Advanced Computing Platform for Theoretical Physics

Commit bf3ede9d authored by mikeaclark's avatar mikeaclark
Browse files

Updated blas_test with better timer

git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@490 be54200a-260c-0410-bdd7-ce6af2a381ab
parent a7974dba
......@@ -847,6 +847,7 @@ void caxpbyCuda(double2 a, ParitySpinor x, double2 b, ParitySpinor y) {
dim3 dimBlock(REDUCE_THREADS, 1, 1);
dim3 dimGrid(blocks, 1, 1);
blas_quda_bytes += 3*x.length*sizeof(x.precision);
blas_quda_flops += 7*x.length;
if (x.precision == QUDA_DOUBLE_PRECISION) {
caxpbyKernel<<<dimGrid, dimBlock>>>(a, (double2*)x.spinor, b, (double2*)y.spinor, length);
} else if (x.precision == QUDA_SINGLE_PRECISION) {
......@@ -863,7 +864,6 @@ void caxpbyCuda(double2 a, ParitySpinor x, double2 b, ParitySpinor y) {
float2 bf2 = make_float2((float)b.x, (float)b.y);
caxpbyHKernel<<<dimGrid, dimBlock>>>(af2, bf2, (short4*)y.spinor, (float*)y.spinorNorm, y.length/spinorSiteSize);
}
blas_quda_flops += 7*x.length;
}
template <typename Float2>
......
......@@ -13,7 +13,7 @@ QudaInvertParam inv_param;
ParitySpinor x, y, z, w, v;
int nIters = 1000;
int nIters;
void init() {
......@@ -58,9 +58,10 @@ double benchmark(int kernel) {
double a, b;
double2 a2, b2;
//printf("Executing %d kernel loops...", nIters);
//fflush(stdout);
stopwatchStart();
cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventCreate(&end);
cudaEventRecord(start, 0);
for (int i=0; i < nIters; ++i) {
switch (kernel) {
......@@ -158,7 +159,11 @@ double benchmark(int kernel) {
}
}
double secs = stopwatchReadSeconds() / nIters;
cudaEventRecord(end, 0);
cudaEventSynchronize(end);
float runTime;
cudaEventElapsedTime(&runTime, start, end);
double secs = runTime / 1000;
return secs;
}
......@@ -202,10 +207,10 @@ int main(int argc, char** argv) {
blas_quda_flops = 0;
blas_quda_bytes = 0;
double secs = benchmark(kernels[i]);
double flops = blas_quda_flops / (double)nIters;
double bytes = blas_quda_bytes / (double)nIters;
double flops = blas_quda_flops;
double bytes = blas_quda_bytes;
printf("%s %f s, flops = %e, Gflops/s = %f, GiB/s = %f\n\n",
names[i], secs, flops, flops/secs*1e-9, bytes/(secs*(1<<30)));
names[i], secs, flops, (flops*1e-9)/(secs), bytes/(secs*(1<<30)));
//printf("Bandwidth: %f GiB/s\n\n", GiB / secs);
}
}
......
......@@ -29,7 +29,7 @@ void *spinorGPU, *spinorGPUEven, *spinorGPUOdd;
double kappa = 1.0;
int ODD_BIT = 1;
int DAGGER_BIT = 0;
int TRANSFER = 1; // include transfer time in the benchmark?
int TRANSFER = 0; // include transfer time in the benchmark?
void init() {
......@@ -45,7 +45,7 @@ void init() {
gaugeParam.t_boundary = QUDA_ANTI_PERIODIC_T;
gaugeParam.cpu_prec = QUDA_DOUBLE_PRECISION;
gaugeParam.cuda_prec = QUDA_DOUBLE_PRECISION;
gaugeParam.cuda_prec = QUDA_HALF_PRECISION;
gaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
gaugeParam.reconstruct_sloppy = gaugeParam.reconstruct;
gaugeParam.cuda_prec_sloppy = gaugeParam.cuda_prec;
......@@ -64,14 +64,14 @@ void init() {
inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
inv_param.cpu_prec = QUDA_DOUBLE_PRECISION;
inv_param.cuda_prec = QUDA_DOUBLE_PRECISION;
inv_param.cuda_prec = QUDA_HALF_PRECISION;
if (test_type == 2) inv_param.dirac_order = QUDA_DIRAC_ORDER;
else inv_param.dirac_order = QUDA_DIRAC_ORDER;
if (clover_yes) {
inv_param.clover_cpu_prec = QUDA_DOUBLE_PRECISION;
inv_param.clover_cuda_prec = QUDA_DOUBLE_PRECISION;
inv_param.clover_cuda_prec = QUDA_HALF_PRECISION;
inv_param.clover_cuda_prec_sloppy = inv_param.clover_cuda_prec;
inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
}
......@@ -308,8 +308,8 @@ void dslashTest() {
printf("%d Test %s\n", i, (1 == res) ? "PASSED" : "FAILED");
//if (test_type < 2) strong_check(spinorRef, spinorOdd, Vh, inv_param.cpu_prec);
//else strong_check(spinorRef, spinorGPU, V, inv_param.cpu_prec);
if (test_type < 2) strong_check(spinorRef, spinorOdd, Vh, inv_param.cpu_prec);
else strong_check(spinorRef, spinorGPU, V, inv_param.cpu_prec);
}
end();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment