changes to complete quda clover inverter (accidentally left out of last

commit) git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@464 be54200a-260c-0410-bdd7-ce6af2a381ab

changes to complete quda clover inverter (accidentally left out of last
commit) git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@464 be54200a-260c-0410-bdd7-ce6af2a381ab
5a093817 · rbabich · 5bf45c2a · 5a093817 · 5a093817 · 5a093817
Commit 5a093817 authored Sep 22, 2009 by rbabich
--- a/dslash_test.c
+++ b/dslash_test.c
@@ -116,7 +116,7 @@ void init() {
  construct_spinor_field(spinor, 1, 0, 0, 0, inv_param.cpu_prec);

  if (clover_yes) {
-    double norm = 1.0; // random components range between -norm and norm
+    double norm = 0; // random components range between -norm and norm
    double diag = 1.0; // constant added to the diagonal

    if (test_type == 2) {

--- a/inv_bicgstab_quda.cpp
+++ b/inv_bicgstab_quda.cpp
@@ -89,8 +89,13 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
      cxpaypbzCuda(r_sloppy, beta_omega, v, beta, p); // 8
    }

-    MatPCCuda(v, cudaGaugeSloppy, p, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
-    
+    if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
+      MatPCCuda(v, cudaGaugeSloppy, p, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
+    } else {
+      cloverMatPCCuda(v, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, invert_param->kappa, tmp_sloppy,
+		      invert_param->matpc_type, dag_type);
+    }
+
    // rv = (r0,v)
    rv = cDotProductCuda(src_sloppy, v);

@@ -101,7 +106,12 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
    caxpyCuda(alpha, v, r_sloppy); // 4
    alpha.x *= -1.0; alpha.y *= -1.0;

-    MatPCCuda(t, cudaGaugeSloppy, r_sloppy, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
+    if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
+      MatPCCuda(t, cudaGaugeSloppy, r_sloppy, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
+    } else {
+      cloverMatPCCuda(v, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, r_sloppy, invert_param->kappa, tmp_sloppy,
+		      invert_param->matpc_type,dag_type);
+    }

    // omega = (t, r) / (t, t)
    omega_t2 = cDotProductNormACuda(t, r_sloppy); // 6
@@ -121,7 +131,12 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
    if (updateR) {
      if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);

+    if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
      MatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
+    } else {
+      cloverMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param->kappa, tmp,
+		      invert_param->matpc_type, dag_type);
+    }

      r2 = xmyNormCuda(b, r);
      if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
@@ -162,13 +177,22 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
  float gflops = (1.0e-9*x.volume)*(2*(2*1320+48)*k + (32*k + 8*(k-1))*spinorSiteSize);
  gflops += 1.0e-9*x.volume*rUpdate*((2*1320+48) + 3*spinorSiteSize);
  gflops += 1.0e-9*x.volume*xUpdate*spinorSiteSize;
+  if (invert_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
+    gflops += (1.0e-9*x.volume)*4*504*k;
+    gflops += (1.0e-9*x.volume)*rUpdate*2*504;
+  }
  //printf("%f gflops\n", k*gflops / stopwatchReadSeconds());
  invert_param->gflops += gflops;
  invert_param->iter += k;

 #if 0
  // Calculate the true residual
-  MatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
+  if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
+    MatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
+  } else {
+    cloverMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param->kappa, tmp,
+		    invert_param->matpc_type, dag_type);
+  }
  double true_res = xmyNormCuda(src, r);
  
  printf("Converged after %d iterations, r2 = %e, true_r2 = %e\n", k, sqrt(r2/b2), sqrt(true_res / b2));

--- a/inv_cg_quda.cpp
+++ b/inv_cg_quda.cpp
@@ -62,7 +62,13 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
    printf("%d iterations, r2 = %e\n", k, r2);
  stopwatchStart();
  while (r2 > stop && k<perf->maxiter) {
-    MatPCDagMatPCCuda(Ap, cudaGaugeSloppy, p, perf->kappa, tmp_sloppy, perf->matpc_type);
+
+    if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
+      MatPCDagMatPCCuda(Ap, cudaGaugeSloppy, p, perf->kappa, tmp_sloppy, perf->matpc_type);
+    } else {
+      cloverMatPCDagMatPCCuda(Ap, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, perf->kappa,
+			      tmp_sloppy, perf->matpc_type);
+    }

    pAp = reDotProductCuda(p, Ap);

@@ -85,8 +91,12 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
      
      if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);

-      MatPCDagMatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, 
-			tmp, invert_param->matpc_type);
+      if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
+	MatPCDagMatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type);
+      } else {
+	cloverMatPCDagMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param->kappa,
+				tmp, invert_param->matpc_type);
+      }

      r2 = xmyNormCuda(b, r);
      if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
@@ -126,13 +136,19 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
    printf("Residual updates = %d, Solution updates = %d\n", rUpdate, xUpdate);

  float gflops = k*(1.0e-9*x.volume)*(2*(2*1320+48) + 10*spinorSiteSize);
+  if (invert_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) gflops += k*(1.0e-9*x.volume)*4*504;
  //printf("%f gflops\n", k*gflops / stopwatchReadSeconds());
  perf->gflops = gflops;
  perf->iter = k;

 #if 0
  // Calculate the true residual
-  MatPCDagMatPCCuda(Ap, cudaGaugePrecise, x, perf->kappa, tmp, perf->matpc_type);
+  if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
+    MatPCDagMatPCCuda(Ap, cudaGaugePrecise, x, perf->kappa, tmp, perf->matpc_type);
+  } else {
+    cloverMatPCDagMatPCCuda(Ap, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, perf->kappa,
+			    tmp, perf->matpc_type);
+  }
  copyCuda(r, b);
  mxpyCuda(Ap, r);
  double true_res = normCuda(r);

--- a/invert_test.c
+++ b/invert_test.c
@@ -64,7 +64,7 @@ int main(int argc, char **argv)

  if (clover_yes) {
    inv_param.clover_cpu_prec = QUDA_DOUBLE_PRECISION;
-    inv_param.clover_cuda_prec = QUDA_DOUBLE_PRECISION;
+    inv_param.clover_cuda_prec = QUDA_SINGLE_PRECISION;
    inv_param.clover_cuda_prec_sloppy = QUDA_SINGLE_PRECISION;
    inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
  }
@@ -79,7 +79,7 @@ int main(int argc, char **argv)
  construct_gauge_field(gauge, 1, Gauge_param.cpu_prec);

  if (clover_yes) {
-    double norm = 1.0; // random components range between -norm and norm
+    double norm = 0.1; // random components range between -norm and norm
    double diag = 1.0; // constant added to the diagonal

    size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
@@ -107,9 +107,10 @@ int main(int argc, char **argv)
  time0 += clock(); // stop the timer
  time0 /= CLOCKS_PER_SEC;

-  printf("Cuda Space Required. Spinor:%f + Gauge:%f GiB\n", 
+  printf("Cuda Space Required:\n   Spinor: %f GiB\n    Gauge: %f GiB\n", 
 	 inv_param.spinorGiB, Gauge_param.gaugeGiB);
-  printf("done: %i iter / %g secs = %g gflops, total time = %g secs\n", 
+  if (clover_yes) printf("   Clover: %f GiB\n", inv_param.cloverGiB);
+  printf("\nDone: %i iter / %g secs = %g gflops, total time = %g secs\n", 
 	 inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time0);

  mat(spinorCheck, gauge, spinorOut, inv_param.kappa, 0, inv_param.cpu_prec, Gauge_param.cpu_prec);