changes to quda: fixed half-precision clover, changed definition of mass

normalization for MatPC solution type, removed "chroma" gauge order since chroma uses same ordering as QDP git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@473 be54200a-260c-0410-bdd7-ce6af2a381ab

changes to quda: fixed half-precision clover, changed definition of mass
normalization for MatPC solution type, removed "chroma" gauge order since chroma uses same ordering as QDP git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@473 be54200a-260c-0410-bdd7-ce6af2a381ab
9ad9699e · rbabich · a95457b8 · 9ad9699e · 9ad9699e · 9ad9699e
Commit 9ad9699e authored Sep 28, 2009 by rbabich
--- a/clover_quda.cpp
+++ b/clover_quda.cpp
@@ -133,7 +133,7 @@ static inline void packCloverMatrixHalf(short4 *res, float *norm, Float *clover,
      res[i*Vh].z = (short) (c * clover[4*i+2]);
      res[i*Vh].w = (short) (c * clover[4*i+3]);
    }
-    norm[chi*Vh] = half/c;
+    norm[chi*Vh] = half*max;
    res += 9*Vh;
    clover += 36;
  }

--- a/dslash_quda.cu
+++ b/dslash_quda.cu
@@ -1817,7 +1817,7 @@ void cloverMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover clover, FullC
  if (matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
      cloverDslashCuda(tmp, gauge, cloverInv, in, 1, dagger);
      cloverCuda(out, gauge, clover, in, 0);
-      dslashXpayCuda(out, gauge, tmp, 0, dagger, out, kappa2);
+      dslashXpayCuda(out, gauge, tmp, 0, dagger, out, kappa2); // safe since out is not read after writing
  } else if (matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
      cloverDslashCuda(tmp, gauge, cloverInv, in, 0, dagger);
      cloverCuda(out, gauge, clover, in, 1);

--- a/dslash_test.c
+++ b/dslash_test.c
@@ -71,7 +71,7 @@ void init() {

  if (clover_yes) {
    inv_param.clover_cpu_prec = QUDA_DOUBLE_PRECISION;
-    inv_param.clover_cuda_prec = QUDA_SINGLE_PRECISION;
+    inv_param.clover_cuda_prec = QUDA_HALF_PRECISION;
    inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
  }
  inv_param.verbosity = QUDA_VERBOSE;
@@ -234,7 +234,7 @@ double dslashCUDA() {

 void dslashRef() {

-  // to be removed once reference clover is finished
+  // FIXME: remove once reference clover is finished
  if (inv_param.matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
    inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
  } else if (inv_param.matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {

--- a/enum_quda.h
+++ b/enum_quda.h
@@ -6,8 +6,7 @@ extern "C" {
 #endif

  typedef enum QudaGaugeFieldOrder_s {
-    QUDA_QDP_GAUGE_ORDER, // expect *gauge[mu], even-odd, row-column colour
-    QUDA_CHROMA_GAUGE_ORDER, // expect *gauge[mu], even-odd, column-row colour
+    QUDA_QDP_GAUGE_ORDER, // expect *gauge[4], even-odd, row-column colour
    QUDA_CPS_WILSON_GAUGE_ORDER, // expect *gauge, even-odd, mu inside, column-row colour
  } QudaGaugeFieldOrder;


--- a/gauge_quda.cpp
+++ b/gauge_quda.cpp
@@ -113,10 +113,11 @@ void packQDPGaugeField(FloatN *res, Float **gauge, int oddBit, ReconstructType r
  }
 }

-// Assume the gauge field is "chroma" ordered (takes an array of pointers
+#if 0 // unused ordering
+// Assume the gauge field is "QDP transposed" ordered (takes an array of pointers
 // to the four directions): even-odd space-time with column-row ordering
 template <typename Float, typename FloatN>
-void packChromaGaugeField(FloatN *res, Float **gauge, int oddBit, ReconstructType reconstruct, int V) {
+void packTransposedGaugeField(FloatN *res, Float **gauge, int oddBit, ReconstructType reconstruct, int V) {
  Float gT[18];
  if (reconstruct == QUDA_RECONSTRUCT_12) {
    for (int dir = 0; dir < 4; dir++) {
@@ -140,6 +141,7 @@ void packChromaGaugeField(FloatN *res, Float **gauge, int oddBit, ReconstructTyp
    }
  }
 }
+#endif

 // Assume the gauge field is "Wilson" ordered: directions inside of
 // space-time, column-row ordering, even-odd space-time
@@ -228,13 +230,9 @@ void loadGaugeField(FloatN *even, FloatN *odd, Float *cpuGauge, ReconstructType
  if (gauge_param->gauge_order == QUDA_QDP_GAUGE_ORDER) {
    packQDPGaugeField(packedEven, (Float**)cpuGauge, 0, reconstruct, Vh);
    packQDPGaugeField(packedOdd,  (Float**)cpuGauge, 1, reconstruct, Vh);
-  } else if (gauge_param->gauge_order == QUDA_CHROMA_GAUGE_ORDER) {
-    packChromaGaugeField(packedEven, (Float**)cpuGauge, 0, reconstruct, Vh);
-    packChromaGaugeField(packedOdd,  (Float**)cpuGauge, 1, reconstruct, Vh);
  } else if (gauge_param->gauge_order == QUDA_CPS_WILSON_GAUGE_ORDER) {
    packCPSGaugeField(packedEven, (Float*)cpuGauge, 0, reconstruct, Vh);
-    packCPSGaugeField(packedOdd,  (Float*)cpuGauge, 1, reconstruct, Vh);
-    
+    packCPSGaugeField(packedOdd,  (Float*)cpuGauge, 1, reconstruct, Vh);    
  } else {
    printf("Sorry, %d GaugeFieldOrder not supported\n", gauge_param->gauge_order);
    exit(-1);

--- a/invert_quda.cpp
+++ b/invert_quda.cpp
@@ -391,22 +391,20 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
 	cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, aux, 0);
      } else if (param->matpc_type == QUDA_MATPC_ODD_ODD) {
 	// in = A_oo^-1 (b_o + k D_oe A_ee^-1 b_e)
-	ParitySpinor aux = tmp; // aliases b.odd when PRESERVE_SOURCE_NO is set
+	ParitySpinor aux = out; // aliases b.even when PRESERVE_SOURCE_NO is set
 	cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, b.even, 0);
 	dslashXpayCuda(aux, cudaGaugePrecise, in, 1, 0, b.odd, kappa);
 	cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, aux, 1);
      } else if (param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
 	// in = b_e + k D_eo A_oo^-1 b_o
-	ParitySpinor aux = out; // aliases b.even when PRESERVE_SOURCE_NO is set
-	cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, b.odd, 1);
-	dslashXpayCuda(aux, cudaGaugePrecise, in, 0, 0, b.even, kappa);
-	copyCuda(in, aux);
+	ParitySpinor aux = tmp; // aliases b.odd when PRESERVE_SOURCE_NO is set
+	cloverCuda(aux, cudaGaugePrecise, cudaCloverInvPrecise, b.odd, 1); // safe even when aux = b.odd
+	dslashXpayCuda(in, cudaGaugePrecise, aux, 0, 0, b.even, kappa);
      } else if (param->matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
 	// in = b_o + k D_oe A_ee^-1 b_e
 	ParitySpinor aux = out; // aliases b.even when PRESERVE_SOURCE_NO is set
-	cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, b.even, 0);
-	dslashXpayCuda(aux, cudaGaugePrecise, in, 1, 0, b.odd, kappa);
-	copyCuda(in, aux);
+	cloverCuda(aux, cudaGaugePrecise, cudaCloverInvPrecise, b.even, 0); // safe even when aux = b.even
+	dslashXpayCuda(in, cudaGaugePrecise, aux, 1, 0, b.odd, kappa);
      } else {
 	printf("QUDA error: invalid matpc_type\n");
 	exit(-1);
@@ -422,10 +420,11 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)

    // multiply the source to get the mass normalization
    if (param->mass_normalization == QUDA_MASS_NORMALIZATION)
-      if (param->solution_type == QUDA_MATPC_SOLUTION) 
+      if (param->solution_type == QUDA_MATPC_SOLUTION)  {
+	axCuda(2.0*kappa, in);
+      } else {
 	axCuda(4.0*kappa*kappa, in);
-      else
-	axCuda(16.0*pow(kappa,4), in);
+      }

    // cps uses a different anisotropy normalization
    if (param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER)

--- a/invert_test.c
+++ b/invert_test.c
@@ -37,7 +37,7 @@ int main(int argc, char **argv)

  gauge_param = &Gauge_param;

-  int clover_yes = 0; // 0 for plain Wilson, 1 for clover
+  int clover_yes = 1; // 0 for plain Wilson, 1 for clover
  
  if (clover_yes) {
    inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;