Advanced Computing Platform for Theoretical Physics

commit大文件会使得服务器变得不稳定,请大家尽量只commit代码,不要commit大的文件。

inv_cg_quda.cpp 4.99 KB
Newer Older
mikeaclark's avatar
mikeaclark committed
1 2 3 4 5 6
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include <quda.h>
#include <util_quda.h>
7 8
#include <spinor_quda.h>
#include <gauge_quda.h>
mikeaclark's avatar
mikeaclark committed
9

rbabich's avatar
rbabich committed
10
void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInvertParam *perf)
mikeaclark's avatar
mikeaclark committed
11
{
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
  ParitySpinor p = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
  ParitySpinor Ap = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
  ParitySpinor y = allocateParitySpinor(x.X, x.precision);
  ParitySpinor r = allocateParitySpinor(x.X, x.precision);

  ParitySpinor b;
  if (perf->preserve_source == QUDA_PRESERVE_SOURCE_YES) {
    b = allocateParitySpinor(x.X, x.precision);
    copyCuda(b, source);
  } else {
    b = source;
  }

  ParitySpinor x_sloppy, r_sloppy, tmp_sloppy;
  if (invert_param->cuda_prec_sloppy == x.precision) {
    x_sloppy = x;
    r_sloppy = r;
    tmp_sloppy = tmp;
  } else {
    x_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
    r_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
    tmp_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
  }

  copyCuda(r, b);
  if (r_sloppy.precision != r.precision) copyCuda(r_sloppy, r);
  copyCuda(p, r_sloppy);
  zeroCuda(x_sloppy);
  zeroCuda(y);
mikeaclark's avatar
mikeaclark committed
41

42 43 44
  double kappa = invert_param->kappa;
  if (invert_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) kappa *= cudaGaugePrecise.anisotropy;

45
  double b2 = 0.0;
46
  b2 = normCuda(b);
47

48 49 50
  double r2 = b2;
  double r2_old;
  double stop = r2*perf->tol*perf->tol; // stopping condition of solver
mikeaclark's avatar
mikeaclark committed
51

52
  double alpha, beta;
53
  double pAp;
mikeaclark's avatar
mikeaclark committed
54

55 56 57 58 59
  double rNorm = sqrt(r2);
  double r0Norm = rNorm;
  double maxrx = rNorm;
  double maxrr = rNorm;
  double delta = invert_param->reliable_delta;
mikeaclark's avatar
mikeaclark committed
60 61

  int k=0;
62 63
  int xUpdate = 0, rUpdate = 0;

rbabich's avatar
rbabich committed
64 65
  if (invert_param->verbosity >= QUDA_VERBOSE)
    printf("%d iterations, r2 = %e\n", k, r2);
66 67 68

  blas_quda_flops = 0;

mikeaclark's avatar
mikeaclark committed
69 70
  stopwatchStart();
  while (r2 > stop && k<perf->maxiter) {
71 72

    if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
73
      MatPCDagMatPCCuda(Ap, cudaGaugeSloppy, p, kappa, tmp_sloppy, perf->matpc_type);
74
    } else {
75
      cloverMatPCDagMatPCCuda(Ap, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, kappa,
76 77
			      tmp_sloppy, perf->matpc_type);
    }
mikeaclark's avatar
mikeaclark committed
78

79
    pAp = reDotProductCuda(p, Ap);
mikeaclark's avatar
mikeaclark committed
80

81
    alpha = r2 / pAp;        
mikeaclark's avatar
mikeaclark committed
82
    r2_old = r2;
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
    r2 = axpyNormCuda(-alpha, Ap, r_sloppy);

    // reliable update conditions
    rNorm = sqrt(r2);
    if (rNorm > maxrx) maxrx = rNorm;
    if (rNorm > maxrr) maxrr = rNorm;
    int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0;
    int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;

    if (!updateR) {
      beta = r2 / r2_old;
      axpyZpbxCuda(alpha, p, x_sloppy, r_sloppy, beta);
    } else {
      axpyCuda(alpha, p, x_sloppy);
      
      if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);

100
      if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
101
      	MatPCDagMatPCCuda(r, cudaGaugePrecise, x, kappa, tmp, invert_param->matpc_type);
102
      } else {
103
	cloverMatPCDagMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, kappa,
104 105
				tmp, invert_param->matpc_type);
      }
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126

      r2 = xmyNormCuda(b, r);
      if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
      rNorm = sqrt(r2);

      maxrr = rNorm;
      rUpdate++;
      
      if (updateX) {
	xpyCuda(x, y);
	zeroCuda(x_sloppy);
	copyCuda(b, r);
	r0Norm = rNorm;

	maxrx = rNorm;
	xUpdate++;
      }

      beta = r2 / r2_old;
      xpayCuda(r_sloppy, beta, p);
    }
mikeaclark's avatar
mikeaclark committed
127 128

    k++;
rbabich's avatar
rbabich committed
129 130
    if (invert_param->verbosity >= QUDA_VERBOSE)
      printf("%d iterations, r2 = %e\n", k, r2);
mikeaclark's avatar
mikeaclark committed
131
  }
132 133 134 135

  if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
  xpyCuda(y, x);

mikeaclark's avatar
mikeaclark committed
136
  perf->secs = stopwatchReadSeconds();
137
  
mikeaclark's avatar
mikeaclark committed
138

139 140 141
  if (k==invert_param->maxiter) 
    printf("Exceeded maximum iterations %d\n", invert_param->maxiter);

rbabich's avatar
rbabich committed
142 143
  if (invert_param->verbosity >= QUDA_SUMMARIZE)
    printf("Residual updates = %d, Solution updates = %d\n", rUpdate, xUpdate);
mikeaclark's avatar
mikeaclark committed
144

145 146
  float gflops = (blas_quda_flops + dslash_quda_flops)*1e-9;
  //  printf("%f gflops\n", gflops / stopwatchReadSeconds());
mikeaclark's avatar
mikeaclark committed
147 148 149
  perf->gflops = gflops;
  perf->iter = k;

150 151 152
  blas_quda_flops = 0;
  dslash_quda_flops = 0;

mikeaclark's avatar
mikeaclark committed
153 154
#if 0
  // Calculate the true residual
155
  if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
156
    MatPCDagMatPCCuda(Ap, cudaGaugePrecise, x, kappa, tmp, perf->matpc_type);
157
  } else {
158
    cloverMatPCDagMatPCCuda(Ap, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, kappa,
159 160
			    tmp, perf->matpc_type);
  }
161 162 163
  copyCuda(r, b);
  mxpyCuda(Ap, r);
  double true_res = normCuda(r);
mikeaclark's avatar
mikeaclark committed
164 165 166 167 168
  
  printf("Converged after %d iterations, r2 = %e, true_r2 = %e\n", 
	 k, r2, true_res / b2);
#endif

169 170 171 172 173 174 175 176
  if (invert_param->cuda_prec_sloppy != x.precision) {
    freeParitySpinor(tmp_sloppy);
    freeParitySpinor(r_sloppy);
    freeParitySpinor(x_sloppy);
  }

  if (perf->preserve_source == QUDA_PRESERVE_SOURCE_YES) freeParitySpinor(b);
  freeParitySpinor(r);
mikeaclark's avatar
mikeaclark committed
177 178 179
  freeParitySpinor(p);
  freeParitySpinor(Ap);

180 181 182
  freeParitySpinor(b);
  freeParitySpinor(y);

mikeaclark's avatar
mikeaclark committed
183 184
  return;
}