Advanced Computing Platform for Theoretical Physics

inv_cg_quda.cpp 4.19 KB
Newer Older
mikeaclark's avatar
mikeaclark committed
1
2
3
4
5
6
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include <quda.h>
#include <util_quda.h>
7
8
#include <spinor_quda.h>
#include <gauge_quda.h>
mikeaclark's avatar
mikeaclark committed
9

rbabich's avatar
rbabich committed
10
void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInvertParam *perf)
mikeaclark's avatar
mikeaclark committed
11
{
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
  ParitySpinor p = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
  ParitySpinor Ap = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
  ParitySpinor y = allocateParitySpinor(x.X, x.precision);
  ParitySpinor r = allocateParitySpinor(x.X, x.precision);

  ParitySpinor b;
  if (perf->preserve_source == QUDA_PRESERVE_SOURCE_YES) {
    b = allocateParitySpinor(x.X, x.precision);
    copyCuda(b, source);
  } else {
    b = source;
  }

  ParitySpinor x_sloppy, r_sloppy, tmp_sloppy;
  if (invert_param->cuda_prec_sloppy == x.precision) {
    x_sloppy = x;
    r_sloppy = r;
    tmp_sloppy = tmp;
  } else {
    x_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
    r_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
    tmp_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
  }

  copyCuda(r, b);
  if (r_sloppy.precision != r.precision) copyCuda(r_sloppy, r);
  copyCuda(p, r_sloppy);
  zeroCuda(x_sloppy);
  zeroCuda(y);
mikeaclark's avatar
mikeaclark committed
41

42
  double b2 = 0.0;
43
  b2 = normCuda(b);
44

45
46
47
  double r2 = b2;
  double r2_old;
  double stop = r2*perf->tol*perf->tol; // stopping condition of solver
mikeaclark's avatar
mikeaclark committed
48

49
  double alpha, beta;
50
  double pAp;
mikeaclark's avatar
mikeaclark committed
51

52
53
54
55
56
  double rNorm = sqrt(r2);
  double r0Norm = rNorm;
  double maxrx = rNorm;
  double maxrr = rNorm;
  double delta = invert_param->reliable_delta;
mikeaclark's avatar
mikeaclark committed
57
58

  int k=0;
59
60
  int xUpdate = 0, rUpdate = 0;

rbabich's avatar
rbabich committed
61
62
  if (invert_param->verbosity >= QUDA_VERBOSE)
    printf("%d iterations, r2 = %e\n", k, r2);
mikeaclark's avatar
mikeaclark committed
63
64
  stopwatchStart();
  while (r2 > stop && k<perf->maxiter) {
rbabich's avatar
rbabich committed
65
    MatPCDagMatPCCuda(Ap, cudaGaugeSloppy, p, perf->kappa, tmp_sloppy, perf->matpc_type);
mikeaclark's avatar
mikeaclark committed
66

67
    pAp = reDotProductCuda(p, Ap);
mikeaclark's avatar
mikeaclark committed
68

69
    alpha = r2 / pAp;        
mikeaclark's avatar
mikeaclark committed
70
    r2_old = r2;
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
    r2 = axpyNormCuda(-alpha, Ap, r_sloppy);

    // reliable update conditions
    rNorm = sqrt(r2);
    if (rNorm > maxrx) maxrx = rNorm;
    if (rNorm > maxrr) maxrr = rNorm;
    int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0;
    int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;

    if (!updateR) {
      beta = r2 / r2_old;
      axpyZpbxCuda(alpha, p, x_sloppy, r_sloppy, beta);
    } else {
      axpyCuda(alpha, p, x_sloppy);
      
      if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);

rbabich's avatar
rbabich committed
88
      MatPCDagMatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, 
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
			tmp, invert_param->matpc_type);

      r2 = xmyNormCuda(b, r);
      if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
      rNorm = sqrt(r2);

      maxrr = rNorm;
      rUpdate++;
      
      if (updateX) {
	xpyCuda(x, y);
	zeroCuda(x_sloppy);
	copyCuda(b, r);
	r0Norm = rNorm;

	maxrx = rNorm;
	xUpdate++;
      }

      beta = r2 / r2_old;
      xpayCuda(r_sloppy, beta, p);
    }
mikeaclark's avatar
mikeaclark committed
111
112

    k++;
rbabich's avatar
rbabich committed
113
114
    if (invert_param->verbosity >= QUDA_VERBOSE)
      printf("%d iterations, r2 = %e\n", k, r2);
mikeaclark's avatar
mikeaclark committed
115
  }
116
117
118
119

  if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
  xpyCuda(y, x);

mikeaclark's avatar
mikeaclark committed
120
121
  perf->secs = stopwatchReadSeconds();

122
123
124
  if (k==invert_param->maxiter) 
    printf("Exceeded maximum iterations %d\n", invert_param->maxiter);

rbabich's avatar
rbabich committed
125
126
  if (invert_param->verbosity >= QUDA_SUMMARIZE)
    printf("Residual updates = %d, Solution updates = %d\n", rUpdate, xUpdate);
mikeaclark's avatar
mikeaclark committed
127

128
  float gflops = k*(1.0e-9*x.volume)*(2*(2*1320+48) + 10*spinorSiteSize);
mikeaclark's avatar
mikeaclark committed
129
130
131
132
133
134
  //printf("%f gflops\n", k*gflops / stopwatchReadSeconds());
  perf->gflops = gflops;
  perf->iter = k;

#if 0
  // Calculate the true residual
rbabich's avatar
rbabich committed
135
  MatPCDagMatPCCuda(Ap, cudaGaugePrecise, x, perf->kappa, tmp, perf->matpc_type);
136
137
138
  copyCuda(r, b);
  mxpyCuda(Ap, r);
  double true_res = normCuda(r);
mikeaclark's avatar
mikeaclark committed
139
140
141
142
143
  
  printf("Converged after %d iterations, r2 = %e, true_r2 = %e\n", 
	 k, r2, true_res / b2);
#endif

144
145
146
147
148
149
150
151
  if (invert_param->cuda_prec_sloppy != x.precision) {
    freeParitySpinor(tmp_sloppy);
    freeParitySpinor(r_sloppy);
    freeParitySpinor(x_sloppy);
  }

  if (perf->preserve_source == QUDA_PRESERVE_SOURCE_YES) freeParitySpinor(b);
  freeParitySpinor(r);
mikeaclark's avatar
mikeaclark committed
152
153
154
  freeParitySpinor(p);
  freeParitySpinor(Ap);

155
156
157
  freeParitySpinor(b);
  freeParitySpinor(y);

mikeaclark's avatar
mikeaclark committed
158
159
  return;
}