Advanced Computing Platform for Theoretical Physics

Commit 7ee1d3d5 authored by mikeaclark's avatar mikeaclark
Browse files

Reduced memory requirement for CG and BiCGstab

git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@518 be54200a-260c-0410-bdd7-ce6af2a381ab
parent 2bbcd340
...@@ -22,34 +22,30 @@ void MatVec(ParitySpinor out, FullGauge gauge, FullClover clover, FullClover cl ...@@ -22,34 +22,30 @@ void MatVec(ParitySpinor out, FullGauge gauge, FullClover clover, FullClover cl
} }
} }
void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp, void invertBiCGstabCuda(ParitySpinor x, ParitySpinor b, ParitySpinor r,
QudaInvertParam *invert_param, DagType dag_type) QudaInvertParam *invert_param, DagType dag_type)
{ {
ParitySpinor r = allocateParitySpinor(x.X, x.precision); ParitySpinor y = allocateParitySpinor(x.X, x.precision);
ParitySpinor p = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy); ParitySpinor p = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
ParitySpinor v = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy); ParitySpinor v = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
ParitySpinor t = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy); ParitySpinor t = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
ParitySpinor tmp = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
ParitySpinor y = allocateParitySpinor(x.X, x.precision); ParitySpinor x_sloppy, r_sloppy, r0;
ParitySpinor b = allocateParitySpinor(x.X, x.precision);
ParitySpinor x_sloppy, r_sloppy, tmp_sloppy, src_sloppy;
if (invert_param->cuda_prec_sloppy == x.precision) { if (invert_param->cuda_prec_sloppy == x.precision) {
x_sloppy = x; x_sloppy = x;
r_sloppy = r; r_sloppy = r;
tmp_sloppy = tmp; r0 = b;
src_sloppy = src;
} else { } else {
x_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy); x_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
r_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy); r_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
src_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy); r0 = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
tmp_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy); copyCuda(r0, b);
copyCuda(src_sloppy, src);
} }
zeroCuda(x_sloppy); zeroCuda(x_sloppy);
copyCuda(b, src); copyCuda(r_sloppy, b);
copyCuda(r_sloppy, src);
zeroCuda(y); zeroCuda(y);
...@@ -59,7 +55,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp, ...@@ -59,7 +55,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
double delta = invert_param->reliable_delta; double delta = invert_param->reliable_delta;
int k = 0; int k = 0;
int xUpdate = 0, rUpdate = 0; int rUpdate = 0;
cuDoubleComplex rho = make_cuDoubleComplex(1.0, 0.0); cuDoubleComplex rho = make_cuDoubleComplex(1.0, 0.0);
cuDoubleComplex rho0 = rho; cuDoubleComplex rho0 = rho;
...@@ -77,8 +73,8 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp, ...@@ -77,8 +73,8 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
double rNorm = sqrt(r2); double rNorm = sqrt(r2);
double r0Norm = rNorm; double r0Norm = rNorm;
double maxrx = rNorm;
double maxrr = rNorm; double maxrr = rNorm;
double maxrx = rNorm;
if (invert_param->verbosity >= QUDA_VERBOSE) printf("%d iterations, r2 = %e\n", k, r2); if (invert_param->verbosity >= QUDA_VERBOSE) printf("%d iterations, r2 = %e\n", k, r2);
...@@ -90,7 +86,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp, ...@@ -90,7 +86,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
while (r2 > stop && k<invert_param->maxiter) { while (r2 > stop && k<invert_param->maxiter) {
if (k==0) { if (k==0) {
rho = make_cuDoubleComplex(r2, 0.0); // cDotProductCuda(src_sloppy, r_sloppy); // BiCRstab rho = make_cuDoubleComplex(r2, 0.0); // cDotProductCuda(r0, r_sloppy); // BiCRstab
copyCuda(p, r_sloppy); copyCuda(p, r_sloppy);
} else { } else {
alpha_omega = cuCdiv(alpha, omega); alpha_omega = cuCdiv(alpha, omega);
...@@ -102,9 +98,9 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp, ...@@ -102,9 +98,9 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
cxpaypbzCuda(r_sloppy, beta_omega, v, beta, p); // 8 cxpaypbzCuda(r_sloppy, beta_omega, v, beta, p); // 8
} }
MatVec(v, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, invert_param, tmp_sloppy, dag_type); MatVec(v, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, invert_param, tmp, dag_type);
// rv = (r0,v) // rv = (r0,v)
rv = cDotProductCuda(src_sloppy, v); rv = cDotProductCuda(r0, v);
alpha = cuCdiv(rho, rv); alpha = cuCdiv(rho, rv);
...@@ -113,44 +109,38 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp, ...@@ -113,44 +109,38 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
caxpyCuda(alpha, v, r_sloppy); // 4 caxpyCuda(alpha, v, r_sloppy); // 4
alpha.x *= -1.0; alpha.y *= -1.0; alpha.x *= -1.0; alpha.y *= -1.0;
MatVec(t, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, r_sloppy, invert_param, tmp_sloppy, dag_type); MatVec(t, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, r_sloppy, invert_param, tmp, dag_type);
// omega = (t, r) / (t, t) // omega = (t, r) / (t, t)
omega_t2 = cDotProductNormACuda(t, r_sloppy); // 6 omega_t2 = cDotProductNormACuda(t, r_sloppy); // 6
omega.x = omega_t2.x / omega_t2.z; omega.y = omega_t2.y/omega_t2.z; omega.x = omega_t2.x / omega_t2.z; omega.y = omega_t2.y/omega_t2.z;
//x += alpha*p + omega*r, r -= omega*t, r2 = (r,r), rho = (r0, r) //x += alpha*p + omega*r, r -= omega*t, r2 = (r,r), rho = (r0, r)
rho_r2 = caxpbypzYmbwcDotProductWYNormYQuda(alpha, p, omega, r_sloppy, x_sloppy, t, src_sloppy); rho_r2 = caxpbypzYmbwcDotProductWYNormYQuda(alpha, p, omega, r_sloppy, x_sloppy, t, r0);
rho0 = rho; rho.x = rho_r2.x; rho.y = rho_r2.y; r2 = rho_r2.z; rho0 = rho; rho.x = rho_r2.x; rho.y = rho_r2.y; r2 = rho_r2.z;
// reliable updates // reliable updates
rNorm = sqrt(r2); rNorm = sqrt(r2);
if (rNorm > maxrx) maxrx = rNorm; if (rNorm > maxrx) maxrx = rNorm;
if (rNorm > maxrr) maxrr = rNorm; if (rNorm > maxrr) maxrr = rNorm;
int updateR = (rNorm < delta*maxrr && r0Norm <= maxrr) ? 1 : 0;
int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0; int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0;
int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;
if (updateR) { if (updateR || updateX) {
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy); if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
MatVec(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param, tmp, dag_type); xpyCuda(x, y);
MatVec(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, y, invert_param, x, dag_type);
r2 = xmyNormCuda(b, r); r2 = xmyNormCuda(b, r);
if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
zeroCuda(x_sloppy);
rNorm = sqrt(r2); rNorm = sqrt(r2);
maxrr = rNorm; maxrr = rNorm;
maxrx = rNorm;
r0Norm = rNorm;
rUpdate++; rUpdate++;
if (updateX) {
xpyCuda(x, y);
zeroCuda(x_sloppy);
copyCuda(b, r);
r0Norm = rNorm;
maxrx = rNorm;
xUpdate++;
}
} }
k++; k++;
...@@ -162,8 +152,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp, ...@@ -162,8 +152,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
if (k==invert_param->maxiter) printf("Exceeded maximum iterations %d\n", invert_param->maxiter); if (k==invert_param->maxiter) printf("Exceeded maximum iterations %d\n", invert_param->maxiter);
if (invert_param->verbosity >= QUDA_VERBOSE) if (invert_param->verbosity >= QUDA_VERBOSE) printf("Reliable updates = %d\n", rUpdate);
printf("Residual updates = %d, Solution updates = %d\n", rUpdate, xUpdate);
invert_param->secs += stopwatchReadSeconds(); invert_param->secs += stopwatchReadSeconds();
...@@ -174,7 +163,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp, ...@@ -174,7 +163,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
#if 0 #if 0
// Calculate the true residual // Calculate the true residual
MatVec(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param, tmp, dag_type); MatVec(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param, y, dag_type);
double true_res = xmyNormCuda(src, r); double true_res = xmyNormCuda(src, r);
copyCuda(b, src); copyCuda(b, src);
...@@ -183,18 +172,17 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp, ...@@ -183,18 +172,17 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
#endif #endif
if (invert_param->cuda_prec_sloppy != x.precision) { if (invert_param->cuda_prec_sloppy != x.precision) {
freeParitySpinor(src_sloppy); freeParitySpinor(r0);
freeParitySpinor(tmp_sloppy);
freeParitySpinor(r_sloppy); freeParitySpinor(r_sloppy);
freeParitySpinor(x_sloppy); freeParitySpinor(x_sloppy);
} }
freeParitySpinor(b); freeParitySpinor(tmp);
freeParitySpinor(y);
freeParitySpinor(r);
freeParitySpinor(v); freeParitySpinor(v);
freeParitySpinor(t); freeParitySpinor(t);
freeParitySpinor(p); freeParitySpinor(p);
freeParitySpinor(y);
return; return;
} }
...@@ -7,30 +7,34 @@ ...@@ -7,30 +7,34 @@
#include <spinor_quda.h> #include <spinor_quda.h>
#include <gauge_quda.h> #include <gauge_quda.h>
void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInvertParam *perf) void MatVec(ParitySpinor out, FullGauge gauge, FullClover clover, FullClover cloverInv, ParitySpinor in,
QudaInvertParam *invert_param, ParitySpinor tmp) {
double kappa = invert_param->kappa;
if (invert_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER)
kappa *= cudaGaugePrecise.anisotropy;
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCDagMatPCCuda(out, gauge, in, kappa, tmp, invert_param->matpc_type);
} else {
cloverMatPCDagMatPCCuda(out, gauge, clover, cloverInv, in, kappa, tmp, invert_param->matpc_type);
}
}
void invertCgCuda(ParitySpinor x, ParitySpinor b, ParitySpinor r, QudaInvertParam *invert_param)
{ {
ParitySpinor p = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
ParitySpinor Ap = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
ParitySpinor y = allocateParitySpinor(x.X, x.precision); ParitySpinor y = allocateParitySpinor(x.X, x.precision);
ParitySpinor r = allocateParitySpinor(x.X, x.precision);
ParitySpinor b; ParitySpinor p = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
if (perf->preserve_source == QUDA_PRESERVE_SOURCE_YES) { ParitySpinor Ap = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
b = allocateParitySpinor(x.X, x.precision); ParitySpinor tmp = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
copyCuda(b, source);
} else {
b = source;
}
ParitySpinor x_sloppy, r_sloppy, tmp_sloppy; ParitySpinor x_sloppy, r_sloppy;
if (invert_param->cuda_prec_sloppy == x.precision) { if (invert_param->cuda_prec_sloppy == x.precision) {
x_sloppy = x; x_sloppy = x;
r_sloppy = r; r_sloppy = r;
tmp_sloppy = tmp;
} else { } else {
x_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy); x_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
r_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy); r_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
tmp_sloppy = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
} }
copyCuda(r, b); copyCuda(r, b);
...@@ -39,15 +43,12 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv ...@@ -39,15 +43,12 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
zeroCuda(x_sloppy); zeroCuda(x_sloppy);
zeroCuda(y); zeroCuda(y);
double kappa = invert_param->kappa;
if (invert_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) kappa *= cudaGaugePrecise.anisotropy;
double b2 = 0.0; double b2 = 0.0;
b2 = normCuda(b); b2 = normCuda(b);
double r2 = b2; double r2 = b2;
double r2_old; double r2_old;
double stop = r2*perf->tol*perf->tol; // stopping condition of solver double stop = r2*invert_param->tol*invert_param->tol; // stopping condition of solver
double alpha, beta; double alpha, beta;
double pAp; double pAp;
...@@ -59,22 +60,16 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv ...@@ -59,22 +60,16 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
double delta = invert_param->reliable_delta; double delta = invert_param->reliable_delta;
int k=0; int k=0;
int xUpdate = 0, rUpdate = 0; int rUpdate = 0;
if (invert_param->verbosity >= QUDA_VERBOSE) if (invert_param->verbosity >= QUDA_VERBOSE) printf("%d iterations, r2 = %e\n", k, r2);
printf("%d iterations, r2 = %e\n", k, r2);
blas_quda_flops = 0; blas_quda_flops = 0;
stopwatchStart(); stopwatchStart();
while (r2 > stop && k<perf->maxiter) { while (r2 > stop && k<invert_param->maxiter) {
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) { MatVec(Ap, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, invert_param, tmp);
MatPCDagMatPCCuda(Ap, cudaGaugeSloppy, p, kappa, tmp_sloppy, perf->matpc_type);
} else {
cloverMatPCDagMatPCCuda(Ap, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, kappa,
tmp_sloppy, perf->matpc_type);
}
pAp = reDotProductCuda(p, Ap); pAp = reDotProductCuda(p, Ap);
...@@ -89,37 +84,25 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv ...@@ -89,37 +84,25 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0; int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0;
int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0; int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;
if (!updateR) { if (!(updateR || updateX)) {
beta = r2 / r2_old; beta = r2 / r2_old;
axpyZpbxCuda(alpha, p, x_sloppy, r_sloppy, beta); axpyZpbxCuda(alpha, p, x_sloppy, r_sloppy, beta);
} else { } else {
axpyCuda(alpha, p, x_sloppy); axpyCuda(alpha, p, x_sloppy);
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy); if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) { xpyCuda(x, y);
MatPCDagMatPCCuda(r, cudaGaugePrecise, x, kappa, tmp, invert_param->matpc_type); MatVec(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, y, invert_param, x);
} else {
cloverMatPCDagMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, kappa,
tmp, invert_param->matpc_type);
}
r2 = xmyNormCuda(b, r); r2 = xmyNormCuda(b, r);
if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r); if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
rNorm = sqrt(r2); zeroCuda(x_sloppy);
rNorm = sqrt(r2);
maxrr = rNorm; maxrr = rNorm;
maxrx = rNorm;
r0Norm = rNorm;
rUpdate++; rUpdate++;
if (updateX) {
xpyCuda(x, y);
zeroCuda(x_sloppy);
copyCuda(b, r);
r0Norm = rNorm;
maxrx = rNorm;
xUpdate++;
}
beta = r2 / r2_old; beta = r2 / r2_old;
xpayCuda(r_sloppy, beta, p); xpayCuda(r_sloppy, beta, p);
...@@ -133,49 +116,40 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv ...@@ -133,49 +116,40 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy); if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
xpyCuda(y, x); xpyCuda(y, x);
perf->secs = stopwatchReadSeconds(); invert_param->secs = stopwatchReadSeconds();
if (k==invert_param->maxiter) if (k==invert_param->maxiter)
printf("Exceeded maximum iterations %d\n", invert_param->maxiter); printf("Exceeded maximum iterations %d\n", invert_param->maxiter);
if (invert_param->verbosity >= QUDA_SUMMARIZE) if (invert_param->verbosity >= QUDA_SUMMARIZE)
printf("Residual updates = %d, Solution updates = %d\n", rUpdate, xUpdate); printf("Reliable updates = %d\n", rUpdate);
float gflops = (blas_quda_flops + dslash_quda_flops)*1e-9; float gflops = (blas_quda_flops + dslash_quda_flops)*1e-9;
// printf("%f gflops\n", gflops / stopwatchReadSeconds()); // printf("%f gflops\n", gflops / stopwatchReadSeconds());
perf->gflops = gflops; invert_param->gflops = gflops;
perf->iter = k; invert_param->iter = k;
blas_quda_flops = 0; blas_quda_flops = 0;
dslash_quda_flops = 0; dslash_quda_flops = 0;
#if 0 #if 0
// Calculate the true residual // Calculate the true residual
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) { MatVec(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, y);
MatPCDagMatPCCuda(Ap, cudaGaugePrecise, x, kappa, tmp, perf->matpc_type); double true_res = xmyNormCuda(b, r);
} else {
cloverMatPCDagMatPCCuda(Ap, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, kappa,
tmp, perf->matpc_type);
}
copyCuda(r, b);
mxpyCuda(Ap, r);
double true_res = normCuda(r);
printf("Converged after %d iterations, r2 = %e, true_r2 = %e\n", printf("Converged after %d iterations, r2 = %e, true_r2 = %e\n",
k, r2, true_res / b2); k, r2, true_res / b2);
#endif #endif
if (invert_param->cuda_prec_sloppy != x.precision) { if (invert_param->cuda_prec_sloppy != x.precision) {
freeParitySpinor(tmp_sloppy);
freeParitySpinor(r_sloppy); freeParitySpinor(r_sloppy);
freeParitySpinor(x_sloppy); freeParitySpinor(x_sloppy);
} }
if (perf->preserve_source == QUDA_PRESERVE_SOURCE_YES) freeParitySpinor(b);
freeParitySpinor(r);
freeParitySpinor(p); freeParitySpinor(p);
freeParitySpinor(Ap); freeParitySpinor(Ap);
freeParitySpinor(tmp);
freeParitySpinor(y); freeParitySpinor(y);
......
...@@ -18,7 +18,7 @@ int main(int argc, char **argv) ...@@ -18,7 +18,7 @@ int main(int argc, char **argv)
Gauge_param.X[0] = 24; Gauge_param.X[0] = 24;
Gauge_param.X[1] = 24; Gauge_param.X[1] = 24;
Gauge_param.X[2] = 24; Gauge_param.X[2] = 24;
Gauge_param.X[3] = 24; Gauge_param.X[3] = 128;
setDims(Gauge_param.X); setDims(Gauge_param.X);
Gauge_param.anisotropy = 1.0; Gauge_param.anisotropy = 1.0;
...@@ -26,9 +26,9 @@ int main(int argc, char **argv) ...@@ -26,9 +26,9 @@ int main(int argc, char **argv)
Gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T; Gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
Gauge_param.cpu_prec = QUDA_DOUBLE_PRECISION; Gauge_param.cpu_prec = QUDA_DOUBLE_PRECISION;
Gauge_param.cuda_prec = QUDA_DOUBLE_PRECISION; Gauge_param.cuda_prec = QUDA_SINGLE_PRECISION;
Gauge_param.reconstruct = QUDA_RECONSTRUCT_12; Gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
Gauge_param.cuda_prec_sloppy = QUDA_HALF_PRECISION; Gauge_param.cuda_prec_sloppy = QUDA_SINGLE_PRECISION;
Gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12; Gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
Gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; Gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
...@@ -44,22 +44,22 @@ int main(int argc, char **argv) ...@@ -44,22 +44,22 @@ int main(int argc, char **argv)
} else { } else {
inv_param.dslash_type = QUDA_WILSON_DSLASH; inv_param.dslash_type = QUDA_WILSON_DSLASH;
} }
inv_param.inv_type = QUDA_BICGSTAB_INVERTER; inv_param.inv_type = QUDA_CG_INVERTER;
double mass = -0.95; double mass = -0.94;
inv_param.kappa = 1.0 / (2.0*(1 + 3/gauge_param->anisotropy + mass)); inv_param.kappa = 1.0 / (2.0*(1 + 3/gauge_param->anisotropy + mass));
inv_param.tol = 1e-12; inv_param.tol = 5e-7;
inv_param.maxiter = 1000; inv_param.maxiter = 10000;
inv_param.reliable_delta = 1e-1; inv_param.reliable_delta = 1e-3;
inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.solution_type = QUDA_MAT_SOLUTION;
inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
inv_param.cpu_prec = QUDA_DOUBLE_PRECISION; inv_param.cpu_prec = QUDA_DOUBLE_PRECISION;
inv_param.cuda_prec = QUDA_DOUBLE_PRECISION; inv_param.cuda_prec = QUDA_SINGLE_PRECISION;
inv_param.cuda_prec_sloppy = QUDA_HALF_PRECISION; inv_param.cuda_prec_sloppy = QUDA_SINGLE_PRECISION;
inv_param.preserve_source = QUDA_PRESERVE_SOURCE_YES; inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
inv_param.dirac_order = QUDA_DIRAC_ORDER; inv_param.dirac_order = QUDA_DIRAC_ORDER;
if (clover_yes) { if (clover_yes) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment