Advanced Computing Platform for Theoretical Physics

commit大文件会使得服务器变得不稳定,请大家尽量只commit代码,不要commit大的文件。

Commit 08aea0d0 authored by mikeaclark's avatar mikeaclark
Browse files

Fixed CPS anisotropy and created dslash flops counter

git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@494 be54200a-260c-0410-bdd7-ce6af2a381ab
parent 5d0defb3
......@@ -95,6 +95,9 @@ __constant__ double t_boundary;
static int initDslash = 0;
unsigned long long dslash_quda_flops;
unsigned long long dslash_quda_bytes;
//#include <dslash_def.h> // Dslash kernel definitions
// kludge to avoid '#include nested too deeply' error
......@@ -306,6 +309,7 @@ void dslashCuda(ParitySpinor out, FullGauge gauge, ParitySpinor in, int parity,
} else if (in.precision == QUDA_HALF_PRECISION) {
dslashHCuda(out, gauge, in, parity, dagger);
}
dslash_quda_flops += 1320*in.volume;
}
......@@ -508,6 +512,8 @@ void dslashXpayCuda(ParitySpinor out, FullGauge gauge, ParitySpinor in, int pari
} else if (in.precision == QUDA_HALF_PRECISION) {
dslashXpayHCuda(out, gauge, in, parity, dagger, x, a);
}
dslash_quda_flops += (1320+48)*in.volume;
}
......@@ -768,6 +774,8 @@ void cloverDslashCuda(ParitySpinor out, FullGauge gauge, FullClover cloverInv,
} else if (in.precision == QUDA_HALF_PRECISION) {
cloverDslashHCuda(out, gauge, cloverInv, in, parity, dagger);
}
dslash_quda_flops += (1320+504)*in.volume;
}
void cloverDslashDCuda(ParitySpinor res, FullGauge gauge, FullClover cloverInv,
......@@ -1272,6 +1280,8 @@ void cloverDslashXpayCuda(ParitySpinor out, FullGauge gauge, FullClover cloverIn
} else if (in.precision == QUDA_HALF_PRECISION) {
cloverDslashXpayHCuda(out, gauge, cloverInv, in, parity, dagger, x, a);
}
dslash_quda_flops += (1320+504+48)*in.volume;
}
......@@ -1887,6 +1897,8 @@ void cloverCuda(ParitySpinor out, FullGauge gauge, FullClover clover,
} else if (in.precision == QUDA_HALF_PRECISION) {
cloverHCuda(out, gauge, clover, in, parity);
}
dslash_quda_flops += 504*in.volume;
}
void cloverDCuda(ParitySpinor res, FullGauge gauge, FullClover clover,
......
......@@ -122,6 +122,9 @@ extern "C" {
void invertBiCGstabCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp,
QudaInvertParam *param, DagType dag_type);
extern unsigned long long dslash_quda_flops;
extern unsigned long long dslash_quda_bytes;
#ifdef __cplusplus
}
#endif
......
......@@ -36,7 +36,7 @@ void init() {
gaugeParam.X[0] = 24;
gaugeParam.X[1] = 24;
gaugeParam.X[2] = 24;
gaugeParam.X[3] = 24;
gaugeParam.X[3] = 48;
setDims(gaugeParam.X);
gaugeParam.anisotropy = 2.3;
......@@ -45,7 +45,7 @@ void init() {
gaugeParam.t_boundary = QUDA_ANTI_PERIODIC_T;
gaugeParam.cpu_prec = QUDA_DOUBLE_PRECISION;
gaugeParam.cuda_prec = QUDA_HALF_PRECISION;
gaugeParam.cuda_prec = QUDA_SINGLE_PRECISION;
gaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
gaugeParam.reconstruct_sloppy = gaugeParam.reconstruct;
gaugeParam.cuda_prec_sloppy = gaugeParam.cuda_prec;
......@@ -64,7 +64,7 @@ void init() {
inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
inv_param.cpu_prec = QUDA_DOUBLE_PRECISION;
inv_param.cuda_prec = QUDA_HALF_PRECISION;
inv_param.cuda_prec = QUDA_SINGLE_PRECISION;
if (test_type == 2) inv_param.dirac_order = QUDA_DIRAC_ORDER;
else inv_param.dirac_order = QUDA_DIRAC_ORDER;
......@@ -308,8 +308,8 @@ void dslashTest() {
printf("%d Test %s\n", i, (1 == res) ? "PASSED" : "FAILED");
if (test_type < 2) strong_check(spinorRef, spinorOdd, Vh, inv_param.cpu_prec);
else strong_check(spinorRef, spinorGPU, V, inv_param.cpu_prec);
//if (test_type < 2) strong_check(spinorRef, spinorOdd, Vh, inv_param.cpu_prec);
//else strong_check(spinorRef, spinorGPU, V, inv_param.cpu_prec);
}
end();
}
......
......@@ -8,6 +8,20 @@
#include <spinor_quda.h>
#include <gauge_quda.h>
void MatVec(ParitySpinor out, FullGauge gauge, FullClover clover, FullClover cloverInv, ParitySpinor in,
QudaInvertParam *invert_param, ParitySpinor tmp, DagType dag_type) {
double kappa = invert_param->kappa;
if (invert_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER)
kappa *= cudaGaugePrecise.anisotropy;
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(out, gauge, in, kappa, tmp, invert_param->matpc_type, dag_type);
} else {
cloverMatPCCuda(out, gauge, clover, cloverInv, in, kappa, tmp,
invert_param->matpc_type, dag_type);
}
}
void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
QudaInvertParam *invert_param, DagType dag_type)
{
......@@ -37,45 +51,44 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
copyCuda(b, src);
copyCuda(r_sloppy, src);
/*MatPCDagCuda(y, cudaGaugePrecise, src, invert_param->kappa, tmp, invert_param->matpc_type);
copyCuda(src_sloppy, y);*/ // uncomment for BiCRstab
zeroCuda(y);
double b2 = normCuda(b);
double r2 = b2;
double stop = b2*invert_param->tol*invert_param->tol; // stopping condition of solver
double delta = invert_param->reliable_delta;
int k = 0;
int xUpdate = 0, rUpdate = 0;
cuDoubleComplex rho = make_cuDoubleComplex(1.0, 0.0);
cuDoubleComplex rho0 = rho;
cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
cuDoubleComplex omega = make_cuDoubleComplex(1.0, 0.0);
cuDoubleComplex beta;
cuDoubleComplex rv;
cuDoubleComplex rho_rho0;
cuDoubleComplex alpha_omega;
cuDoubleComplex beta_omega;
cuDoubleComplex one = make_cuDoubleComplex(1.0, 0.0);
double3 rho_r2;
double3 omega_t2;
double rNorm = sqrt(r2);
double r0Norm = rNorm;
double maxrx = rNorm;
double maxrr = rNorm;
double delta = invert_param->reliable_delta;
int k=0;
int xUpdate = 0, rUpdate = 0;
if (invert_param->verbosity >= QUDA_VERBOSE) printf("%d iterations, r2 = %e\n", k, r2);
blas_quda_flops = 0;
dslash_quda_flops = 0;
if (invert_param->verbosity >= QUDA_VERBOSE)
printf("%d iterations, r2 = %e\n", k, r2);
stopwatchStart();
while (r2 > stop && k<invert_param->maxiter) {
while (r2 > stop && k<invert_param->maxiter) {
if (k==0) {
rho = make_cuDoubleComplex(r2, 0.0); // cDotProductCuda(src_sloppy, r_sloppy); // BiCRstab
copyCuda(p, r_sloppy);
......@@ -83,119 +96,90 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
alpha_omega = cuCdiv(alpha, omega);
rho_rho0 = cuCdiv(rho, rho0);
beta = cuCmul(rho_rho0, alpha_omega);
// p = r - beta*omega*v + beta*(p)
beta_omega = cuCmul(beta, omega); beta_omega.x *= -1.0; beta_omega.y *= -1.0;
cxpaypbzCuda(r_sloppy, beta_omega, v, beta, p); // 8
}
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(v, cudaGaugeSloppy, p, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
} else {
cloverMatPCCuda(v, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, invert_param->kappa, tmp_sloppy,
invert_param->matpc_type, dag_type);
}
MatVec(v, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, invert_param, tmp_sloppy, dag_type);
// rv = (r0,v)
rv = cDotProductCuda(src_sloppy, v);
alpha = cuCdiv(rho, rv);
// r -= alpha*v
alpha.x *= -1.0; alpha.y *= -1.0;
caxpyCuda(alpha, v, r_sloppy); // 4
alpha.x *= -1.0; alpha.y *= -1.0;
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(t, cudaGaugeSloppy, r_sloppy, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
} else {
cloverMatPCCuda(t, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, r_sloppy, invert_param->kappa, tmp_sloppy,
invert_param->matpc_type,dag_type);
}
MatVec(t, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, r_sloppy, invert_param, tmp_sloppy, dag_type);
// omega = (t, r) / (t, t)
omega_t2 = cDotProductNormACuda(t, r_sloppy); // 6
omega.x = omega_t2.x / omega_t2.z; omega.y = omega_t2.y/omega_t2.z;
//x += alpha*p + omega*r, r -= omega*t, r2 = (r,r), rho = (r0, r)
rho_r2 = caxpbypzYmbwcDotProductWYNormYQuda(alpha, p, omega, r_sloppy, x_sloppy, t, src_sloppy);
rho0 = rho; rho.x = rho_r2.x; rho.y = rho_r2.y; r2 = rho_r2.z;
// reliable updates
rNorm = sqrt(r2);
if (rNorm > maxrx) maxrx = rNorm;
if (rNorm > maxrr) maxrr = rNorm;
int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0;
int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;
if (updateR) {
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
} else {
cloverMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param->kappa, tmp,
invert_param->matpc_type, dag_type);
}
MatVec(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param, tmp, dag_type);
r2 = xmyNormCuda(b, r);
if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
rNorm = sqrt(r2);
maxrr = rNorm;
rUpdate++;
if (updateX) {
xpyCuda(x, y);
zeroCuda(x_sloppy);
copyCuda(b, r);
r0Norm = rNorm;
maxrx = rNorm;
xUpdate++;
}
}
k++;
if (invert_param->verbosity >= QUDA_VERBOSE)
printf("%d iterations, r2 = %e\n", k, r2);
if (invert_param->verbosity >= QUDA_VERBOSE) printf("%d iterations, r2 = %e\n", k, r2);
}
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
xpyCuda(y, x);
if (k==invert_param->maxiter) printf("Exceeded maximum iterations %d\n", invert_param->maxiter);
invert_param->secs += stopwatchReadSeconds();
if (k==invert_param->maxiter)
printf("Exceeded maximum iterations %d\n", invert_param->maxiter);
if (invert_param->verbosity >= QUDA_SUMMARIZE)
if (invert_param->verbosity >= QUDA_VERBOSE)
printf("Residual updates = %d, Solution updates = %d\n", rUpdate, xUpdate);
float gflops = (1.0e-9*x.volume)*(2*(2*1320+48)*k + (32*k + 8*(k-1))*spinorSiteSize);
gflops += 1.0e-9*x.volume*rUpdate*((2*1320+48) + 3*spinorSiteSize);
gflops += 1.0e-9*x.volume*xUpdate*spinorSiteSize;
if (invert_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
gflops += (1.0e-9*x.volume)*4*504*k;
gflops += (1.0e-9*x.volume)*rUpdate*2*504;
}
//printf("%f gflops\n", k*gflops / stopwatchReadSeconds());
invert_param->secs += stopwatchReadSeconds();
float gflops = (blas_quda_flops + dslash_quda_flops)*1e-9;
// printf("%f gflops\n", gflops / stopwatchReadSeconds());
invert_param->gflops += gflops;
invert_param->iter += k;
#if 0
// Calculate the true residual
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
} else {
cloverMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param->kappa, tmp,
invert_param->matpc_type, dag_type);
}
MatVec(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param, tmp, dag_type);
double true_res = xmyNormCuda(src, r);
printf("Converged after %d iterations, r2 = %e, true_r2 = %e\n", k, sqrt(r2/b2), sqrt(true_res / b2));
copyCuda(b, src);
if (invert_param->verbosity >= QUDA_SUMMARIZE)
printf("Converged after %d iterations, r2 = %e, true_r2 = %e\n", k, sqrt(r2/b2), sqrt(true_res / b2));
#endif
if (invert_param->cuda_prec_sloppy != x.precision) {
......
......@@ -39,6 +39,9 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
zeroCuda(x_sloppy);
zeroCuda(y);
double kappa = invert_param->kappa;
if (invert_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) kappa *= cudaGaugePrecise.anisotropy;
double b2 = 0.0;
b2 = normCuda(b);
......@@ -60,13 +63,16 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
if (invert_param->verbosity >= QUDA_VERBOSE)
printf("%d iterations, r2 = %e\n", k, r2);
blas_quda_flops = 0;
stopwatchStart();
while (r2 > stop && k<perf->maxiter) {
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCDagMatPCCuda(Ap, cudaGaugeSloppy, p, perf->kappa, tmp_sloppy, perf->matpc_type);
MatPCDagMatPCCuda(Ap, cudaGaugeSloppy, p, kappa, tmp_sloppy, perf->matpc_type);
} else {
cloverMatPCDagMatPCCuda(Ap, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, perf->kappa,
cloverMatPCDagMatPCCuda(Ap, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, kappa,
tmp_sloppy, perf->matpc_type);
}
......@@ -92,9 +98,9 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCDagMatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type);
MatPCDagMatPCCuda(r, cudaGaugePrecise, x, kappa, tmp, invert_param->matpc_type);
} else {
cloverMatPCDagMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param->kappa,
cloverMatPCDagMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, kappa,
tmp, invert_param->matpc_type);
}
......@@ -128,6 +134,7 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
xpyCuda(y, x);
perf->secs = stopwatchReadSeconds();
if (k==invert_param->maxiter)
printf("Exceeded maximum iterations %d\n", invert_param->maxiter);
......@@ -135,18 +142,20 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
if (invert_param->verbosity >= QUDA_SUMMARIZE)
printf("Residual updates = %d, Solution updates = %d\n", rUpdate, xUpdate);
float gflops = k*(1.0e-9*x.volume)*(2*(2*1320+48) + 10*spinorSiteSize);
if (invert_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) gflops += k*(1.0e-9*x.volume)*4*504;
//printf("%f gflops\n", k*gflops / stopwatchReadSeconds());
float gflops = (blas_quda_flops + dslash_quda_flops)*1e-9;
// printf("%f gflops\n", gflops / stopwatchReadSeconds());
perf->gflops = gflops;
perf->iter = k;
blas_quda_flops = 0;
dslash_quda_flops = 0;
#if 0
// Calculate the true residual
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCDagMatPCCuda(Ap, cudaGaugePrecise, x, perf->kappa, tmp, perf->matpc_type);
MatPCDagMatPCCuda(Ap, cudaGaugePrecise, x, kappa, tmp, perf->matpc_type);
} else {
cloverMatPCDagMatPCCuda(Ap, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, perf->kappa,
cloverMatPCDagMatPCCuda(Ap, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, kappa,
tmp, perf->matpc_type);
}
copyCuda(r, b);
......
......@@ -227,6 +227,12 @@ void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int parity,
ParitySpinor out = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
loadParitySpinor(in, h_in, inv_param->cpu_prec, inv_param->dirac_order);
if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) {
parity = (parity+1)%2;
axCuda(gauge_param->anisotropy, in);
}
if (inv_param->dslash_type == QUDA_WILSON_DSLASH) {
dslashCuda(out, cudaGaugePrecise, in, parity, dagger);
} else if (inv_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
......@@ -248,12 +254,17 @@ void MatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger)
ParitySpinor in = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
ParitySpinor out = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
ParitySpinor tmp = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
loadParitySpinor(in, h_in, inv_param->cpu_prec, inv_param->dirac_order);
double kappa = inv_param->kappa;
if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER)
kappa *= cudaGaugePrecise.anisotropy;
if (inv_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(out, cudaGaugePrecise, in, inv_param->kappa, tmp, inv_param->matpc_type, dagger);
MatPCCuda(out, cudaGaugePrecise, in, kappa, tmp, inv_param->matpc_type, dagger);
} else if (inv_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
cloverMatPCCuda(out, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, in, inv_param->kappa,
cloverMatPCCuda(out, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, in, kappa,
tmp, inv_param->matpc_type, dagger);
} else {
printf("QUDA error: unsupported dslash_type\n");
......@@ -275,10 +286,15 @@ void MatPCDagMatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
ParitySpinor tmp = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
loadParitySpinor(in, h_in, inv_param->cpu_prec, inv_param->dirac_order);
double kappa = inv_param->kappa;
if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER)
kappa *= cudaGaugePrecise.anisotropy;
if (inv_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCDagMatPCCuda(out, cudaGaugePrecise, in, inv_param->kappa, tmp, inv_param->matpc_type);
MatPCDagMatPCCuda(out, cudaGaugePrecise, in, kappa, tmp, inv_param->matpc_type);
} else if (inv_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
cloverMatPCDagMatPCCuda(out, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, in, inv_param->kappa,
cloverMatPCDagMatPCCuda(out, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, in, kappa,
tmp, inv_param->matpc_type);
} else {
printf("QUDA error: unsupported dslash_type\n");
......@@ -299,11 +315,15 @@ void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger) {
loadSpinorField(in, h_in, inv_param->cpu_prec, inv_param->dirac_order);
double kappa = inv_param->kappa;
if (inv_param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER)
kappa *= cudaGaugePrecise.anisotropy;
if (inv_param->dslash_type == QUDA_WILSON_DSLASH) {
MatCuda(out, cudaGaugePrecise, in, -inv_param->kappa, dagger);
MatCuda(out, cudaGaugePrecise, in, -kappa, dagger);
} else if (inv_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
ParitySpinor tmp = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
cloverMatCuda(out, cudaGaugePrecise, cudaCloverPrecise, in, inv_param->kappa, tmp, dagger);
cloverMatCuda(out, cudaGaugePrecise, cudaCloverPrecise, in, kappa, tmp, dagger);
freeParitySpinor(tmp);
} else {
printf("QUDA error: unsupported dslash_type\n");
......@@ -333,7 +353,7 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
param->iter = 0;
double kappa = param->kappa;
if (param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) kappa /= cudaGaugePrecise.anisotropy;
if (param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) kappa *= cudaGaugePrecise.anisotropy;
FullSpinor b, x;
ParitySpinor in = allocateParitySpinor(cudaGaugePrecise.X, invert_param->cuda_prec); // source vector
......@@ -366,12 +386,6 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
axCuda(2.0*kappa, b.odd);
}
// cps uses a different anisotropy normalization
if (param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) {
axCuda(1.0/gauge_param->anisotropy, b.even);
axCuda(1.0/gauge_param->anisotropy, b.odd);
}
if (param->dslash_type == QUDA_WILSON_DSLASH) {
if (param->matpc_type == QUDA_MATPC_EVEN_EVEN) {
// in = b_e + k D_eo b_o
......@@ -433,16 +447,9 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
axCuda(4.0*kappa*kappa, in);
}
}
// cps uses a different anisotropy normalization
if (param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) {
if (param->solution_type == QUDA_MATPC_SOLUTION)
axCuda(pow(1.0/gauge_param->anisotropy, 2), in);
else
axCuda(pow(1.0/gauge_param->anisotropy, 4), in);
}
}
switch (param->inv_type) {
case QUDA_CG_INVERTER:
if (param->solution_type != QUDA_MATPCDAG_MATPC_SOLUTION) {
......@@ -467,7 +474,7 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
printf("Inverter type %d not implemented\n", param->inv_type);
exit(-1);
}
if (param->solution_type == QUDA_MAT_SOLUTION) {
if (param->preserve_source == QUDA_PRESERVE_SOURCE_NO) {
......
......@@ -18,7 +18,7 @@ int main(int argc, char **argv)
Gauge_param.X[0] = 24;
Gauge_param.X[1] = 24;
Gauge_param.X[2] = 24;
Gauge_param.X[3] = 48;
Gauge_param.X[3] = 24;
setDims(Gauge_param.X);
Gauge_param.anisotropy = 1.0;
......@@ -28,29 +28,29 @@ int main(int argc, char **argv)
Gauge_param.cpu_prec = QUDA_DOUBLE_PRECISION;
Gauge_param.cuda_prec = QUDA_DOUBLE_PRECISION;
Gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
Gauge_param.cuda_prec_sloppy = QUDA_DOUBLE_PRECISION;
Gauge_param.cuda_prec_sloppy = QUDA_HALF_PRECISION;
Gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
Gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
Gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
Gauge_param.blockDim = 64;
Gauge_param.blockDim_sloppy = 64;
gauge_param = &Gauge_param;
int clover_yes = 1; // 0 for plain Wilson, 1 for clover
int clover_yes = 0; // 0 for plain Wilson, 1 for clover
if (clover_yes) {
inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
} else {
inv_param.dslash_type = QUDA_WILSON_DSLASH;
}
inv_param.inv_type = QUDA_CG_INVERTER;
inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
double mass = -0.95;
inv_param.kappa = 1.0 / (2.0*(1 + 3/gauge_param->anisotropy + mass));
inv_param.tol = 1e-12;
inv_param.maxiter = 200;
inv_param.reliable_delta = 1e-13;
inv_param.maxiter = 1000;
inv_param.reliable_delta = 1e-1;
inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
inv_param.solution_type = QUDA_MAT_SOLUTION;
......@@ -58,7 +58,7 @@ int main(int argc, char **argv)
inv_param.cpu_prec = QUDA_DOUBLE_PRECISION;
inv_param.cuda_prec = QUDA_DOUBLE_PRECISION;
inv_param.cuda_prec_sloppy = QUDA_DOUBLE_PRECISION;
inv_param.cuda_prec_sloppy = QUDA_HALF_PRECISION;
inv_param.preserve_source = QUDA_PRESERVE_SOURCE_YES;
inv_param.dirac_order = QUDA_DIRAC_ORDER;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment