Advanced Computing Platform for Theoretical Physics

Commit 5a093817 authored by rbabich's avatar rbabich
Browse files

changes to complete quda clover inverter (accidentally left out of last

commit)


git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@464 be54200a-260c-0410-bdd7-ce6af2a381ab
parent 5bf45c2a
......@@ -116,7 +116,7 @@ void init() {
construct_spinor_field(spinor, 1, 0, 0, 0, inv_param.cpu_prec);
if (clover_yes) {
double norm = 1.0; // random components range between -norm and norm
double norm = 0; // random components range between -norm and norm
double diag = 1.0; // constant added to the diagonal
if (test_type == 2) {
......
......@@ -89,8 +89,13 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
cxpaypbzCuda(r_sloppy, beta_omega, v, beta, p); // 8
}
MatPCCuda(v, cudaGaugeSloppy, p, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(v, cudaGaugeSloppy, p, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
} else {
cloverMatPCCuda(v, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, invert_param->kappa, tmp_sloppy,
invert_param->matpc_type, dag_type);
}
// rv = (r0,v)
rv = cDotProductCuda(src_sloppy, v);
......@@ -101,7 +106,12 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
caxpyCuda(alpha, v, r_sloppy); // 4
alpha.x *= -1.0; alpha.y *= -1.0;
MatPCCuda(t, cudaGaugeSloppy, r_sloppy, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(t, cudaGaugeSloppy, r_sloppy, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
} else {
cloverMatPCCuda(v, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, r_sloppy, invert_param->kappa, tmp_sloppy,
invert_param->matpc_type,dag_type);
}
// omega = (t, r) / (t, t)
omega_t2 = cDotProductNormACuda(t, r_sloppy); // 6
......@@ -121,7 +131,12 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
if (updateR) {
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
} else {
cloverMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param->kappa, tmp,
invert_param->matpc_type, dag_type);
}
r2 = xmyNormCuda(b, r);
if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
......@@ -162,13 +177,22 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
float gflops = (1.0e-9*x.volume)*(2*(2*1320+48)*k + (32*k + 8*(k-1))*spinorSiteSize);
gflops += 1.0e-9*x.volume*rUpdate*((2*1320+48) + 3*spinorSiteSize);
gflops += 1.0e-9*x.volume*xUpdate*spinorSiteSize;
if (invert_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
gflops += (1.0e-9*x.volume)*4*504*k;
gflops += (1.0e-9*x.volume)*rUpdate*2*504;
}
//printf("%f gflops\n", k*gflops / stopwatchReadSeconds());
invert_param->gflops += gflops;
invert_param->iter += k;
#if 0
// Calculate the true residual
MatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
} else {
cloverMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param->kappa, tmp,
invert_param->matpc_type, dag_type);
}
double true_res = xmyNormCuda(src, r);
printf("Converged after %d iterations, r2 = %e, true_r2 = %e\n", k, sqrt(r2/b2), sqrt(true_res / b2));
......
......@@ -62,7 +62,13 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
printf("%d iterations, r2 = %e\n", k, r2);
stopwatchStart();
while (r2 > stop && k<perf->maxiter) {
MatPCDagMatPCCuda(Ap, cudaGaugeSloppy, p, perf->kappa, tmp_sloppy, perf->matpc_type);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCDagMatPCCuda(Ap, cudaGaugeSloppy, p, perf->kappa, tmp_sloppy, perf->matpc_type);
} else {
cloverMatPCDagMatPCCuda(Ap, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, p, perf->kappa,
tmp_sloppy, perf->matpc_type);
}
pAp = reDotProductCuda(p, Ap);
......@@ -85,8 +91,12 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
MatPCDagMatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa,
tmp, invert_param->matpc_type);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCDagMatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type);
} else {
cloverMatPCDagMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param->kappa,
tmp, invert_param->matpc_type);
}
r2 = xmyNormCuda(b, r);
if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
......@@ -126,13 +136,19 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
printf("Residual updates = %d, Solution updates = %d\n", rUpdate, xUpdate);
float gflops = k*(1.0e-9*x.volume)*(2*(2*1320+48) + 10*spinorSiteSize);
if (invert_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) gflops += k*(1.0e-9*x.volume)*4*504;
//printf("%f gflops\n", k*gflops / stopwatchReadSeconds());
perf->gflops = gflops;
perf->iter = k;
#if 0
// Calculate the true residual
MatPCDagMatPCCuda(Ap, cudaGaugePrecise, x, perf->kappa, tmp, perf->matpc_type);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCDagMatPCCuda(Ap, cudaGaugePrecise, x, perf->kappa, tmp, perf->matpc_type);
} else {
cloverMatPCDagMatPCCuda(Ap, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, perf->kappa,
tmp, perf->matpc_type);
}
copyCuda(r, b);
mxpyCuda(Ap, r);
double true_res = normCuda(r);
......
......@@ -64,7 +64,7 @@ int main(int argc, char **argv)
if (clover_yes) {
inv_param.clover_cpu_prec = QUDA_DOUBLE_PRECISION;
inv_param.clover_cuda_prec = QUDA_DOUBLE_PRECISION;
inv_param.clover_cuda_prec = QUDA_SINGLE_PRECISION;
inv_param.clover_cuda_prec_sloppy = QUDA_SINGLE_PRECISION;
inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
}
......@@ -79,7 +79,7 @@ int main(int argc, char **argv)
construct_gauge_field(gauge, 1, Gauge_param.cpu_prec);
if (clover_yes) {
double norm = 1.0; // random components range between -norm and norm
double norm = 0.1; // random components range between -norm and norm
double diag = 1.0; // constant added to the diagonal
size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
......@@ -107,9 +107,10 @@ int main(int argc, char **argv)
time0 += clock(); // stop the timer
time0 /= CLOCKS_PER_SEC;
printf("Cuda Space Required. Spinor:%f + Gauge:%f GiB\n",
printf("Cuda Space Required:\n Spinor: %f GiB\n Gauge: %f GiB\n",
inv_param.spinorGiB, Gauge_param.gaugeGiB);
printf("done: %i iter / %g secs = %g gflops, total time = %g secs\n",
if (clover_yes) printf(" Clover: %f GiB\n", inv_param.cloverGiB);
printf("\nDone: %i iter / %g secs = %g gflops, total time = %g secs\n",
inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time0);
mat(spinorCheck, gauge, spinorOut, inv_param.kappa, 0, inv_param.cpu_prec, Gauge_param.cpu_prec);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment