Advanced Computing Platform for Theoretical Physics

commit大文件会使得服务器变得不稳定,请大家尽量只commit代码,不要commit大的文件。

Commit 610ecbb7 authored by mikeaclark's avatar mikeaclark
Browse files

Added support for cps anisotropy

git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@417 be54200a-260c-0410-bdd7-ce6af2a381ab
parent e5e5e289
......@@ -10,6 +10,8 @@
#define SCALE_FLOAT (SHORT_LENGTH-1) / 2.f
#define SHIFT_FLOAT -1.f / (SHORT_LENGTH-1)
double Anisotropy;
template <typename Float>
inline short FloatToShort(Float a) {
return (short)((a+SHIFT_FLOAT)*SCALE_FLOAT);
......@@ -111,9 +113,9 @@ void packCPSGaugeField(FloatN *res, Float *gauge, int oddBit, ReconstructType re
for (int dir = 0; dir < 4; dir++) {
Float *g = gauge + (oddBit*V*4+dir)*18;
for (int i = 0; i < V; i++) {
// Must reorder rows-columns
// Must reorder rows-columns and divide by anisotropy
for (int ic=0; ic<2; ic++) for (int jc=0; jc<3; jc++) for (int r=0; r<2; r++)
gT[(ic*3+jc)*2+r] = g[4*i*18 + (jc*3+ic)*2+r];
gT[(ic*3+jc)*2+r] = g[4*i*18 + (jc*3+ic)*2+r] / Anisotropy;
pack12(res+i, gT, dir, V);
}
}
......@@ -121,9 +123,9 @@ void packCPSGaugeField(FloatN *res, Float *gauge, int oddBit, ReconstructType re
for (int dir = 0; dir < 4; dir++) {
Float *g = gauge + (oddBit*V*4+dir)*18;
for (int i = 0; i < V; i++) {
// Must reorder rows-columns
// Must reorder rows-columns and divide by anisotropy
for (int ic=0; ic<3; ic++) for (int jc=0; jc<3; jc++) for (int r=0; r<2; r++)
gT[(ic*3+jc)*2+r] = g[4*i*18 + (jc*3+ic)*2+r];
gT[(ic*3+jc)*2+r] = g[4*i*18 + (jc*3+ic)*2+r] / Anisotropy;
pack8(res+i, gT, dir, V);
}
}
......@@ -191,6 +193,7 @@ void loadGaugeField(FloatN *even, FloatN *odd, Float *cpuGauge, ReconstructType
} else if (gauge_param->gauge_order == QUDA_CPS_WILSON_GAUGE_ORDER) {
packCPSGaugeField(packedEven, (Float*)cpuGauge, 0, reconstruct, Vh);
packCPSGaugeField(packedOdd, (Float*)cpuGauge, 1, reconstruct, Vh);
} else {
printf("Sorry, %d GaugeFieldOrder not supported\n", gauge_param->gauge_order);
exit(-1);
......@@ -210,7 +213,7 @@ void loadGaugeField(FloatN *even, FloatN *odd, Float *cpuGauge, ReconstructType
}
void createGaugeField(FullGauge *cudaGauge, void *cpuGauge, ReconstructType reconstruct,
Precision precision, int *X) {
Precision precision, int *X, double anisotropy) {
if (gauge_param->cpu_prec == QUDA_HALF_PRECISION) {
printf("QUDA error: half precision not supported on cpu\n");
......@@ -227,6 +230,9 @@ void createGaugeField(FullGauge *cudaGauge, void *cpuGauge, ReconstructType reco
exit(-1);
}
Anisotropy = anisotropy;
cudaGauge->anisotropy = anisotropy;
cudaGauge->volume = 1;
for (int d=0; d<4; d++) {
cudaGauge->X[d] = X[d];
......
......@@ -8,8 +8,8 @@
extern "C" {
#endif
void createGaugeField(FullGauge *cudaGauge, void *cpuGauge,
ReconstructType reconstruct, Precision precision, int *X);
void createGaugeField(FullGauge *cudaGauge, void *cpuGauge, ReconstructType reconstruct,
Precision precision, int *X, double anisotropy);
void freeGaugeField(FullGauge *cudaCauge);
#ifdef __cplusplus
......
......@@ -111,7 +111,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, FullGauge gaugePrecise
rho_r2 = caxpbypzYmbwcDotProductWYNormYQuda(alpha, p, omega, r_sloppy, x_sloppy, t, src_sloppy);
rho0 = rho; rho.x = rho_r2.x; rho.y = rho_r2.y; r2 = rho_r2.z;
// reliable updates (ideally should be double precision)
// reliable updates
rNorm = sqrt(r2);
if (rNorm > maxrx) maxrx = rNorm;
if (rNorm > maxrr) maxrr = rNorm;
......
......@@ -59,7 +59,7 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, FullGauge gaugePrecise,
int k=0;
int xUpdate = 0, rUpdate = 0;
//printf("%d iterations, r2 = %e\n", k, r2);
printf("%d iterations, r2 = %e\n", k, r2);
stopwatchStart();
while (r2 > stop && k<perf->maxiter) {
MatPCDagMatPCCuda(Ap, gaugeSloppy, p, perf->kappa, tmp_sloppy, perf->matpc_type);
......@@ -110,7 +110,7 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, FullGauge gaugePrecise,
}
k++;
//printf("%d iterations, r2 = %e\n", k, r2);
printf("%d iterations, r2 = %e\n", k, r2);
}
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
......@@ -121,7 +121,7 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, FullGauge gaugePrecise,
if (k==invert_param->maxiter)
printf("Exceeded maximum iterations %d\n", invert_param->maxiter);
//printf("Residual updates = %d, Solution updates = %d\n", rUpdate, xUpdate);
printf("Residual updates = %d, Solution updates = %d\n", rUpdate, xUpdate);
float gflops = k*(1.0e-9*x.volume)*(2*(2*1320+48) + 10*spinorSiteSize);
//printf("%f gflops\n", k*gflops / stopwatchReadSeconds());
......
......@@ -93,20 +93,15 @@ void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
{
gauge_param = param;
/* if (gauge_param->X != L1 || gauge_param->Y != L2 || gauge_param->Z != L3 || gauge_param->T != L4) {
printf("QUDA error: dimensions do not match: %d=%d, %d=%d, %d=%d, %d=%d\n",
gauge_param->X, L1, gauge_param->Y, L2, gauge_param->Z, L3, gauge_param->T, L4);
exit(-1);
}*/
gauge_param->packed_size = (gauge_param->reconstruct == QUDA_RECONSTRUCT_8) ? 8 : 12;
createGaugeField(&cudaGaugePrecise, h_gauge, gauge_param->reconstruct,
gauge_param->cuda_prec, gauge_param->X);
gauge_param->cuda_prec, gauge_param->X, gauge_param->anisotropy);
gauge_param->gaugeGiB = 2.0*cudaGaugePrecise.bytes/ (1 << 30);
if (gauge_param->cuda_prec_sloppy != gauge_param->cuda_prec ||
gauge_param->reconstruct_sloppy != gauge_param->reconstruct) {
createGaugeField(&cudaGaugeSloppy, h_gauge, gauge_param->reconstruct_sloppy,
gauge_param->cuda_prec_sloppy, gauge_param->X);
gauge_param->cuda_prec_sloppy, gauge_param->X, gauge_param->anisotropy);
gauge_param->gaugeGiB += 2.0*cudaGaugeSloppy.bytes/ (1 << 30);
} else {
cudaGaugeSloppy = cudaGaugePrecise;
......@@ -212,6 +207,7 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
param->iter = 0;
double kappa = param->kappa;
if (param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) kappa /= cudaGaugePrecise.anisotropy;
FullSpinor b, x;
ParitySpinor in = allocateParitySpinor(cudaGaugePrecise.X, invert_param->cuda_prec); // source vector
......@@ -237,6 +233,12 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
axCuda(2.0*kappa, b.odd);
}
// cps uses a different anisotropy normalization
if (param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) {
axCuda(1.0/gauge_param->anisotropy, b.even);
axCuda(1.0/gauge_param->anisotropy, b.even);
}
if (param->matpc_type == QUDA_MATPC_EVEN_EVEN) {
dslashXpayCuda(in, cudaGaugePrecise, b.odd, 0, 0, b.even, kappa);
} else {
......@@ -253,6 +255,14 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
axCuda(4.0*kappa*kappa, in);
else
axCuda(16.0*pow(kappa,4), in);
// cps uses a different anisotropy normalization
if (param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER)
if (param->solution_type == QUDA_MATPC_SOLUTION)
axCuda(pow(1.0/gauge_param->anisotropy, 2), in);
else
axCuda(pow(1.0/gauge_param->anisotropy, 4), in);
}
switch (param->inv_type) {
......
......@@ -97,13 +97,13 @@ void packTest() {
stopwatchStart();
param.gauge_order = QUDA_CPS_WILSON_GAUGE_ORDER;
createGaugeField(&cudaGaugePrecise, cpsGauge, param.reconstruct, param.cuda_prec, param.X);
createGaugeField(&cudaGaugePrecise, cpsGauge, param.reconstruct, param.cuda_prec, param.X, 1.0);
double cpsGtime = stopwatchReadSeconds();
printf("CPS Gauge send time = %e seconds\n", cpsGtime);
stopwatchStart();
param.gauge_order = QUDA_QDP_GAUGE_ORDER;
createGaugeField(&cudaGaugePrecise, qdpGauge, param.reconstruct, param.cuda_prec, param.X);
createGaugeField(&cudaGaugePrecise, qdpGauge, param.reconstruct, param.cuda_prec, param.X, 1.0);
double qdpGtime = stopwatchReadSeconds();
printf("QDP Gauge send time = %e seconds\n", qdpGtime);
......
......@@ -46,6 +46,7 @@ extern "C" {
ReconstructType reconstruct;
ParityGauge odd;
ParityGauge even;
double anisotropy;
} FullGauge;
typedef struct {
......
// this will evaluate wrong for (1/L^3) < 5e-7, i.e., L=100
#define FAST_INT_DIVIDE(a, b) \
((int)(__fdividef((float)a, (float)b)+0.0000005f))
#define FAST_INT_MOD(a, b) (a - FAST_INT_DIVIDE(a,b)*b)
// Performs complex addition
#define COMPLEX_ADD_TO(a, b) \
a##_re += b##_re, \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment