Advanced Computing Platform for Theoretical Physics

Commit 33f0a6b7 authored by mikeaclark's avatar mikeaclark
Browse files

Further fixes to unpacking

git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@509 be54200a-260c-0410-bdd7-ce6af2a381ab
parent 11cf54f6
...@@ -80,26 +80,22 @@ void init() { ...@@ -80,26 +80,22 @@ void init() {
gauge_param = &gaugeParam; gauge_param = &gaugeParam;
invert_param = &inv_param; invert_param = &inv_param;
size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
size_t sSize = (inv_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
// construct input fields // construct input fields
for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc(V*gaugeSiteSize*gSize); for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc(V*gaugeSiteSize*gaugeParam.cpu_prec);
if (clover_yes) { if (clover_yes) {
size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
if (test_type > 0) { if (test_type > 0) {
hostClover = malloc(V*cloverSiteSize*cSize); hostClover = malloc(V*cloverSiteSize*inv_param.clover_cpu_prec);
hostCloverInv = hostClover; // fake it hostCloverInv = hostClover; // fake it
} else { } else {
hostClover = NULL; hostClover = NULL;
hostCloverInv = malloc(V*cloverSiteSize*cSize); hostCloverInv = malloc(V*cloverSiteSize*inv_param.clover_cpu_prec);
} }
} }
spinor = malloc(V*spinorSiteSize*sSize); spinor = malloc(V*spinorSiteSize*inv_param.cpu_prec);
spinorRef = malloc(V*spinorSiteSize*sSize); spinorRef = malloc(V*spinorSiteSize*inv_param.cpu_prec);
spinorGPU = malloc(V*spinorSiteSize*sSize); spinorGPU = malloc(V*spinorSiteSize*inv_param.cpu_prec);
spinorEven = spinor; spinorEven = spinor;
spinorRefEven = spinorRef; spinorRefEven = spinorRef;
spinorGPUEven = spinorGPU; spinorGPUEven = spinorGPU;
......
...@@ -177,7 +177,6 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv ...@@ -177,7 +177,6 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
freeParitySpinor(p); freeParitySpinor(p);
freeParitySpinor(Ap); freeParitySpinor(Ap);
freeParitySpinor(b);
freeParitySpinor(y); freeParitySpinor(y);
return; return;
......
...@@ -144,6 +144,17 @@ void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param) ...@@ -144,6 +144,17 @@ void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
} }
} }
/*
Very limited functionailty here
- currently assumes that the precision of the cpu field is the same as before
- no ability to dump the sloppy gauge field
- really exposes how crap the current api is
*/
void saveGaugeQuda(void *h_gauge)
{
restoreGaugeField(h_gauge, &cudaGaugePrecise);
}
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param) void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
{ {
if (!h_clover && !h_clovinv) { if (!h_clover && !h_clovinv) {
......
...@@ -76,6 +76,7 @@ extern "C" { ...@@ -76,6 +76,7 @@ extern "C" {
// Interface functions // Interface functions
void initQuda(int dev); void initQuda(int dev);
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param); void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param);
void saveGaugeQuda(void *h_gauge);
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param); void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param);
void discardCloverQuda(QudaInvertParam *inv_param); void discardCloverQuda(QudaInvertParam *inv_param);
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param); void invertQuda(void *h_x, void *h_b, QudaInvertParam *param);
......
...@@ -21,96 +21,62 @@ inline short doubleToShort(double a) { ...@@ -21,96 +21,62 @@ inline short doubleToShort(double a) {
return (short)((a+SHIFT_FLOAT)*SCALE_FLOAT); return (short)((a+SHIFT_FLOAT)*SCALE_FLOAT);
} }
// CPU only test of SU(3) accuracy, tests 8 and 12 component reconstruction QudaGaugeParam param;
void SU3Test() { void *gauge[4], *new_gauge[4];
Precision gauge_precision = QUDA_DOUBLE_PRECISION; void init() {
// construct input fields param.blockDim = 64;
double *gauge[4];
for (int dir = 0; dir < 4; dir++) gauge[dir] = (double*)malloc(V*gaugeSiteSize*sizeof(double)); param.cpu_prec = QUDA_SINGLE_PRECISION;
param.cuda_prec = QUDA_SINGLE_PRECISION;
QudaGaugeParam param; param.reconstruct = QUDA_RECONSTRUCT_8;
gauge_param = &param; param.cuda_prec_sloppy = param.cuda_prec;
param.reconstruct_sloppy = param.reconstruct;
param.X[0] = 4;
param.X[1] = 4; param.X[0] = 24;
param.X[2] = 4; param.X[1] = 24;
param.X[3] = 4; param.X[2] = 24;
param.X[3] = 24;
setDims(param.X); setDims(param.X);
param.anisotropy = 2.0; param.anisotropy = 2.3;
param.t_boundary = QUDA_ANTI_PERIODIC_T; param.t_boundary = QUDA_ANTI_PERIODIC_T;
param.gauge_fix = QUDA_GAUGE_FIXED_NO;
printf("Randomizing fields..."); gauge_param = &param;
construct_gauge_field((void**)gauge, 1, gauge_precision);
printf("done.\n");
int fail_check = 17; // construct gauge fields
int fail8[fail_check], fail12[fail_check]; for (int dir = 0; dir < 4; dir++) {
for (int f=0; f<fail_check; f++) { gauge[dir] = malloc(V*gaugeSiteSize*param.cpu_prec);
fail8[f] = 0; new_gauge[dir] = malloc(V*gaugeSiteSize*param.cpu_prec);
fail12[f] = 0;
} }
int iter8[18], iter12[18]; int dev = 0;
for (int i=0; i<18; i++) { cudaSetDevice(dev);
iter8[i] = 0; }
iter12[i] = 0;
}
for (int eo=0; eo<2; eo++) { void end() {
for (int i=0; i<Vh; i++) { // release memory
int ga_idx = (eo*Vh+i); for (int dir = 0; dir < 4; dir++) {
for (int d=0; d<4; d++) { free(gauge[dir]);
double gauge8[18], gauge12[18]; free(new_gauge[dir]);
for (int j=0; j<18; j++) {
gauge8[j] = gauge[d][ga_idx*18+j];
gauge12[j] = gauge[d][ga_idx*18+j];
}
su3_construct(gauge8, QUDA_RECONSTRUCT_8, gauge_precision);
su3_reconstruct(gauge8, d, i, QUDA_RECONSTRUCT_8, gauge_precision);
su3_construct(gauge12, QUDA_RECONSTRUCT_12, gauge_precision);
su3_reconstruct(gauge12, d, i, QUDA_RECONSTRUCT_12, gauge_precision);
if (fabs(gauge8[8] - gauge[d][ga_idx*18+8]) > 1e-1) {
printGaugeElement(gauge[d]+ga_idx*18, 0, gauge_precision);printf("\n");
printGaugeElement(gauge8, 0, gauge_precision);printf("\n");
printGaugeElement(gauge12, 0, gauge_precision);
exit(0);
}
for (int j=0; j<18; j++) {
double diff8 = fabs(gauge8[j] - gauge[d][ga_idx*18+j]);
double diff12 = fabs(gauge12[j] - gauge[d][ga_idx*18+j]);
for (int f=0; f<fail_check; f++) {
if (diff8 > pow(10,-(f+1))) fail8[f]++;
if (diff12 > pow(10,-(f+1))) fail12[f]++;
}
if (diff8 > 1e-3) {
iter8[j]++;
}
if (diff12 > 1e-3) {
iter12[j]++;
}
}
}
}
} }
}
for (int i=0; i<18; i++) printf("%d 12 fails = %d, 8 fails = %d\n", i, iter12[i], iter8[i]); void SU3Test() {
for (int f=0; f<fail_check; f++) { init();
printf("%e Failures: 12 component = %d / %d = %e, 8 component = %d / %d = %e\n",
pow(10,-(f+1)), fail12[f], V*4*18, fail12[f] / (double)(4*V*18), printf("Randomizing fields...");
fail8[f], V*4*18, fail8[f] / (double)(4*V*18)); construct_gauge_field((void**)gauge, 1, param.cpu_prec);
} printf("done.\n");
// release memory loadGaugeQuda(gauge, &param);
for (int dir = 0; dir < 4; dir++) free(gauge[dir]); saveGaugeQuda(new_gauge);
check_gauge(gauge, new_gauge, param.cpu_prec);
end();
} }
int main(int argc, char **argv) { int main(int argc, char **argv) {
......
...@@ -552,3 +552,41 @@ void strong_check(void *spinorRef, void *spinorGPU, int len, Precision prec) { ...@@ -552,3 +552,41 @@ void strong_check(void *spinorRef, void *spinorGPU, int len, Precision prec) {
compare_spinor(spinorRef, spinorGPU, len, prec); compare_spinor(spinorRef, spinorGPU, len, prec);
} }
template <typename Float>
void checkGauge(Float **oldG, Float **newG) {
int fail_check = 17;
int fail[fail_check];
int iter[18];
for (int i=0; i<fail_check; i++) fail[i] = 0;
for (int i=0; i<18; i++) iter[i] = 0;
for (int eo=0; eo<2; eo++) {
for (int i=0; i<Vh; i++) {
int ga_idx = (eo*Vh+i);
for (int d=0; d<4; d++) {
for (int j=0; j<18; j++) {
double diff = fabs(newG[d][ga_idx*18+j] - oldG[d][ga_idx*18+j]);
for (int f=0; f<fail_check; f++) if (diff > pow(10.0,-(f+1))) fail[f]++;
if (diff > 1e-3) iter[j]++;
}
}
}
}
for (int i=0; i<18; i++) printf("%d fails = %d\n", i, iter[i]);
for (int f=0; f<fail_check; f++) {
printf("%e Failures = %d / %d = %e\n", pow(10.0,-(f+1)), fail[f], V*4*18, fail[f] / (double)(4*V*18));
}
}
void check_gauge(void *oldG, void *newG, QudaPrecision precision) {
if (precision == QUDA_SINGLE_PRECISION)
checkGauge((double**)oldG, (double**)newG);
else
checkGauge((float**)oldG, (float**)newG);
}
...@@ -28,6 +28,9 @@ extern "C" { ...@@ -28,6 +28,9 @@ extern "C" {
void compare_spinor(void *spinor_cpu, void *spinor_gpu, int len, Precision precision); void compare_spinor(void *spinor_cpu, void *spinor_gpu, int len, Precision precision);
void strong_check(void *spinor, void *spinorGPU, int len, Precision precision); void strong_check(void *spinor, void *spinorGPU, int len, Precision precision);
int compare_floats(void *a, void *b, int len, double epsilon, Precision precision); int compare_floats(void *a, void *b, int len, double epsilon, Precision precision);
void check_gauge(void *, void *, Precision precision);
// ---------- gauge_read.cpp ---------- // ---------- gauge_read.cpp ----------
void readGaugeField(char *filename, float *gauge[], int argc, char *argv[]); void readGaugeField(char *filename, float *gauge[], int argc, char *argv[]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment