Advanced Computing Platform for Theoretical Physics

commit大文件会使得服务器变得不稳定,请大家尽量只commit代码,不要commit大的文件。

Commit 33f0a6b7 authored by mikeaclark's avatar mikeaclark
Browse files

Further fixes to unpacking

git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@509 be54200a-260c-0410-bdd7-ce6af2a381ab
parent 11cf54f6
......@@ -80,26 +80,22 @@ void init() {
gauge_param = &gaugeParam;
invert_param = &inv_param;
size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
size_t sSize = (inv_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
// construct input fields
for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc(V*gaugeSiteSize*gSize);
for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc(V*gaugeSiteSize*gaugeParam.cpu_prec);
if (clover_yes) {
size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
if (test_type > 0) {
hostClover = malloc(V*cloverSiteSize*cSize);
hostClover = malloc(V*cloverSiteSize*inv_param.clover_cpu_prec);
hostCloverInv = hostClover; // fake it
} else {
hostClover = NULL;
hostCloverInv = malloc(V*cloverSiteSize*cSize);
hostCloverInv = malloc(V*cloverSiteSize*inv_param.clover_cpu_prec);
}
}
spinor = malloc(V*spinorSiteSize*sSize);
spinorRef = malloc(V*spinorSiteSize*sSize);
spinorGPU = malloc(V*spinorSiteSize*sSize);
spinor = malloc(V*spinorSiteSize*inv_param.cpu_prec);
spinorRef = malloc(V*spinorSiteSize*inv_param.cpu_prec);
spinorGPU = malloc(V*spinorSiteSize*inv_param.cpu_prec);
spinorEven = spinor;
spinorRefEven = spinorRef;
spinorGPUEven = spinorGPU;
......
......@@ -177,7 +177,6 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
freeParitySpinor(p);
freeParitySpinor(Ap);
freeParitySpinor(b);
freeParitySpinor(y);
return;
......
......@@ -144,6 +144,17 @@ void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
}
}
/*
Very limited functionailty here
- currently assumes that the precision of the cpu field is the same as before
- no ability to dump the sloppy gauge field
- really exposes how crap the current api is
*/
void saveGaugeQuda(void *h_gauge)
{
restoreGaugeField(h_gauge, &cudaGaugePrecise);
}
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
{
if (!h_clover && !h_clovinv) {
......
......@@ -76,6 +76,7 @@ extern "C" {
// Interface functions
void initQuda(int dev);
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param);
void saveGaugeQuda(void *h_gauge);
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param);
void discardCloverQuda(QudaInvertParam *inv_param);
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param);
......
......@@ -21,96 +21,62 @@ inline short doubleToShort(double a) {
return (short)((a+SHIFT_FLOAT)*SCALE_FLOAT);
}
// CPU only test of SU(3) accuracy, tests 8 and 12 component reconstruction
void SU3Test() {
Precision gauge_precision = QUDA_DOUBLE_PRECISION;
// construct input fields
double *gauge[4];
for (int dir = 0; dir < 4; dir++) gauge[dir] = (double*)malloc(V*gaugeSiteSize*sizeof(double));
QudaGaugeParam param;
gauge_param = &param;
param.X[0] = 4;
param.X[1] = 4;
param.X[2] = 4;
param.X[3] = 4;
QudaGaugeParam param;
void *gauge[4], *new_gauge[4];
void init() {
param.blockDim = 64;
param.cpu_prec = QUDA_SINGLE_PRECISION;
param.cuda_prec = QUDA_SINGLE_PRECISION;
param.reconstruct = QUDA_RECONSTRUCT_8;
param.cuda_prec_sloppy = param.cuda_prec;
param.reconstruct_sloppy = param.reconstruct;
param.X[0] = 24;
param.X[1] = 24;
param.X[2] = 24;
param.X[3] = 24;
setDims(param.X);
param.anisotropy = 2.0;
param.anisotropy = 2.3;
param.t_boundary = QUDA_ANTI_PERIODIC_T;
printf("Randomizing fields...");
construct_gauge_field((void**)gauge, 1, gauge_precision);
printf("done.\n");
param.gauge_fix = QUDA_GAUGE_FIXED_NO;
gauge_param = &param;
int fail_check = 17;
int fail8[fail_check], fail12[fail_check];
for (int f=0; f<fail_check; f++) {
fail8[f] = 0;
fail12[f] = 0;
// construct gauge fields
for (int dir = 0; dir < 4; dir++) {
gauge[dir] = malloc(V*gaugeSiteSize*param.cpu_prec);
new_gauge[dir] = malloc(V*gaugeSiteSize*param.cpu_prec);
}
int iter8[18], iter12[18];
for (int i=0; i<18; i++) {
iter8[i] = 0;
iter12[i] = 0;
}
int dev = 0;
cudaSetDevice(dev);
}
for (int eo=0; eo<2; eo++) {
for (int i=0; i<Vh; i++) {
int ga_idx = (eo*Vh+i);
for (int d=0; d<4; d++) {
double gauge8[18], gauge12[18];
for (int j=0; j<18; j++) {
gauge8[j] = gauge[d][ga_idx*18+j];
gauge12[j] = gauge[d][ga_idx*18+j];
}
su3_construct(gauge8, QUDA_RECONSTRUCT_8, gauge_precision);
su3_reconstruct(gauge8, d, i, QUDA_RECONSTRUCT_8, gauge_precision);
su3_construct(gauge12, QUDA_RECONSTRUCT_12, gauge_precision);
su3_reconstruct(gauge12, d, i, QUDA_RECONSTRUCT_12, gauge_precision);
if (fabs(gauge8[8] - gauge[d][ga_idx*18+8]) > 1e-1) {
printGaugeElement(gauge[d]+ga_idx*18, 0, gauge_precision);printf("\n");
printGaugeElement(gauge8, 0, gauge_precision);printf("\n");
printGaugeElement(gauge12, 0, gauge_precision);
exit(0);
}
for (int j=0; j<18; j++) {
double diff8 = fabs(gauge8[j] - gauge[d][ga_idx*18+j]);
double diff12 = fabs(gauge12[j] - gauge[d][ga_idx*18+j]);
for (int f=0; f<fail_check; f++) {
if (diff8 > pow(10,-(f+1))) fail8[f]++;
if (diff12 > pow(10,-(f+1))) fail12[f]++;
}
if (diff8 > 1e-3) {
iter8[j]++;
}
if (diff12 > 1e-3) {
iter12[j]++;
}
}
}
}
void end() {
// release memory
for (int dir = 0; dir < 4; dir++) {
free(gauge[dir]);
free(new_gauge[dir]);
}
}
for (int i=0; i<18; i++) printf("%d 12 fails = %d, 8 fails = %d\n", i, iter12[i], iter8[i]);
void SU3Test() {
for (int f=0; f<fail_check; f++) {
printf("%e Failures: 12 component = %d / %d = %e, 8 component = %d / %d = %e\n",
pow(10,-(f+1)), fail12[f], V*4*18, fail12[f] / (double)(4*V*18),
fail8[f], V*4*18, fail8[f] / (double)(4*V*18));
}
init();
printf("Randomizing fields...");
construct_gauge_field((void**)gauge, 1, param.cpu_prec);
printf("done.\n");
// release memory
for (int dir = 0; dir < 4; dir++) free(gauge[dir]);
loadGaugeQuda(gauge, &param);
saveGaugeQuda(new_gauge);
check_gauge(gauge, new_gauge, param.cpu_prec);
end();
}
int main(int argc, char **argv) {
......
......@@ -552,3 +552,41 @@ void strong_check(void *spinorRef, void *spinorGPU, int len, Precision prec) {
compare_spinor(spinorRef, spinorGPU, len, prec);
}
template <typename Float>
void checkGauge(Float **oldG, Float **newG) {
int fail_check = 17;
int fail[fail_check];
int iter[18];
for (int i=0; i<fail_check; i++) fail[i] = 0;
for (int i=0; i<18; i++) iter[i] = 0;
for (int eo=0; eo<2; eo++) {
for (int i=0; i<Vh; i++) {
int ga_idx = (eo*Vh+i);
for (int d=0; d<4; d++) {
for (int j=0; j<18; j++) {
double diff = fabs(newG[d][ga_idx*18+j] - oldG[d][ga_idx*18+j]);
for (int f=0; f<fail_check; f++) if (diff > pow(10.0,-(f+1))) fail[f]++;
if (diff > 1e-3) iter[j]++;
}
}
}
}
for (int i=0; i<18; i++) printf("%d fails = %d\n", i, iter[i]);
for (int f=0; f<fail_check; f++) {
printf("%e Failures = %d / %d = %e\n", pow(10.0,-(f+1)), fail[f], V*4*18, fail[f] / (double)(4*V*18));
}
}
void check_gauge(void *oldG, void *newG, QudaPrecision precision) {
if (precision == QUDA_SINGLE_PRECISION)
checkGauge((double**)oldG, (double**)newG);
else
checkGauge((float**)oldG, (float**)newG);
}
......@@ -28,6 +28,9 @@ extern "C" {
void compare_spinor(void *spinor_cpu, void *spinor_gpu, int len, Precision precision);
void strong_check(void *spinor, void *spinorGPU, int len, Precision precision);
int compare_floats(void *a, void *b, int len, double epsilon, Precision precision);
void check_gauge(void *, void *, Precision precision);
// ---------- gauge_read.cpp ----------
void readGaugeField(char *filename, float *gauge[], int argc, char *argv[]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment