Advanced Computing Platform for Theoretical Physics

Commit 11cf54f6 authored by mikeaclark's avatar mikeaclark
Browse files

Fixed bugs related to unpacking gauge field

git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@508 be54200a-260c-0410-bdd7-ce6af2a381ab
parent dfe423a8
......@@ -233,7 +233,7 @@ inline void unpack8(Float *h_gauge, double2 *d_gauge, int dir, int V, int idx) {
template <typename Float>
inline void unpack8(Float *h_gauge, float4 *d_gauge, int dir, int V, int idx) {
float4 *dg = d_gauge + dir*4*V;
float4 *dg = d_gauge + dir*2*V;
h_gauge[0] = dg[0].x;
h_gauge[1] = dg[0].y;
h_gauge[2] = dg[0].z;
......@@ -247,7 +247,7 @@ inline void unpack8(Float *h_gauge, float4 *d_gauge, int dir, int V, int idx) {
template <typename Float>
inline void unpack8(Float *h_gauge, short4 *d_gauge, int dir, int V, int idx) {
short4 *dg = d_gauge + dir*4*V;
short4 *dg = d_gauge + dir*2*V;
ShortToFloat(h_gauge[0], dg[0].x);
ShortToFloat(h_gauge[1], dg[0].y);
ShortToFloat(h_gauge[2], dg[0].z);
......@@ -583,19 +583,14 @@ void retrieveGaugeField(Float *cpuGauge, FloatN *even, FloatN *odd, ReconstructT
}
void createGaugeField(FullGauge *cudaGauge, void *cpuGauge, ReconstructType reconstruct,
Tboundary t_boundary, Precision precision, int *XX, double anisotropy, int blockDim) {
void createGaugeField(FullGauge *cudaGauge, void *cpuGauge, Precision precision, ReconstructType reconstruct,
Tboundary t_boundary, int *XX, double anisotropy, int blockDim) {
if (precision == QUDA_HALF_PRECISION) {
if (gauge_param->cpu_prec == QUDA_HALF_PRECISION) {
printf("QUDA error: half precision not supported on cpu\n");
exit(-1);
}
if (cudaGauge->precision == QUDA_DOUBLE_PRECISION && precision != QUDA_DOUBLE_PRECISION) {
printf("Error: can only create a double GPU gauge field from a double CPU gauge field\n");
exit(-1);
}
Anisotropy = anisotropy;
tBoundary = t_boundary;
......@@ -614,53 +609,68 @@ void createGaugeField(FullGauge *cudaGauge, void *cpuGauge, ReconstructType reco
allocateGaugeField(cudaGauge, reconstruct, precision);
if (precision == QUDA_DOUBLE_PRECISION) {
if (gauge_param->cpu_prec == QUDA_DOUBLE_PRECISION)
loadGaugeField((double2*)(cudaGauge->even), (double2*)(cudaGauge->odd), (double*)cpuGauge,
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
else if (gauge_param->cpu_prec == QUDA_SINGLE_PRECISION)
loadGaugeField((double2*)(cudaGauge->even), (double2*)(cudaGauge->odd), (float*)cpuGauge,
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
} else if (precision == QUDA_SINGLE_PRECISION) {
if (precision == QUDA_DOUBLE_PRECISION)
if (gauge_param->cpu_prec == QUDA_DOUBLE_PRECISION)
loadGaugeField((float4*)(cudaGauge->even), (float4*)(cudaGauge->odd), (double*)cpuGauge,
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
else if (precision == QUDA_SINGLE_PRECISION)
else if (gauge_param->cpu_prec == QUDA_SINGLE_PRECISION)
loadGaugeField((float4*)(cudaGauge->even), (float4*)(cudaGauge->odd), (float*)cpuGauge,
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
} else if (precision == QUDA_HALF_PRECISION) {
if (precision == QUDA_DOUBLE_PRECISION)
if (gauge_param->cpu_prec == QUDA_DOUBLE_PRECISION)
loadGaugeField((short4*)(cudaGauge->even), (short4*)(cudaGauge->odd), (double*)cpuGauge,
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
else if (precision == QUDA_SINGLE_PRECISION)
else if (gauge_param->cpu_prec == QUDA_SINGLE_PRECISION)
loadGaugeField((short4*)(cudaGauge->even), (short4*)(cudaGauge->odd), (float*)cpuGauge,
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
}
}
void restoreGaugeField(void *cpuGauge, FullGauge *cudaGauge, Precision precision) {
void restoreGaugeField(void *cpuGauge, FullGauge *cudaGauge) {
if (precision == QUDA_HALF_PRECISION) {
if (gauge_param->cpu_prec == QUDA_HALF_PRECISION) {
printf("QUDA error: half precision not supported on cpu\n");
exit(-1);
}
if (cudaGauge->precision == QUDA_DOUBLE_PRECISION && precision != QUDA_DOUBLE_PRECISION) {
printf("Error: can only create a double GPU gauge field from a double CPU gauge field\n");
exit(-1);
}
if (cudaGauge->precision == QUDA_DOUBLE_PRECISION) {
if (precision == QUDA_DOUBLE_PRECISION) {
if (gauge_param->cpu_prec == QUDA_DOUBLE_PRECISION)
retrieveGaugeField((double*)cpuGauge, (double2*)(cudaGauge->even), (double2*)(cudaGauge->odd),
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
} else if (precision == QUDA_SINGLE_PRECISION) {
if (precision == QUDA_DOUBLE_PRECISION)
else if (gauge_param->cpu_prec == QUDA_SINGLE_PRECISION)
retrieveGaugeField((float*)cpuGauge, (double2*)(cudaGauge->even), (double2*)(cudaGauge->odd),
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
} else if (cudaGauge->precision == QUDA_SINGLE_PRECISION) {
if (gauge_param->cpu_prec == QUDA_DOUBLE_PRECISION)
retrieveGaugeField((double*)cpuGauge, (float4*)(cudaGauge->even), (float4*)(cudaGauge->odd),
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
else if (precision == QUDA_SINGLE_PRECISION)
else if (gauge_param->cpu_prec == QUDA_SINGLE_PRECISION)
retrieveGaugeField((float*)cpuGauge, (float4*)(cudaGauge->even), (float4*)(cudaGauge->odd),
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
} else if (precision == QUDA_HALF_PRECISION) {
if (precision == QUDA_DOUBLE_PRECISION)
} else if (cudaGauge->precision == QUDA_HALF_PRECISION) {
if (gauge_param->cpu_prec == QUDA_DOUBLE_PRECISION)
retrieveGaugeField((double*)cpuGauge, (short4*)(cudaGauge->even), (short4*)(cudaGauge->odd),
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
else if (precision == QUDA_SINGLE_PRECISION)
else if (gauge_param->cpu_prec == QUDA_SINGLE_PRECISION)
retrieveGaugeField((float*)cpuGauge, (short4*)(cudaGauge->even), (short4*)(cudaGauge->odd),
cudaGauge->reconstruct, cudaGauge->bytes, cudaGauge->volume);
}
}
......@@ -8,8 +8,12 @@
extern "C" {
#endif
void createGaugeField(FullGauge *cudaGauge, void *cpuGauge, ReconstructType reconstruct, Tboundary t_boundary,
Precision precision, int *X, double anisotropy, int blockDim);
void createGaugeField(FullGauge *cudaGauge, void *cpuGauge, Precision precision,
ReconstructType reconstruct, Tboundary t_boundary,
int *X, double anisotropy, int blockDim);
void restoreGaugeField(void *cpuGauge, FullGauge *cudaGauge);
void freeGaugeField(FullGauge *cudaCauge);
#ifdef __cplusplus
......
......@@ -67,6 +67,13 @@ void printInvertParam(QudaInvertParam *param) {
printf("verbosity = %d\n", param->verbosity);
}
void checkPrecision(QudaPrecision precision) {
if (precision == QUDA_HALF_PRECISION) {
printf("Half precision not supported on cpu\n");
exit(-1);
}
}
void initQuda(int dev)
{
int deviceCount;
......@@ -122,13 +129,14 @@ void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
gauge_param->packed_size = (gauge_param->reconstruct == QUDA_RECONSTRUCT_8) ? 8 : 12;
createGaugeField(&cudaGaugePrecise, h_gauge, gauge_param->reconstruct, gauge_param->t_boundary,
gauge_param->cuda_prec, gauge_param->X, gauge_param->anisotropy, gauge_param->blockDim);
createGaugeField(&cudaGaugePrecise, h_gauge, gauge_param->cuda_prec, gauge_param->reconstruct,
gauge_param->t_boundary, gauge_param->X, gauge_param->anisotropy, gauge_param->blockDim);
gauge_param->gaugeGiB = 2.0*cudaGaugePrecise.bytes/ (1 << 30);
if (gauge_param->cuda_prec_sloppy != gauge_param->cuda_prec ||
gauge_param->reconstruct_sloppy != gauge_param->reconstruct) {
createGaugeField(&cudaGaugeSloppy, h_gauge, gauge_param->reconstruct_sloppy, gauge_param->t_boundary,
gauge_param->cuda_prec_sloppy, gauge_param->X, gauge_param->anisotropy,
createGaugeField(&cudaGaugeSloppy, h_gauge, gauge_param->cuda_prec_sloppy,
gauge_param->reconstruct_sloppy, gauge_param->t_boundary,
gauge_param->X, gauge_param->anisotropy,
gauge_param->blockDim_sloppy);
gauge_param->gaugeGiB += 2.0*cudaGaugeSloppy.bytes/ (1 << 30);
} else {
......@@ -212,16 +220,9 @@ void endQuda(void)
if (cudaCloverInvSloppy.even.clover) freeCloverField(&cudaCloverInvSloppy);
}
void checkPrecision(QudaInvertParam *param) {
if (param->cpu_prec == QUDA_HALF_PRECISION) {
printf("Half precision not supported on cpu\n");
exit(-1);
}
}
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int parity, int dagger)
{
checkPrecision(inv_param);
checkPrecision(inv_param->cpu_prec);
ParitySpinor in = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
ParitySpinor out = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
......@@ -249,7 +250,7 @@ void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int parity,
void MatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger)
{
checkPrecision(inv_param);
checkPrecision(inv_param->cpu_prec);
ParitySpinor in = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
ParitySpinor out = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
......@@ -279,7 +280,7 @@ void MatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger)
void MatPCDagMatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
{
checkPrecision(inv_param);
checkPrecision(inv_param->cpu_prec);
ParitySpinor in = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
ParitySpinor out = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
......@@ -308,7 +309,7 @@ void MatPCDagMatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
}
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger) {
checkPrecision(inv_param);
checkPrecision(inv_param->cpu_prec);
FullSpinor in = allocateSpinorField(cudaGaugePrecise.X, inv_param->cuda_prec);
FullSpinor out = allocateSpinorField(cudaGaugePrecise.X, inv_param->cuda_prec);
......@@ -339,7 +340,7 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
{
invert_param = param;
checkPrecision(param);
checkPrecision(param->cpu_prec);
int slenh = cudaGaugePrecise.volume*spinorSiteSize;
param->spinorGiB = (double)slenh*(param->cuda_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double): sizeof(float);
......
......@@ -44,14 +44,14 @@ void init() {
param.cpu_prec = QUDA_SINGLE_PRECISION;
param.cuda_prec = QUDA_SINGLE_PRECISION;
param.reconstruct = QUDA_RECONSTRUCT_12;
param.reconstruct = QUDA_RECONSTRUCT_8;
param.cuda_prec_sloppy = param.cuda_prec;
param.reconstruct_sloppy = param.reconstruct;
param.X[0] = 4;
param.X[1] = 4;
param.X[2] = 4;
param.X[3] = 4;
param.X[0] = 24;
param.X[1] = 24;
param.X[2] = 24;
param.X[3] = 24;
setDims(param.X);
param.anisotropy = 2.3;
......@@ -99,16 +99,28 @@ void packTest() {
stopwatchStart();
param.gauge_order = QUDA_CPS_WILSON_GAUGE_ORDER;
createGaugeField(&cudaGaugePrecise, cpsGauge, param.reconstruct, param.t_boundary, param.cuda_prec, param.X, 1.0, param.blockDim);
createGaugeField(&cudaGaugePrecise, cpsGauge, param.cuda_prec, param.reconstruct,
param.t_boundary, param.X, 1.0, param.blockDim);
double cpsGtime = stopwatchReadSeconds();
printf("CPS Gauge send time = %e seconds\n", cpsGtime);
stopwatchStart();
restoreGaugeField(cpsGauge, &cudaGaugePrecise);
double cpsGRtime = stopwatchReadSeconds();
printf("CPS Gauge restore time = %e seconds\n", cpsGRtime);
stopwatchStart();
param.gauge_order = QUDA_QDP_GAUGE_ORDER;
createGaugeField(&cudaGaugePrecise, qdpGauge, param.reconstruct, param.t_boundary, param.cuda_prec, param.X, 1.0, param.blockDim);
createGaugeField(&cudaGaugePrecise, qdpGauge, param.cpu_prec, param.reconstruct,
param.t_boundary, param.X, 1.0, param.blockDim);
double qdpGtime = stopwatchReadSeconds();
printf("QDP Gauge send time = %e seconds\n", qdpGtime);
stopwatchStart();
restoreGaugeField(qdpGauge, &cudaGaugePrecise);
double qdpGRtime = stopwatchReadSeconds();
printf("QDP Gauge restore time = %e seconds\n", qdpGRtime);
stopwatchStart();
loadSpinorField(cudaFullSpinor, (void*)spinor, QUDA_SINGLE_PRECISION, QUDA_DIRAC_ORDER);
double sSendTime = stopwatchReadSeconds();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment