Advanced Computing Platform for Theoretical Physics

commit大文件会使得服务器变得不稳定,请大家尽量只commit代码,不要commit大的文件。

Commit e11b7242 authored by rbabich's avatar rbabich
Browse files

bugfixes to quda clover (half precision still broken)


git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@465 be54200a-260c-0410-bdd7-ce6af2a381ab
parent 5a093817
......@@ -134,7 +134,7 @@ static inline void packCloverMatrixHalf(short4 *res, float *norm, Float *clover,
res[i*Vh].w = (short) (c * clover[4*i+3]);
}
norm[chi*Vh] = half/c;
res += 9;
res += 9*Vh;
clover += 36;
}
}
......@@ -265,10 +265,10 @@ void loadFullClover(FullClover ret, void *clover, Precision cpu_prec,
}
} else {
if (cpu_prec == QUDA_DOUBLE_PRECISION) {
packFullCloverHalf((short4 *)packedEven, (float *) packedEvenNorm, (short4 *)packedOdd,
packFullCloverHalf((short4 *)packedEven, (float *)packedEvenNorm, (short4 *)packedOdd,
(float *) packedOddNorm, (double *)clover, ret.even.X);
} else {
packFullCloverHalf((short4 *)packedEven, (float *) packedEvenNorm, (short4 *)packedOdd,
packFullCloverHalf((short4 *)packedEven, (float *)packedEvenNorm, (short4 *)packedOdd,
(float * )packedOddNorm, (float *)clover, ret.even.X);
}
}
......
......@@ -2,6 +2,7 @@
#include <stdio.h>
#include <dslash_quda.h>
#include <spinor_quda.h> // not needed once call to allocateParitySpinor() is removed
#if (__CUDA_ARCH__ == 130)
static __inline__ __device__ double2 fetch_double2(texture<int4, 1> t, int i)
......@@ -1855,8 +1856,10 @@ void cloverMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover clover, FullC
void cloverMatPCDagMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover clover, FullClover cloverInv, ParitySpinor in,
double kappa, ParitySpinor tmp, MatPCType matpc_type)
{
cloverMatPCCuda(out, gauge, clover, cloverInv, in, kappa, tmp, matpc_type, 0);
cloverMatPCCuda(out, gauge, clover, cloverInv, out, kappa, tmp, matpc_type, 1);
ParitySpinor aux = allocateParitySpinor(out.X, out.precision); // FIXME: eliminate aux
cloverMatPCCuda(aux, gauge, clover, cloverInv, in, kappa, tmp, matpc_type, 0);
cloverMatPCCuda(out, gauge, clover, cloverInv, aux, kappa, tmp, matpc_type, 1);
freeParitySpinor(aux);
}
// Apply the full operator (FIXME: create kernel to eliminate tmp)
......
......@@ -10,7 +10,7 @@
// What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat)
int test_type = 1;
// clover-improved? (0 = plain Wilson, 1 = clover)
int clover_yes = 0;
int clover_yes = 1;
QudaGaugeParam gaugeParam;
QudaInvertParam inv_param;
......@@ -116,7 +116,7 @@ void init() {
construct_spinor_field(spinor, 1, 0, 0, 0, inv_param.cpu_prec);
if (clover_yes) {
double norm = 0; // random components range between -norm and norm
double norm = 0; // clover components are random numbers in the range (-norm, norm)
double diag = 1.0; // constant added to the diagonal
if (test_type == 2) {
......
......@@ -94,9 +94,9 @@ extern "C" {
} QudaTboundary;
typedef enum QudaVerbosity_s {
QUDA_SILENT = 0,
QUDA_SUMMARIZE = 1,
QUDA_VERBOSE = 2
QUDA_SILENT,
QUDA_SUMMARIZE,
QUDA_VERBOSE
} QudaVerbosity;
typedef struct double3_s {
......
......@@ -109,7 +109,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(t, cudaGaugeSloppy, r_sloppy, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
} else {
cloverMatPCCuda(v, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, r_sloppy, invert_param->kappa, tmp_sloppy,
cloverMatPCCuda(t, cudaGaugeSloppy, cudaCloverSloppy, cudaCloverInvSloppy, r_sloppy, invert_param->kappa, tmp_sloppy,
invert_param->matpc_type,dag_type);
}
......
......@@ -92,7 +92,7 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInv
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
if (invert_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCDagMatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type);
MatPCDagMatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type);
} else {
cloverMatPCDagMatPCCuda(r, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, x, invert_param->kappa,
tmp, invert_param->matpc_type);
......
......@@ -138,10 +138,6 @@ void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
{
if (!cudaGaugePrecise.even) {
printf("QUDA error: loadGaugeQuda() must precede call to loadCloverQuda()\n");
exit(-1);
}
if (!h_clover && !h_clovinv) {
printf("QUDA error: loadCloverQuda() called with neither clover term nor inverse\n");
exit(-1);
......@@ -161,14 +157,14 @@ void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
if (h_clover) {
cudaCloverPrecise = allocateCloverField(X, inv_param->clover_cuda_prec);
loadCloverField(cudaCloverPrecise, h_clover, inv_param->clover_cuda_prec, inv_param->clover_order);
loadCloverField(cudaCloverPrecise, h_clover, inv_param->clover_cpu_prec, inv_param->clover_order);
inv_param->cloverGiB += 2.0*cudaCloverPrecise.even.bytes / (1<<30);
if (inv_param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC ||
inv_param->matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
if (inv_param->clover_cuda_prec != inv_param->clover_cuda_prec_sloppy) {
cudaCloverSloppy = allocateCloverField(X, inv_param->clover_cuda_prec_sloppy);
loadCloverField(cudaCloverSloppy, h_clover, inv_param->clover_cuda_prec_sloppy, inv_param->clover_order);
loadCloverField(cudaCloverSloppy, h_clover, inv_param->clover_cpu_prec, inv_param->clover_order);
inv_param->cloverGiB += 2.0*cudaCloverInvSloppy.even.bytes / (1<<30);
} else {
cudaCloverSloppy = cudaCloverPrecise;
......@@ -181,13 +177,13 @@ void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
printf("QUDA error: clover term inverse not implemented yet\n");
exit(-1);
} else {
loadCloverField(cudaCloverInvPrecise, h_clovinv, inv_param->clover_cuda_prec, inv_param->clover_order);
loadCloverField(cudaCloverInvPrecise, h_clovinv, inv_param->clover_cpu_prec, inv_param->clover_order);
}
inv_param->cloverGiB += 2.0*cudaCloverInvPrecise.even.bytes / (1<<30);
if (inv_param->clover_cuda_prec != inv_param->clover_cuda_prec_sloppy) {
cudaCloverInvSloppy = allocateCloverField(X, inv_param->clover_cuda_prec_sloppy);
loadCloverField(cudaCloverInvSloppy, h_clovinv, inv_param->clover_cuda_prec_sloppy, inv_param->clover_order);
loadCloverField(cudaCloverInvSloppy, h_clovinv, inv_param->clover_cpu_prec, inv_param->clover_order);
inv_param->cloverGiB += 2.0*cudaCloverInvSloppy.even.bytes / (1<<30);
} else {
cudaCloverInvSloppy = cudaCloverInvPrecise;
......@@ -444,7 +440,12 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
case QUDA_CG_INVERTER:
if (param->solution_type != QUDA_MATPCDAG_MATPC_SOLUTION) {
copyCuda(out, in);
MatPCCuda(in, cudaGaugePrecise, out, kappa, tmp, param->matpc_type, QUDA_DAG_YES);
if (param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(in, cudaGaugePrecise, out, kappa, tmp, param->matpc_type, QUDA_DAG_YES);
} else {
cloverMatPCCuda(in, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, out, kappa, tmp,
param->matpc_type, QUDA_DAG_YES);
}
}
invertCgCuda(out, in, tmp, param);
break;
......@@ -477,22 +478,17 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
dslashXpayCuda(x.even, cudaGaugePrecise, out, 0, 0, b.even, kappa);
}
} else {
if (param->matpc_type == QUDA_MATPC_EVEN_EVEN) {
if (param->matpc_type == QUDA_MATPC_EVEN_EVEN ||
param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
// x_o = A_oo^-1 (b_o + k D_oe x_e)
ParitySpinor aux = b.even;
dslashXpayCuda(aux, cudaGaugePrecise, out, 1, 0, b.odd, kappa);
cloverCuda(x.odd, cudaGaugePrecise, cudaCloverInvPrecise, aux, 1);
} else if (param->matpc_type == QUDA_MATPC_ODD_ODD) {
} else {
// x_e = A_ee^-1 (b_e + k D_eo x_o)
ParitySpinor aux = b.odd;
dslashXpayCuda(aux, cudaGaugePrecise, out, 0, 0, b.even, kappa);
cloverCuda(x.even, cudaGaugePrecise, cudaCloverInvPrecise, aux, 0);
} else if (param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
// x_o = b_o + k D_oe x_e
dslashXpayCuda(x.odd, cudaGaugePrecise, out, 1, 0, b.odd, kappa);
} else {
// x_e = b_e + k D_eo x_o
dslashXpayCuda(x.even, cudaGaugePrecise, out, 0, 0, b.even, kappa);
}
}
......
......@@ -26,7 +26,7 @@ int main(int argc, char **argv)
Gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
Gauge_param.cpu_prec = QUDA_DOUBLE_PRECISION;
Gauge_param.cuda_prec = QUDA_DOUBLE_PRECISION;
Gauge_param.cuda_prec = QUDA_SINGLE_PRECISION;
Gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
Gauge_param.cuda_prec_sloppy = QUDA_SINGLE_PRECISION;
Gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
......@@ -37,14 +37,14 @@ int main(int argc, char **argv)
gauge_param = &Gauge_param;
int clover_yes = 0; // 0 for plain Wilson, 1 for clover
int clover_yes = 1; // 0 for plain Wilson, 1 for clover
if (clover_yes) {
inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
} else {
inv_param.dslash_type = QUDA_WILSON_DSLASH;
}
inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
inv_param.inv_type = QUDA_CG_INVERTER;
double mass = -0.95;
inv_param.kappa = 1.0 / (2.0*(1 + 3/gauge_param->anisotropy + mass));
......@@ -79,7 +79,7 @@ int main(int argc, char **argv)
construct_gauge_field(gauge, 1, Gauge_param.cpu_prec);
if (clover_yes) {
double norm = 0.1; // random components range between -norm and norm
double norm = 0.2; // clover components are random numbers in the range (-norm, norm)
double diag = 1.0; // constant added to the diagonal
size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
......
......@@ -447,7 +447,7 @@ void constructCloverField(Float *res, double norm, double diag) {
for(int i = 0; i < V; i++) {
for (int j = 0; j < 72; j++) {
res[i*72 + j] = c * rand() - 1.0;
res[i*72 + j] = c*rand() - norm;
}
for (int j = 0; j< 6; j++) {
res[i*72 + j] += diag;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment