Advanced Computing Platform for Theoretical Physics

Commit 5bf45c2a authored by rbabich's avatar rbabich
Browse files

finished quda clover (untested)


git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@463 be54200a-260c-0410-bdd7-ce6af2a381ab
parent 458d290a
......@@ -13,8 +13,6 @@ these kernels. Mixed-precision implementations of both CG and
BiCGstab are provided, with support for double, single, and half
(16-bit fixed-point) precision.
NOTE: In this pre-release, only the BiCGstab inverter supports clover.
Software compatibility:
......@@ -53,7 +51,8 @@ Using the library:
Include the header file "invert_quda.h" in your application, link
against libquda.a, and study invert_test.c for an example of the
interface.
interface. The various inverter options are enumerated in
enum_quda.h.
Known issues:
......
......@@ -63,20 +63,24 @@ void freeCloverField(FullClover *clover)
template <typename Float>
static inline void packCloverMatrix(float4* a, Float *b, int Vh)
{
const Float half = 0.5; // pre-include factor of 1/2 introduced by basis change
for (int i=0; i<18; i++) {
a[i*Vh].x = b[4*i+0];
a[i*Vh].y = b[4*i+1];
a[i*Vh].z = b[4*i+2];
a[i*Vh].w = b[4*i+3];
a[i*Vh].x = half * b[4*i+0];
a[i*Vh].y = half * b[4*i+1];
a[i*Vh].z = half * b[4*i+2];
a[i*Vh].w = half * b[4*i+3];
}
}
template <typename Float>
static inline void packCloverMatrix(double2* a, Float *b, int Vh)
{
const Float half = 0.5; // pre-include factor of 1/2 introduced by basis change
for (int i=0; i<36; i++) {
a[i*Vh].x = b[2*i+0];
a[i*Vh].y = b[2*i+1];
a[i*Vh].x = half * b[2*i+0];
a[i*Vh].y = half * b[2*i+1];
}
}
......@@ -113,6 +117,7 @@ static void packFullClover(FloatN *even, FloatN *odd, Float *clover, int *X)
template<typename Float>
static inline void packCloverMatrixHalf(short4 *res, float *norm, Float *clover, int Vh)
{
const Float half = 0.5; // pre-include factor of 1/2 introduced by basis change
Float max, a, c;
// treat the two chiral blocks separately
......@@ -128,7 +133,7 @@ static inline void packCloverMatrixHalf(short4 *res, float *norm, Float *clover,
res[i*Vh].z = (short) (c * clover[4*i+2]);
res[i*Vh].w = (short) (c * clover[4*i+3]);
}
norm[chi*Vh] = 1/c;
norm[chi*Vh] = half/c;
res += 9;
clover += 36;
}
......@@ -310,3 +315,17 @@ void loadCloverField(FullClover ret, void *clover, Precision cpu_prec, CloverFie
exit(-1);
}
}
/*
void createCloverField(FullClover *cudaClover, void *cpuClover, int *X, Precision precision)
{
if (invert_param->clover_cpu_prec == QUDA_HALF_PRECISION) {
printf("QUDA error: half precision not supported on cpu\n");
exit(-1);
}
// X should contain the dimensions of the even/odd sublattice
*cudaClover = allocateCloverField(X, precision);
loadCloverField(*cudaClover, cpuClover, precision, invert_param->clover_order);
}
*/
......@@ -724,9 +724,12 @@ void MatPCCuda(ParitySpinor out, FullGauge gauge, ParitySpinor in, double kappa,
if (matpc_type == QUDA_MATPC_EVEN_EVEN) {
dslashCuda(tmp, gauge, in, 1, dagger);
dslashXpayCuda(out, gauge, tmp, 0, dagger, in, kappa2);
} else {
} else if (matpc_type == QUDA_MATPC_ODD_ODD) {
dslashCuda(tmp, gauge, in, 0, dagger);
dslashXpayCuda(out, gauge, tmp, 1, dagger, in, kappa2);
} else {
printf("QUDA error: matpc_type not valid for plain Wilson\n");
exit(-1);
}
}
......@@ -1798,18 +1801,32 @@ void cloverDslashXpayHCuda(ParitySpinor res, FullGauge gauge, FullClover cloverI
}
// Apply the even-odd preconditioned clover-improved Dirac operator
void cloverMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover cloverInv, ParitySpinor in,
void cloverMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover clover, FullClover cloverInv, ParitySpinor in,
double kappa, ParitySpinor tmp, MatPCType matpc_type, int dagger)
{
double kappa2 = -kappa*kappa;
if (((matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) || (matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC))
&& (clover.even.clover == NULL)) {
printf("QUDA error: For asymmetric matpc_type, the uninverted clover term must be loaded\n");
}
if (!dagger) {
if (matpc_type == QUDA_MATPC_EVEN_EVEN) {
cloverDslashCuda(tmp, gauge, cloverInv, in, 1, dagger);
cloverDslashXpayCuda(out, gauge, cloverInv, tmp, 0, dagger, in, kappa2);
} else {
} else if (matpc_type == QUDA_MATPC_ODD_ODD) {
cloverDslashCuda(tmp, gauge, cloverInv, in, 0, dagger);
cloverDslashXpayCuda(out, gauge, cloverInv, tmp, 1, dagger, in, kappa2);
} else if (matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
printf("QUDA error: matpc_type QUDA_MATPC_EVEN_EVEN_ASYMMETRIC not implemented yet\n");
exit(1);
} else if (matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
printf("QUDA error: matpc_type QUDA_MATPC_ODD_ODD_ASYMMETRIC not implemented yet\n");
exit(-1);
} else {
printf("QUDA error: invalid matpc_type\n");
exit(-1);
}
} else { // very inefficient (FIXME)
if (matpc_type == QUDA_MATPC_EVEN_EVEN) {
......@@ -1817,20 +1834,29 @@ void cloverMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover cloverInv, Pa
cloverDslashCuda(out, gauge, cloverInv, tmp, 1, dagger);
copyCuda(tmp, out);
dslashXpayCuda(out, gauge, tmp, 0, dagger, in, kappa2);
} else {
} else if (matpc_type == QUDA_MATPC_ODD_ODD) {
cloverCuda(tmp, gauge, cloverInv, in, 1);
cloverDslashCuda(out, gauge, cloverInv, tmp, 0, dagger);
copyCuda(tmp, out);
dslashXpayCuda(out, gauge, tmp, 1, dagger, in, kappa2);
} else if (matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
printf("QUDA error: matpc_type QUDA_MATPC_EVEN_EVEN_ASYMMETRIC not implemented yet\n");
exit(1);
} else if (matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
printf("QUDA error: matpc_type QUDA_MATPC_ODD_ODD_ASYMMETRIC not implemented yet\n");
exit(-1);
} else {
printf("QUDA error: invalid matpc_type\n");
exit(-1);
}
}
}
void cloverMatPCDagMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover cloverInv, ParitySpinor in,
void cloverMatPCDagMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover clover, FullClover cloverInv, ParitySpinor in,
double kappa, ParitySpinor tmp, MatPCType matpc_type)
{
cloverMatPCCuda(out, gauge, cloverInv, in, kappa, tmp, matpc_type, 0);
cloverMatPCCuda(out, gauge, cloverInv, out, kappa, tmp, matpc_type, 1);
cloverMatPCCuda(out, gauge, clover, cloverInv, in, kappa, tmp, matpc_type, 0);
cloverMatPCCuda(out, gauge, clover, cloverInv, out, kappa, tmp, matpc_type, 1);
}
// Apply the full operator (FIXME: create kernel to eliminate tmp)
......
......@@ -16,9 +16,12 @@ extern "C" {
extern FullGauge cudaGaugePrecise;
extern FullGauge cudaGaugeSloppy;
extern FullClover cudaClover;
extern FullClover cudaCloverPrecise;
extern FullClover cudaCloverSloppy;
extern FullClover cudaCloverInvPrecise;
extern FullClover cudaCloverInvSloppy;
extern QudaGaugeParam *gauge_param;
extern QudaInvertParam *invert_param;
......@@ -90,12 +93,12 @@ extern "C" {
int oddBit, int daggerBit, ParitySpinor x,
double a);
void cloverMatPCCuda(ParitySpinor out, FullGauge gauge,
void cloverMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover clover,
FullClover cloverInv, ParitySpinor in, double kappa,
ParitySpinor tmp, MatPCType matpc_type, int dagger);
void cloverMatPCDagMatPCCuda(ParitySpinor out, FullGauge gauge,
FullClover cloverInv, ParitySpinor in,
double kappa, ParitySpinor tmp,
FullClover clover, FullClover cloverInv,
ParitySpinor in, double kappa, ParitySpinor tmp,
MatPCType matpc_type);
void cloverMatCuda(FullSpinor out, FullGauge gauge, FullClover clover,
FullSpinor in, double kappa, ParitySpinor tmp,
......@@ -112,13 +115,11 @@ extern "C" {
ParitySpinor spinor, int oddBit);
// -- inv_cg_cuda.cpp
void invertCgCuda(ParitySpinor x, ParitySpinor b, FullGauge gauge,
FullGauge gaugeSloppy, ParitySpinor tmp,
void invertCgCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp,
QudaInvertParam *param);
// -- inv_bicgstab_cuda.cpp
void invertBiCGstabCuda(ParitySpinor x, ParitySpinor b, FullGauge gauge,
FullGauge gaugeSloppy, ParitySpinor tmp,
void invertBiCGstabCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp,
QudaInvertParam *param, DagType dag_type);
#ifdef __cplusplus
......
......@@ -10,17 +10,18 @@
// What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat)
int test_type = 1;
// clover-improved? (0 = plain Wilson, 1 = clover)
int dslash_type = 0;
int clover_yes = 0;
QudaGaugeParam gaugeParam;
QudaInvertParam inv_param;
FullGauge gauge;
FullClover clover, cloverInv;
FullSpinor cudaSpinor;
FullSpinor cudaSpinorOut;
ParitySpinor tmp;
void *hostGauge[4];
void *hostGauge[4], *hostClover, *hostCloverInv;
void *spinor, *spinorEven, *spinorOdd;
void *spinorRef, *spinorRefEven, *spinorRefOdd;
void *spinorGPU, *spinorGPUEven, *spinorGPUOdd;
......@@ -36,35 +37,42 @@ void init() {
gaugeParam.X[1] = 24;
gaugeParam.X[2] = 24;
gaugeParam.X[3] = 32;
setDims(gaugeParam.X);
gaugeParam.blockDim = 64;
gaugeParam.anisotropy = 2.3;
gaugeParam.gauge_order = QUDA_QDP_GAUGE_ORDER;
gaugeParam.t_boundary = QUDA_ANTI_PERIODIC_T;
gaugeParam.cpu_prec = QUDA_DOUBLE_PRECISION;
gaugeParam.cuda_prec = QUDA_SINGLE_PRECISION;
gaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
gaugeParam.reconstruct_sloppy = gaugeParam.reconstruct;
gaugeParam.cuda_prec_sloppy = gaugeParam.cuda_prec;
gaugeParam.anisotropy = 2.3;
gaugeParam.gauge_order = QUDA_QDP_GAUGE_ORDER;
gaugeParam.t_boundary = QUDA_ANTI_PERIODIC_T;
gaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO;
gaugeParam.blockDim = 64;
if (clover_yes) {
inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
} else {
inv_param.dslash_type = QUDA_WILSON_DSLASH;
}
inv_param.kappa = kappa;
inv_param.cpu_prec = QUDA_DOUBLE_PRECISION;
inv_param.cuda_prec = QUDA_SINGLE_PRECISION;
if (test_type == 2) inv_param.dirac_order = QUDA_DIRAC_ORDER;
else inv_param.dirac_order = QUDA_DIRAC_ORDER;
inv_param.kappa = kappa;
if (dslash_type) {
inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
inv_param.clover_cpu_prec = QUDA_SINGLE_PRECISION;
if (clover_yes) {
inv_param.clover_cpu_prec = QUDA_DOUBLE_PRECISION;
inv_param.clover_cuda_prec = QUDA_SINGLE_PRECISION;
} else {
inv_param.dslash_type = QUDA_WILSON_DSLASH;
inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
}
inv_param.verbosity = QUDA_VERBOSE;
gauge_param = &gaugeParam;
invert_param = &inv_param;
......@@ -75,6 +83,17 @@ void init() {
// construct input fields
for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc(V*gaugeSiteSize*gSize);
if (clover_yes) {
size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
if (test_type == 2) {
hostClover = malloc(V*cloverSiteSize*cSize);
hostCloverInv = hostClover; // fake it
} else {
hostClover = NULL;
hostCloverInv = malloc(V*cloverSiteSize*cSize);
}
}
spinor = malloc(V*spinorSiteSize*sSize);
spinorRef = malloc(V*spinorSiteSize*sSize);
spinorGPU = malloc(V*spinorSiteSize*sSize);
......@@ -91,20 +110,36 @@ void init() {
spinorGPUOdd = (void*)((float*)spinorGPU + Vh*spinorSiteSize);
}
printf("Randomizing fields...");
printf("Randomizing fields... ");
construct_gauge_field(hostGauge, 1, gaugeParam.cpu_prec);
construct_spinor_field(spinor, 1, 0, 0, 0, inv_param.cpu_prec);
if (clover_yes) {
double norm = 1.0; // random components range between -norm and norm
double diag = 1.0; // constant added to the diagonal
if (test_type == 2) {
construct_clover_field(hostClover, norm, diag, inv_param.clover_cpu_prec);
} else {
construct_clover_field(hostCloverInv, norm, diag, inv_param.clover_cpu_prec);
}
}
printf("done.\n"); fflush(stdout);
int dev = 0;
initQuda(dev);
loadGaugeQuda(hostGauge, &gaugeParam);
loadGaugeQuda(hostGauge, &gaugeParam);
gauge = cudaGaugePrecise;
printf("Sending fields to GPU..."); fflush(stdout);
if (clover_yes) {
loadCloverQuda(hostClover, hostCloverInv, &inv_param);
clover = cudaCloverPrecise;
cloverInv = cudaCloverInvPrecise;
}
printf("Sending fields to GPU... "); fflush(stdout);
if (!TRANSFER) {
......@@ -129,6 +164,10 @@ void init() {
void end() {
// release memory
for (int dir = 0; dir < 4; dir++) free(hostGauge[dir]);
if (clover_yes) {
if (test_type == 2) free(hostClover);
else free(hostCloverInv);
}
free(spinorGPU);
free(spinor);
free(spinorRef);
......@@ -150,16 +189,32 @@ double dslashCUDA() {
for (int i = 0; i < LOOPS; i++) {
switch (test_type) {
case 0:
if (TRANSFER) dslashQuda(spinorOdd, spinorEven, &inv_param, ODD_BIT, DAGGER_BIT);
else dslashCuda(cudaSpinor.odd, gauge, cudaSpinor.even, ODD_BIT, DAGGER_BIT);
if (TRANSFER) {
dslashQuda(spinorOdd, spinorEven, &inv_param, ODD_BIT, DAGGER_BIT);
} else if (!clover_yes) {
dslashCuda(cudaSpinor.odd, gauge, cudaSpinor.even, ODD_BIT, DAGGER_BIT);
} else {
cloverDslashCuda(cudaSpinor.odd, gauge, cloverInv, cudaSpinor.even, ODD_BIT, DAGGER_BIT);
}
break;
case 1:
if (TRANSFER) MatPCQuda(spinorOdd, spinorEven, &inv_param, DAGGER_BIT);
else MatPCCuda(cudaSpinor.odd, gauge, cudaSpinor.even, kappa, tmp, QUDA_MATPC_EVEN_EVEN, DAGGER_BIT);
if (TRANSFER) {
MatPCQuda(spinorOdd, spinorEven, &inv_param, DAGGER_BIT);
} else if (!clover_yes) {
MatPCCuda(cudaSpinor.odd, gauge, cudaSpinor.even, kappa, tmp, QUDA_MATPC_EVEN_EVEN, DAGGER_BIT);
} else {
cloverMatPCCuda(cudaSpinor.odd, gauge, clover, cloverInv, cudaSpinor.even, kappa, tmp,
QUDA_MATPC_EVEN_EVEN, DAGGER_BIT);
}
break;
case 2:
if (TRANSFER) MatQuda(spinorGPU, spinor, &inv_param, DAGGER_BIT);
else MatCuda(cudaSpinorOut, gauge, cudaSpinor, kappa, DAGGER_BIT);
if (TRANSFER) {
MatQuda(spinorGPU, spinor, &inv_param, DAGGER_BIT);
} else if (!clover_yes) {
MatCuda(cudaSpinorOut, gauge, cudaSpinor, kappa, DAGGER_BIT);
} else {
cloverMatCuda(cudaSpinorOut, gauge, clover, cudaSpinor, kappa, tmp, DAGGER_BIT);
}
}
}
......@@ -230,7 +285,7 @@ void dslashTest() {
int flops = test_type ? 1320*2 + 48 : 1320;
int floats = test_type ? 2*(7*24+8*gaugeParam.packed_size+24)+24 : 7*24+8*gaugeParam.packed_size+24;
if (dslash_type) {
if (clover_yes) {
flops += test_type ? 504*2 : 504;
floats += test_type ? 72*2 : 72;
}
......
......@@ -39,16 +39,26 @@ extern "C" {
} QudaPrecision;
// Whether the preconditioned matrix is (1-k^2 Deo Doe) or (1-k^2 Doe Deo)
//
// For the clover-improved Wilson Dirac operator, QUDA_MATPC_EVEN_EVEN
// defaults to the "symmetric" form, (1 - k^2 A_ee^-1 D_eo A_oo^-1 D_oe),
// and likewise for QUDA_MATPC_ODD_ODD.
//
// For the "asymmetric" form, (A_ee - k^2 D_eo A_oo^-1 D_oe), select
// QUDA_MATPC_EVEN_EVEN_ASYMMETRIC.
//
typedef enum QudaMatPCType_s {
QUDA_MATPC_EVEN_EVEN,
QUDA_MATPC_ODD_ODD
QUDA_MATPC_ODD_ODD,
QUDA_MATPC_EVEN_EVEN_ASYMMETRIC,
QUDA_MATPC_ODD_ODD_ASYMMETRIC
} QudaMatPCType;
// The different solutions supported
typedef enum QudaSolutionType_s {
QUDA_MAT_SOLUTION,
QUDA_MATPC_SOLUTION,
QUDA_MATPCDAG_SOLUTION,
QUDA_MATPCDAG_SOLUTION, // not implemented
QUDA_MATPCDAG_MATPC_SOLUTION,
} QudaSolutionType;
......
......@@ -8,8 +8,7 @@
#include <spinor_quda.h>
#include <gauge_quda.h>
void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, FullGauge gaugePrecise,
FullGauge gaugeSloppy, ParitySpinor tmp,
void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, ParitySpinor tmp,
QudaInvertParam *invert_param, DagType dag_type)
{
ParitySpinor r = allocateParitySpinor(x.X, x.precision);
......@@ -38,7 +37,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, FullGauge gaugePrecise
copyCuda(b, src);
copyCuda(r_sloppy, src);
/*MatPCDagCuda(y, gaugePrecise, src, invert_param->kappa, tmp, invert_param->matpc_type);
/*MatPCDagCuda(y, cudaGaugePrecise, src, invert_param->kappa, tmp, invert_param->matpc_type);
copyCuda(src_sloppy, y);*/ // uncomment for BiCRstab
zeroCuda(y);
......@@ -90,7 +89,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, FullGauge gaugePrecise
cxpaypbzCuda(r_sloppy, beta_omega, v, beta, p); // 8
}
MatPCCuda(v, gaugeSloppy, p, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
MatPCCuda(v, cudaGaugeSloppy, p, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
// rv = (r0,v)
rv = cDotProductCuda(src_sloppy, v);
......@@ -102,7 +101,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, FullGauge gaugePrecise
caxpyCuda(alpha, v, r_sloppy); // 4
alpha.x *= -1.0; alpha.y *= -1.0;
MatPCCuda(t, gaugeSloppy, r_sloppy, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
MatPCCuda(t, cudaGaugeSloppy, r_sloppy, invert_param->kappa, tmp_sloppy, invert_param->matpc_type, dag_type);
// omega = (t, r) / (t, t)
omega_t2 = cDotProductNormACuda(t, r_sloppy); // 6
......@@ -122,7 +121,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, FullGauge gaugePrecise
if (updateR) {
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
MatPCCuda(r, gaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
MatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
r2 = xmyNormCuda(b, r);
if (x.precision != r_sloppy.precision) copyCuda(r_sloppy, r);
......@@ -169,7 +168,7 @@ void invertBiCGstabCuda(ParitySpinor x, ParitySpinor src, FullGauge gaugePrecise
#if 0
// Calculate the true residual
MatPCCuda(r, gaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
MatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa, tmp, invert_param->matpc_type, dag_type);
double true_res = xmyNormCuda(src, r);
printf("Converged after %d iterations, r2 = %e, true_r2 = %e\n", k, sqrt(r2/b2), sqrt(true_res / b2));
......
......@@ -7,8 +7,7 @@
#include <spinor_quda.h>
#include <gauge_quda.h>
void invertCgCuda(ParitySpinor x, ParitySpinor source, FullGauge gaugePrecise,
FullGauge gaugeSloppy, ParitySpinor tmp, QudaInvertParam *perf)
void invertCgCuda(ParitySpinor x, ParitySpinor source, ParitySpinor tmp, QudaInvertParam *perf)
{
ParitySpinor p = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
ParitySpinor Ap = allocateParitySpinor(x.X, invert_param->cuda_prec_sloppy);
......@@ -63,7 +62,7 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, FullGauge gaugePrecise,
printf("%d iterations, r2 = %e\n", k, r2);
stopwatchStart();
while (r2 > stop && k<perf->maxiter) {
MatPCDagMatPCCuda(Ap, gaugeSloppy, p, perf->kappa, tmp_sloppy, perf->matpc_type);
MatPCDagMatPCCuda(Ap, cudaGaugeSloppy, p, perf->kappa, tmp_sloppy, perf->matpc_type);
pAp = reDotProductCuda(p, Ap);
......@@ -86,7 +85,7 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, FullGauge gaugePrecise,
if (x.precision != x_sloppy.precision) copyCuda(x, x_sloppy);
MatPCDagMatPCCuda(r, gaugePrecise, x, invert_param->kappa,
MatPCDagMatPCCuda(r, cudaGaugePrecise, x, invert_param->kappa,
tmp, invert_param->matpc_type);
r2 = xmyNormCuda(b, r);
......@@ -133,7 +132,7 @@ void invertCgCuda(ParitySpinor x, ParitySpinor source, FullGauge gaugePrecise,
#if 0
// Calculate the true residual
MatPCDagMatPCCuda(Ap, gauge, x, perf->kappa, tmp, perf->matpc_type);
MatPCDagMatPCCuda(Ap, cudaGaugePrecise, x, perf->kappa, tmp, perf->matpc_type);
copyCuda(r, b);
mxpyCuda(Ap, r);
double true_res = normCuda(r);
......
......@@ -14,9 +14,12 @@
FullGauge cudaGaugePrecise; // precise gauge field
FullGauge cudaGaugeSloppy; // sloppy gauge field
FullClover cudaCloverPrecise;
FullClover cudaCloverPrecise; // clover term
FullClover cudaCloverSloppy;
FullClover cudaCloverInvPrecise; // inverted clover term
FullClover cudaCloverInvSloppy;
void printGaugeParam(QudaGaugeParam *param) {
printf("Gauge Params:\n");
......@@ -100,6 +103,17 @@ void initQuda(int dev)
cudaGaugeSloppy.even = NULL;
cudaGaugeSloppy.odd = NULL;
cudaCloverPrecise.even.clover = NULL;
cudaCloverPrecise.odd.clover = NULL;
cudaCloverSloppy.even.clover = NULL;
cudaCloverSloppy.odd.clover = NULL;
cudaCloverInvPrecise.even.clover = NULL;
cudaCloverInvPrecise.odd.clover = NULL;
cudaCloverInvSloppy.even.clover = NULL;
cudaCloverInvSloppy.odd.clover = NULL;
}
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
......@@ -120,22 +134,86 @@ void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
} else {
cudaGaugeSloppy = cudaGaugePrecise;
}
}
// for now, only single-precision clover is supported
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *param)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
{
if (!cudaGaugePrecise.even) {
printf("QUDA error: loadGaugeQuda() must precede call to loadCloverQuda()\n");
exit(-1);
}
if (!h_clover && !h_clovinv) {
printf("QUDA error: loadCloverQuda() called with neither clover term nor inverse\n");
exit(-1);
}
if (inv_param->clover_cpu_prec == QUDA_HALF_PRECISION) {
printf("QUDA error: half precision not supported on CPU\n");
exit(-1);
}
int X[4];
for (int i=0; i<4; i++) {
X[i] = gauge_param->X[i];
}
X[0] /= 2; // dimensions of the even-odd sublattice
inv_param->cloverGiB = 0;
if (h_clover) {
cudaCloverPrecise = allocateCloverField(X, inv_param->clover_cuda_prec);
loadCloverField(cudaCloverPrecise, h_clover, inv_param->clover_cuda_prec, inv_param->clover_order);
inv_param->cloverGiB += 2.0*cudaCloverPrecise.even.bytes / (1<<30);
if (inv_param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC ||
inv_param->matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
if (inv_param->clover_cuda_prec != inv_param->clover_cuda_prec_sloppy) {
cudaCloverSloppy = allocateCloverField(X, inv_param->clover_cuda_prec_sloppy);
loadCloverField(cudaCloverSloppy, h_clover, inv_param->clover_cuda_prec_sloppy, inv_param->clover_order);
inv_param->cloverGiB += 2.0*cudaCloverInvSloppy.even.bytes / (1<<30);
} else {
cudaCloverSloppy = cudaCloverPrecise;
}
} // sloppy precision clover term not needed otherwise
}
cudaCloverInvPrecise = allocateCloverField(X, inv_param->clover_cuda_prec);
if (!h_clovinv) {
printf("QUDA error: clover term inverse not implemented yet\n");
exit(-1);
} else {
loadCloverField(cudaCloverInvPrecise, h_clovinv, inv_param->clover_cuda_prec, inv_param->clover_order);
}
inv_param->cloverGiB += 2.0*cudaCloverInvPrecise.even.bytes / (1<<30);
if (inv_param->clover_cuda_prec != inv_param->clover_cuda_prec_sloppy) {
cudaCloverInvSloppy = allocateCloverField(X, inv_param->clover_cuda_prec_sloppy);
loadCloverField(cudaCloverInvSloppy, h_clovinv, inv_param->clover_cuda_prec_sloppy, inv_param->clover_order);
inv_param->cloverGiB += 2.0*cudaCloverInvSloppy.even.bytes / (1<<30);
} else {
cudaCloverInvSloppy = cudaCloverInvPrecise;
}
}
void endQuda()
// discard clover term but keep the inverse
void discardCloverQuda(QudaInvertParam *inv_param)
{
inv_param->cloverGiB -= 2.0*cudaCloverPrecise.even.bytes / (1<<30);
freeCloverField(&cudaCloverPrecise);
if (cudaCloverSloppy.even.clover) {
inv_param->cloverGiB -= 2.0*cudaCloverSloppy.even.bytes / (1<<30);
freeCloverField(&cudaCloverSloppy);
}
}
void endQuda(void)
{
freeSpinorBuffer();
freeGaugeField(&cudaGaugePrecise);
freeGaugeField(&cudaGaugeSloppy);
if (cudaCloverPrecise.even.clover) freeCloverField(&cudaCloverPrecise);
if (cudaCloverSloppy.even.clover) freeCloverField(&cudaCloverSloppy);
if (cudaCloverInvPrecise.even.clover) freeCloverField(&cudaCloverInvPrecise);
if (cudaCloverInvSloppy.even.clover) freeCloverField(&cudaCloverInvSloppy);
}
void checkPrecision(QudaInvertParam *param) {
......@@ -153,7 +231,14 @@ void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int parity,
ParitySpinor out = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
loadParitySpinor(in, h_in, inv_param->cpu_prec, inv_param->dirac_order);
if (inv_param->dslash_type == QUDA_WILSON_DSLASH) {
dslashCuda(out, cudaGaugePrecise, in, parity, dagger);
} else if (inv_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
cloverDslashCuda(out, cudaGaugePrecise, cudaCloverInvPrecise, in, parity, dagger);
} else {
printf("QUDA error: unsupported dslash_type\n");
exit(-1);
}
retrieveParitySpinor(h_out, out, inv_param->cpu_prec, inv_param->dirac_order);
freeParitySpinor(out);
......@@ -169,7 +254,15 @@ void MatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger)
ParitySpinor tmp = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
loadParitySpinor(in, h_in, inv_param->cpu_prec, inv_param->dirac_order);
if (inv_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCCuda(out, cudaGaugePrecise, in, inv_param->kappa, tmp, inv_param->matpc_type, dagger);
} else if (inv_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
cloverMatPCCuda(out, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, in, inv_param->kappa,
tmp, inv_param->matpc_type, dagger);
} else {
printf("QUDA error: unsupported dslash_type\n");
exit(-1);
}
retrieveParitySpinor(h_out, out, inv_param->cpu_prec, inv_param->dirac_order);
freeParitySpinor(tmp);
......@@ -186,7 +279,15 @@ void MatPCDagMatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
ParitySpinor tmp = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
loadParitySpinor(in, h_in, inv_param->cpu_prec, inv_param->dirac_order);
if (inv_param->dslash_type == QUDA_WILSON_DSLASH) {
MatPCDagMatPCCuda(out, cudaGaugePrecise, in, inv_param->kappa, tmp, inv_param->matpc_type);
} else if (inv_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
cloverMatPCDagMatPCCuda(out, cudaGaugePrecise, cudaCloverPrecise, cudaCloverInvPrecise, in, inv_param->kappa,
tmp, inv_param->matpc_type);
} else {
printf("QUDA error: unsupported dslash_type\n");
exit(-1);
}
retrieveParitySpinor(h_out, out, inv_param->cpu_prec, inv_param->dirac_order);
freeParitySpinor(tmp);
......@@ -202,9 +303,16 @@ void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger) {
loadSpinorField(in, h_in, inv_param->cpu_prec, inv_param->dirac_order);
dslashXpayCuda(out.odd, cudaGaugePrecise, in.even, 1, dagger, in.odd, -inv_param->kappa);
dslashXpayCuda(out.even, cudaGaugePrecise, in.odd, 0, dagger, in.even, -inv_param->kappa);
if (inv_param->dslash_type == QUDA_WILSON_DSLASH) {
MatCuda(out, cudaGaugePrecise, in, -inv_param->kappa, dagger);
} else if (inv_param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
ParitySpinor tmp = allocateParitySpinor(cudaGaugePrecise.X, inv_param->cuda_prec);
cloverMatCuda(out, cudaGaugePrecise, cudaCloverPrecise, in, inv_param->kappa, tmp, dagger);
freeParitySpinor(tmp);
} else {
printf("QUDA error: unsupported dslash_type\n");
exit(-1);
}
retrieveSpinorField(h_out, out, inv_param->cpu_prec, inv_param->dirac_order);
freeSpinorField(out);
......@@ -244,8 +352,14 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
b.odd = tmp;
}
if (param->matpc_type == QUDA_MATPC_EVEN_EVEN) { x.odd = tmp; x.even = out; }
else { x.even = tmp; x.odd = out; }
if (param->matpc_type == QUDA_MATPC_EVEN_EVEN ||
param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
x.odd = tmp;
x.even = out;
} else {
x.even = tmp;
x.odd = out;
}
loadSpinorField(b, h_b, param->cpu_prec, param->dirac_order);
......@@ -258,13 +372,52 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
// cps uses a different anisotropy normalization
if (param->dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) {
axCuda(1.0/gauge_param->anisotropy, b.even);
axCuda(1.0/gauge_param->anisotropy, b.even);
axCuda(1.0/gauge_param->anisotropy, b.odd);
}
if (param->dslash_type == QUDA_WILSON_DSLASH) {
if (param->matpc_type == QUDA_MATPC_EVEN_EVEN) {
// in = b_e + k D_eo b_o
dslashXpayCuda(in, cudaGaugePrecise, b.odd, 0, 0, b.even, kappa);
} else {
} else if (param->matpc_type == QUDA_MATPC_ODD_ODD) {
// in = b_o + k D_oe b_e
dslashXpayCuda(in, cudaGaugePrecise, b.even, 1, 0, b.odd, kappa);
} else {
printf("QUDA error: matpc_type not valid for plain Wilson\n");
exit(-1);
}
} else if (param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
if (param->matpc_type == QUDA_MATPC_EVEN_EVEN) {
// in = A_ee^-1 (b_e + k D_eo A_oo^-1 b_o)
ParitySpinor aux = tmp; // aliases b.odd when PRESERVE_SOURCE_NO is set
cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, b.odd, 1);
dslashXpayCuda(aux, cudaGaugePrecise, in, 0, 0, b.even, kappa);
cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, aux, 0);
} else if (param->matpc_type == QUDA_MATPC_ODD_ODD) {
// in = A_oo^-1 (b_o + k D_oe A_ee^-1 b_e)
ParitySpinor aux = tmp; // aliases b.odd when PRESERVE_SOURCE_NO is set
cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, b.even, 0);
dslashXpayCuda(aux, cudaGaugePrecise, in, 1, 0, b.odd, kappa);
cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, aux, 1);
} else if (param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
// in = b_e + k D_eo A_oo^-1 b_o
ParitySpinor aux = out; // aliases b.even when PRESERVE_SOURCE_NO is set
cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, b.odd, 1);
dslashXpayCuda(aux, cudaGaugePrecise, in, 0, 0, b.even, kappa);
copyCuda(in, aux);
} else if (param->matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
// in = b_o + k D_oe A_ee^-1 b_e
ParitySpinor aux = out; // aliases b.even when PRESERVE_SOURCE_NO is set
cloverCuda(in, cudaGaugePrecise, cudaCloverInvPrecise, b.even, 0);
dslashXpayCuda(aux, cudaGaugePrecise, in, 1, 0, b.odd, kappa);
copyCuda(in, aux);
} else {
printf("QUDA error: invalid matpc_type\n");
exit(-1);
}
} else {
printf("QUDA error: unsupported dslash_type\n");
exit(-1);
}
} else if (param->solution_type == QUDA_MATPC_SOLUTION ||
......@@ -293,14 +446,14 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
copyCuda(out, in);
MatPCCuda(in, cudaGaugePrecise, out, kappa, tmp, param->matpc_type, QUDA_DAG_YES);
}
invertCgCuda(out, in, cudaGaugePrecise, cudaGaugeSloppy, tmp, param);
invertCgCuda(out, in, tmp, param);
break;
case QUDA_BICGSTAB_INVERTER:
if (param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) {
invertBiCGstabCuda(out, in, cudaGaugePrecise, cudaGaugeSloppy, tmp, param, QUDA_DAG_YES);
invertBiCGstabCuda(out, in, tmp, param, QUDA_DAG_YES);
copyCuda(in, out);
}
invertBiCGstabCuda(out, in, cudaGaugePrecise, cudaGaugeSloppy, tmp, param, QUDA_DAG_NO);
invertBiCGstabCuda(out, in, tmp, param, QUDA_DAG_NO);
break;
default:
printf("Inverter type %d not implemented\n", param->inv_type);
......@@ -315,11 +468,33 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
loadSpinorField(b, h_b, param->cpu_prec, param->dirac_order);
}
if (param->dslash_type == QUDA_WILSON_DSLASH) {
if (param->matpc_type == QUDA_MATPC_EVEN_EVEN) {
// x_o = b_o + k D_oe x_e
dslashXpayCuda(x.odd, cudaGaugePrecise, out, 1, 0, b.odd, kappa);
} else {
// x_e = b_e + k D_eo x_o
dslashXpayCuda(x.even, cudaGaugePrecise, out, 0, 0, b.even, kappa);
}
} else {
if (param->matpc_type == QUDA_MATPC_EVEN_EVEN) {
// x_o = A_oo^-1 (b_o + k D_oe x_e)
ParitySpinor aux = b.even;
dslashXpayCuda(aux, cudaGaugePrecise, out, 1, 0, b.odd, kappa);
cloverCuda(x.odd, cudaGaugePrecise, cudaCloverInvPrecise, aux, 1);
} else if (param->matpc_type == QUDA_MATPC_ODD_ODD) {
// x_e = A_ee^-1 (b_e + k D_eo x_o)
ParitySpinor aux = b.odd;
dslashXpayCuda(aux, cudaGaugePrecise, out, 0, 0, b.even, kappa);
cloverCuda(x.even, cudaGaugePrecise, cudaCloverInvPrecise, aux, 0);
} else if (param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
// x_o = b_o + k D_oe x_e
dslashXpayCuda(x.odd, cudaGaugePrecise, out, 1, 0, b.odd, kappa);
} else {
// x_e = b_e + k D_eo x_o
dslashXpayCuda(x.even, cudaGaugePrecise, out, 0, 0, b.even, kappa);
}
}
retrieveSpinorField(h_x, x, param->cpu_prec, param->dirac_order);
......@@ -335,4 +510,3 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
return;
}
......@@ -15,6 +15,8 @@ extern "C" {
QudaGaugeFieldOrder gauge_order;
QudaTboundary t_boundary;
QudaPrecision cpu_prec;
QudaPrecision cuda_prec;
......@@ -25,29 +27,27 @@ extern "C" {
QudaGaugeFixed gauge_fix;
QudaTboundary t_boundary;
int blockDim; // number of threads in a block
int blockDim_sloppy;
int packed_size;
double gaugeGiB;
int blockDim; // number of threads in a block
int blockDim_sloppy;
} QudaGaugeParam;
typedef struct QudaInvertParam_s {
double kappa;
QudaMassNormalization mass_normalization;
QudaDslashType dslash_type;
QudaInverterType inv_type;
double kappa;
double tol;
int iter;
int maxiter;
double reliable_delta; // reliable update tolerance
QudaMatPCType matpc_type;
QudaSolutionType solution_type;
QudaMassNormalization mass_normalization;
QudaPreserveSource preserve_source;
......@@ -63,24 +63,26 @@ extern "C" {
QudaCloverFieldOrder clover_order;
QudaVerbosity verbosity;
int iter;
double spinorGiB;
double cloverGiB;
double gflops;
double secs;
QudaVerbosity verbosity;
} QudaInvertParam;
// Interface functions
void initQuda(int dev);
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param);
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param);
void discardCloverQuda(QudaInvertParam *inv_param);
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param);
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int parity, int dagger);
void MatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger);
void MatPCDagMatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param);
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger);
void endQuda(void);
......
......@@ -10,7 +10,7 @@ int main(int argc, char **argv)
{
int device = 0;
void *gauge[4];
void *gauge[4], *clover_inv;
QudaGaugeParam Gauge_param;
QudaInvertParam inv_param;
......@@ -21,40 +21,53 @@ int main(int argc, char **argv)
Gauge_param.X[3] = 32;
setDims(Gauge_param.X);
Gauge_param.blockDim = 64;
Gauge_param.blockDim_sloppy = 64;
Gauge_param.anisotropy = 1.0;
Gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
Gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
Gauge_param.cpu_prec = QUDA_DOUBLE_PRECISION;
Gauge_param.cuda_prec = QUDA_DOUBLE_PRECISION;
Gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
Gauge_param.cuda_prec_sloppy = QUDA_SINGLE_PRECISION;
Gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
Gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
Gauge_param.anisotropy = 1.0;
inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
Gauge_param.blockDim = 64;
Gauge_param.blockDim_sloppy = 64;
Gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
Gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
gauge_param = &Gauge_param;
int clover_yes = 0; // 0 for plain Wilson, 1 for clover
if (clover_yes) {
inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
} else {
inv_param.dslash_type = QUDA_WILSON_DSLASH;
}
inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
double mass = -0.95;
inv_param.kappa = 1.0 / (2.0*(4 + mass));
inv_param.kappa = 1.0 / (2.0*(1 + 3/gauge_param->anisotropy + mass));
inv_param.tol = 1e-12;
inv_param.maxiter = 10000;
inv_param.reliable_delta = 1e-3;
inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
inv_param.solution_type = QUDA_MAT_SOLUTION;
inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
inv_param.cpu_prec = QUDA_DOUBLE_PRECISION;
inv_param.cuda_prec = QUDA_DOUBLE_PRECISION;
inv_param.cuda_prec = QUDA_SINGLE_PRECISION;
inv_param.cuda_prec_sloppy = QUDA_SINGLE_PRECISION;
inv_param.solution_type = QUDA_MAT_SOLUTION;
inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
inv_param.preserve_source = QUDA_PRESERVE_SOURCE_YES;
inv_param.dirac_order = QUDA_DIRAC_ORDER;
if (clover_yes) {
inv_param.clover_cpu_prec = QUDA_DOUBLE_PRECISION;
inv_param.clover_cuda_prec = QUDA_DOUBLE_PRECISION;
inv_param.clover_cuda_prec_sloppy = QUDA_SINGLE_PRECISION;
inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
}
inv_param.verbosity = QUDA_VERBOSE;
size_t gSize = (Gauge_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
......@@ -65,6 +78,15 @@ int main(int argc, char **argv)
}
construct_gauge_field(gauge, 1, Gauge_param.cpu_prec);
if (clover_yes) {
double norm = 1.0; // random components range between -norm and norm
double diag = 1.0; // constant added to the diagonal
size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
clover_inv = malloc(V*cloverSiteSize*cSize);
construct_clover_field(clover_inv, norm, diag, inv_param.clover_cpu_prec);
}
void *spinorIn = malloc(V*spinorSiteSize*sSize);
void *spinorOut = malloc(V*spinorSiteSize*sSize);
void *spinorCheck = malloc(V*spinorSiteSize*sSize);
......@@ -78,6 +100,7 @@ int main(int argc, char **argv)
initQuda(device);
loadGaugeQuda((void*)gauge, &Gauge_param);
if (clover_yes) loadCloverQuda(NULL, clover_inv, &inv_param);
invertQuda(spinorOut, spinorIn, &inv_param);
......
......@@ -42,9 +42,12 @@ extern "C" {
CloverFieldOrder clover_order);
void loadFullClover(FullClover ret, void *clover, Precision cpu_prec,
CloverFieldOrder clover_order);
void loadCloverField(FullSpinor ret, void *clover, Precision cpu_prec,
void loadCloverField(FullClover ret, void *clover, Precision cpu_prec,
CloverFieldOrder clover_order);
/* void createCloverField(FullClover *cudaClover, void *cpuClover, int *X,
Precision precision); */
#ifdef __cplusplus
}
#endif
......
......@@ -440,6 +440,28 @@ void construct_gauge_field(void **gauge, int type, Precision precision) {
}
}
template <typename Float>
void constructCloverField(Float *res, double norm, double diag) {
Float c = 2.0 * norm / RAND_MAX;
for(int i = 0; i < V; i++) {
for (int j = 0; j < 72; j++) {
res[i*72 + j] = c * rand() - 1.0;
}
for (int j = 0; j< 6; j++) {
res[i*72 + j] += diag;
res[i*72 + j+36] += diag;
}
}
}
void construct_clover_field(void *clover, double norm, double diag, Precision precision) {
if (precision == QUDA_DOUBLE_PRECISION) constructCloverField((double *)clover, norm, diag);
else constructCloverField((float *)clover, norm, diag);
}
template <typename Float>
void constructPointSpinorField(Float *res, int i0, int s0, int c0) {
Float *resEven = res;
......
......@@ -17,6 +17,7 @@ extern "C" {
int getOddBit(int X);
void construct_gauge_field(void **gauge, int type, Precision precision);
void construct_clover_field(void *clover, double norm, double diag, Precision precision);
void construct_spinor_field(void *spinor, int type, int i0, int s0, int c0, Precision precision);
void su3_construct(void *mat, ReconstructType reconstruct, Precision precision);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment