Advanced Computing Platform for Theoretical Physics

Commit 0dd9dc6c authored by rbabich's avatar rbabich
Browse files

quda: more disentangling of tests and lib


git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@571 be54200a-260c-0410-bdd7-ce6af2a381ab
parent 16131f00
Release 0.1 - 17 November 2009
- Initial public release.
#ifndef _QUDA_DSLASH_H
#define _QUDA_DSLASH_H
#ifndef _DSLASH_QUDA_H
#define _DSLASH_QUDA_H
#include <cuComplex.h>
#include <quda_internal.h>
#define gaugeSiteSize 18 // real numbers per link
#define spinorSiteSize 24 // real numbers per spinor
#define cloverSiteSize 72 // real numbers per block-diagonal clover matrix
#ifdef __cplusplus
extern "C" {
#endif
......@@ -129,4 +125,4 @@ extern "C" {
}
#endif
#endif // _QUDA_DLASH_H
#endif // _DLASH_QUDA_H
......@@ -37,6 +37,9 @@ gen:
clean:
-rm -f *.o $(QUDA)
dslash_quda.o: dslash_quda.cu $(HDRS) $(CORE)
$(NVCC) $(NVCCFLAGS) -maxrregcount=80 $< -c -o $@
%.o: %.cpp $(HDRS)
$(CXX) $(CXXFLAGS) $< -c -o $@
......
......@@ -1521,10 +1521,10 @@ double2 cDotProductCuda(ParitySpinor x, ParitySpinor y) {
int length = x.length/2;
blas_quda_bytes += 2*x.real_length*x.precision;
if (x.precision == QUDA_DOUBLE_PRECISION) {
char c = NULL;
char c = 0;
return cDotProductFCuda((double2*)x.spinor, (double2*)y.spinor, c, length);
} else if (x.precision == QUDA_SINGLE_PRECISION) {
char c = NULL;
char c = 0;
return cDotProductFCuda((float2*)x.spinor, (float2*)y.spinor, c, length);
} else {
int spinor_bytes = x.length*sizeof(short);
......
......@@ -7,6 +7,8 @@
#include <spinor_quda.h>
#include <gauge_quda.h>
#define spinorSiteSize 24 // real numbers per spinor
FullGauge cudaGaugePrecise; // precise gauge field
FullGauge cudaGaugeSloppy; // sloppy gauge field
......
#include <quda_internal.h>
#include <blas_reference.h>
// performs the operation x[i] *= a
template <typename Float>
void aX(Float a, Float *x, int len) {
inline void aX(Float a, Float *x, int len) {
for (int i=0; i<len; i++) x[i] *= a;
}
void ax(double a, void *x, int len, Precision precision) {
void ax(double a, void *x, int len, QudaPrecision precision) {
if (precision == QUDA_DOUBLE_PRECISION) aX(a, (double*)x, len);
else aX((float)a, (float*)x, len);
}
// performs the operation y[i] -= x[i] (minus x plus y)
template <typename Float>
void mXpY(Float *x, Float *y, int len) {
inline void mXpY(Float *x, Float *y, int len) {
for (int i=0; i<len; i++) y[i] -= x[i];
}
void mxpy(void* x, void* y, int len, Precision precision) {
void mxpy(void* x, void* y, int len, QudaPrecision precision) {
if (precision == QUDA_DOUBLE_PRECISION) mXpY((double*)x, (double*)y, len);
else mXpY((float*)x, (float*)y, len);
}
......@@ -26,13 +25,13 @@ void mxpy(void* x, void* y, int len, Precision precision) {
// returns the square of the L2 norm of the vector
template <typename Float>
double norm2(Float *v, int len) {
inline double norm2(Float *v, int len) {
double sum=0.0;
for (int i=0; i<len; i++) sum += v[i]*v[i];
return sum;
}
double norm_2(void *v, int len, Precision precision) {
double norm_2(void *v, int len, QudaPrecision precision) {
if (precision == QUDA_DOUBLE_PRECISION) return norm2((double*)v, len);
else return norm2((float*)v, len);
}
......
#ifndef _QUDA_BLAS_REF_H
#define _QUDA_BLAS_REF_H
#ifndef _BLAS_REFERENCE_H
#define _BLAS_REFERENCE_H
#include <quda_internal.h>
#include <enum_quda.h>
#ifdef __cplusplus
extern "C" {
......@@ -9,9 +9,9 @@ extern "C" {
// ---------- blas_reference.cpp ----------
double norm_2(void *vector, int len, Precision precision);
void mxpy(void *x, void *y, int len, Precision precision);
void ax(double a, void *x, int len, Precision precision);
double norm_2(void *vector, int len, QudaPrecision precision);
void mxpy(void *x, void *y, int len, QudaPrecision precision);
void ax(double a, void *x, int len, QudaPrecision precision);
/* void zero(float* a, int cnt);
void copy(float* a, float *b, int len);*/
......@@ -30,4 +30,4 @@ extern "C" {
}
#endif
#endif // _QUDA_BLAS_REF_H
#endif // _BLAS_REFERENCE_H
......@@ -7,9 +7,6 @@
#include <test_util.h>
// What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat)
int test_type = 1;
QudaInvertParam inv_param;
ParitySpinor x, y, z, w, v;
......
......@@ -2,9 +2,10 @@
#include <stdlib.h>
#include <math.h>
#include <quda_internal.h>
#include <util_quda.h>
#include <test_util.h>
#include <blas_reference.h>
#include <dslash_reference.h>
int Z[4];
......@@ -250,7 +251,7 @@ void dslashReference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField, int o
}
void dslash(void *res, void **gaugeFull, void *spinorField, int oddBit, int daggerBit,
Precision sPrecision, Precision gPrecision) {
QudaPrecision sPrecision, QudaPrecision gPrecision) {
if (sPrecision == QUDA_DOUBLE_PRECISION)
if (gPrecision == QUDA_DOUBLE_PRECISION)
......@@ -281,7 +282,7 @@ void Mat(sFloat *out, gFloat **gauge, sFloat *in, sFloat kappa, int daggerBit) {
}
void mat(void *out, void **gauge, void *in, double kappa, int dagger_bit,
Precision sPrecision, Precision gPrecision) {
QudaPrecision sPrecision, QudaPrecision gPrecision) {
if (sPrecision == QUDA_DOUBLE_PRECISION)
if (gPrecision == QUDA_DOUBLE_PRECISION)
......@@ -298,7 +299,7 @@ void mat(void *out, void **gauge, void *in, double kappa, int dagger_bit,
// Apply the even-odd preconditioned Dirac operator
template <typename sFloat, typename gFloat>
void MatPC(sFloat *outEven, gFloat **gauge, sFloat *inEven, sFloat kappa,
int daggerBit, MatPCType matpc_type) {
int daggerBit, QudaMatPCType matpc_type) {
sFloat *tmp = (sFloat*)malloc(Vh*spinorSiteSize*sizeof(sFloat));
......@@ -318,7 +319,7 @@ void MatPC(sFloat *outEven, gFloat **gauge, sFloat *inEven, sFloat kappa,
}
void matpc(void *outEven, void **gauge, void *inEven, double kappa,
MatPCType matpc_type, int dagger_bit, Precision sPrecision, Precision gPrecision) {
QudaMatPCType matpc_type, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision) {
if (sPrecision == QUDA_DOUBLE_PRECISION)
if (gPrecision == QUDA_DOUBLE_PRECISION)
......
#include <blas_reference.h>
#ifndef _DSLASH_REFERENCE_H
#define _DSLASH_REFERENCE_H
#ifndef _QUDA_DSLASH_REF_H
#define _QUDA_DSLASH_REF_H
#include <enum_quda.h>
#ifdef __cplusplus
extern "C" {
......@@ -13,18 +13,19 @@ extern "C" {
void setDims(int *);
void dslash(void *res, void **gauge, void *spinorField,
int oddBit, int daggerBit, Precision sPrecision, Precision gPrecision);
void dslash(void *res, void **gauge, void *spinorField, int oddBit,
int daggerBit, QudaPrecision sPrecision,
QudaPrecision gPrecision);
void mat(void *out, void **gauge, void *in, double kappa, int daggerBit,
Precision sPrecision, Precision gPrecision);
QudaPrecision sPrecision, QudaPrecision gPrecision);
void matpc(void *out, void **gauge, void *in, double kappa, MatPCType matpc_type,
int daggerBit, Precision sPrecision, Precision gPrecision);
void matpc(void *out, void **gauge, void *in, double kappa,
QudaMatPCType matpc_type, int daggerBit,
QudaPrecision sPrecision, QudaPrecision gPrecision);
#ifdef __cplusplus
}
#endif
#endif // _QUDA_DLASH_REF_H
#endif // _DSLASH_REFERENCE_H
......@@ -119,7 +119,7 @@ void init() {
printf("Randomizing fields... ");
construct_gauge_field(hostGauge, 1, gaugeParam.cpu_prec);
construct_gauge_field(hostGauge, 1, gaugeParam.cpu_prec, &gaugeParam);
construct_spinor_field(spinor, 1, 0, 0, 0, inv_param.cpu_prec);
if (clover_yes) {
......
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <math.h>
#include <quda_internal.h>
#include <quda.h>
#include <test_util.h>
#include <blas_reference.h>
#include <dslash_reference.h>
int main(int argc, char **argv)
......@@ -33,8 +35,6 @@ int main(int argc, char **argv)
Gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
Gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
gauge_param = &Gauge_param;
int clover_yes = 0; // 0 for plain Wilson, 1 for clover
if (clover_yes) {
......@@ -45,7 +45,7 @@ int main(int argc, char **argv)
inv_param.inv_type = QUDA_CG_INVERTER;
double mass = -0.94;
inv_param.kappa = 1.0 / (2.0*(1 + 3/gauge_param->anisotropy + mass));
inv_param.kappa = 1.0 / (2.0*(1 + 3/Gauge_param.anisotropy + mass));
inv_param.tol = 5e-7;
inv_param.maxiter = 10000;
inv_param.reliable_delta = 1e-3;
......@@ -78,7 +78,7 @@ int main(int argc, char **argv)
for (int dir = 0; dir < 4; dir++) {
gauge[dir] = malloc(V*gaugeSiteSize*gSize);
}
construct_gauge_field(gauge, 1, Gauge_param.cpu_prec);
construct_gauge_field(gauge, 1, Gauge_param.cpu_prec, &Gauge_param);
if (clover_yes) {
double norm = 0.2; // clover components are random numbers in the range (-norm, norm)
......
......@@ -9,8 +9,6 @@
#include <test_util.h>
#include <dslash_reference.h>
#define FULL_WILSON 1
QudaGaugeParam param;
FullSpinor cudaFullSpinor;
ParitySpinor cudaParitySpinor;
......
......@@ -39,7 +39,6 @@ void init() {
param.anisotropy = 2.3;
param.t_boundary = QUDA_ANTI_PERIODIC_T;
param.gauge_fix = QUDA_GAUGE_FIXED_NO;
gauge_param = &param;
// construct gauge fields
for (int dir = 0; dir < 4; dir++) {
......@@ -48,7 +47,7 @@ void init() {
}
int dev = 0;
cudaSetDevice(dev);
initQuda(dev);
}
void end() {
......@@ -64,7 +63,7 @@ void SU3Test() {
init();
printf("Randomizing fields...");
construct_gauge_field((void**)gauge, 1, param.cpu_prec);
construct_gauge_field((void**)gauge, 1, param.cpu_prec, &param);
printf("done.\n");
loadGaugeQuda(gauge, &param);
......
#include <stdlib.h>
#include <stdio.h>
#include <quda_internal.h>
#include <dslash_reference.h>
#include <test_util.h>
......@@ -27,12 +26,12 @@ inline Float shortToFloat(short a) {
}
template <typename Float>
void printVector(Float *v) {
static void printVector(Float *v) {
printf("{(%f %f) (%f %f) (%f %f)}\n", v[0], v[1], v[2], v[3], v[4], v[5]);
}
// X indexes the lattice site
void printSpinorElement(void *spinor, int X, Precision precision) {
void printSpinorElement(void *spinor, int X, QudaPrecision precision) {
if (precision == QUDA_DOUBLE_PRECISION)
for (int s=0; s<4; s++) printVector((double*)spinor+X*24+s*6);
else
......@@ -40,7 +39,7 @@ void printSpinorElement(void *spinor, int X, Precision precision) {
}
// X indexes the full lattice
void printGaugeElement(void *gauge, int X, Precision precision) {
void printGaugeElement(void *gauge, int X, QudaPrecision precision) {
if (getOddBit(X) == 0) {
if (precision == QUDA_DOUBLE_PRECISION)
for (int m=0; m<3; m++) printVector((double*)gauge +(X/2)*gaugeSiteSize + m*3*2);
......@@ -66,54 +65,54 @@ int getOddBit(int Y) {
// a+=b
template <typename Float>
void complexAddTo(Float *a, Float *b) {
inline void complexAddTo(Float *a, Float *b) {
a[0] += b[0];
a[1] += b[1];
}
// a = b*c
template <typename Float>
void complexProduct(Float *a, Float *b, Float *c) {
inline void complexProduct(Float *a, Float *b, Float *c) {
a[0] = b[0]*c[0] - b[1]*c[1];
a[1] = b[0]*c[1] + b[1]*c[0];
}
// a = conj(b)*conj(c)
template <typename Float>
void complexConjugateProduct(Float *a, Float *b, Float *c) {
inline void complexConjugateProduct(Float *a, Float *b, Float *c) {
a[0] = b[0]*c[0] - b[1]*c[1];
a[1] = -b[0]*c[1] - b[1]*c[0];
}
// a = conj(b)*c
template <typename Float>
void complexDotProduct(Float *a, Float *b, Float *c) {
inline void complexDotProduct(Float *a, Float *b, Float *c) {
a[0] = b[0]*c[0] + b[1]*c[1];
a[1] = b[0]*c[1] - b[1]*c[0];
}
// a += b*c
template <typename Float>
void accumulateComplexProduct(Float *a, Float *b, Float *c, Float sign) {
inline void accumulateComplexProduct(Float *a, Float *b, Float *c, Float sign) {
a[0] += sign*(b[0]*c[0] - b[1]*c[1]);
a[1] += sign*(b[0]*c[1] + b[1]*c[0]);
}
// a += conj(b)*c)
template <typename Float>
void accumulateComplexDotProduct(Float *a, Float *b, Float *c) {
inline void accumulateComplexDotProduct(Float *a, Float *b, Float *c) {
a[0] += b[0]*c[0] + b[1]*c[1];
a[1] += b[0]*c[1] - b[1]*c[0];
}
template <typename Float>
void accumulateConjugateProduct(Float *a, Float *b, Float *c, int sign) {
inline void accumulateConjugateProduct(Float *a, Float *b, Float *c, int sign) {
a[0] += sign * (b[0]*c[0] - b[1]*c[1]);
a[1] -= sign * (b[0]*c[1] + b[1]*c[0]);
}
template <typename Float>
void su3Construct12(Float *mat) {
inline void su3Construct12(Float *mat) {
Float *w = mat+12;
w[0] = 0.0;
w[1] = 0.0;
......@@ -125,13 +124,13 @@ void su3Construct12(Float *mat) {
// Stabilized Bunk and Sommer
template <typename Float>
void su3Construct8(Float *mat) {
inline void su3Construct8(Float *mat) {
mat[0] = atan2(mat[1], mat[0]);
mat[1] = atan2(mat[13], mat[12]);
for (int i=8; i<18; i++) mat[i] = 0.0;
}
void su3_construct(void *mat, ReconstructType reconstruct, Precision precision) {
void su3_construct(void *mat, QudaReconstructType reconstruct, QudaPrecision precision) {
if (reconstruct == QUDA_RECONSTRUCT_12) {
if (precision == QUDA_DOUBLE_PRECISION) su3Construct12((double*)mat);
else su3Construct12((float*)mat);
......@@ -146,7 +145,7 @@ void su3_construct(void *mat, ReconstructType reconstruct, Precision precision)
//
// 48 flops
template <typename Float>
void su3Reconstruct12(Float *mat, int dir, int ga_idx) {
static void su3Reconstruct12(Float *mat, int dir, int ga_idx, QudaGaugeParam *param) {
Float *u = &mat[0*(3*2)];
Float *v = &mat[1*(3*2)];
Float *w = &mat[2*(3*2)];
......@@ -157,21 +156,21 @@ void su3Reconstruct12(Float *mat, int dir, int ga_idx) {
accumulateConjugateProduct(w+1*(2), u+0*(2), v+2*(2), -1);
accumulateConjugateProduct(w+2*(2), u+0*(2), v+1*(2), +1);
accumulateConjugateProduct(w+2*(2), u+1*(2), v+0*(2), -1);
Float u0 = (dir < 3 ? gauge_param->anisotropy :
(ga_idx >= (Z[3]-1)*Z[0]*Z[1]*Z[2]/2 ? gauge_param->t_boundary : 1));
Float u0 = (dir < 3 ? param->anisotropy :
(ga_idx >= (Z[3]-1)*Z[0]*Z[1]*Z[2]/2 ? param->t_boundary : 1));
w[0]*=u0; w[1]*=u0; w[2]*=u0; w[3]*=u0; w[4]*=u0; w[5]*=u0;
}
template <typename Float>
void su3Reconstruct8(Float *mat, int dir, int ga_idx) {
static void su3Reconstruct8(Float *mat, int dir, int ga_idx, QudaGaugeParam *param) {
// First reconstruct first row
Float row_sum = 0.0;
row_sum += mat[2]*mat[2];
row_sum += mat[3]*mat[3];
row_sum += mat[4]*mat[4];
row_sum += mat[5]*mat[5];
Float u0 = (dir < 3 ? gauge_param->anisotropy :
(ga_idx >= (Z[3]-1)*Z[0]*Z[1]*Z[2]/2 ? gauge_param->t_boundary : 1));
Float u0 = (dir < 3 ? param->anisotropy :
(ga_idx >= (Z[3]-1)*Z[0]*Z[1]*Z[2]/2 ? param->t_boundary : 1));
Float U00_mag = sqrt(1.f/(u0*u0) - row_sum);
mat[14] = mat[0];
......@@ -221,13 +220,13 @@ void su3Reconstruct8(Float *mat, int dir, int ga_idx) {
mat[17] *= -r_inv2;
}
void su3_reconstruct(void *mat, int dir, int ga_idx, ReconstructType reconstruct, Precision precision) {
void su3_reconstruct(void *mat, int dir, int ga_idx, QudaReconstructType reconstruct, QudaPrecision precision, QudaGaugeParam *param) {
if (reconstruct == QUDA_RECONSTRUCT_12) {
if (precision == QUDA_DOUBLE_PRECISION) su3Reconstruct12((double*)mat, dir, ga_idx);
else su3Reconstruct12((float*)mat, dir, ga_idx);
if (precision == QUDA_DOUBLE_PRECISION) su3Reconstruct12((double*)mat, dir, ga_idx, param);
else su3Reconstruct12((float*)mat, dir, ga_idx, param);
} else {
if (precision == QUDA_DOUBLE_PRECISION) su3Reconstruct8((double*)mat, dir, ga_idx);
else su3Reconstruct8((float*)mat, dir, ga_idx);
if (precision == QUDA_DOUBLE_PRECISION) su3Reconstruct8((double*)mat, dir, ga_idx, param);
else su3Reconstruct8((float*)mat, dir, ga_idx, param);
}
}
......@@ -242,7 +241,7 @@ void su3_construct_8_half(float *mat, short *mat_half) {
}
}
void su3_reconstruct_8_half(float *mat, short *mat_half, int dir, int ga_idx) {
void su3_reconstruct_8_half(float *mat, short *mat_half, int dir, int ga_idx, QudaGaugeParam *param) {
for (int i=0; i<18; i++) {
mat[i] = shortToFloat(mat_half[i]);
......@@ -250,11 +249,11 @@ void su3_reconstruct_8_half(float *mat, short *mat_half, int dir, int ga_idx) {
mat[0] *= M_PI;
mat[1] *= M_PI;
su3Reconstruct8(mat, dir, ga_idx);
su3Reconstruct8(mat, dir, ga_idx, param);
}*/
template <typename Float>
int compareFloats(Float *a, Float *b, int len, double epsilon) {
static int compareFloats(Float *a, Float *b, int len, double epsilon) {
for (int i = 0; i < len; i++) {
double diff = fabs(a[i] - b[i]);
if (diff > epsilon) return 0;
......@@ -262,7 +261,7 @@ int compareFloats(Float *a, Float *b, int len, double epsilon) {
return 1;
}
int compare_floats(void *a, void *b, int len, double epsilon, Precision precision) {
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision) {
if (precision == QUDA_DOUBLE_PRECISION) return compareFloats((double*)a, (double*)b, len, epsilon);
else return compareFloats((float*)a, (float*)b, len, epsilon);
}
......@@ -277,16 +276,16 @@ int fullLatticeIndex(int i, int oddBit) {
}
template <typename Float>
void applyGaugeFieldScaling(Float **gauge, int Vh) {
static void applyGaugeFieldScaling(Float **gauge, int Vh, QudaGaugeParam *param) {
// Apply spatial scaling factor (u0) to spatial links
for (int d = 0; d < 3; d++) {
for (int i = 0; i < gaugeSiteSize*Vh*2; i++) {
gauge[d][i] /= gauge_param->anisotropy;
gauge[d][i] /= param->anisotropy;
}
}
// Apply boundary conditions to temporal links
if (gauge_param->t_boundary == QUDA_ANTI_PERIODIC_T) {
if (param->t_boundary == QUDA_ANTI_PERIODIC_T) {
for (int j = (Z[0]/2)*Z[1]*Z[2]*(Z[3]-1); j < Vh; j++) {
for (int i = 0; i < gaugeSiteSize; i++) {
gauge[3][j*gaugeSiteSize+i] *= -1.0;
......@@ -295,7 +294,7 @@ void applyGaugeFieldScaling(Float **gauge, int Vh) {
}
}
if (gauge_param->gauge_fix) {
if (param->gauge_fix) {
// set all gauge links (except for the first Z[0]*Z[1]*Z[2]/2) to the identity,
// to simulate fixing to the temporal gauge.
int dir = 3; // time direction only
......@@ -315,7 +314,7 @@ void applyGaugeFieldScaling(Float **gauge, int Vh) {
}
template <typename Float>
void constructUnitGaugeField(Float **res) {
static void constructUnitGaugeField(Float **res, QudaGaugeParam *param) {
Float *resOdd[4], *resEven[4];
for (int dir = 0; dir < 4; dir++) {
resEven[dir] = res[dir];
......@@ -335,12 +334,12 @@ void constructUnitGaugeField(Float **res) {
}
}
applyGaugeFieldScaling(res, Vh);
applyGaugeFieldScaling(res, Vh, param);
}
// normalize the vector a
template <typename Float>
void normalize(complex<Float> *a, int len) {
static void normalize(complex<Float> *a, int len) {
double sum = 0.0;
for (int i=0; i<len; i++) sum += norm(a[i]);
for (int i=0; i<len; i++) a[i] /= sqrt(sum);
......@@ -348,14 +347,14 @@ void normalize(complex<Float> *a, int len) {
// orthogonalize vector b to vector a
template <typename Float>
void orthogonalize(complex<Float> *a, complex<Float> *b, int len) {
static void orthogonalize(complex<Float> *a, complex<Float> *b, int len) {
complex<double> dot = 0.0;
for (int i=0; i<len; i++) dot += conj(a[i])*b[i];
for (int i=0; i<len; i++) b[i] -= (complex<Float>)dot*a[i];
}
template <typename Float>
void constructGaugeField(Float **res) {
static void constructGaugeField(Float **res, QudaGaugeParam *param) {
Float *resOdd[4], *resEven[4];
for (int dir = 0; dir < 4; dir++) {
resEven[dir] = res[dir];
......@@ -411,21 +410,21 @@ void constructGaugeField(Float **res) {
}
}
applyGaugeFieldScaling(res, Vh);
applyGaugeFieldScaling(res, Vh, param);
}
void construct_gauge_field(void **gauge, int type, Precision precision) {
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param) {
if (type == 0) {
if (precision == QUDA_DOUBLE_PRECISION) constructUnitGaugeField((double**)gauge);
else constructUnitGaugeField((float**)gauge);
if (precision == QUDA_DOUBLE_PRECISION) constructUnitGaugeField((double**)gauge, param);
else constructUnitGaugeField((float**)gauge, param);
} else {
if (precision == QUDA_DOUBLE_PRECISION) constructGaugeField((double**)gauge);
else constructGaugeField((float**)gauge);
if (precision == QUDA_DOUBLE_PRECISION) constructGaugeField((double**)gauge, param);
else constructGaugeField((float**)gauge, param);
}
}
template <typename Float>
void constructCloverField(Float *res, double norm, double diag) {
static void constructCloverField(Float *res, double norm, double diag) {
Float c = 2.0 * norm / RAND_MAX;
......@@ -440,14 +439,14 @@ void constructCloverField(Float *res, double norm, double diag) {
}
}
void construct_clover_field(void *clover, double norm, double diag, Precision precision) {
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision) {
if (precision == QUDA_DOUBLE_PRECISION) constructCloverField((double *)clover, norm, diag);
else constructCloverField((float *)clover, norm, diag);
}
template <typename Float>
void constructPointSpinorField(Float *res, int i0, int s0, int c0) {
static void constructPointSpinorField(Float *res, int i0, int s0, int c0) {
Float *resEven = res;
Float *resOdd = res + Vh*spinorSiteSize;
......@@ -470,7 +469,7 @@ void constructPointSpinorField(Float *res, int i0, int s0, int c0) {
}
template <typename Float>
void constructSpinorField(Float *res) {
static void constructSpinorField(Float *res) {
for(int i = 0; i < V; i++) {
for (int s = 0; s < 4; s++) {
for (int m = 0; m < 3; m++) {
......@@ -481,7 +480,7 @@ void constructSpinorField(Float *res) {
}
}
void construct_spinor_field(void *spinor, int type, int i0, int s0, int c0, Precision precision) {
void construct_spinor_field(void *spinor, int type, int i0, int s0, int c0, QudaPrecision precision) {
if (type == 0) {
if (precision == QUDA_DOUBLE_PRECISION) constructPointSpinorField((double*)spinor, i0, s0, c0);
else constructPointSpinorField((float*)spinor, i0, s0, c0);
......@@ -492,7 +491,7 @@ void construct_spinor_field(void *spinor, int type, int i0, int s0, int c0, Prec
}
template <typename Float>
void compareSpinor(Float *spinorRef, Float *spinorGPU, int len) {
static void compareSpinor(Float *spinorRef, Float *spinorGPU, int len) {
int res = 1;
int fail_check = 16*res;
int fail[fail_check];
......@@ -520,12 +519,12 @@ void compareSpinor(Float *spinorRef, Float *spinorGPU, int len) {
}
void compare_spinor(void *spinor_ref, void *spinor_gpu, int len, Precision precision) {
void compare_spinor(void *spinor_ref, void *spinor_gpu, int len, QudaPrecision precision) {
if (precision == QUDA_DOUBLE_PRECISION) compareSpinor((double*)spinor_ref, (double*)spinor_gpu, len);
else compareSpinor((float*)spinor_ref, (float*)spinor_gpu, len);
}
void strong_check(void *spinorRef, void *spinorGPU, int len, Precision prec) {
void strong_check(void *spinorRef, void *spinorGPU, int len, QudaPrecision prec) {
printf("Reference:\n");
printSpinorElement(spinorRef, 0, prec); printf("...\n");
printSpinorElement(spinorRef, len-1, prec); printf("\n");
......@@ -538,7 +537,7 @@ void strong_check(void *spinorRef, void *spinorGPU, int len, Precision prec) {
}
template <typename Float>
void checkGauge(Float **oldG, Float **newG, double epsilon) {
static void checkGauge(Float **oldG, Float **newG, double epsilon) {
int fail_check = 17;
int fail[fail_check];
......
#ifndef _TEST_UTIL_H
#define _TEST_UTIL_H
#include <quda_internal.h>
#include <quda.h>
#define gaugeSiteSize 18 // real numbers per link
#define spinorSiteSize 24 // real numbers per spinor
#define cloverSiteSize 72 // real numbers per block-diagonal clover matrix
#ifdef __cplusplus
extern "C" {
#endif
void printSpinorElement(void *spinor, int X, Precision precision);
void printGaugeElement(void *gauge, int X, Precision precision);
void printSpinorElement(void *spinor, int X, QudaPrecision precision);
void printGaugeElement(void *gauge, int X, QudaPrecision precision);
int fullLatticeIndex(int i, int oddBit);
int getOddBit(int X);
void construct_gauge_field(void **gauge, int type, Precision precision);
void construct_clover_field(void *clover, double norm, double diag, Precision precision);
void construct_spinor_field(void *spinor, int type, int i0, int s0, int c0, Precision precision);
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param);
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision);
void construct_spinor_field(void *spinor, int type, int i0, int s0, int c0, QudaPrecision precision);
void su3_construct(void *mat, ReconstructType reconstruct, Precision precision);
void su3_reconstruct(void *mat, int dir, int ga_idx, ReconstructType reconstruct, Precision precision);
void su3_construct(void *mat, QudaReconstructType reconstruct, QudaPrecision precision);
void su3_reconstruct(void *mat, int dir, int ga_idx, QudaReconstructType reconstruct, QudaPrecision precision, QudaGaugeParam *param);
//void su3_construct_8_half(float *mat, short *mat_half);
//void su3_reconstruct_8_half(float *mat, short *mat_half, int dir, int ga_idx);
//void su3_reconstruct_8_half(float *mat, short *mat_half, int dir, int ga_idx, QudaGaugeParam *param);
void compare_spinor(void *spinor_cpu, void *spinor_gpu, int len, Precision precision);
void strong_check(void *spinor, void *spinorGPU, int len, Precision precision);
int compare_floats(void *a, void *b, int len, double epsilon, Precision precision);
void compare_spinor(void *spinor_cpu, void *spinor_gpu, int len, QudaPrecision precision);
void strong_check(void *spinor, void *spinorGPU, int len, QudaPrecision precision);
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision);
void check_gauge(void **, void **, double epsilon, Precision precision);
void check_gauge(void **, void **, double epsilon, QudaPrecision precision);
// ---------- gauge_read.cpp ----------
void readGaugeField(char *filename, float *gauge[], int argc, char *argv[]);
//void readGaugeField(char *filename, float *gauge[], int argc, char *argv[]);
#ifdef __cplusplus
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment