Advanced Computing Platform for Theoretical Physics

commit大文件会使得服务器变得不稳定,请大家尽量只commit代码,不要commit大的文件。

Commit 12b32dea authored by rbabich's avatar rbabich
Browse files

quda: standardized error reporting


git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@599 be54200a-260c-0410-bdd7-ce6af2a381ab
parent d8c36033
......@@ -4,8 +4,6 @@ Version 0.x
- Introduced new interface functions newQudaGaugeParam() and
newQudaInvertParam() to allow for enhanced error checking. See
invert_test for an example of their use.
- Modified loadCloverQuda() to take gauge_param as an additional
parameter.
- Added auto-tuning blas to improve performance (see README for details).
- Improved stability of the half precision 8-parameter SU(3)
reconstruction (with thanks to Guochun Shi).
......
......@@ -83,7 +83,7 @@ extern "C" {
void initQuda(int dev);
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param);
void saveGaugeQuda(void *h_gauge, QudaGaugeParam *param);
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaGaugeParam *gauge_param, QudaInvertParam *inv_param);
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param);
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param);
......
......@@ -3,6 +3,10 @@
#include <cuda_runtime.h>
#ifdef USE_QMP
#include <qmp.h>
#endif
//#define L1 4 // "x" dimension
//#define L2 4 // "y" dimension
//#define L3 4 // "z" dimension
......@@ -31,6 +35,7 @@
#define Tboundary QudaTboundary
#include <enum_quda.h>
#include <util_quda.h>
#ifdef __cplusplus
extern "C" {
......
#ifndef _UTIL_QUDA_H
#define _UTIL_QUDA_H
#ifdef USE_QMP
#include <qmp.h>
#define printfQuda(...) do { \
if (QMP_get_node_number() == 0) { \
printf(__VA_ARGS__); \
fflush(stdout); \
} \
} while (0)
#define errorQuda(...) do { \
printf("QUDA error: " __VA_ARGS__); \
printf(" (node %d, " __FILE__ ":%d)\n", \
QMP_get_node_number(), __LINE__); \
QMP_abort(1); \
} while (0)
#else
#define printfQuda(...) do { printf(__VA_ARGS__); fflush(stdout); } while (0)
#define errorQuda(...) do { \
printf("QUDA error: " __VA_ARGS__); \
printf(" (" __FILE__ ":%d)\n", __LINE__); \
exit(1); \
} while (0)
#endif // USE_QMP
#define warningQuda(...) do { \
printfQuda("QUDA warning: " __VA_ARGS__); \
printfQuda("\n"); \
} while (0)
#define checkCudaError() do { \
cudaError_t error = cudaGetLastError(); \
if (error != cudaSuccess) \
errorQuda("CUDA: %s", cudaGetErrorString(error)); \
} while (0)
#ifdef __cplusplus
extern "C" {
#endif
......
......@@ -45,43 +45,37 @@ void initBlas(void) {
if (!d_reduceFloat) {
if (cudaMalloc((void**) &d_reduceFloat, REDUCE_MAX_BLOCKS*sizeof(QudaSumFloat)) == cudaErrorMemoryAllocation) {
printf("Error allocating device reduction array\n");
exit(0);
errorQuda("Error allocating device reduction array");
}
}
if (!d_reduceComplex) {
if (cudaMalloc((void**) &d_reduceComplex, REDUCE_MAX_BLOCKS*sizeof(QudaSumComplex)) == cudaErrorMemoryAllocation) {
printf("Error allocating device reduction array\n");
exit(0);
errorQuda("Error allocating device reduction array");
}
}
if (!d_reduceFloat3) {
if (cudaMalloc((void**) &d_reduceFloat3, REDUCE_MAX_BLOCKS*sizeof(QudaSumFloat3)) == cudaErrorMemoryAllocation) {
printf("Error allocating device reduction array\n");
exit(0);
errorQuda("Error allocating device reduction array");
}
}
if (!h_reduceFloat) {
if (cudaMallocHost((void**) &h_reduceFloat, REDUCE_MAX_BLOCKS*sizeof(QudaSumFloat)) == cudaErrorMemoryAllocation) {
printf("Error allocating host reduction array\n");
exit(0);
errorQuda("Error allocating host reduction array");
}
}
if (!h_reduceComplex) {
if (cudaMallocHost((void**) &h_reduceComplex, REDUCE_MAX_BLOCKS*sizeof(QudaSumComplex)) == cudaErrorMemoryAllocation) {
printf("Error allocating host reduction array\n");
exit(0);
errorQuda("Error allocating host reduction array");
}
}
if (!h_reduceFloat3) {
if (cudaMallocHost((void**) &h_reduceFloat3, REDUCE_MAX_BLOCKS*sizeof(QudaSumFloat3)) == cudaErrorMemoryAllocation) {
printf("Error allocating host reduction array\n");
exit(0);
errorQuda("Error allocating host reduction array");
}
}
......@@ -309,21 +303,18 @@ texture<float, 1, cudaReadModeElementType> texNorm5;
inline void checkSpinor(ParitySpinor &a, ParitySpinor &b) {
if (a.precision != b.precision) {
printf("checkSpinor error, precisions do not match: %d %d\n", a.precision, b.precision);
exit(-1);
errorQuda("checkSpinor: precisions do not match: %d %d", a.precision, b.precision);
}
if (a.length != b.length) {
printf("checkSpinor error, lengths do not match: %d %d\n", a.length, b.length);
exit(-1);
errorQuda("checkSpinor: lengths do not match: %d %d", a.length, b.length);
}
}
// For kernels with precision conversion built in
inline void checkSpinorLength(ParitySpinor &a, ParitySpinor &b) {
if (a.length != b.length) {
printf("checkSpinor error, lengths do not match: %d %d\n", a.length, b.length);
exit(-1);
errorQuda("checkSpinor: lengths do not match: %d %d", a.length, b.length);
}
}
......
......@@ -2,8 +2,13 @@
// This file defines functions to either initialize, check, or print
// the QUDA gauge and inverter parameters. It gets included in
// invert_quda.cpp, after either INIT_PARAM, CHECK_PARAM, or
// interface_quda.cpp, after either INIT_PARAM, CHECK_PARAM, or
// PRINT_PARAM is defined.
//
// If you're reading this file because it was mentioned in a "QUDA
// error" message, it probably means that you forgot to set one of the
// gauge or inverter parameters in your application before calling
// loadGaugeQuda() or invertQuda().
#include <float.h>
#define INVALID_INT QUDA_INVALID_ENUM
......@@ -12,18 +17,12 @@
// define macro to carry out the appropriate action for a given parameter
#if defined INIT_PARAM
#define P(x, val) do { ret.x = val; } while (0)
#define P(x, val) ret.x = val
#elif defined CHECK_PARAM
#define P(x, val) do { \
if (param->x == val) { \
printf("QUDA error: " #x " undefined.\n"); \
exit(1); \
} \
} while (0)
#define P(x, val) if (param->x == val) errorQuda("Parameter " #x " undefined")
#elif defined PRINT_PARAM
#define P(x, val) do { \
printf((val == INVALID_DOUBLE) ? #x " = %g\n" : #x " = %d\n", param->x); \
} while (0)
#define P(x, val) \
printfQuda((val == INVALID_DOUBLE) ? #x " = %g\n" : #x " = %d\n", param->x)
#else
#error INIT_PARAM, CHECK_PARAM, and PRINT_PARAM all undefined in check_params.h
#endif
......@@ -38,7 +37,7 @@ QudaGaugeParam newQudaGaugeParam(void) {
static void checkGaugeParam(QudaGaugeParam *param) {
#else
void printQudaGaugeParam(QudaGaugeParam *param) {
printf("QUDA Gauge Parameters:\n");
printfQuda("QUDA Gauge Parameters:\n");
#endif
P(X[0], INVALID_INT);
......@@ -76,7 +75,7 @@ QudaInvertParam newQudaInvertParam(void) {
static void checkInvertParam(QudaInvertParam *param) {
#else
void printQudaInvertParam(QudaInvertParam *param) {
printf("QUDA Inverter Parameters:\n");
printfQuda("QUDA Inverter Parameters:\n");
#endif
P(dslash_type, QUDA_INVALID_DSLASH);
......
......@@ -27,16 +27,14 @@ void allocateParityClover(ParityClover *ret, int *X, int pad, Precision precisio
if (!ret->clover) {
if (cudaMalloc((void**)&(ret->clover), ret->bytes) == cudaErrorMemoryAllocation) {
printf("Error allocating clover term\n");
exit(0);
errorQuda("Error allocating clover term");
}
}
if (!ret->cloverNorm) {
if (precision == QUDA_HALF_PRECISION) {
if (cudaMalloc((void**)&ret->cloverNorm, ret->bytes/18) == cudaErrorMemoryAllocation) {
printf("Error allocating cloverNorm\n");
exit(0);
errorQuda("Error allocating cloverNorm");
}
}
}
......@@ -178,23 +176,19 @@ void loadParityClover(ParityClover ret, void *clover, Precision cpu_prec,
void *packedClover, *packedCloverNorm;
if (ret.precision == QUDA_DOUBLE_PRECISION && cpu_prec != QUDA_DOUBLE_PRECISION) {
printf("QUDA error: cannot have CUDA double precision without double CPU precision\n");
exit(-1);
errorQuda("Cannot have CUDA double precision without CPU double precision");
}
if (clover_order != QUDA_PACKED_CLOVER_ORDER) {
printf("QUDA error: invalid clover order\n");
exit(-1);
errorQuda("Invalid clover_order");
}
#ifndef __DEVICE_EMULATION__
if (cudaMallocHost(&packedClover, ret.bytes) == cudaErrorMemoryAllocation) {
printf("Error allocating clover pinned memory\n");
exit(0);
errorQuda("Error allocating clover pinned memory");
}
if (ret.precision == QUDA_HALF_PRECISION)
if (cudaMallocHost(&packedCloverNorm, ret.bytes/18) == cudaErrorMemoryAllocation) {
printf("Error allocating clover pinned memory\n");
exit(0);
errorQuda("Error allocating clover pinned memory");
}
#else
packedClover = malloc(ret.bytes);
......@@ -241,12 +235,10 @@ void loadFullClover(FullClover ret, void *clover, Precision cpu_prec,
void *packedEven, *packedEvenNorm, *packedOdd, *packedOddNorm;
if (ret.even.precision == QUDA_DOUBLE_PRECISION && cpu_prec != QUDA_DOUBLE_PRECISION) {
printf("QUDA error: cannot have CUDA double precision without double CPU precision\n");
exit(-1);
errorQuda("Cannot have CUDA double precision without CPU double precision");
}
if (clover_order != QUDA_LEX_PACKED_CLOVER_ORDER) {
printf("QUDA error: invalid clover order\n");
exit(-1);
errorQuda("Invalid clover order");
}
#ifndef __DEVICE_EMULATION__
......@@ -321,8 +313,7 @@ void loadCloverField(FullClover ret, void *clover, Precision cpu_prec, CloverFie
loadParityClover(ret.even, clover, cpu_prec, clover_order);
loadParityClover(ret.odd, clover_odd, cpu_prec, clover_order);
} else {
printf("QUDA error: CloverFieldOrder %d not supported\n", clover_order);
exit(-1);
errorQuda("Invalid clover_order");
}
}
......@@ -330,8 +321,7 @@ void loadCloverField(FullClover ret, void *clover, Precision cpu_prec, CloverFie
void createCloverField(FullClover *cudaClover, void *cpuClover, int *X, Precision precision, QudaInvertParam invert_param)
{
if (invert_param->clover_cpu_prec == QUDA_HALF_PRECISION) {
printf("QUDA error: half precision not supported on cpu\n");
exit(-1);
errorQuda("Half precision not supported on CPU");
}
// X should contain the dimensions of the even/odd sublattice
......
static void checkSpinor(ParitySpinor out, ParitySpinor in) {
if (in.precision != out.precision) {
printf("Error in dslash quda: input and out spinor precisions don't match\n");
exit(-1);
errorQuda("Input and output spinor precisions don't match in dslash_quda");
}
if (in.stride != out.stride) {
printf("Error in dslash quda: input and out spinor strides don't match\n");
exit(-1);
errorQuda("Input and output spinor strides don't match in dslash_quda");
}
#if (__CUDA_ARCH__ != 130)
if (in.precision == QUDA_DOUBLE_PRECISION) {
printf("Double precision not supported on this GPU\n");
exit(-1);
errorQuda("Double precision not supported on this GPU");
}
#endif
}
static void checkGaugeSpinor(ParitySpinor spinor, FullGauge gauge) {
if (spinor.volume != gauge.volume) {
printf("Error, spinor volume %d doesn't match gauge volume %d\n", spinor.volume, gauge.volume);
exit(-1);
errorQuda("Spinor volume %d doesn't match gauge volume %d", spinor.volume, gauge.volume);
}
#if (__CUDA_ARCH__ != 130)
if (gauge.precision == QUDA_DOUBLE_PRECISION) {
printf("Double precision not supported on this GPU\n");
exit(-1);
errorQuda("Double precision not supported on this GPU");
}
#endif
}
static void checkCloverSpinor(ParitySpinor spinor, FullClover clover) {
if (spinor.volume != clover.even.volume) {
printf("Error, spinor volume %d doesn't match even clover volume %d\n",
spinor.volume, clover.even.volume);
exit(-1);
errorQuda("Spinor volume %d doesn't match even clover volume %d",
spinor.volume, clover.even.volume);
}
if (spinor.volume != clover.odd.volume) {
printf("Error, spinor volume %d doesn't match odd clover volume %d\n",
spinor.volume, clover.odd.volume);
exit(-1);
errorQuda("Spinor volume %d doesn't match odd clover volume %d",
spinor.volume, clover.odd.volume);
}
#if (__CUDA_ARCH__ != 130)
if ((clover.even.precision == QUDA_DOUBLE_PRECISION) ||
(clover.odd.precision == QUDA_DOUBLE_PRECISION)) {
printf("Double precision not supported on this GPU\n");
exit(-1);
errorQuda("Double precision not supported on this GPU");
}
#endif
}
......@@ -47,8 +47,7 @@ void initDslashConstants(FullGauge gauge, int sp_stride, int cl_stride) {
cudaMemcpyToSymbol("cl_stride", &cl_stride, sizeof(int));
if (Vh%BLOCK_DIM != 0) {
printf("Error, volume not a multiple of the thread block size\n");
exit(-1);
errorQuda("Error, Volume not a multiple of the thread block size");
}
int X1 = 2*gauge.X[0];
......@@ -122,12 +121,7 @@ void initDslashConstants(FullGauge gauge, int sp_stride, int cl_stride) {
float h_pi_f = M_PI;
cudaMemcpyToSymbol("pi_f", &(h_pi_f), sizeof(float));
cudaError_t error = cudaGetLastError();
cudaGetLastError();
if(error != cudaSuccess) {
printf("initDslashConstants error: %s\n", cudaGetErrorString(error));
exit(0);
}
checkCudaError();
initDslash = 1;
}
......@@ -175,13 +169,7 @@ void dslashCuda(ParitySpinor out, FullGauge gauge, ParitySpinor in, int parity,
} else if (in.precision == QUDA_HALF_PRECISION) {
dslashHCuda(out, gauge, in, parity, dagger);
}
cudaError_t error = cudaGetLastError();
cudaGetLastError();
if(error != cudaSuccess) {
printf("dslashCuda error: %s\n", cudaGetErrorString(error));
exit(0);
}
checkCudaError();
dslash_quda_flops += 1320*in.volume;
}
......@@ -386,13 +374,7 @@ void dslashXpayCuda(ParitySpinor out, FullGauge gauge, ParitySpinor in, int pari
} else if (in.precision == QUDA_HALF_PRECISION) {
dslashXpayHCuda(out, gauge, in, parity, dagger, x, a);
}
cudaError_t error = cudaGetLastError();
cudaGetLastError();
if(error != cudaSuccess) {
printf("dslashXpayCuda error: %s\n", cudaGetErrorString(error));
exit(0);
}
checkCudaError();
dslash_quda_flops += (1320+48)*in.volume;
}
......@@ -616,8 +598,7 @@ void MatPCCuda(ParitySpinor out, FullGauge gauge, ParitySpinor in, double kappa,
dslashCuda(tmp, gauge, in, 0, dagger);
dslashXpayCuda(out, gauge, tmp, 1, dagger, in, kappa2);
} else {
printf("QUDA error: matpc_type not valid for plain Wilson\n");
exit(-1);
errorQuda("matpc_type not valid for plain Wilson");
}
}
......@@ -666,13 +647,7 @@ void cloverDslashCuda(ParitySpinor out, FullGauge gauge, FullClover cloverInv,
} else if (in.precision == QUDA_HALF_PRECISION) {
cloverDslashHCuda(out, gauge, cloverInv, in, parity, dagger);
}
cudaError_t error = cudaGetLastError();
cudaGetLastError();
if(error != cudaSuccess) {
printf("cloverDslashCuda error: %s\n", cudaGetErrorString(error));
exit(0);
}
checkCudaError();
dslash_quda_flops += (1320+504)*in.volume;
}
......@@ -1179,13 +1154,7 @@ void cloverDslashXpayCuda(ParitySpinor out, FullGauge gauge, FullClover cloverIn
} else if (in.precision == QUDA_HALF_PRECISION) {
cloverDslashXpayHCuda(out, gauge, cloverInv, in, parity, dagger, x, a);
}
cudaError_t error = cudaGetLastError();
cudaGetLastError();
if(error != cudaSuccess) {
printf("cloverDslashXpayCuda error: %s\n", cudaGetErrorString(error));
exit(0);
}
checkCudaError();
dslash_quda_flops += (1320+504+48)*in.volume;
}
......@@ -1725,7 +1694,7 @@ void cloverMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover clover, FullC
if (((matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) || (matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC))
&& (clover.even.clover == NULL)) {
printf("QUDA error: For asymmetric matpc_type, the uninverted clover term must be loaded\n");
errorQuda("For asymmetric matpc_type, the uninverted clover term must be loaded");
}
// FIXME: For asymmetric, a "dslashCxpay" kernel would improve performance.
......@@ -1746,8 +1715,7 @@ void cloverMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover clover, FullC
cloverDslashCuda(tmp, gauge, cloverInv, in, 0, dagger);
cloverDslashXpayCuda(out, gauge, cloverInv, tmp, 1, dagger, in, kappa2);
} else {
printf("QUDA error: invalid matpc_type\n");
exit(-1);
errorQuda("Invalid matpc_type");
}
} else { // symmetric preconditioning, dagger
if (matpc_type == QUDA_MATPC_EVEN_EVEN) {
......@@ -1759,8 +1727,7 @@ void cloverMatPCCuda(ParitySpinor out, FullGauge gauge, FullClover clover, FullC
cloverDslashCuda(tmp, gauge, cloverInv, out, 0, dagger);
dslashXpayCuda(out, gauge, tmp, 1, dagger, in, kappa2);
} else {
printf("QUDA error: invalid matpc_type\n");
exit(-1);
errorQuda("Invalid matpc_type");
}
}
}
......@@ -1803,13 +1770,7 @@ void cloverCuda(ParitySpinor out, FullGauge gauge, FullClover clover,
} else if (in.precision == QUDA_HALF_PRECISION) {
cloverHCuda(out, gauge, clover, in, parity);
}
cudaError_t error = cudaGetLastError();
cudaGetLastError();
if(error != cudaSuccess) {
printf("cloverCuda error: %s\n", cudaGetErrorString(error));
exit(0);
}
checkCudaError();
dslash_quda_flops += 504*in.volume;
}
......
......@@ -488,15 +488,13 @@ static void allocateGaugeField(FullGauge *cudaGauge, ReconstructType reconstruct
if (!cudaGauge->even) {
if (cudaMalloc((void **)&cudaGauge->even, cudaGauge->bytes) == cudaErrorMemoryAllocation) {
printf("Error allocating even gauge field\n");
exit(0);
errorQuda("Error allocating even gauge field");
}
}
if (!cudaGauge->odd) {
if (cudaMalloc((void **)&cudaGauge->odd, cudaGauge->bytes) == cudaErrorMemoryAllocation) {
printf("Error allocating even odd gauge field\n");
exit(0);
errorQuda("Error allocating even odd gauge field");
}
}
......@@ -531,22 +529,14 @@ static void loadGaugeField(FloatN *even, FloatN *odd, Float *cpuGauge, GaugeFiel
packCPSGaugeField(packedEven, (Float*)cpuGauge, 0, reconstruct, Vh, pad);
packCPSGaugeField(packedOdd, (Float*)cpuGauge, 1, reconstruct, Vh, pad);
} else {
printf("Sorry, %d GaugeFieldOrder not supported\n", gauge_order);
exit(-1);
}
cudaError_t error = cudaMemcpy(even, packedEven, bytes, cudaMemcpyHostToDevice);
if (error != cudaSuccess) {
printf("Error: %s\n", cudaGetErrorString(error));
exit(-1);
errorQuda("Invalid gauge_order");
}
error = cudaMemcpy(odd, packedOdd, bytes, cudaMemcpyHostToDevice);
if (error != cudaSuccess) {
printf("Error: %s\n", cudaGetErrorString(error));
exit(-1);
}
cudaMemcpy(even, packedEven, bytes, cudaMemcpyHostToDevice);
checkCudaError();
cudaMemcpy(odd, packedOdd, bytes, cudaMemcpyHostToDevice);
checkCudaError();
#ifndef __DEVICE_EMULATION__
cudaFreeHost(packedEven);
......@@ -583,8 +573,7 @@ static void retrieveGaugeField(Float *cpuGauge, FloatN *even, FloatN *odd, Gauge
unpackCPSGaugeField((Float*)cpuGauge, packedEven, 0, reconstruct, Vh, pad);
unpackCPSGaugeField((Float*)cpuGauge, packedOdd, 1, reconstruct, Vh, pad);
} else {
printf("Sorry, %d GaugeFieldOrder not supported\n", gauge_order);
exit(-1);
errorQuda("Invalid gauge_order");
}
#ifndef __DEVICE_EMULATION__
......@@ -602,8 +591,7 @@ void createGaugeField(FullGauge *cudaGauge, void *cpuGauge, Precision cuda_prec,
Tboundary t_boundary, int *XX, double anisotropy, int pad)
{
if (cpu_prec == QUDA_HALF_PRECISION) {
printf("QUDA error: half precision not supported on cpu\n");
exit(-1);
errorQuda("Half precision not supported on CPU");
}
Anisotropy = anisotropy;
......@@ -658,8 +646,7 @@ void createGaugeField(FullGauge *cudaGauge, void *cpuGauge, Precision cuda_prec,
void restoreGaugeField(void *cpuGauge, FullGauge *cudaGauge, Precision cpu_prec, GaugeFieldOrder gauge_order)
{
if (cpu_prec == QUDA_HALF_PRECISION) {
printf("QUDA error: half precision not supported on cpu\n");
exit(-1);
errorQuda("Half precision not supported on CPU");
}
if (cudaGauge->precision == QUDA_DOUBLE_PRECISION) {
......
......@@ -40,8 +40,7 @@ FullClover cudaCloverInvSloppy;
static void checkPrecision(QudaPrecision precision)
{
if (precision == QUDA_HALF_PRECISION) {
printf("Half precision not supported on cpu\n");
exit(-1);
errorQuda("Half precision not supported on CPU");
}
}
......@@ -50,29 +49,26 @@ void initQuda(int dev)
int deviceCount;
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "No devices supporting CUDA.\n");
exit(EXIT_FAILURE);
errorQuda("No devices supporting CUDA");
}
for(int i=0; i<deviceCount; i++) {
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, i);
fprintf(stderr, "found device %d: %s\n", i, deviceProp.name);
fprintf(stderr, "QUDA: Found device %d: %s\n", i, deviceProp.name);
}
if(dev<0) {
if (dev < 0) {
dev = deviceCount - 1;
//dev = 0;
}
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
if (deviceProp.major < 1) {
fprintf(stderr, "Device %d does not support CUDA.\n", dev);
exit(EXIT_FAILURE);
errorQuda("Device %d does not support CUDA", dev);
}
fprintf(stderr, "Using device %d: %s\n", dev, deviceProp.name);
fprintf(stderr, "QUDA: Using device %d: %s\n", dev, deviceProp.name);
cudaSetDevice(dev);
cudaGaugePrecise.even = NULL;
......@@ -126,30 +122,25 @@ void saveGaugeQuda(void *h_gauge, QudaGaugeParam *param)
restoreGaugeField(h_gauge, &cudaGaugePrecise, param->cpu_prec, param->gauge_order);
}
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaGaugeParam *gauge_param, QudaInvertParam *inv_param)