Advanced Computing Platform for Theoretical Physics

commit大文件会使得服务器变得不稳定,请大家尽量只commit代码,不要commit大的文件。

Commit a90f5547 authored by rbabich's avatar rbabich
Browse files

quda: added checking for unset gauge and inverter parameters


git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@583 be54200a-260c-0410-bdd7-ce6af2a381ab
parent ba4a786d
Version 0.x
- Added Guochun's patch to improve half precision 8 SU(3)
reconstruction: 8 half precision now actually works
- Introduced new interface functions newQudaGaugeParam() and
newQudaInvertParam() to allow for enhanced error checking. See
invert_test for an example of their use.
- Improved stability of the half precision 8-parameter SU(3)
reconstruction (with thanks to Guochun Shi).
- Cleaned up the invert_test example to remove unnecessary dependencies.
- Fixed bug affecting saveGaugeQuda() that caused su3_test to fail.
- Added compiler directive to Makefile to improve performance of the
......
......@@ -75,8 +75,9 @@ For help or to report a bug, please contact Mike Clark
If you find this code useful in your work, please cite:
M. A. Clark et al., "Solving Lattice QCD systems of equations using mixed
precision solvers on GPUs" (2009), arXiv:0911.3191 [hep-lat].
M. A. Clark, R. Babich, K. Barros, R. Brower, and C. Rebbi, "Solving
Lattice QCD systems of equations using mixed precision solvers on
GPUs" (2009), arXiv:0911.3191 [hep-lat].
Please also drop us a note so that we can inform you of updates and
bug-fixes. The most recent public release will always be available
......
......@@ -8,6 +8,12 @@
extern "C" {
#endif
typedef struct double3_s {
double x;
double y;
double z;
} double3;
// ---------- blas_quda.cu ----------
void zeroCuda(ParitySpinor a);
......@@ -36,8 +42,8 @@ extern "C" {
cuDoubleComplex cDotProductCuda(ParitySpinor, ParitySpinor);
cuDoubleComplex xpaycDotzyCuda(ParitySpinor x, double a, ParitySpinor y, ParitySpinor z);
void blasTest();
void axpbyTest();
// void blasTest();
// void axpbyTest();
double3 cDotProductNormACuda(ParitySpinor a, ParitySpinor b);
double3 cDotProductNormBCuda(ParitySpinor a, ParitySpinor b);
......
#ifndef _ENUM_QUDA_H
#define _ENUM_QUDA_H
#include <limits.h>
#define QUDA_INVALID_ENUM INT_MIN
#ifdef __cplusplus
extern "C" {
#endif
......@@ -8,34 +11,40 @@ extern "C" {
typedef enum QudaGaugeFieldOrder_s {
QUDA_QDP_GAUGE_ORDER, // expect *gauge[4], even-odd, row-column colour
QUDA_CPS_WILSON_GAUGE_ORDER, // expect *gauge, even-odd, mu inside, column-row colour
QUDA_INVALID_GAUGE_ORDER = QUDA_INVALID_ENUM
} QudaGaugeFieldOrder;
typedef enum QudaDiracFieldOrder_s {
QUDA_DIRAC_ORDER, // even-odd, colour inside spin
QUDA_QDP_DIRAC_ORDER, // even-odd, spin inside colour
QUDA_CPS_WILSON_DIRAC_ORDER, // odd-even, colour inside spin
QUDA_LEX_DIRAC_ORDER // lexicographical order, colour inside spin
QUDA_LEX_DIRAC_ORDER, // lexicographical order, colour inside spin
QUDA_INVALID_DIRAC_ORDER = QUDA_INVALID_ENUM
} QudaDiracFieldOrder;
typedef enum QudaCloverFieldOrder_s {
QUDA_PACKED_CLOVER_ORDER, // even-odd, packed
QUDA_LEX_PACKED_CLOVER_ORDER // lexicographical order, packed
QUDA_LEX_PACKED_CLOVER_ORDER, // lexicographical order, packed
QUDA_INVALID_CLOVER_ORDER = QUDA_INVALID_ENUM
} QudaCloverFieldOrder;
typedef enum QudaDslashType_s {
QUDA_WILSON_DSLASH,
QUDA_CLOVER_WILSON_DSLASH
QUDA_CLOVER_WILSON_DSLASH,
QUDA_INVALID_DSLASH = QUDA_INVALID_ENUM
} QudaDslashType;
typedef enum QudaInverterType_s {
QUDA_CG_INVERTER,
QUDA_BICGSTAB_INVERTER
QUDA_BICGSTAB_INVERTER,
QUDA_INVALID_INVERTER = QUDA_INVALID_ENUM
} QudaInverterType;
typedef enum QudaPrecision_s {
QUDA_HALF_PRECISION = 2,
QUDA_SINGLE_PRECISION = 4,
QUDA_DOUBLE_PRECISION = 8
QUDA_DOUBLE_PRECISION = 8,
QUDA_INVALID_PRECISION = QUDA_INVALID_ENUM
} QudaPrecision;
// Whether the preconditioned matrix is (1-k^2 Deo Doe) or (1-k^2 Doe Deo)
......@@ -51,7 +60,8 @@ extern "C" {
QUDA_MATPC_EVEN_EVEN,
QUDA_MATPC_ODD_ODD,
QUDA_MATPC_EVEN_EVEN_ASYMMETRIC,
QUDA_MATPC_ODD_ODD_ASYMMETRIC
QUDA_MATPC_ODD_ODD_ASYMMETRIC,
QUDA_MATPC_INVALID = QUDA_INVALID_ENUM
} QudaMatPCType;
// The different solutions supported
......@@ -60,52 +70,54 @@ extern "C" {
QUDA_MATPC_SOLUTION,
QUDA_MATPCDAG_SOLUTION, // not implemented
QUDA_MATPCDAG_MATPC_SOLUTION,
QUDA_INVALID_SOLUTION = QUDA_INVALID_ENUM
} QudaSolutionType;
typedef enum QudaMassNormalization_s {
QUDA_KAPPA_NORMALIZATION,
QUDA_MASS_NORMALIZATION,
QUDA_ASYMMETRIC_MASS_NORMALIZATION
QUDA_ASYMMETRIC_MASS_NORMALIZATION,
QUDA_INVALID_NORMALIZATION = QUDA_INVALID_ENUM
} QudaMassNormalization;
typedef enum QudaPreserveSource_s {
QUDA_PRESERVE_SOURCE_NO, // use the source for the residual
QUDA_PRESERVE_SOURCE_YES // keep the source intact
QUDA_PRESERVE_SOURCE_YES, // keep the source intact
QUDA_PRESERVE_SOURCE_INVALID = QUDA_INVALID_ENUM
} QudaPreserveSource;
typedef enum QudaReconstructType_s {
QUDA_RECONSTRUCT_NO, // store all 18 real numbers explicitly
QUDA_RECONSTRUCT_8, // reconstruct from 8 real numbers
QUDA_RECONSTRUCT_12 // reconstruct from 12 real numbers
QUDA_RECONSTRUCT_12, // reconstruct from 12 real numbers
QUDA_RECONSTRUCT_INVALID = QUDA_INVALID_ENUM
} QudaReconstructType;
typedef enum QudaGaugeFixed_s {
QUDA_GAUGE_FIXED_NO, // No gauge fixing
QUDA_GAUGE_FIXED_YES // Gauge field stored in temporal gauge
QUDA_GAUGE_FIXED_YES, // Gauge field stored in temporal gauge
QUDA_GAUGE_FIXED_INVALID = QUDA_INVALID_ENUM
} QudaGaugeFixed;
typedef enum QudaDagType_s {
QUDA_DAG_NO,
QUDA_DAG_YES
QUDA_DAG_YES,
QUDA_DAG_INVALID = QUDA_INVALID_ENUM
} QudaDagType;
typedef enum QudaTboundary_s {
QUDA_ANTI_PERIODIC_T = -1,
QUDA_PERIODIC_T = 1
QUDA_PERIODIC_T = 1,
QUDA_INVALID_T_BOUNDARY = QUDA_INVALID_ENUM
} QudaTboundary;
typedef enum QudaVerbosity_s {
QUDA_SILENT,
QUDA_SUMMARIZE,
QUDA_VERBOSE
QUDA_VERBOSE,
QUDA_INVALID_VERBOSITY = QUDA_INVALID_ENUM
} QudaVerbosity;
typedef struct double3_s {
double x;
double y;
double z;
} double3;
#ifdef __cplusplus
}
#endif
......
......@@ -84,7 +84,7 @@ extern "C" {
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param);
void saveGaugeQuda(void *h_gauge);
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param);
void discardCloverQuda(QudaInvertParam *inv_param);
// void discardCloverQuda(QudaInvertParam *inv_param);
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param);
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int parity, int dagger);
......@@ -94,8 +94,11 @@ extern "C" {
void endQuda(void);
void printGaugeParam(QudaGaugeParam *);
void printInvertParam(QudaInvertParam *);
QudaGaugeParam newQudaGaugeParam(void);
QudaInvertParam newQudaInvertParam(void);
void printQudaGaugeParam(QudaGaugeParam *param);
void printQudaInvertParam(QudaInvertParam *param);
#ifdef __cplusplus
}
......
......@@ -18,6 +18,7 @@
#define GaugeFieldOrder QudaGaugeFieldOrder
#define DiracFieldOrder QudaDiracFieldOrder
#define CloverFieldOrder QudaCloverFieldOrder
#define DslashType QudaDslashType
#define InverterType QudaInverterType
#define Precision QudaPrecision
#define MatPCType QudaMatPCType
......
......@@ -11,9 +11,9 @@ QUDA_HDRS = blas_quda.h dslash_quda.h enum_quda.h gauge_quda.h quda.h \
# files containing complex macros and other code fragments to be inlined,
# found in lib/
QUDA_INLN = clover_def.h dslash_common.h dslash_def.h dslash_textures.h \
io_spinor.h read_clover.h read_gauge.h reduce_complex_core.h \
reduce_core.h reduce_triple_core.h
QUDA_INLN = check_params.h clover_def.h dslash_common.h dslash_def.h \
dslash_textures.h io_spinor.h read_clover.h read_gauge.h \
reduce_complex_core.h reduce_core.h reduce_triple_core.h
# files generated by the scripts in lib/generate/, found in lib/dslash_core/
# (The current clover_core.h was edited by hand.)
......
// check_params.h
// This file defines functions to either initialize, check, or print
// the QUDA gauge and inverter parameters. It gets included in
// invert_quda.cpp, after either INIT_PARAM, CHECK_PARAM, or
// PRINT_PARAM is defined.
#include <float.h>
#define INVALID_INT QUDA_INVALID_ENUM
#define INVALID_DOUBLE DBL_MIN
// define macro to carry out the appropriate action for a given parameter
#if defined INIT_PARAM
#define P(x, val) do { ret.x = val; } while (0)
#elif defined CHECK_PARAM
#define P(x, val) do { \
if (param->x == val) { \
printf("QUDA error: " #x " undefined.\n"); \
exit(1); \
} \
} while (0)
#elif defined PRINT_PARAM
#define P(x, val) do { \
printf((val == INVALID_DOUBLE) ? #x " = %g\n" : #x " = %d\n", param->x); \
} while (0)
#else
#error INIT_PARAM, CHECK_PARAM, and PRINT_PARAM all undefined in check_params.h
#endif
// define the appropriate function for GaugeParam
#if defined INIT_PARAM
QudaGaugeParam newQudaGaugeParam(void) {
QudaGaugeParam ret;
#elif defined CHECK_PARAM
static void checkGaugeParam(QudaGaugeParam *param) {
#else
void printQudaGaugeParam(QudaGaugeParam *param) {
printf("QUDA Gauge Parameters:\n");
#endif
P(X[0], INVALID_INT);
P(X[1], INVALID_INT);
P(X[2], INVALID_INT);
P(X[3], INVALID_INT);
P(anisotropy, INVALID_DOUBLE);
P(gauge_order, QUDA_INVALID_GAUGE_ORDER);
P(t_boundary, QUDA_INVALID_T_BOUNDARY);
P(cpu_prec, QUDA_INVALID_PRECISION);
P(cuda_prec, QUDA_INVALID_PRECISION);
P(reconstruct, QUDA_RECONSTRUCT_INVALID);
P(cuda_prec_sloppy, QUDA_INVALID_PRECISION);
P(reconstruct_sloppy, QUDA_RECONSTRUCT_INVALID);
P(gauge_fix, QUDA_GAUGE_FIXED_INVALID);
P(ga_pad, INVALID_INT);
#ifdef PRINT_PARAM
P(packed_size, INVALID_INT);
P(gaugeGiB, INVALID_DOUBLE);
#endif
#ifdef INIT_PARAM
return ret;
#endif
}
// define the appropriate function for InvertParam
#if defined INIT_PARAM
QudaInvertParam newQudaInvertParam(void) {
QudaInvertParam ret;
#elif defined CHECK_PARAM
static void checkInvertParam(QudaInvertParam *param) {
#else
void printQudaInvertParam(QudaInvertParam *param) {
printf("QUDA Inverter Parameters:\n");
#endif
P(dslash_type, QUDA_INVALID_DSLASH);
P(inv_type, QUDA_INVALID_INVERTER);
P(kappa, INVALID_DOUBLE);
P(tol, INVALID_DOUBLE);
P(maxiter, INVALID_INT);
P(reliable_delta, INVALID_DOUBLE);
P(matpc_type, QUDA_MATPC_INVALID);
P(solution_type, QUDA_INVALID_SOLUTION);
P(mass_normalization, QUDA_INVALID_NORMALIZATION);
P(preserve_source, QUDA_PRESERVE_SOURCE_INVALID);
P(cpu_prec, QUDA_INVALID_PRECISION);
P(cuda_prec, QUDA_INVALID_PRECISION);
P(cuda_prec_sloppy, QUDA_INVALID_PRECISION);
P(dirac_order, QUDA_INVALID_DIRAC_ORDER);
P(sp_pad, INVALID_INT);
#if defined CHECK_PARAM || defined PRINT_PARAM
if (param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
#endif
P(clover_cpu_prec, QUDA_INVALID_PRECISION);
P(clover_cuda_prec, QUDA_INVALID_PRECISION);
P(clover_cuda_prec_sloppy, QUDA_INVALID_PRECISION);
P(clover_order, QUDA_INVALID_CLOVER_ORDER);
P(cl_pad, INVALID_INT);
#if defined CHECK_PARAM || defined PRINT_PARAM
}
#endif
P(verbosity, QUDA_INVALID_VERBOSITY);
#ifdef PRINT_PARAM
P(iter, INVALID_INT);
P(spinorGiB, INVALID_DOUBLE);
if (param->dslash_type == QUDA_CLOVER_WILSON_DSLASH)
P(cloverGiB, INVALID_DOUBLE);
P(gflops, INVALID_DOUBLE);
P(secs, INVALID_DOUBLE);
#endif
#ifdef INIT_PARAM
return ret;
#endif
}
// clean up
#undef INVALID_INT
#undef INVALID_DOUBLE
#undef P
......@@ -18,54 +18,23 @@ FullClover cudaCloverSloppy;
FullClover cudaCloverInvPrecise; // inverted clover term
FullClover cudaCloverInvSloppy;
void printGaugeParam(QudaGaugeParam *param) {
printf("Gauge Params:\n");
for (int d=0; d<4; d++) {
printf("X[%d] = %d\n", d, param->X[d]);
}
printf("anisotropy = %e\n", param->anisotropy);
printf("gauge_order = %d\n", param->gauge_order);
printf("cpu_prec = %d\n", param->cpu_prec);
printf("cuda_prec = %d\n", param->cuda_prec);
printf("reconstruct = %d\n", param->reconstruct);
printf("cuda_prec_sloppy = %d\n", param->cuda_prec_sloppy);
printf("reconstruct_sloppy = %d\n", param->reconstruct_sloppy);
printf("gauge_fix = %d\n", param->gauge_fix);
printf("t_boundary = %d\n", param->t_boundary);
printf("packed_size = %d\n", param->packed_size);
printf("gaugeGiB = %e\n", param->gaugeGiB);
}
void printInvertParam(QudaInvertParam *param) {
printf("kappa = %e\n", param->kappa);
printf("mass_normalization = %d\n", param->mass_normalization);
printf("dslash_type = %d\n", param->dslash_type);
printf("inv_type = %d\n", param->inv_type);
printf("tol = %e\n", param->tol);
printf("iter = %d\n", param->iter);
printf("maxiter = %d\n", param->maxiter);
printf("matpc_type = %d\n", param->matpc_type);
printf("solution_type = %d\n", param->solution_type);
printf("preserve_source = %d\n", param->preserve_source);
printf("cpu_prec = %d\n", param->cpu_prec);
printf("cuda_prec = %d\n", param->cuda_prec);
printf("cuda_prec_sloppy = %d\n", param->cuda_prec_sloppy);
printf("dirac_order = %d\n", param->dirac_order);
printf("spinorGiB = %e\n", param->spinorGiB);
if (param->dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
printf("clover_cpu_prec = %d\n", param->clover_cpu_prec);
printf("clover_cuda_prec = %d\n", param->clover_cuda_prec);
printf("clover_cuda_prec_sloppy = %d\n", param->clover_cuda_prec_sloppy);
printf("clover_order = %d\n", param->clover_order);
printf("cloverGiB = %e\n", param->cloverGiB);
}
printf("gflops = %e\n", param->gflops);
printf("secs = %f\n", param->secs);
printf("verbosity = %d\n", param->verbosity);
}
static void checkPrecision(QudaPrecision precision) {
// define newQudaGaugeParam() and newQudaInvertParam()
#define INIT_PARAM
#include "check_params.h"
#undef INIT_PARAM
// define (static) checkGaugeParam() and checkInvertParam()
#define CHECK_PARAM
#include "check_params.h"
#undef CHECK_PARAM
// define printQudaGaugeParam() and printQudaInvertParam()
#define PRINT_PARAM
#include "check_params.h"
#undef PRINT_PARAM
static void checkPrecision(QudaPrecision precision)
{
if (precision == QUDA_HALF_PRECISION) {
printf("Half precision not supported on cpu\n");
exit(-1);
......@@ -125,6 +94,8 @@ void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
{
gauge_param = param;
checkGaugeParam(gauge_param);
gauge_param->packed_size = (gauge_param->reconstruct == QUDA_RECONSTRUCT_8) ? 8 : 12;
createGaugeField(&cudaGaugePrecise, h_gauge, gauge_param->cuda_prec, gauge_param->reconstruct,
......@@ -162,6 +133,14 @@ void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
printf("QUDA error: half precision not supported on CPU\n");
exit(-1);
}
if (cudaGaugePrecise.even == NULL) {
printf("QUDA error: gauge field must be loaded before clover\n");
exit(-1);
}
if (inv_param->dslash_type != QUDA_CLOVER_WILSON_DSLASH) {
printf("QUDA error: wrong dslash_type in loadCloverQuda()\n");
exit(-1);
}
int X[4];
for (int i=0; i<4; i++) {
......@@ -206,6 +185,7 @@ void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
}
}
#if 0
// discard clover term but keep the inverse
void discardCloverQuda(QudaInvertParam *inv_param)
{
......@@ -216,6 +196,7 @@ void discardCloverQuda(QudaInvertParam *inv_param)
freeCloverField(&cudaCloverSloppy);
}
}
#endif
void endQuda(void)
{
......@@ -316,7 +297,8 @@ void MatPCDagMatPCQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
freeParitySpinor(in);
}
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger) {
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, int dagger)
{
checkPrecision(inv_param->cpu_prec);
FullSpinor in = allocateSpinorField(cudaGaugePrecise.X, inv_param->cuda_prec, inv_param->sp_pad);
......@@ -348,6 +330,7 @@ void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
{
invert_param = param;
checkInvertParam(param);
checkPrecision(param->cpu_prec);
int slenh = cudaGaugePrecise.volume*spinorSiteSize;
......
......@@ -35,6 +35,9 @@ int TRANSFER = 0; // include transfer time in the benchmark?
void init() {
gaugeParam = newQudaGaugeParam();
inv_param = newQudaInvertParam();
gaugeParam.X[0] = 24;
gaugeParam.X[1] = 24;
gaugeParam.X[2] = 24;
......
......@@ -15,8 +15,8 @@ int main(int argc, char **argv)
void *gauge[4], *clover_inv;
QudaGaugeParam Gauge_param;
QudaInvertParam inv_param;
QudaGaugeParam Gauge_param = newQudaGaugeParam();
QudaInvertParam inv_param = newQudaInvertParam();
Gauge_param.X[0] = 24;
Gauge_param.X[1] = 24;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment