Advanced Computing Platform for Theoretical Physics

Commit 0b0cc5f8 authored by rbabich's avatar rbabich
Browse files

quda: updated NEWS and README files, and changed some parameters in the

test programs.  This will be release 0.3.1.


git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@1137 be54200a-260c-0410-bdd7-ce6af2a381ab
parent dcab5ba3
Version 0.3.1
Version 0.3.1 - 22 December 2010
- Added support for domain wall fermions. The length of the fifth
dimension and the domain wall height are set via the 'Ls' and 'm5'
......@@ -13,7 +13,8 @@ Version 0.3.1
case is better optimized and generally also exhibits faster
convergence.
- Significantly improved performance of several BLAS routines on Fermi.
- Improved performance in several of the BLAS routines, particularly
on Fermi.
- Improved performance in the CG solver for Wilson-like (and domain
wall) fermions by avoiding unnecessary allocation and deallocation
......@@ -28,7 +29,7 @@ Version 0.3.1
MatPCDagMatPCQuda(). The latter two functions have been absorbed
into MatQuda() and MatDagMatQuda(), respectively, since
preconditioning may be selected via the solution_type member of
InvertParam.
QudaInvertParam.
- Fixed a bug in the Wilson and Wilson-clover Dirac operators that
prevented the use of MatPC solution types.
......@@ -37,7 +38,7 @@ Version 0.3.1
would cause a crash when QUDA_MASS_NORMALIZATION is used.
- Fixed an allocation bug in the Wilson and Wilson-clover
Dirac operators that might have led to undefined behaviour for
Dirac operators that might have led to undefined behavior for
non-zero padding.
- Fixed a bug in blas_test that might have led to incorrect autotuning
......@@ -58,7 +59,7 @@ Version 0.3.0 - 1 October 2010
new parameters in QudaInvertParam and QudaGaugeParam. See below for
details.
- The internals of QUDA have been signficantly restructured to facilitate
- The internals of QUDA have been significantly restructured to facilitate
future extensions. This is an ongoing process and will continue
through the next several releases.
......
Release Notes for QUDA v0.3.1 x December 2010
Release Notes for QUDA v0.3.1 22 December 2010
-----------------------------
Overview:
......@@ -46,7 +46,7 @@ Installation:
In the source directory, copy 'make.inc.example' to 'make.inc', and
edit the first few lines to specify the CUDA install path, the
platform (x86 or x86_64), and the GPU architecture (see "Hardware
compatibility" above). Then type 'make' to build the library.
Compatibility" above). Then type 'make' to build the library.
As an optional step, 'make tune' will invoke tests/blas_test to
perform autotuning of the various BLAS-like functions needed by the
......
......@@ -11,7 +11,7 @@ PYTHON = python # python 2.5 or later required for 'make gen'
DEVICE = 0 # CUDA device to use for 'make tune'
BUILD_WILSON_DIRAC = yes # build Wilson and clover Dirac operators?
BUILD_DOMAIN_WALL_DIRAC = yes # build Domain Wall Dirac operators?
BUILD_DOMAIN_WALL_DIRAC = yes # build domain wall Dirac operators?
BUILD_STAGGERED_DIRAC = yes # build staggered Dirac operators?
BUILD_TWISTED_MASS_DIRAC = yes # build twisted mass Dirac operators?
BUILD_FATLINK = no # build code for computing asqtad fat links?
......
......@@ -12,10 +12,10 @@ const int LX = 24;
const int LY = 24;
const int LZ = 24;
const int LT = 24;
const int nSpin = 4;
const int Nspin = 4;
// corresponds to 10 iterations for V=24^4, nSpin = 4, at half precision
const int Niter = 10 * (24*24*24*24*4) / (LX * LY * LZ * LT * nSpin);
// corresponds to 10 iterations for V=24^4, Nspin = 4, at half precision
const int Niter = 10 * (24*24*24*24*4) / (LX * LY * LZ * LT * Nspin);
const int Nkernels = 24;
const int ThreadMin = 32;
......@@ -56,7 +56,7 @@ void initFields(int prec)
ColorSpinorParam param;
param.fieldLocation = QUDA_CUDA_FIELD_LOCATION;
param.nColor = 3;
param.nSpin = nSpin; // =1 for staggered, =2 for coarse Dslash, =4 for 4d spinor
param.nSpin = Nspin; // =1 for staggered, =2 for coarse Dslash, =4 for 4d spinor
param.nDim = 4; // number of spacetime dimensions
param.x[0] = LX;
param.x[1] = LY;
......
......@@ -26,7 +26,7 @@ const int Ls = 16;
double kappa5;
QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_SINGLE_PRECISION;
QudaGaugeParam gauge_param;
QudaInvertParam inv_param;
......@@ -45,10 +45,10 @@ void init() {
gauge_param = newQudaGaugeParam();
inv_param = newQudaInvertParam();
gauge_param.X[0] = 8;
gauge_param.X[1] = 8;
gauge_param.X[2] = 8;
gauge_param.X[3] = 8;
gauge_param.X[0] = 12;
gauge_param.X[1] = 12;
gauge_param.X[2] = 12;
gauge_param.X[3] = 12;
setDims(gauge_param.X, Ls);
......
......@@ -18,15 +18,15 @@ int main(int argc, char **argv)
QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_SINGLE_PRECISION;
QudaPrecision cuda_prec_sloppy = QUDA_SINGLE_PRECISION;
QudaPrecision cuda_prec_sloppy = QUDA_HALF_PRECISION;
QudaGaugeParam gauge_param = newQudaGaugeParam();
QudaInvertParam inv_param = newQudaInvertParam();
gauge_param.X[0] = 16;
gauge_param.X[1] = 16;
gauge_param.X[2] = 16;
gauge_param.X[3] = 32;
gauge_param.X[0] = 12;
gauge_param.X[1] = 12;
gauge_param.X[2] = 12;
gauge_param.X[3] = 12;
inv_param.Ls = 16;
gauge_param.anisotropy = 1.0;
......@@ -50,10 +50,10 @@ int main(int argc, char **argv)
inv_param.tol = 5e-8;
inv_param.maxiter = 1000;
inv_param.reliable_delta = 1e-1;
inv_param.reliable_delta = 0.1;
inv_param.solution_type = QUDA_MAT_SOLUTION;
inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
inv_param.solve_type = QUDA_NORMEQ_PC_SOLVE;
inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
inv_param.dagger = QUDA_DAG_NO;
inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
......
......@@ -37,7 +37,7 @@ QudaParity parity;
QudaDagType dagger = QUDA_DAG_NO;
int transfer = 0; // include transfer time in the benchmark?
int tdim = 24;
int sdim = 8;
int sdim = 24;
QudaReconstructType link_recon = QUDA_RECONSTRUCT_12;
QudaPrecision prec = QUDA_SINGLE_PRECISION;
......
......@@ -23,7 +23,7 @@ QudaPrecision prec_sloppy = QUDA_INVALID_PRECISION;
static double tol = 1e-8;
static int testtype = 0;
static int sdim = 8;
static int sdim = 24;
static int tdim = 24;
extern int V;
......
......@@ -14,16 +14,16 @@
#include <twisted_mass_dslash_reference.h>
// What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat)
const int test_type = 2;
const int test_type = 1;
const QudaParity parity = QUDA_EVEN_PARITY; // even or odd?
const QudaDagType dagger = QUDA_DAG_YES; // apply Dslash or Dslash dagger?
const QudaDagType dagger = QUDA_DAG_NO; // apply Dslash or Dslash dagger?
const int transfer = 0; // include transfer time in the benchmark?
const int loops = 1;
const int loops = 100;
QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_SINGLE_PRECISION;
QudaGaugeParam gauge_param;
QudaInvertParam inv_param;
......@@ -42,10 +42,10 @@ void init() {
gauge_param = newQudaGaugeParam();
inv_param = newQudaInvertParam();
gauge_param.X[0] = 16;
gauge_param.X[1] = 16;
gauge_param.X[2] = 16;
gauge_param.X[3] = 16;
gauge_param.X[0] = 24;
gauge_param.X[1] = 24;
gauge_param.X[2] = 24;
gauge_param.X[3] = 24;
setDims(gauge_param.X);
gauge_param.anisotropy = 2.3;
......@@ -56,13 +56,13 @@ void init() {
gauge_param.cpu_prec = cpu_prec;
gauge_param.cuda_prec = cuda_prec;
gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
gauge_param.reconstruct_sloppy = gauge_param.reconstruct;
gauge_param.cuda_prec_sloppy = gauge_param.cuda_prec;
gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
gauge_param.type = QUDA_WILSON_LINKS;
inv_param.kappa = 1.0;
inv_param.kappa = 0.1;
inv_param.mu = 0.01;
inv_param.twist_flavor = QUDA_TWIST_MINUS;
......
......@@ -17,8 +17,8 @@ int main(int argc, char **argv)
int device = 0; // CUDA device number
QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec_sloppy = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_SINGLE_PRECISION;
QudaPrecision cuda_prec_sloppy = QUDA_HALF_PRECISION;
QudaGaugeParam gauge_param = newQudaGaugeParam();
QudaInvertParam inv_param = newQudaInvertParam();
......@@ -26,7 +26,7 @@ int main(int argc, char **argv)
gauge_param.X[0] = 24;
gauge_param.X[1] = 24;
gauge_param.X[2] = 24;
gauge_param.X[3] = 48;
gauge_param.X[3] = 24;
gauge_param.anisotropy = 1.0;
gauge_param.type = QUDA_WILSON_LINKS;
......
......@@ -14,15 +14,15 @@
#include <wilson_dslash_reference.h>
// What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat)
const int test_type = 0;
const int test_type = 1;
// clover-improved? (0 = plain Wilson, 1 = clover)
const int clover_yes = 0;
const QudaParity parity = QUDA_EVEN_PARITY; // even or odd?
const QudaDagType dagger = QUDA_DAG_YES; // apply Dslash or Dslash dagger?
const QudaDagType dagger = QUDA_DAG_NO; // apply Dslash or Dslash dagger?
const int transfer = 0; // include transfer time in the benchmark?
const int loops = 1000;
const int loops = 100;
QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_SINGLE_PRECISION;
......@@ -45,10 +45,10 @@ void init() {
gauge_param = newQudaGaugeParam();
inv_param = newQudaInvertParam();
gauge_param.X[0] = 4;
gauge_param.X[1] = 4;
gauge_param.X[2] = 4;
gauge_param.X[3] = 4;
gauge_param.X[0] = 24;
gauge_param.X[1] = 24;
gauge_param.X[2] = 24;
gauge_param.X[3] = 24;
setDims(gauge_param.X);
gauge_param.anisotropy = 2.3;
......@@ -63,9 +63,8 @@ void init() {
gauge_param.reconstruct_sloppy = gauge_param.reconstruct;
gauge_param.cuda_prec_sloppy = gauge_param.cuda_prec;
gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
gauge_param.type = QUDA_WILSON_LINKS;
inv_param.kappa = 1.0;
inv_param.kappa = 0.1;
inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
inv_param.dagger = dagger;
......
......@@ -17,16 +17,16 @@ int main(int argc, char **argv)
int device = 0; // CUDA device number
QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_SINGLE_PRECISION;
QudaPrecision cuda_prec_sloppy = QUDA_HALF_PRECISION;
QudaGaugeParam gauge_param = newQudaGaugeParam();
QudaInvertParam inv_param = newQudaInvertParam();
gauge_param.X[0] = 20;
gauge_param.X[1] = 20;
gauge_param.X[2] = 20;
gauge_param.X[3] = 64;
gauge_param.X[0] = 24;
gauge_param.X[1] = 24;
gauge_param.X[2] = 24;
gauge_param.X[3] = 24;
gauge_param.anisotropy = 1.0;
gauge_param.type = QUDA_WILSON_LINKS;
......@@ -35,9 +35,9 @@ int main(int argc, char **argv)
gauge_param.cpu_prec = cpu_prec;
gauge_param.cuda_prec = cuda_prec;
gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
gauge_param.cuda_prec_sloppy = cuda_prec_sloppy;
gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
int clover_yes = 0; // 0 for plain Wilson, 1 for clover
......@@ -53,11 +53,10 @@ int main(int argc, char **argv)
inv_param.kappa = 1.0 / (2.0*(1 + 3/gauge_param.anisotropy + mass));
inv_param.tol = 5e-8;
inv_param.maxiter = 1000;
inv_param.reliable_delta = 3e-1;
inv_param.reliable_delta = 0.1;
inv_param.solution_type = QUDA_MAT_SOLUTION;
inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
inv_param.solve_type = QUDA_NORMEQ_PC_SOLVE;
inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
inv_param.dagger = QUDA_DAG_NO;
inv_param.mass_normalization = QUDA_MASS_NORMALIZATION;
......@@ -69,7 +68,7 @@ int main(int argc, char **argv)
inv_param.dirac_order = QUDA_DIRAC_ORDER;
gauge_param.ga_pad = 0; // 24*24*24;
inv_param.sp_pad = 24*24*24;
inv_param.sp_pad = 0; // 24*24*24;
inv_param.cl_pad = 0; // 24*24*24;
if (clover_yes) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment