Advanced Computing Platform for Theoretical Physics

Commit 22e8df06 authored by mikeaclark's avatar mikeaclark
Browse files

git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@293 be54200a-260c-0410-bdd7-ce6af2a381ab
parent dfa47927
......@@ -69,7 +69,7 @@
#define A_re G4.z
#define A_im G4.w
#define o00_re s[0]
/*#define o00_re s[0]
#define o00_im s[1]
#define o01_re s[2]
#define o01_im s[3]
......@@ -87,7 +87,26 @@
#define o21_im s[15]
#define o22_re s[16]
#define o22_im s[17]
#define o30_re s[18]
#define o30_re s[18]*/
volatile float o00_re;
volatile float o00_im;
volatile float o01_re;
volatile float o01_im;
volatile float o02_re;
volatile float o02_im;
volatile float o10_re;
volatile float o10_im;
volatile float o11_re;
volatile float o11_im;
volatile float o12_re;
volatile float o12_im;
volatile float o20_re;
volatile float o20_im;
volatile float o21_re;
volatile float o21_im;
volatile float o22_re;
volatile float o22_im;
volatile float o30_re;
volatile float o30_im;
volatile float o31_re;
volatile float o31_im;
......@@ -95,7 +114,6 @@ volatile float o32_re;
volatile float o32_im;
#include "read_gauge.h"
#include "io_spinor.h"
......@@ -107,8 +125,8 @@ int x3 = (X/(L2*L1)) % L3;
int x2 = (X/L1) % L2;
int x1 = X % L1;
extern __shared__ float s_data[];
volatile float *s = s_data+SHARED_FLOATS_PER_THREAD*threadIdx.x;
//extern __shared__ float s_data[];
//volatile float *s = s_data+SHARED_FLOATS_PER_THREAD*threadIdx.x;
o00_re = o00_im = 0;
o01_re = o01_im = 0;
......
......@@ -851,4 +851,22 @@ o32_re = o32_im = 0;
// write spinor field back to device memory
WRITE_SPINOR();
#undef o00_re
#undef o00_im
#undef o01_re
#undef o01_im
#undef o02_re
#undef o02_im
#undef o10_re
#undef o10_im
#undef o11_re
#undef o11_im
#undef o12_re
#undef o12_im
#undef o20_re
#undef o20_im
#undef o21_re
#undef o21_im
#undef o22_re
#undef o22_im
#undef o30_re
......@@ -5,7 +5,7 @@
#include <util_quda.h>
#include <field_quda.h>
#define FULL_WILSON 1
#define FULL_WILSON 0
QudaGaugeParam param;
FullSpinor cudaSpinor;
......
......@@ -8,6 +8,42 @@
#include <util_quda.h>
#include <field_quda.h>
void printGaugeParam(QudaGaugeParam *param) {
printf("Gauge Params:\n");
printf("X = %d\n", param->X);
printf("Y = %d\n", param->Y);
printf("Z = %d\n", param->Z);
printf("T = %d\n", param->T);
printf("anisotropy = %e\n", param->anisotropy);
printf("gauge_order = %d\n", param->gauge_order);
printf("cpu_prec = %d\n", param->cpu_prec);
printf("cuda_prec = %d\n", param->cuda_prec);
printf("reconstruct = %d\n", param->reconstruct);
printf("gauge_fix = %d\n", param->gauge_fix);
printf("t_boundary = %d\n", param->t_boundary);
printf("packed_size = %d\n", param->packed_size);
printf("gaugeGiB = %e\n", param->gaugeGiB);
}
void printInvertParam(QudaInvertParam *param) {
printf("kappa = %e\n", param->kappa);
printf("mass_normalization = %d\n", param->mass_normalization);
printf("inv_type = %d\n", param->inv_type);
printf("tol = %e\n", param->tol);
printf("iter = %d\n", param->iter);
printf("maxiter = %d\n", param->maxiter);
printf("matpc_type = %d\n", param->matpc_type);
printf("solution_type = %d\n", param->solution_type);
printf("preserve_source = %d\n", param->preserve_source);
printf("cpu_prec = %d\n", param->cpu_prec);
printf("cuda_prec = %d\n", param->cuda_prec);
printf("dirac_order = %d\n", param->dirac_order);
printf("spinorGiB = %e\n", param->spinorGiB);
printf("gflops = %e\n", param->gflops);
printf("secs = %f\n", param->secs);
}
void initQuda(int dev)
{
int deviceCount;
......
......@@ -62,6 +62,8 @@ extern "C" {
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param);
void endQuda(void);
void printGaugeParam(QudaGaugeParam *);
void printInvertParam(QudaInvertParam *);
#ifdef __cplusplus
}
......
......@@ -3,10 +3,10 @@
#include <cuda_runtime.h>
#define L1 16 // "x" dimension
#define L2 16 // "y" dimension
#define L3 16 // "z" dimension
#define L4 16 // "time" dimension
#define L1 4 // "x" dimension
#define L2 4 // "y" dimension
#define L3 4 // "z" dimension
#define L4 4 // "time" dimension
#define L1h (L1/2) // half of the full "x" dimension, useful for even/odd lattice indexing
#define N (L1*L2*L3*L4) // total number of lattice points
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment