Advanced Computing Platform for Theoretical Physics

Commit d8c36033 authored by rbabich's avatar rbabich
Browse files

quda: fixed pack_test bug (missing blas initialization)


git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@598 be54200a-260c-0410-bdd7-ce6af2a381ab
parent e5f59d3a
...@@ -17,8 +17,8 @@ extern "C" { ...@@ -17,8 +17,8 @@ extern "C" {
// ---------- blas_quda.cu ---------- // ---------- blas_quda.cu ----------
// creates and destroys reduction buffers // creates and destroys reduction buffers
void initBlas(); void initBlas(void);
void endBlas(); void endBlas(void);
void zeroCuda(ParitySpinor a); void zeroCuda(ParitySpinor a);
void copyCuda(ParitySpinor dst, ParitySpinor src); void copyCuda(ParitySpinor dst, ParitySpinor src);
......
...@@ -22,13 +22,13 @@ ...@@ -22,13 +22,13 @@
#endif #endif
// These are used for reduction kernels // These are used for reduction kernels
QudaSumFloat *d_reduceFloat=0; static QudaSumFloat *d_reduceFloat=0;
QudaSumComplex *d_reduceComplex=0; static QudaSumComplex *d_reduceComplex=0;
QudaSumFloat3 *d_reduceFloat3=0; static QudaSumFloat3 *d_reduceFloat3=0;
QudaSumFloat *h_reduceFloat=0; static QudaSumFloat *h_reduceFloat=0;
QudaSumComplex *h_reduceComplex=0; static QudaSumComplex *h_reduceComplex=0;
QudaSumFloat3 *h_reduceFloat3=0; static QudaSumFloat3 *h_reduceFloat3=0;
unsigned long long blas_quda_flops; unsigned long long blas_quda_flops;
unsigned long long blas_quda_bytes; unsigned long long blas_quda_bytes;
...@@ -38,8 +38,8 @@ int blas_threads[3][22]; ...@@ -38,8 +38,8 @@ int blas_threads[3][22];
// Number of thread blocks for each blas kernel // Number of thread blocks for each blas kernel
int blas_blocks[3][22]; int blas_blocks[3][22];
dim3 blasBlock; static dim3 blasBlock;
dim3 blasGrid; static dim3 blasGrid;
void initBlas(void) { void initBlas(void) {
...@@ -85,12 +85,12 @@ void initBlas(void) { ...@@ -85,12 +85,12 @@ void initBlas(void) {
} }
} }
// Output from blas_test // Output from blas_test
#include<blas_param.h> #include <blas_param.h>
} }
void endBlas() { void endBlas(void) {
if (d_reduceFloat) cudaFree(d_reduceFloat); if (d_reduceFloat) cudaFree(d_reduceFloat);
if (d_reduceComplex) cudaFree(d_reduceComplex); if (d_reduceComplex) cudaFree(d_reduceComplex);
if (d_reduceFloat3) cudaFree(d_reduceFloat3); if (d_reduceFloat3) cudaFree(d_reduceFloat3);
......
...@@ -22,8 +22,6 @@ FullClover cudaCloverSloppy; ...@@ -22,8 +22,6 @@ FullClover cudaCloverSloppy;
FullClover cudaCloverInvPrecise; // inverted clover term FullClover cudaCloverInvPrecise; // inverted clover term
FullClover cudaCloverInvSloppy; FullClover cudaCloverInvSloppy;
void initBlas(void);
// define newQudaGaugeParam() and newQudaInvertParam() // define newQudaGaugeParam() and newQudaInvertParam()
#define INIT_PARAM #define INIT_PARAM
#include "check_params.h" #include "check_params.h"
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <quda_internal.h> #include <quda_internal.h>
#include <gauge_quda.h> #include <gauge_quda.h>
#include <spinor_quda.h> #include <spinor_quda.h>
#include <blas_quda.h>
#include <util_quda.h> #include <util_quda.h>
#include <test_util.h> #include <test_util.h>
...@@ -70,6 +71,7 @@ void init() { ...@@ -70,6 +71,7 @@ void init() {
int dev = 0; int dev = 0;
cudaSetDevice(dev); cudaSetDevice(dev);
initBlas();
param.X[0] /= 2; param.X[0] /= 2;
cudaFullSpinor = allocateSpinorField(param.X, param.cuda_prec, sp_pad); cudaFullSpinor = allocateSpinorField(param.X, param.cuda_prec, sp_pad);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment