Advanced Computing Platform for Theoretical Physics

commit大文件会使得服务器变得不稳定,请大家尽量只commit代码,不要commit大的文件。

Commit d8c36033 authored by rbabich's avatar rbabich
Browse files

quda: fixed pack_test bug (missing blas initialization)


git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@598 be54200a-260c-0410-bdd7-ce6af2a381ab
parent e5f59d3a
......@@ -17,8 +17,8 @@ extern "C" {
// ---------- blas_quda.cu ----------
// creates and destroys reduction buffers
void initBlas();
void endBlas();
void initBlas(void);
void endBlas(void);
void zeroCuda(ParitySpinor a);
void copyCuda(ParitySpinor dst, ParitySpinor src);
......
......@@ -22,13 +22,13 @@
#endif
// These are used for reduction kernels
QudaSumFloat *d_reduceFloat=0;
QudaSumComplex *d_reduceComplex=0;
QudaSumFloat3 *d_reduceFloat3=0;
static QudaSumFloat *d_reduceFloat=0;
static QudaSumComplex *d_reduceComplex=0;
static QudaSumFloat3 *d_reduceFloat3=0;
QudaSumFloat *h_reduceFloat=0;
QudaSumComplex *h_reduceComplex=0;
QudaSumFloat3 *h_reduceFloat3=0;
static QudaSumFloat *h_reduceFloat=0;
static QudaSumComplex *h_reduceComplex=0;
static QudaSumFloat3 *h_reduceFloat3=0;
unsigned long long blas_quda_flops;
unsigned long long blas_quda_bytes;
......@@ -38,8 +38,8 @@ int blas_threads[3][22];
// Number of thread blocks for each blas kernel
int blas_blocks[3][22];
dim3 blasBlock;
dim3 blasGrid;
static dim3 blasBlock;
static dim3 blasGrid;
void initBlas(void) {
......@@ -85,12 +85,12 @@ void initBlas(void) {
}
}
// Output from blas_test
#include<blas_param.h>
// Output from blas_test
#include <blas_param.h>
}
void endBlas() {
void endBlas(void) {
if (d_reduceFloat) cudaFree(d_reduceFloat);
if (d_reduceComplex) cudaFree(d_reduceComplex);
if (d_reduceFloat3) cudaFree(d_reduceFloat3);
......
......@@ -22,8 +22,6 @@ FullClover cudaCloverSloppy;
FullClover cudaCloverInvPrecise; // inverted clover term
FullClover cudaCloverInvSloppy;
void initBlas(void);
// define newQudaGaugeParam() and newQudaInvertParam()
#define INIT_PARAM
#include "check_params.h"
......
......@@ -4,6 +4,7 @@
#include <quda_internal.h>
#include <gauge_quda.h>
#include <spinor_quda.h>
#include <blas_quda.h>
#include <util_quda.h>
#include <test_util.h>
......@@ -70,6 +71,7 @@ void init() {
int dev = 0;
cudaSetDevice(dev);
initBlas();
param.X[0] /= 2;
cudaFullSpinor = allocateSpinorField(param.X, param.cuda_prec, sp_pad);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment