quda: cleaned up header files (invert_quda.cpp is now interface_quda.cpp)

git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@593 be54200a-260c-0410-bdd7-ce6af2a381ab

quda: cleaned up header files (invert_quda.cpp is now interface_quda.cpp)
git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@593 be54200a-260c-0410-bdd7-ce6af2a381ab
e5f59d3a · rbabich · c3ded658 · e5f59d3a · e5f59d3a · e5f59d3a
Commit e5f59d3a authored Dec 09, 2009 by rbabich
--- a/include/blas_quda.h
+++ b/include/blas_quda.h
-#include <cuComplex.h>
-#include <enum_quda.h>
-
 #ifndef _QUDA_BLAS_H
 #define _QUDA_BLAS_H

+#include <cuComplex.h>
+#include <quda_internal.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif

--- a/include/clover_quda.h
+++ b/include/clover_quda.h
+#ifndef _CLOVER_QUDA_H
+#define _CLOVER_QUDA_H
+
+#include <quda_internal.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  void allocateParityClover(ParityClover *, int *X, int pad,
+			    Precision precision);
+  void allocateCloverField(FullClover *, int *X, int pad, Precision precision);
+
+  void freeParityClover(ParityClover *clover);
+  void freeCloverField(FullClover *clover);
+
+  void loadParityClover(ParityClover ret, void *clover, Precision cpu_prec,
+			CloverFieldOrder clover_order);
+  void loadFullClover(FullClover ret, void *clover, Precision cpu_prec,
+		      CloverFieldOrder clover_order);
+  void loadCloverField(FullClover ret, void *clover, Precision cpu_prec,
+		       CloverFieldOrder clover_order);
+
+  /* void createCloverField(FullClover *cudaClover, void *cpuClover, int *X,
+                         Precision precision); */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _CLOVER_QUDA_H
--- a/include/dslash_quda.h
+++ b/include/dslash_quda.h
 #ifndef _DSLASH_QUDA_H
 #define _DSLASH_QUDA_H

-#include <cuComplex.h>
-#include <quda.h>
 #include <quda_internal.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

-  extern FullGauge cudaGaugePrecise;
-  extern FullGauge cudaGaugeSloppy;
-
-  extern FullClover cudaCloverPrecise;
-  extern FullClover cudaCloverSloppy;
-
-  extern FullClover cudaCloverInvPrecise;
-  extern FullClover cudaCloverInvSloppy;
-
-// ---------- dslash_quda.cu ----------
+  extern unsigned long long dslash_quda_flops;
+  extern unsigned long long dslash_quda_bytes;

  int dslashCudaSharedBytes(Precision spinor_prec, int blockDim);

@@ -107,17 +97,6 @@ extern "C" {
  void cloverHCuda(ParitySpinor res, FullGauge gauge, FullClover clover,
 		   ParitySpinor spinor, int oddBit);

-  // -- inv_cg_cuda.cpp
-  void invertCgCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp,
-		    QudaInvertParam *param);
-  
-  // -- inv_bicgstab_cuda.cpp
-  void invertBiCGstabCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp, 
-			  QudaInvertParam *param, DagType dag_type);
-  
-  extern unsigned long long dslash_quda_flops;
-  extern unsigned long long dslash_quda_bytes;
-
 #ifdef __cplusplus
 }
 #endif

--- a/include/gauge_quda.h
+++ b/include/gauge_quda.h
 #ifndef _GAUGE_QUDA_H
 #define _GAUGE_QUDA_H

-#include <enum_quda.h>
-#include <dslash_quda.h>
+#include <quda_internal.h>

 #ifdef __cplusplus
 extern "C" {

--- a/include/invert_quda.h
+++ b/include/invert_quda.h
+#ifndef _INVERT_QUDA_H
+#define _INVERT_QUDA_H
+
+#include <quda_internal.h>
+#include <quda.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  extern FullGauge cudaGaugePrecise;
+  extern FullGauge cudaGaugeSloppy;
+
+  extern FullClover cudaCloverPrecise;
+  extern FullClover cudaCloverSloppy;
+
+  extern FullClover cudaCloverInvPrecise;
+  extern FullClover cudaCloverInvSloppy;
+
+  // -- inv_cg_cuda.cpp
+  void invertCgCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp,
+		    QudaInvertParam *param);
+  
+  // -- inv_bicgstab_cuda.cpp
+  void invertBiCGstabCuda(ParitySpinor x, ParitySpinor b, ParitySpinor tmp, 
+			  QudaInvertParam *param, DagType dag_type);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _INVERT_QUDA_H
--- a/include/quda.h
+++ b/include/quda.h
@@ -78,7 +78,7 @@ extern "C" {

  } QudaInvertParam;

-  // Interface functions, found in invert_quda.cpp
+  // Interface functions, found in interface_quda.cpp

  void initQuda(int dev);
  void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param);

--- a/include/quda_internal.h
+++ b/include/quda_internal.h
@@ -100,7 +100,4 @@ extern "C" {
 }
 #endif

-#include <blas_quda.h>
-#include <dslash_quda.h>
-
 #endif // _QUDA_INTERNAL_H
--- a/include/spinor_quda.h
+++ b/include/spinor_quda.h
-#ifndef _QUDA_SPINOR_H
-#define _QUDA_SPINOR_H
+#ifndef _SPINOR_QUDA_H
+#define _SPINOR_QUDA_H

-#include <enum_quda.h>
-#include <dslash_quda.h>
+#include <quda_internal.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

-  // -- spinor_quda.cpp
-
  ParitySpinor allocateParitySpinor(int *X, Precision precision, int stride);
  FullSpinor allocateSpinorField(int *X, Precision precision, int stride);
  
@@ -30,26 +27,8 @@ extern "C" {
  void spinorHalfPack(float *c, short *s0, float *f0);
  void spinorHalfUnpack(float *f0, float *c, short *s0);

-  // -- clover_quda.cpp
-
-  void allocateParityClover(ParityClover *, int *X, int pad, Precision precision);
-  void allocateCloverField(FullClover *, int *X, int pad, Precision precision);
-
-  void freeParityClover(ParityClover *clover);
-  void freeCloverField(FullClover *clover);
-
-  void loadParityClover(ParityClover ret, void *clover, Precision cpu_prec,
-			CloverFieldOrder clover_order);
-  void loadFullClover(FullClover ret, void *clover, Precision cpu_prec,
-		      CloverFieldOrder clover_order);
-  void loadCloverField(FullClover ret, void *clover, Precision cpu_prec,
-		       CloverFieldOrder clover_order);
-
-  /* void createCloverField(FullClover *cudaClover, void *cpuClover, int *X,
-                         Precision precision); */
-
 #ifdef __cplusplus
 }
 #endif

-#endif // _QUDA_SPINOR_H
+#endif // _SPINOR_QUDA_H
--- a/include/util_quda.h
+++ b/include/util_quda.h
 #ifndef _UTIL_QUDA_H
 #define _UTIL_QUDA_H

-#include <quda_internal.h>
-
 #ifdef __cplusplus
 extern "C" {
 #endif

--- a/lib/Makefile
+++ b/lib/Makefile
@@ -2,12 +2,12 @@ include ../make.inc

 QUDA = libquda.a
 QUDA_OBJS = blas_quda.o clover_quda.o dslash_quda.o gauge_quda.o         \
-	inv_bicgstab_quda.o inv_cg_quda.o invert_quda.o spinor_quda.o \
+	inv_bicgstab_quda.o inv_cg_quda.o interface_quda.o spinor_quda.o \
 	util_quda.o

 # header files, found in include/
-QUDA_HDRS = blas_quda.h dslash_quda.h enum_quda.h gauge_quda.h quda.h \
-	quda_internal.h spinor_quda.h util_quda.h
+QUDA_HDRS = blas_quda.h clover_quda.h dslash_quda.h enum_quda.h gauge_quda.h \
+	invert_quda.h quda.h quda_internal.h spinor_quda.h util_quda.h

 # files containing complex macros and other code fragments to be inlined,
 # found in lib/

--- a/lib/blas_quda.cu
+++ b/lib/blas_quda.cu
@@ -2,6 +2,7 @@
 #include <stdio.h>

 #include <quda_internal.h>
+#include <blas_quda.h>

 #define REDUCE_MAX_BLOCKS 2048


--- a/lib/clover_quda.cpp
+++ b/lib/clover_quda.cpp
@@ -3,7 +3,7 @@
 #include <math.h>

 #include <quda_internal.h>
-#include <spinor_quda.h>
+#include <clover_quda.h>

 void allocateParityClover(ParityClover *ret, int *X, int pad, Precision precision)
 {

--- a/lib/dslash_constants.h
+++ b/lib/dslash_constants.h
@@ -36,4 +36,3 @@ __constant__ float pi_f;
 // double precision constants
 __constant__ double anisotropy;
 __constant__ double t_boundary;
-
--- a/lib/dslash_def.h
+++ b/lib/dslash_def.h
 // dslash_def.h - Dslash kernel definitions

-// There are currently 64 different variants of the Dslash kernel,
+// There are currently 288 different variants of the Dslash kernel,
 // each one characterized by a set of 6 options, where each option can
-// take one of two values (2^6 = 64).  This file is structured so that
-// the C preprocessor loops through all 64 variants (in a manner
-// resembling a binary counter), sets the appropriate macros, and
-// defines the corresponding functions.
+// take one of several values (3*3*4*2*2*2 = 288).  This file is
+// structured so that the C preprocessor loops through all 288
+// variants (in a manner resembling a counter), sets the appropriate
+// macros, and defines the corresponding functions.
 //
 // As an example of the function naming conventions, consider
 //
@@ -250,7 +250,7 @@ DD_FUNC(DD_GPREC_F, DD_SPREC_F, DD_CPREC_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)(DD_
 #define DD_SPREC 2
 #else

-#undef DD_SPREC // from here
+#undef DD_SPREC
 #define DD_SPREC 0

 #if (DD_CPREC==0)
@@ -263,7 +263,7 @@ DD_FUNC(DD_GPREC_F, DD_SPREC_F, DD_CPREC_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)(DD_
 #undef DD_CPREC
 #define DD_CPREC 3

-#else // to here
+#else

 #undef DD_LOOP
 #undef DD_DAG
@@ -271,9 +271,9 @@ DD_FUNC(DD_GPREC_F, DD_SPREC_F, DD_CPREC_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)(DD_
 #undef DD_RECON
 #undef DD_GPREC
 #undef DD_SPREC
-#undef DD_CPREC //
+#undef DD_CPREC

-#endif // DD_CPREC //
+#endif // DD_CPREC
 #endif // DD_SPREC
 #endif // DD_GPREC
 #endif // DD_RECON

--- a/lib/dslash_quda.cu
+++ b/lib/dslash_quda.cu
@@ -5,8 +5,8 @@
 #include <dslash_quda.h>
 #include <spinor_quda.h> // not needed once call to allocateParitySpinor() is removed

-#include<dslash_textures.h>
-#include<dslash_constants.h>
+#include <dslash_textures.h>
+#include <dslash_constants.h>

 unsigned long long dslash_quda_flops;
 unsigned long long dslash_quda_bytes;
@@ -33,7 +33,7 @@ int dslashCudaSharedBytes(Precision precision) {

 #include <dslash_common.h>

-int initDslash = 0;
+static int initDslash = 0;

 void initDslashConstants(FullGauge gauge, int sp_stride, int cl_stride) {
  int Vh = gauge.volume;
@@ -160,8 +160,6 @@ static void bindGaugeTex(FullGauge gauge, int oddBit) {
  }
 }

-// ----------------------------------------------------------------------
-
 // ----------------------------------------------------------------------
 // plain Wilson Dslash:


--- a/lib/gauge_quda.cpp
+++ b/lib/gauge_quda.cpp
 #include <stdlib.h>
 #include <stdio.h>
+#include <math.h>

 #include <quda_internal.h>
 #include <gauge_quda.h>

--- a/lib/invert_quda.cpp
+++ b/lib/invert_quda.cpp
@@ -4,8 +4,12 @@

 #include <quda.h>
 #include <quda_internal.h>
-#include <spinor_quda.h>
 #include <gauge_quda.h>
+#include <spinor_quda.h>
+#include <clover_quda.h>
+#include <blas_quda.h>
+#include <dslash_quda.h>
+#include <invert_quda.h>

 #define spinorSiteSize 24 // real numbers per spinor


--- a/lib/inv_bicgstab_quda.cpp
+++ b/lib/inv_bicgstab_quda.cpp
@@ -3,10 +3,11 @@
 #include <math.h>
 #include <cuComplex.h>

-#include <quda.h>
 #include <quda_internal.h>
 #include <spinor_quda.h>
-
+#include <blas_quda.h>
+#include <dslash_quda.h>
+#include <invert_quda.h>
 #include <util_quda.h>

 void MatVec(ParitySpinor out, FullGauge gauge,  FullClover clover, FullClover cloverInv, ParitySpinor in, 

--- a/lib/inv_cg_quda.cpp
+++ b/lib/inv_cg_quda.cpp
@@ -2,10 +2,12 @@
 #include <stdlib.h>
 #include <math.h>

-#include <quda.h>
 #include <quda_internal.h>
-#include <util_quda.h>
 #include <spinor_quda.h>
+#include <blas_quda.h>
+#include <dslash_quda.h>
+#include <invert_quda.h>
+#include <util_quda.h>

 void MatVec(ParitySpinor out, FullGauge gauge,  FullClover clover, FullClover cloverInv, ParitySpinor in, 
 	    QudaInvertParam *invert_param, ParitySpinor tmp) {

--- a/lib/io_spinor.h
+++ b/lib/io_spinor.h