Advanced Computing Platform for Theoretical Physics

Commit 0bf496d8 authored by rbabich's avatar rbabich
Browse files

changes to unpackFloat4()


git-svn-id: http://lattice.bu.edu/qcdalg/cuda/quda@312 be54200a-260c-0410-bdd7-ce6af2a381ab
parent 3e8e2c6d
......@@ -573,8 +573,8 @@ inline void packFloat4(float4* a, float *b) {
inline void unpackFloat4(float *a, float4 *b) {
__m128 SSEtmp;
SSEtmp = _mm_loadu_ps((const float*)b);
_mm_store_ps((float*)a, SSEtmp);
SSEtmp = _mm_load_ps((const float*)b);
_mm_storeu_ps((float*)a, SSEtmp);
//a[0] = b->x; a[1] = b->y; a[2] = b->z; a[3] = b->w;
}
......@@ -774,7 +774,7 @@ void loadParitySpinor(ParitySpinor ret, void *spinor, Precision cpu_prec,
allocateSpinorHalf();
} else if (!hSpinor1.spinorHalf || !hSpinor1.spinorNorm ||
!hSpinor2.spinorHalf || !hSpinor2.spinorNorm) {
printf("allocateSpinorHalf error %u %u %u %u\n",
printf("allocateSpinorHalf error %lu %lu %lu %lu\n",
(unsigned long)hSpinor1.spinorHalf, (unsigned long)hSpinor1.spinorNorm,
(unsigned long)hSpinor2.spinorHalf, (unsigned long)hSpinor2.spinorNorm);
exit(-1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment