/* -*- mode:c++; c-basic-offset:4 -*- */ #ifndef INCLUDED_BFM_EVO_AUX_HT_H #define INCLUDED_BFM_EVO_AUX_HT_H namespace bfm_evo_aux { template static inline void trless_am(Float *p, Float coef) { p[0] = p[8] = p[16] = 0.; //real diagonal elements Float tmp = 0.5*(p[2] - p[6]) * coef; p[2]=tmp; p[6] = -tmp; tmp = 0.5*(p[3] + p[7]) * coef; p[3]=tmp; p[7] = tmp; tmp = 0.5*(p[4] - p[12]) * coef; p[4]=tmp; p[12] = -tmp; tmp = 0.5*(p[5] + p[13]) * coef; p[5]=tmp; p[13] = tmp; tmp = 0.5*(p[10] - p[14]) * coef; p[10]=tmp; p[14] = -tmp; tmp = 0.5*(p[11] + p[15]) * coef; p[11]=tmp; p[15] = tmp; Float c = 1./3. * (p[1] + p[9] + p[17]); p[1] = (p[1] - c) * coef; p[9] = (p[9] - c) * coef; p[17] = (p[17] - c) * coef; } template static inline void mDotMEqual(Float* c, const Float* a, const Float* b) { *c = *a * *b - *(a+1) * *(b+1) + *(a+2) * *(b+6) - *(a+3) * *(b+7) + *(a+4) * *(b+12) - *(a+5) * *(b+13); *(c+1) = *a * *(b+1) + *(a+1) * *b + *(a+2) * *(b+7) + *(a+3) * *(b+6) + *(a+4) * *(b+13) + *(a+5) * *(b+12); *(c+2) = *a * *(b+2) - *(a+1) * *(b+3) + *(a+2) * *(b+8) - *(a+3) * *(b+9) + *(a+4) * *(b+14) - *(a+5) * *(b+15); *(c+3) = *a * *(b+3) + *(a+1) * *(b+2) + *(a+2) * *(b+9) + *(a+3) * *(b+8) + *(a+4) * *(b+15) + *(a+5) * *(b+14); *(c+4) = *a * *(b+4) - *(a+1) * *(b+5) + *(a+2) * *(b+10) - *(a+3) * *(b+11) + *(a+4) * *(b+16) - *(a+5) * *(b+17); *(c+5) = *a * *(b+5) + *(a+1) * *(b+4) + *(a+2) * *(b+11) + *(a+3) * *(b+10) + *(a+4) * *(b+17) + *(a+5) * *(b+16); *(c+6) = *(a+6) * *b - *(a+7) * *(b+1) + *(a+8) * *(b+6) - *(a+9) * *(b+7) + *(a+10) * *(b+12) - *(a+11) * *(b+13); *(c+7) = *(a+6) * *(b+1) + *(a+7) * *b + *(a+8) * *(b+7) + *(a+9) * *(b+6) + *(a+10) * *(b+13) + *(a+11) * *(b+12); *(c+8) = *(a+6) * *(b+2) - *(a+7) * *(b+3) + *(a+8) * *(b+8) - *(a+9) * *(b+9) + *(a+10) * *(b+14) - *(a+11) * *(b+15); *(c+9) = *(a+6) * *(b+3) + *(a+7) * *(b+2) + *(a+8) * *(b+9) + *(a+9) * *(b+8) + *(a+10) * *(b+15) + *(a+11) * *(b+14); *(c+10) = *(a+6) * *(b+4) - *(a+7) * *(b+5) + *(a+8) * *(b+10) - *(a+9) * *(b+11) + *(a+10) * *(b+16) - *(a+11) * *(b+17); *(c+11) = *(a+6) * *(b+5) + *(a+7) * *(b+4) + *(a+8) * *(b+11) + *(a+9) * *(b+10) + *(a+10) * *(b+17) + *(a+11) * *(b+16); *(c+12) = *(a+12) * *b - *(a+13) * *(b+1) + *(a+14) * *(b+6) - *(a+15) * *(b+7) + *(a+16) * *(b+12) - *(a+17) * *(b+13); *(c+13) = *(a+12) * *(b+1) + *(a+13) * *b + *(a+14) * *(b+7) + *(a+15) * *(b+6) + *(a+16) * *(b+13) + *(a+17) * *(b+12); *(c+14) = *(a+12) * *(b+2) - *(a+13) * *(b+3) + *(a+14) * *(b+8) - *(a+15) * *(b+9) + *(a+16) * *(b+14) - *(a+17) * *(b+15); *(c+15) = *(a+12) * *(b+3) + *(a+13) * *(b+2) + *(a+14) * *(b+9) + *(a+15) * *(b+8) + *(a+16) * *(b+15) + *(a+17) * *(b+14); *(c+16) = *(a+12) * *(b+4) - *(a+13) * *(b+5) + *(a+14) * *(b+10) - *(a+15) * *(b+11) + *(a+16) * *(b+16) - *(a+17) * *(b+17); *(c+17) = *(a+12) * *(b+5) + *(a+13) * *(b+4) + *(a+14) * *(b+11) + *(a+15) * *(b+10) + *(a+16) * *(b+17) + *(a+17) * *(b+16); } template static inline void mStarDotMTransEqual(Float* c, const Float* a, const Float* b){ c[0] = a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3] + a[4] * b[4] + a[5] * b[5]; c[1] = a[0] * b[1] - a[1] * b[0] + a[2] * b[3] - a[3] * b[2] + a[4] * b[5] - a[5] * b[4]; c[2] = a[0] * b[6] + a[1] * b[7] + a[2] * b[8] + a[3] * b[9] + a[4] * b[10] + a[5] * b[11]; c[3] = a[0] * b[7] - a[1] * b[6] + a[2] * b[9] - a[3] * b[8] + a[4] * b[11] - a[5] * b[10]; c[4] = a[0] * b[12] + a[1] * b[13] + a[2] * b[14] + a[3] * b[15] + a[4] * b[16] + a[5] * b[17]; c[5] = a[0] * b[13] - a[1] * b[12] + a[2] * b[15] - a[3] * b[14] + a[4] * b[17] - a[5] * b[16]; c[6] = a[6] * b[0] + a[7] * b[1] + a[8] * b[2] + a[9] * b[3] + a[10] * b[4] + a[11] * b[5]; c[7] = a[6] * b[1] - a[7] * b[0] + a[8] * b[3] - a[9] * b[2] + a[10] * b[5] - a[11] * b[4]; c[8] = a[6] * b[6] + a[7] * b[7] + a[8] * b[8] + a[9] * b[9] + a[10] * b[10] + a[11] * b[11]; c[9] = a[6] * b[7] - a[7] * b[6] + a[8] * b[9] - a[9] * b[8] + a[10] * b[11] - a[11] * b[10]; c[10] = a[6] * b[12] + a[7] * b[13] + a[8] * b[14] + a[9] * b[15] + a[10] * b[16] + a[11] * b[17]; c[11] = a[6] * b[13] - a[7] * b[12] + a[8] * b[15] - a[9] * b[14] + a[10] * b[17] - a[11] * b[16]; c[12] = a[12] * b[0] + a[13] * b[1] + a[14] * b[2] + a[15] * b[3] + a[16] * b[4] + a[17] * b[5]; c[13] = a[12] * b[1] - a[13] * b[0] + a[14] * b[3] - a[15] * b[2] + a[16] * b[5] - a[17] * b[4]; c[14] = a[12] * b[6] + a[13] * b[7] + a[14] * b[8] + a[15] * b[9] + a[16] * b[10] + a[17] * b[11]; c[15] = a[12] * b[7] - a[13] * b[6] + a[14] * b[9] - a[15] * b[8] + a[16] * b[11] - a[17] * b[10]; c[16] = a[12] * b[12] + a[13] * b[13] + a[14] * b[14] + a[15] * b[15] + a[16] * b[16] + a[17] * b[17]; c[17] = a[12] * b[13] - a[13] * b[12] + a[14] * b[15] - a[15] * b[14] + a[16] * b[17] - a[17] * b[16]; } // a += b template static inline void su3_add(Float *a, Float *b) { for(int i = 0; i < 18; ++i) { a[i] += b[i]; } } //------------------------------------------------------------------ // sproj with (1 + gamma_0) //------------------------------------------------------------------ template void sprojTrXp(IFloat *f, IFloat *v, IFloat *w, int num_blk, int v_stride, int w_stride) { int row, col, blk ; IFloat *tf, *tv, *tw ; tf = f ; for (row=0; row<18; row++) *tf++ = 0.0 ; for (blk=0; blk void sprojTrXm(IFloat *f, IFloat *v, IFloat *w, int num_blk, int v_stride, int w_stride) { int row, col, blk ; IFloat *tf, *tv, *tw ; tf = f ; for (row=0; row<18; row++) *tf++ = 0.0 ; for (blk=0; blk void sprojTrYp(IFloat *f, IFloat *v, IFloat *w, int num_blk, int v_stride, int w_stride) { int row, col, blk ; IFloat *tf, *tv, *tw ; tf = f ; for (row=0; row<18; row++) *tf++ = 0.0 ; for (blk=0; blk void sprojTrYm(IFloat *f, IFloat *v, IFloat *w, int num_blk, int v_stride, int w_stride) { int row, col, blk ; IFloat *tf, *tv, *tw ; tf = f ; for (row=0; row<18; row++) *tf++ = 0.0 ; for (blk=0; blk void sprojTrZp(IFloat *f, IFloat *v, IFloat *w, int num_blk, int v_stride, int w_stride) { int row, col, blk ; IFloat *tf, *tv, *tw ; tf = f ; for (row=0; row<18; row++) *tf++ = 0.0 ; for (blk=0; blk void sprojTrZm(IFloat *f, IFloat *v, IFloat *w, int num_blk, int v_stride, int w_stride) { int row, col, blk ; IFloat *tf, *tv, *tw ; tf = f ; for (row=0; row<18; row++) *tf++ = 0.0 ; for (blk=0; blk void sprojTrTp(IFloat *f, IFloat *v, IFloat *w, int num_blk, int v_stride, int w_stride) { int row, col, blk ; IFloat *tf, *tv, *tw ; tf = f ; for (row=0; row<18; row++) *tf++ = 0.0 ; for (blk=0; blk void sprojTrTm(IFloat *f, IFloat *v, IFloat *w, int num_blk, int v_stride, int w_stride) { int row, col, blk ; IFloat *tf, *tv, *tw ; tf = f ; for (row=0; row<18; row++) *tf++ = 0.0 ; for (blk=0; blk