//Implementations of methods in a2a.h template inline double VW_Mbyte_size(const A2AArg &_args, const typename VWtype::FieldInputParamType &field_setup_params){ typedef typename VWtype::DilutionType DilutionType; typedef typename VWtype::FermionFieldType FermionFieldType; DilutionType dil(_args); const int sz = dil.getNmodes(); double field_size = double(FermionFieldType::byte_size(field_setup_params))/(1024.*1024.); return sz * field_size; } template< typename mf_Policies> double A2AvectorV::Mbyte_size(const A2AArg &_args, const FieldInputParamType &field_setup_params){ return VW_Mbyte_size >(_args,field_setup_params); } template< typename mf_Policies> double A2AvectorVfftw::Mbyte_size(const A2AArg &_args, const FieldInputParamType &field_setup_params){ return VW_Mbyte_size >(_args,field_setup_params); } template< typename mf_Policies> double A2AvectorW::Mbyte_size(const A2AArg &_args, const FieldInputParamType &field_setup_params){ FullyPackedIndexDilution dil(_args); double ffield_size = double(FermionFieldType::byte_size(field_setup_params))/(1024.*1024.); double cfield_size = double(ComplexFieldType::byte_size(field_setup_params))/(1024.*1024.); return dil.getNl() * ffield_size + dil.getNhits() * cfield_size; } template< typename mf_Policies> double A2AvectorWfftw::Mbyte_size(const A2AArg &_args, const FieldInputParamType &field_setup_params){ return VW_Mbyte_size >(_args,field_setup_params); } struct VFFTfieldPolicyBasic{ template static inline void actionOutputMode(T &v, const int i){} template static inline void actionInputMode(T &v, const int i){} }; struct VFFTfieldPolicyAllocFree{ template static inline void actionOutputMode(T &v, const int i){ v.allocMode(i); } template static inline void actionInputMode(T &v, const int i){ v.freeMode(i); } }; template struct _V_fft_impl{ typedef typename InputType::FermionFieldType FermionFieldType; static inline void fft(OutputType &to, InputType &from, fieldOperation* mode_preop){ if(!UniqueID()){ printf("Doing V FFT\n"); fflush(stdout); } typedef typename FermionFieldType::InputParamType FieldParamType; FieldParamType field_setup = from.getMode(0).getDimPolParams(); FermionFieldType tmp(field_setup); Float preop_time = 0; Float fft_time = 0; const bool fft_dirs[4] = {true,true,true,false}; for(int mode=0;mode void A2AvectorVfftw::fft(const A2AvectorV &from, fieldOperation* mode_preop){ _V_fft_impl, const A2AvectorV, VFFTfieldPolicyBasic>::fft(*this,from,mode_preop); } template< typename mf_Policies> template void A2AvectorVfftw::destructivefft(A2AvectorV

&from, fieldOperation* mode_preop, VFFTW_ENABLE_IF_MANUAL_ALLOC(P) ){ _V_fft_impl, A2AvectorV

, VFFTfieldPolicyAllocFree>::fft(*this,from,mode_preop); } template struct _V_invfft_impl{ typedef typename InputType::FermionFieldType FermionFieldType; static inline void inversefft(OutputType &to, InputType &from, fieldOperation* mode_postop){ if(!UniqueID()){ printf("Doing V inverse FFT\n"); fflush(stdout); } typedef typename FermionFieldType::InputParamType FieldParamType; FieldParamType field_setup = from.getMode(0).getDimPolParams(); FermionFieldType tmp(field_setup); Float postop_time = 0; Float fft_time = 0; const bool fft_dirs[4] = {true,true,true,false}; for(int mode=0;mode void A2AvectorVfftw::inversefft(A2AvectorV &to, fieldOperation* mode_postop) const{ _V_invfft_impl, const A2AvectorVfftw, VFFTfieldPolicyBasic>::inversefft(to,*this,mode_postop); } template< typename mf_Policies> template void A2AvectorVfftw::destructiveInversefft(A2AvectorV

&to, fieldOperation* mode_postop, VFFTW_ENABLE_IF_MANUAL_ALLOC(P) ){ _V_invfft_impl, A2AvectorVfftw, VFFTfieldPolicyAllocFree>::inversefft(to,*this,mode_postop); } struct WFFTfieldPolicyBasic{ template static inline void actionOutputLowMode(T &v, const int i){} template static inline void actionOutputHighMode(T &v, const int i){} template static inline void actionInputLowMode(T &v, const int i){} template static inline void actionInputHighMode(T &v, const int i){} }; struct WFFTfieldPolicyAllocFree{ template static inline void actionOutputLowMode(T &v, const int i){ v.allocLowMode(i); } template static inline void actionOutputHighMode(T &v, const int i){ v.allocHighMode(i); } template static inline void actionInputLowMode(T &v, const int i){ v.freeLowMode(i); } template static inline void actionInputHighMode(T &v, const int i){ v.freeHighMode(i); } }; template struct _W_fft_impl{ typedef typename InputType::FermionFieldType FermionFieldType; inline static void fft(OutputType &to, InputType &from, fieldOperation* mode_preop){ if(!UniqueID()){ printf("Doing W FFT\n"); fflush(stdout); } typedef typename FermionFieldType::InputParamType FieldParamType; FieldParamType field_setup = from.getWh(0).getDimPolParams(); FermionFieldType tmp(field_setup), tmp2(field_setup); Float preop_time = 0; Float fft_time = 0; const bool fft_dirs[4] = {true,true,true,false}; //Do wl for(int mode=0;mode void A2AvectorWfftw::fft(const A2AvectorW &from, fieldOperation* mode_preop){ _W_fft_impl, const A2AvectorW, WFFTfieldPolicyBasic>::fft(*this,from,mode_preop); } template< typename mf_Policies> template void A2AvectorWfftw::destructivefft(A2AvectorW &from, fieldOperation* mode_preop, WFFTW_ENABLE_IF_MANUAL_ALLOC(P) ){ _W_fft_impl, A2AvectorW, WFFTfieldPolicyAllocFree>::fft(*this,from,mode_preop); } template struct _W_invfft_impl{ typedef typename InputType::FermionFieldType FermionFieldType; static inline void inversefft(OutputType &to, InputType &from, fieldOperation* mode_postop){ if(!UniqueID()){ printf("Doing W inverse FFT\n"); fflush(stdout); } typedef typename FermionFieldType::InputParamType FieldParamType; FieldParamType field_setup = from.getWh(0,0).getDimPolParams(); FermionFieldType tmp(field_setup), tmp2(field_setup); Float postop_time = 0; Float fft_time = 0; const bool fft_dirs[4] = {true,true,true,false}; //Do wl for(int mode=0;modefsite_ptr(i) + sc); for(int ssc=0;ssc<12;ssc++) FFTfieldPolicy::actionInputHighMode(from, ssc + 12*hit); //free for all sc } if(!UniqueID()){ printf("Finishing W inverse FFT\n"); fflush(stdout); } print_time("A2AvectorWfftw::fftinverse","FFT",fft_time); print_time("A2AvectorWfftw::fftinverse","Postop",postop_time); } }; template< typename mf_Policies> void A2AvectorWfftw::inversefft(A2AvectorW &to, fieldOperation* mode_postop) const{ _W_invfft_impl, const A2AvectorWfftw, WFFTfieldPolicyBasic>::inversefft(to,*this,mode_postop); } template< typename mf_Policies> template void A2AvectorWfftw::destructiveInversefft(A2AvectorW &to, fieldOperation* mode_postop, WFFTW_ENABLE_IF_MANUAL_ALLOC(P)){ _W_invfft_impl, A2AvectorWfftw, WFFTfieldPolicyAllocFree>::inversefft(to,*this,mode_postop); } //Generate the wh field. We store in a compact notation that knows nothing about any dilution we apply when generating V from this //For reproducibility we want to generate the wh field in the same order that Daiqian did originally. Here nhit random numbers are generated for each site/flavor template struct _set_wh_random_impl{}; template struct _set_wh_random_impl{ static void doit(std::vector > &wh, const RandomType &type, const int nhits){ typedef typename ComplexFieldType::FieldSiteType FieldSiteType; LRG.SetInterval(1, 0); int sites = wh[0]->nsites(), flavors = wh[0]->nflavors(); for(int i = 0; i < sites*flavors; ++i) { int flav = i / sites; int st = i % sites; LRG.AssignGenerator(st,flav); for(int j = 0; j < nhits; ++j) { FieldSiteType* p = wh[j]->site_ptr(st,flav); RandomComplex::rand(p,type,FOUR_D); } } } }; template< typename mf_Policies> void A2AvectorW::setWhRandom(const RandomType &type){ _set_wh_random_impl::type>::doit(wh,type,nhits); } //Get the diluted source with index id. //We use the same set of random numbers for each spin and dilution as we do not need to rely on stochastic cancellation to separate them //For legacy reasons we use different random numbers for the two G-parity flavors, although this is not strictly necessary template< typename mf_Policies> template void A2AvectorW::getDilutedSource(TargetFermionFieldType &into, const int dil_id) const{ typedef FieldSiteType mf_Complex; typedef typename TargetFermionFieldType::FieldSiteType TargetComplex; const char* fname = "getDilutedSource(...)"; int hit, tblock, spin_color, flavor; StandardIndexDilution stdidx(getArgs()); stdidx.indexUnmap(dil_id,hit,tblock,spin_color,flavor); VRB.Result("A2AvectorW", fname, "Generating random wall source %d = (%d, %d, %d, %d).\n ", dil_id, hit, tblock, flavor, spin_color); int tblock_origt = tblock * args.src_width; into.zero(); if(tblock_origt / GJP.TnodeSites() != GJP.TnodeCoor()){ VRB.Result("A2AvectorW", fname, "Not on node\n "); return; } int tblock_origt_lcl = tblock_origt % GJP.TnodeSites(); int src_size = GJP.VolNodeSites()/GJP.TnodeSites() * args.src_width; //size of source in units of complex numbers #pragma omp parallel for for(int i=0;isite_ptr(x,flavor); //note same random numbers for each spin/color! *into_site = *from_site; } } //When gauge fixing prior to taking the FFT it is necessary to uncompact the wh field in the spin-color index, as these indices are acted upon by the gauge fixing //(I suppose technically only the color indices need uncompacting; this might be considered as a future improvement) template< typename mf_Policies> void A2AvectorW::getSpinColorDilutedSource(FermionFieldType &into, const int hit, const int sc_id) const{ const char* fname = "getSpinColorDilutedSource(...)"; into.zero(); #pragma omp parallel for for(int i=0;infsites();i++){ //same mapping, different site_size FieldSiteType &into_site = *(into.fsite_ptr(i) + sc_id); const FieldSiteType &from_site = *(wh[hit]->fsite_ptr(i)); into_site = from_site; } } template::type, complex_double_or_float_mark>::value, int>::type = 0> void randomizeVW(A2AvectorV &V, A2AvectorW &W){ typedef typename mf_Policies::FermionFieldType FermionFieldType; typedef typename mf_Policies::ComplexFieldType ComplexFieldType; int nl = V.getNl(); int nh = V.getNh(); //number of fully diluted high-mode indices int nhit = V.getNhits(); assert(nl == W.getNl()); assert(nh == W.getNh()); assert(nhit == W.getNhits()); std::vector wl(nl); for(int i=0;i vl(nl); for(int i=0;i wh(nhit); for(int i=0;i vh(nh); for(int i=0;i::type, grid_vector_complex_mark>::value, int>::type = 0> void randomizeVW(A2AvectorV &V, A2AvectorW &W){ typedef typename mf_Policies::FermionFieldType::FieldDimensionPolicy::EquivalentScalarPolicy ScalarDimensionPolicy; typedef CPSfermion4D ScalarFermionFieldType; typedef CPScomplex4D ScalarComplexFieldType; int nl = V.getNl(); int nh = V.getNh(); //number of fully diluted high-mode indices int nhit = V.getNhits(); assert(nl == W.getNl()); assert(nh == W.getNh()); assert(nhit == W.getNhits()); ScalarFermionFieldType tmp; ScalarComplexFieldType tmp_cmplx; for(int i=0;i FieldType const * getBaseAndShift(int shift[3], const int p[3], FieldType const *base_p, FieldType const *base_m){ //With G-parity base_p has momentum +1 in each G-parity direction, base_m has momentum -1 in each G-parity direction. //Non-Gparity directions are assumed to have momentum 0 //Units of momentum are 2pi/L for periodic BCs, pi/L for antiperiodic and pi/2L for Gparity FieldType const * out = GJP.Gparity() ? NULL : base_p; for(int d=0;d<3;d++){ if(GJP.Bc(d) == BND_CND_GPARITY){ //Type 1 : f_{p=4b+1}(n) = f_+1(n+b) // p \in {.. -7 , -3, 1, 5, 9 ..} //Type 2 : f_{p=4b-1}(n) = f_-1(n+b) // p \n {.. -5, -1, 3, 7 , 11 ..} if( (p[d]-1) % 4 == 0 ){ //Type 1 int b = (p[d]-1)/4; shift[d] = -b; //shift f_+1 backwards by b if(out == NULL) out = base_p; else if(out != base_p) ERR.General("","getBaseAndShift","Momentum (%d,%d,%d) appears to be invalid because momenta in different G-parity directions do not reside in the same set\n",p[0],p[1],p[2]); }else if( (p[d]+1) % 4 == 0 ){ //Type 2 int b = (p[d]+1)/4; shift[d] = -b; //shift f_-1 backwards by b if(out == NULL) out = base_m; else if(out != base_m) ERR.General("","getBaseAndShift","Momentum (%d,%d,%d) appears to be invalid because momenta in different G-parity directions do not reside in the same set\n",p[0],p[1],p[2]); }else ERR.General("","getBaseAndShift","Momentum (%d,%d,%d) appears to be invalid because one or more components in G-parity directions are not allowed\n",p[0],p[1],p[2]); }else{ //f_b(n) = f_0(n+b) //Let the other directions decide on which base to use if some of them are G-parity dirs ; otherwise the pointer defaults to base_p above shift[d] = -p[d]; } } if(!UniqueID()) printf("getBaseAndShift for p=(%d,%d,%d) determined shift=(%d,%d,%d) from ptr %c\n",p[0],p[1],p[2],shift[0],shift[1],shift[2],out == base_p ? 'p' : 'm'); assert(out != NULL); return out; } //Use the relations between FFTs to obtain the FFT for a chosen quark momentum //With G-parity BCs there are 2 disjoint sets of momenta hence there are 2 base FFTs template< typename mf_Policies> void A2AvectorWfftw::getTwistedFFT(const int p[3], A2AvectorWfftw const *base_p, A2AvectorWfftw const *base_m){ Float time = -dclock(); std::vector shift(3); A2AvectorWfftw const* base = getBaseAndShift(&shift[0], p, base_p, base_m); if(base == NULL) ERR.General("A2AvectorWfftw","getTwistedFFT","Base pointer for twist momentum (%d,%d,%d) is NULL\n",p[0],p[1],p[2]); wl = base->wl; wh = base->wh; int nshift = 0; for(int i=0;i<3;i++) if(shift[i]) nshift++; if(nshift > 0){ for(int i=0;igetNmodes();i++) shiftPeriodicField( this->getMode(i), base->getMode(i), shift); } time += dclock(); print_time("A2AvectorWfftw::getTwistedFFT","Twist",time); } template< typename mf_Policies> void A2AvectorWfftw::shiftFieldsInPlace(const std::vector &shift){ Float time = -dclock(); int nshift = 0; for(int i=0;i<3;i++) if(shift[i]) nshift++; if(nshift > 0){ for(int i=0;igetNmodes();i++) shiftPeriodicField( this->getMode(i), this->getMode(i), shift); } print_time("A2AvectorWfftw::shiftFieldsInPlace","Total",time + dclock()); } //A version of the above that directly shifts the base Wfftw rather than outputting into a separate storage //Returns the pointer to the Wfftw acted upon and the *shift required to restore the Wfftw to it's original form* template< typename mf_Policies> std::pair< A2AvectorWfftw*, std::vector > A2AvectorWfftw::inPlaceTwistedFFT(const int p[3], A2AvectorWfftw *base_p, A2AvectorWfftw *base_m){ Float time = -dclock(); std::vector shift(3); A2AvectorWfftw* base = getBaseAndShift(&shift[0], p, base_p, base_m); if(base == NULL) ERR.General("A2AvectorWfftw","getTwistedFFT","Base pointer for twist momentum (%d,%d,%d) is NULL\n",p[0],p[1],p[2]); base->shiftFieldsInPlace(shift); for(int i=0;i<3;i++) shift[i] = -shift[i]; time += dclock(); print_time("A2AvectorWfftw::inPlaceTwistedFFT","Twist",time); return std::pair< A2AvectorWfftw*, std::vector >(base,shift); } template< typename mf_Policies> void A2AvectorVfftw::getTwistedFFT(const int p[3], A2AvectorVfftw const *base_p, A2AvectorVfftw const *base_m){ Float time = -dclock(); std::vector shift(3); A2AvectorVfftw const* base = getBaseAndShift(&shift[0], p, base_p, base_m); if(base == NULL) ERR.General("A2AvectorVfftw","getTwistedFFT","Base pointer for twist momentum (%d,%d,%d) is NULL\n",p[0],p[1],p[2]); v = base->v; int nshift = 0; for(int i=0;i<3;i++) if(shift[i]) nshift++; if(nshift > 0){ for(int i=0;igetNmodes();i++) shiftPeriodicField( this->getMode(i), base->getMode(i), shift); } time += dclock(); print_time("A2AvectorVfftw::getTwistedFFT","Twist",time); } template< typename mf_Policies> void A2AvectorVfftw::shiftFieldsInPlace(const std::vector &shift){ Float time = -dclock(); int nshift = 0; for(int i=0;i<3;i++) if(shift[i]) nshift++; if(nshift > 0){ for(int i=0;igetNmodes();i++) shiftPeriodicField( this->getMode(i), this->getMode(i), shift); } print_time("A2AvectorVfftw::shiftFieldsInPlace","Total",time + dclock()); } //A version of the above that directly shifts the base Wfftw rather than outputting into a separate storage //Returns the pointer to the Wfftw acted upon and the *shift required to restore the Wfftw to it's original form* template< typename mf_Policies> std::pair< A2AvectorVfftw*, std::vector > A2AvectorVfftw::inPlaceTwistedFFT(const int p[3], A2AvectorVfftw *base_p, A2AvectorVfftw *base_m){ Float time = -dclock(); std::vector shift(3); A2AvectorVfftw* base = getBaseAndShift(&shift[0], p, base_p, base_m); if(base == NULL) ERR.General("A2AvectorWfftw","getTwistedFFT","Base pointer for twist momentum (%d,%d,%d) is NULL\n",p[0],p[1],p[2]); base->shiftFieldsInPlace(shift); for(int i=0;i<3;i++) shift[i] = -shift[i]; time += dclock(); print_time("A2AvectorVfftw::inPlaceTwistedFFT","Twist",time); return std::pair< A2AvectorVfftw*, std::vector >(base,shift); }