/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmA64FX.h Copyright (C) 2020 Author: Nils Meyer Regensburg University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #pragma once //#if defined(A64FXASM) #if defined(A64FX) // safety include #include // undefine everything related to kernels #include /////////////////////////////////////////////////////////// // If we are A64FX specialise the single precision routine /////////////////////////////////////////////////////////// #if defined(DSLASHINTRIN) //#pragma message ("A64FX Dslash: intrin") #include #else #pragma message ("A64FX Dslash: asm") #include #endif /// Switch off the 5d vectorised code optimisations #undef DWFVEC5D ///////////////////////////////////////////////////////////////// // XYZT vectorised, undag Kernel, single ///////////////////////////////////////////////////////////////// #undef KERNEL_DAG #define INTERIOR_AND_EXTERIOR #undef INTERIOR #undef EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include #undef INTERIOR_AND_EXTERIOR #define INTERIOR #undef EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include #undef INTERIOR_AND_EXTERIOR #undef INTERIOR #define EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include ///////////////////////////////////////////////////////////////// // XYZT vectorised, dag Kernel, single ///////////////////////////////////////////////////////////////// #define KERNEL_DAG #define INTERIOR_AND_EXTERIOR #undef INTERIOR #undef EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include #undef INTERIOR_AND_EXTERIOR #define INTERIOR #undef EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include #undef INTERIOR_AND_EXTERIOR #undef INTERIOR #define EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //template<> void //WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) //#include // undefine #include /////////////////////////////////////////////////////////// // If we are A64FX specialise the double precision routine /////////////////////////////////////////////////////////// #if defined(DSLASHINTRIN) #include #else #include #endif // former KNL //#define MAYBEPERM(A,perm) if (perm) { A ; } //#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) //#define COMPLEX_SIGNS(isigns) vComplexD *isigns = &signsD[0]; #define INTERIOR_AND_EXTERIOR #undef INTERIOR #undef EXTERIOR ///////////////////////////////////////////////////////////////// // XYZT vectorised, undag Kernel, double ///////////////////////////////////////////////////////////////// #undef KERNEL_DAG #define INTERIOR_AND_EXTERIOR #undef INTERIOR #undef EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include #undef INTERIOR_AND_EXTERIOR #define INTERIOR #undef EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include #undef INTERIOR_AND_EXTERIOR #undef INTERIOR #define EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include ///////////////////////////////////////////////////////////////// // XYZT vectorised, dag Kernel, double ///////////////////////////////////////////////////////////////// #define KERNEL_DAG #define INTERIOR_AND_EXTERIOR #undef INTERIOR #undef EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include #undef INTERIOR_AND_EXTERIOR #define INTERIOR #undef EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include #undef INTERIOR_AND_EXTERIOR #undef INTERIOR #define EXTERIOR #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") template<> void WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // template<> void // WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // #include // undefs #include #endif //A64FXASM