#include #ifndef INCLUDED_FFT_H_ #define INCLUDED_FFT_H_ #ifdef USE_FFTW #include #include #include #include CPS_START_NAMESPACE ////////////////////////////////////////////////////////// /*! @file @brief a FFT implementation @author Taku Izubuchi @date 2007-05-30 TODOs: - There is unnecessary local memory copy in `shift()' and `unshift()'. - When there is more than one direction, mu, which doesn't satisfy nlr % N == 0 where nlr = GJP.VolSites() / GJP.NodeSites(mu) * [num of Complex on a site, such as 144] N = GJP.Nodes(mu), the `FFT_one_dir()' bangs. This limitation is curable by padding data, for example, by increasing number of site Complex. More memory/computation preserving way is to pad a chunk in `skew' section and adjust accordingly in `unskew()' automatically. This is not done for now as the day after tomorrow is the deadline of the lattice conference and I will have to write something. Tom is very mad at me also. ----------------------------------------------------------- CVS keywords $Author: chulwoo $ $Date: 2013-04-08 20:50:00 $ $Header: /home/chulwoo/CPS/repo/CVS/cps_only/cps_pp/include/util/fft.h,v 1.2 2013-04-08 20:50:00 chulwoo Exp $ $Id: fft.h,v 1.2 2013-04-08 20:50:00 chulwoo Exp $ $Name: not supported by cvs2svn $ $Locker: $ $RCSfile: fft.h,v $ $Revision: 1.2 $ $Source: /home/chulwoo/CPS/repo/CVS/cps_only/cps_pp/include/util/fft.h,v $ $State: Exp $ */ /*----------------------------------------------------------*/ //------------------------------------------------------------------ //! A class implementing FFT in one direction. /*! This class perform FFT in the following way: 1. gather a whole dimension into a node 2. do local FFT 3. redistribute results into nodes */ class FFT_one_dir { private: const int mu; //!< The direction this FFT is to be done const int N; //!< The number of node in the direction size_t P; //!< The Coordinate of this node in the direction Float* fp_tmp; //!< pointer to temporary vector // Input Data is aligned as D[nl][n][nr] const size_t nl; //!< The number of index(es), which is(are) left of the index for the direction, in a node. const int n; //!< The number of sites in the direction in a node. const size_t nr; //!< The number of last index(es), which is(are) right of the index for the direction, in a node size_t nlr; //!< nl*nr size_t nlr_N; //!< nl*nr/N size_t nnlr_N; //!< n*nl*nr/N char* cname; //Large Data void getPlusDataL(IFloat *rcv_buf, IFloat *send_buf, int len, int mu) { const int MAX_LENGTH = 4096; const int blk(MAX_LENGTH); const int N (len / MAX_LENGTH); for(int i=0;i=1;--I){ getPlusDataL( Fp2(I,0), Fp1(I,0), (N-I)*nnlr_N, mu ) ; moveMem( Fp1(I, 0), Fp2(I, 0), (N-I)*nnlr_N*sizeof(IFloat) ); } #else /* faster version uses get{Minus,Plus}Data for {first,second} half */ const int M=N/2; // The first half uses getPlusMinus for(int I=M-1; I>=1;--I){ getPlusDataL( Fp2(I,0), Fp1(I,0), (M-I)*nnlr_N, mu ) ; moveMem( Fp1(I, 0), Fp2(I, 0), (M-I)*nnlr_N*sizeof(IFloat) ); } // The second half uses getMinus for(int I=N-1; I>=M;--I){ getMinusDataL( Fp2(M,0), Fp1(M,0), (N-I)*nnlr_N, mu ) ; moveMem( Fp1(M, 0), Fp2(M, 0), (N-I)*nnlr_N*sizeof(IFloat) ); } #endif // just copy the first line moveMem( Fp2(0,0), Fp1(0,0), nnlr_N*sizeof(IFloat) ); } #undef Fp1 #undef Fp2 //! "Unshift" data /*! Pass back data between nodes @param[in/out] fp1[N][n][nlr/N] @param[tmp] fp2[N][n][nlr/N] working vector */ #define Fpin( I_, i_, ilr_N_ ) (fpin+ (ilr_N_)+nlr_N*( (i_) +n*(I_) ) ) #define Fpout( il_, i_, ir_ ) (fpout+ (ir_)+nr*( (i_ ) +n *(il_) )) void unskew(Float *fpin, Float *fpout) { // convert input data so that `n' is the first index for(int il=0;il CPS_START_NAMESPACE class FFT_one_dir { public: FFT_one_dir(const int _mu, Float* fpin, const int _nl, const int _n, const int _nr, int fft_forward=1, int flag_dist_back=1 ) //: mu(_mu), N(GJP.Nodes(mu)), n(_n), nl(_nl), nr(_nr) { char *cname="FFT_one_dir"; char* fname="FFT_one_dir(...)"; ERR.General(cname,fname,"Not implemented without fftw (USE_FFTW)\n"); } }; CPS_END_NAMESPACE #endif //#ifdef USE_FFTW #endif