/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/algorithms/GeneralCoarsenedMatrixMultiRHS.h Copyright (C) 2015 Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #pragma once NAMESPACE_BEGIN(Grid); // Fine Object == (per site) type of fine field // nbasis == number of deflation vectors template class MultiGeneralCoarsenedMatrix : public SparseMatrixBase > > { public: typedef typename CComplex::scalar_object SComplex; typedef GeneralCoarsenedMatrix GeneralCoarseOp; typedef MultiGeneralCoarsenedMatrix MultiGeneralCoarseOp; typedef iVector siteVector; typedef iMatrix siteMatrix; typedef iVector calcVector; typedef iMatrix calcMatrix; typedef Lattice > CoarseComplexField; typedef Lattice CoarseVector; typedef Lattice > CoarseMatrix; typedef iMatrix Cobj; typedef iVector Cvec; typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field typedef Lattice FineField; typedef Lattice FineComplexField; typedef CoarseVector Field; //////////////////// // Data members //////////////////// GridCartesian * _CoarseGridMulti; NonLocalStencilGeometry geom; NonLocalStencilGeometry geom_srhs; PaddedCell Cell; GeneralLocalStencil Stencil; deviceVector BLAS_B; deviceVector BLAS_C; std::vector > BLAS_A; std::vector > BLAS_AP; std::vector > BLAS_BP; deviceVector BLAS_CP; /////////////////////// // Interface /////////////////////// GridBase * Grid(void) { return _CoarseGridMulti; }; // this is all the linalg routines need to know GridCartesian * CoarseGrid(void) { return _CoarseGridMulti; }; // this is all the linalg routines need to know // Can be used to do I/O on the operator matrices externally void SetMatrix (int p,CoarseMatrix & A) { assert(A.size()==geom_srhs.npoint); GridtoBLAS(A[p],BLAS_A[p]); } void GetMatrix (int p,CoarseMatrix & A) { assert(A.size()==geom_srhs.npoint); BLAStoGrid(A[p],BLAS_A[p]); } void CopyMatrix (GeneralCoarseOp &_Op) { for(int p=0;plSites(); int32_t unpadded_sites = CoarseGridMulti->lSites(); int32_t nrhs = CoarseGridMulti->FullDimensions()[0]; // # RHS int32_t orhs = nrhs/CComplex::Nsimd(); padded_sites = padded_sites/nrhs; unpadded_sites = unpadded_sites/nrhs; ///////////////////////////////////////////////// // Device data vector storage ///////////////////////////////////////////////// BLAS_A.resize(geom.npoint); for(int p=0;p lSite assert(nbr void GridtoBLAS(const Lattice &from,deviceVector &to) { typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; GridBase *Fg = from.Grid(); assert(!Fg->_isCheckerBoarded); int nd = Fg->_ndimension; to.resize(Fg->lSites()); Coordinate LocalLatt = Fg->LocalDimensions(); size_t nsite = 1; for(int i=0;i_ostride; Coordinate f_istride = Fg->_istride; Coordinate f_rdimensions = Fg->_rdimensions; autoView(from_v,from,AcceleratorRead); auto to_v = &to[0]; const int words=sizeof(vobj)/sizeof(vector_type); accelerator_for(idx,nsite,1,{ Coordinate from_coor, base; Lexicographic::CoorFromIndex(base,idx,LocalLatt); for(int i=0;i void BLAStoGrid(Lattice &grid,deviceVector &in) { typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; GridBase *Tg = grid.Grid(); assert(!Tg->_isCheckerBoarded); int nd = Tg->_ndimension; assert(in.size()==Tg->lSites()); Coordinate LocalLatt = Tg->LocalDimensions(); size_t nsite = 1; for(int i=0;i_ostride; Coordinate t_istride = Tg->_istride; Coordinate t_rdimensions = Tg->_rdimensions; autoView(to_v,grid,AcceleratorWrite); auto from_v = &in[0]; const int words=sizeof(vobj)/sizeof(vector_type); accelerator_for(idx,nsite,1,{ Coordinate to_coor, base; Lexicographic::CoorFromIndex(base,idx,LocalLatt); for(int i=0;i > &linop, Aggregation & Subspace, GridBase *CoarseGrid) { #if 0 std::cout << GridLogMessage<< "GeneralCoarsenMatrixMrhs "<< std::endl; GridBase *grid = Subspace.FineGrid; ///////////////////////////////////////////////////////////// // Orthogonalise the subblocks over the basis ///////////////////////////////////////////////////////////// CoarseScalar InnerProd(CoarseGrid); blockOrthogonalise(InnerProd,Subspace.subspace); const int npoint = geom_srhs.npoint; Coordinate clatt = CoarseGrid->GlobalDimensions(); int Nd = CoarseGrid->Nd(); /* * Here, k,l index which possible momentum/shift within the N-points connected by MdagM. * Matrix index i is mapped to this shift via * geom.shifts[i] * * conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block] * = \sum_{l in ball} e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} > * = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} * = M_{kl} A_ji^{b.b+l} * * Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] * * Where q_k = delta_k . (2*M_PI/global_nb[mu]) * * Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} */ Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint); Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); ComplexD ci(0.0,1.0); for(int k=0;k phaF(npoint,grid); std::vector pha(npoint,CoarseGrid); CoarseVector coarseInner(CoarseGrid); typedef typename CComplex::scalar_type SComplex; FineComplexField one(grid); one=SComplex(1.0); FineComplexField zz(grid); zz = Zero(); for(int p=0;p _A; _A.resize(geom_srhs.npoint,CoarseGrid); std::vector ComputeProj(npoint,CoarseGrid); CoarseVector FT(CoarseGrid); for(int i=0;ioSites(); autoView( A_v , _A[k], AcceleratorWrite); autoView( FT_v , FT, AcceleratorRead); accelerator_for(sss, osites, 1, { for(int j=0;j > Projector; Projector.Allocate(nbasis,grid,CoarseGrid); Projector.ImportBasis(Subspace.subspace); const int npoint = geom_srhs.npoint; Coordinate clatt = CoarseGrid->GlobalDimensions(); int Nd = CoarseGrid->Nd(); /* * Here, k,l index which possible momentum/shift within the N-points connected by MdagM. * Matrix index i is mapped to this shift via * geom.shifts[i] * * conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block] * = \sum_{l in ball} e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} > * = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l} * = M_{kl} A_ji^{b.b+l} * * Must assemble and invert matrix M_k,l = e^[i q_k . delta_l] * * Where q_k = delta_k . (2*M_PI/global_nb[mu]) * * Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j} */ Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint); Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint); ComplexD ci(0.0,1.0); for(int k=0;k phaF(npoint,grid); std::vector pha(npoint,CoarseGrid); CoarseVector coarseInner(CoarseGrid); tphase=-usecond(); typedef typename CComplex::scalar_type SComplex; FineComplexField one(grid); one=SComplex(1.0); FineComplexField zz(grid); zz = Zero(); for(int p=0;p _A; _A.resize(geom_srhs.npoint,CoarseGrid); // Count use small chunks than npoint == 81 and save memory int batch = 9; std::vector _MphaV(batch,grid); std::vector TmpProj(batch,CoarseGrid); std::vector ComputeProj(npoint,CoarseGrid); CoarseVector FT(CoarseGrid); for(int i=0;ioSites(); autoView( A_v , _A[k], AcceleratorWrite); autoView( FT_v , FT, AcceleratorRead); accelerator_for(sss, osites, 1, { for(int j=0;jM(in,out); } void M (const CoarseVector &in, CoarseVector &out) { // std::cout << GridLogMessage << "New Mrhs coarse"< Vview; const int Nsimd = CComplex::Nsimd(); int64_t nrhs =pin.Grid()->GlobalDimensions()[0]; assert(nrhs>=1); RealD flops,bytes; int64_t osites=in.Grid()->oSites(); // unpadded int64_t unpadded_vol = CoarseGrid()->lSites()/nrhs; flops = 1.0* npoint * nbasis * nbasis * 8.0 * osites * CComplex::Nsimd(); bytes = 1.0*osites*sizeof(siteMatrix)*npoint/pin.Grid()->GlobalDimensions()[0] + 2.0*osites*sizeof(siteVector)*npoint; t_GtoB=-usecond(); GridtoBLAS(pin,BLAS_B); t_GtoB+=usecond(); GridBLAS BLAS; t_mult=-usecond(); for(int p=0;p &out){assert(0);}; }; NAMESPACE_END(Grid);