/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/communicator/SharedMemory.cc Copyright (C) 2015 Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #pragma once #include #if defined (GRID_COMMS_MPI3) #include #endif #include #include #include #include #include #include #include #include #include NAMESPACE_BEGIN(Grid); #if defined (GRID_COMMS_MPI3) typedef MPI_Comm Grid_MPI_Comm; typedef MPI_Request MpiCommsRequest_t; #ifdef ACCELERATOR_AWARE_MPI typedef MPI_Request CommsRequest_t; #else /* * Enable state transitions as each packet flows. */ enum PacketType_t { FaceGather, InterNodeXmit, InterNodeRecv, IntraNodeXmit, IntraNodeRecv, InterNodeXmitISend, InterNodeReceiveHtoD }; /* *Package arguments needed for various actions along packet flow */ typedef struct { PacketType_t PacketType; void *host_buf; void *device_buf; int dest; int tag; int commdir; unsigned long bytes; acceleratorEvent_t ev; MpiCommsRequest_t req; } CommsRequest_t; #endif #else typedef int MpiCommsRequest_t; typedef int CommsRequest_t; typedef int Grid_MPI_Comm; #endif class GlobalSharedMemory { private: static const int MAXLOG2RANKSPERNODE = 16; // Init once lock on the buffer allocation static int _ShmSetup; static int _ShmAlloc; static uint64_t _ShmAllocBytes; public: /////////////////////////////////////// // HPE 8600 hypercube optimisation /////////////////////////////////////// static int HPEhypercube; static int ShmSetup(void) { return _ShmSetup; } static int ShmAlloc(void) { return _ShmAlloc; } static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; } static uint64_t MAX_MPI_SHM_BYTES; static int Hugepages; static std::vector WorldShmCommBufs; #ifndef ACCELERATOR_AWARE_MPI static void *HostCommBuf; #endif static Grid_MPI_Comm WorldComm; static int WorldRank; static int WorldSize; static Grid_MPI_Comm WorldShmComm; static int WorldShmRank; static int WorldShmSize; static int WorldNodes; static int WorldNode; static std::vector WorldShmRanks; ////////////////////////////////////////////////////////////////////////////////////// // Create an optimal reordered communicator that makes MPI_Cart_create get it right ////////////////////////////////////////////////////////////////////////////////////// static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD // Turns MPI_COMM_WORLD into right layout for Cartesian static void OptimalCommunicator (const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &ShmDims); static void OptimalCommunicatorHypercube (const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &ShmDims); static void OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &ShmDims); static void GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims); /////////////////////////////////////////////////// // Provide shared memory facilities off comm world /////////////////////////////////////////////////// static void SharedMemoryAllocate(uint64_t bytes, int flags); static void SharedMemoryFree(void); // static void SharedMemoryCopy(void *dest,void *src,size_t bytes); static void SharedMemoryZero(void *dest,size_t bytes); }; ////////////////////////////// // one per communicator ////////////////////////////// class SharedMemory { private: static const int MAXLOG2RANKSPERNODE = 16; size_t heap_top; size_t heap_bytes; size_t heap_size; #ifndef ACCELERATOR_AWARE_MPI size_t host_heap_top; // set in free all size_t host_heap_bytes;// set in free all void *HostCommBuf; // set in SetCommunicator size_t host_heap_size; // set in SetCommunicator #endif protected: Grid_MPI_Comm ShmComm; // for barriers int ShmRank; int ShmSize; std::vector ShmCommBufs; std::vector ShmRanks;// Mapping comm ranks to Shm ranks public: SharedMemory() {}; ~SharedMemory(); /////////////////////////////////////////////////////////////////////////////////////// // set the buffers & sizes /////////////////////////////////////////////////////////////////////////////////////// void SetCommunicator(Grid_MPI_Comm comm); //////////////////////////////////////////////////////////////////////// // For this instance ; disjoint buffer sets between splits if split grid //////////////////////////////////////////////////////////////////////// void ShmBarrier(void); /////////////////////////////////////////////////// // Call on any instance /////////////////////////////////////////////////// void SharedMemoryTest(void); void *ShmBufferSelf(void); void *ShmBuffer (int rank); void *ShmBufferTranslate(int rank,void * local_p); void *ShmBufferMalloc(size_t bytes); void ShmBufferFreeAll(void) ; #ifndef ACCELERATOR_AWARE_MPI void *HostBufferMalloc(size_t bytes); void HostBufferFreeAll(void); #endif ////////////////////////////////////////////////////////////////////////// // Make info on Nodes & ranks and Shared memory available ////////////////////////////////////////////////////////////////////////// int NodeCount(void) { return GlobalSharedMemory::WorldNodes;}; int RankCount(void) { return GlobalSharedMemory::WorldSize;}; }; NAMESPACE_END(Grid);