#ifdef USE_QIO #include #include #include CPS_START_NAMESPACE using namespace std; #define PROFILE // qio-factory functions struct qio_genfield_glb_type { int precision; int n_fields; int f_size_per_site; int n_sites; }; static qio_genfield_glb_type qio_genfield_glb; void qio_genfield_put_glb(char *buf_, size_t site_index, int count, void *arg_) { moveMem( arg_, buf_, sizeof(qio_genfield_glb_type)); } // global variable for fanctor (could make a separated record and read from it, but then we would need if statement...) static int qio_glb_genfield_n_fields, qio_glb_genfield_f_size_per_site, qio_glb_n_sites; void qio_putGenField(char *buf_, size_t site_index, int count, void *arg) { /*printf(" called with count %i\n",count);*/\ const int n_field = qio_genfield_glb. n_fields; const size_t f_size = qio_genfield_glb. f_size_per_site; const int n_sites = qio_genfield_glb. n_sites; /* The field should store data in memory in the following format : [ 1 st field ] [ 2 nd field ] ... [ (n_fields-1q)-th field ] where [ n-th field ] is [ f_size_per_site Floats for (0,0,0,0) ] [ f_size_per_site Floats for (1,0,0,0) ] [ f_size_per_site Floats for (2,0,0,0) ] .... [f_size_per_site Floats for (Nx-1, Ny-1, Nz-1, Nt-1) ] To save the number of io, we rearrange the file format as follows : [ n_fields* f_size_per_site Floats for (0,0,0,0) ] [ n_fields* f_size_per_site Floats for (1,0,0,0) ] [ n_fields* f_size_per_site Floats for (2,0,0,0) ] .... [n_fields* f_size_per_site Floats for (Nx-1, Ny-1, Nz-1, Nt-1) ] the most fastest changing index is the f_size_per_site degree in one field, then the index for the field, 0 ... n_field-1 This rearrangement requires the non-local memory access, but I hope the benefit of n_field times smaller number of I/O will supersede the slow down. */ if(qio_genfield_glb.precision){ Float *field = (Float*) arg; Float *buf = (Float*) buf_; for(int field_i=0; field_i