{
  "uuid": "2d427dd8-b8a5-4242-96d2-6fbb499d3f19",
  "metadata": {
    "citation_count_without_self_citations": 5,
    "citation_count": 8,
    "documents": [
      {
        "key": "5f32206eb36bcda6964749ad8508a93c",
        "url": "https://inspirehep.net/files/5f32206eb36bcda6964749ad8508a93c",
        "filename": "epjconf_lattice2018_09006.pdf",
        "fulltext": true
      }
    ],
    "publication_info": [
      {
        "cnum": "C17-06-18.1",
        "year": 2018,
        "artid": "09006",
        "journal_title": "EPJ Web Conf.",
        "parent_record": {
          "$ref": "https://inspirehep.net/api/literature/1665353"
        },
        "journal_record": {
          "$ref": "https://inspirehep.net/api/journals/1211782"
        },
        "journal_volume": "175",
        "conference_record": {
          "$ref": "https://inspirehep.net/api/conferences/1425631"
        }
      }
    ],
    "core": true,
    "dois": [
      {
        "value": "10.1051/epjconf/201817509006",
        "source": "bibmatch"
      },
      {
        "value": "10.1051/epjconf/201817509006"
      }
    ],
    "titles": [
      {
        "title": "Performance Portability Strategies for Grid C++ Expression Templates",
        "source": "arXiv"
      }
    ],
    "$schema": "https://inspirehep.net/schemas/records/hep.json",
    "authors": [
      {
        "uuid": "f18b9b88-1a3c-4f30-abc6-f9b88fe75038",
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1015525"
        },
        "full_name": "Boyle, Peter A.",
        "affiliations": [
          {
            "value": "Edinburgh U.",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/902787"
            }
          }
        ],
        "signature_block": "BYLp",
        "raw_affiliations": [
          {
            "value": "Higgs Centre for Theoretical Physics, School of Physics & Astronomy, University of Edinburgh, EH9 3FD, UK"
          }
        ]
      },
      {
        "uuid": "e4b67346-12ff-4646-a6e7-f4af42fb7eff",
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1041420"
        },
        "full_name": "Clark, M.A.",
        "affiliations": [
          {
            "value": "NVIDIA, Santa Clara",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/1677141"
            }
          }
        ],
        "signature_block": "CLARCm",
        "curated_relation": true,
        "raw_affiliations": [
          {
            "value": "NVIDIA Corporation, Santa Clara, CA 95050, USA"
          }
        ]
      },
      {
        "uuid": "d53181ec-4d85-4d34-a3e1-2534345fce91",
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1011934"
        },
        "full_name": "DeTar, Carleton",
        "affiliations": [
          {
            "value": "Utah U.",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/903315"
            }
          }
        ],
        "signature_block": "DATARc",
        "curated_relation": true,
        "raw_affiliations": [
          {
            "value": "Department of Physics and Astronomy, University of Utah, Salt Lake City, UT 84112, USA"
          }
        ]
      },
      {
        "uuid": "3117269c-2612-4398-9759-a31b8a6713c3",
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1049380"
        },
        "full_name": "Lin, Meifeng",
        "affiliations": [
          {
            "value": "Brookhaven Natl. Lab.",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/1268258"
            }
          }
        ],
        "signature_block": "LANm",
        "raw_affiliations": [
          {
            "value": "Computational Science Initiative, Brookhaven National Laboratory, Upton, New York 11973, USA"
          }
        ]
      },
      {
        "uuid": "c6774e9b-8b5f-4ca6-be6f-d5cbe60efad4",
        "record": {
          "$ref": "https://inspirehep.net/api/authors/2021762"
        },
        "full_name": "Rana, Verinder",
        "affiliations": [
          {
            "value": "Brookhaven Natl. Lab.",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/1268258"
            }
          }
        ],
        "signature_block": "RANv",
        "raw_affiliations": [
          {
            "value": "Computational Science Initiative, Brookhaven National Laboratory, Upton, New York 11973, USA"
          }
        ]
      },
      {
        "uuid": "f2aca165-b282-4420-8b35-e8948565c3ef",
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1056581"
        },
        "full_name": "Avilés-Casco, Alejandro Vaquero",
        "affiliations": [
          {
            "value": "Utah U.",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/903315"
            }
          }
        ],
        "signature_block": "CASCa",
        "curated_relation": true,
        "raw_affiliations": [
          {
            "value": "Department of Physics and Astronomy, University of Utah, Salt Lake City, UT 84112, USA"
          }
        ]
      }
    ],
    "curated": true,
    "figures": [
      {
        "key": "7d3a0c3013f431d2141e90791072f1b2",
        "url": "https://inspirehep.net/files/7d3a0c3013f431d2141e90791072f1b2",
        "source": "arxiv",
        "caption": "Complex SU(3)$\\times$SU(3) streaming bandwidth on NVIDIA GTX 1080 for implementations with OpenACC, CUDA and Jitify.",
        "filename": "su3_gbs.png"
      },
      {
        "key": "5ac82b67b68904111285f8ef7a340921",
        "url": "https://inspirehep.net/files/5ac82b67b68904111285f8ef7a340921",
        "source": "arxiv",
        "caption": "Comparison of OpenACC SU(3)$\\times$SU(3) streaming bandwidth with and without coalesced pointer.",
        "filename": "SU3_openacc.png"
      },
      {
        "key": "9f9e9dfb9bdfe5f35dcea7227655d357",
        "url": "https://inspirehep.net/files/9f9e9dfb9bdfe5f35dcea7227655d357",
        "source": "arxiv",
        "caption": "Performance of the miniapp in several architectures. The vertical lines indicate the cache size, the horizontal ones show the memory bandwidth obtained in a stream triad test. In all cases we can easily saturate the memory bandwidth of the machine.",
        "filename": "voltaPlot.png"
      },
      {
        "key": "286082ce286103ca9a19c028a7781bdb",
        "url": "https://inspirehep.net/files/286082ce286103ca9a19c028a7781bdb",
        "source": "arxiv",
        "caption": "Performance of the miniapp as a function of the vector length in a Quadro GP100. The vector length is measured in terms of single precision complex numbers. For a vector of 16 complex numbers (twice the size of a KNL vector), performance  hits the maximum.",
        "filename": "blockPascal.png"
      }
    ],
    "license": [
      {
        "url": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/",
        "imposing": "arXiv"
      },
      {
        "url": "http://creativecommons.org/licenses/by/4.0/",
        "license": "CC-BY-4.0",
        "imposing": "EDP Sciences"
      }
    ],
    "texkeys": [
      "Boyle:2017gzg"
    ],
    "citeable": true,
    "imprints": [
      {
        "date": "2018"
      }
    ],
    "keywords": [
      {
        "value": "Grid computing",
        "schema": "INSPIRE"
      },
      {
        "value": "computer: performance",
        "schema": "INSPIRE"
      },
      {
        "value": "gauge field theory: SU(3)",
        "schema": "INSPIRE"
      },
      {
        "value": "lattice field theory",
        "schema": "INSPIRE"
      }
    ],
    "abstracts": [
      {
        "value": "One of the key requirements for the Lattice QCD Application Development as part of the US Exascale Computing Project is performance portability across multiple architectures. Using the Grid C++ expression template as a starting point, we report on the progress made with regards to the Grid GPU offloading strategies. We present both the successes and issues encountered in using CUDA, OpenACC and Just-In-Time compilation. Experimentation and performance on GPUs with a SU(3)×SU(3) streaming test will be reported. We will also report on the challenges of using current OpenMP 4.x for GPU offloading in the same code.1 As of Lattice 2017, our partners included University of Edinburgh, University of Illinois, NVIDIA and Stony Brook University.",
        "source": "EDP Sciences"
      },
      {
        "value": "One of the key requirements for the Lattice QCD Application Development as part of the US Exascale Computing Project is performance portability across multiple architectures. Using the Grid C++ expression template as a starting point, we report on the progress made with regards to the Grid GPU offloading strategies. We present both the successes and issues encountered in using CUDA, OpenACC and Just-In-Time compilation. Experimentation and performance on GPUs with a SU(3)$\\times$SU(3) streaming test will be reported. We will also report on the challenges of using current OpenMP 4.x for GPU offloading in the same code.",
        "source": "arXiv"
      }
    ],
    "references": [
      {
        "raw_refs": [
          {
            "value": "C. DeTar, Lattice QCD Application Development within the US DOE Exascale Computation Project, in Proceedings, 35th International Symposium on Lattice Field Theory (Lattice2017): Granada, Spain, to appear in EPJ Web Conf. [Google Scholar]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Lattice QCD Application Development within the US DOE Exascale Computation Project, in Proceedings, 35th International Symposium on Lattice Field Theory (Lattice): Granada, Spain, to appear in EPJ Web Conf. [Google Scholar]"
          ],
          "authors": [
            {
              "full_name": "DeTar, C."
            }
          ],
          "publication_info": {
            "year": 2017
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "https://exascaleproject.org/ [Google Scholar]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "[Google Scholar]"
          ]
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/1409303"
        },
        "raw_refs": [
          {
            "value": "P.A. Boyle, G. Cossu, A. Yamaguchi, A. Portelli, PoS LATTICE2015, 023 (2016) [Google Scholar]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "[Google Scholar]"
          ],
          "authors": [
            {
              "full_name": "Boyle, P.A."
            },
            {
              "full_name": "Cossu, G."
            },
            {
              "full_name": "Yamaguchi, A."
            },
            {
              "full_name": "Portelli, A."
            }
          ],
          "publication_info": {
            "year": 2016,
            "artid": "023",
            "page_start": "023",
            "journal_title": "PoS",
            "journal_volume": "LATTICE2015"
          }
        },
        "curated_relation": false
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/1511682"
        },
        "raw_refs": [
          {
            "value": "P.A. Boyle, PoS LATTICE2016, 013 (2017), 1702.00208 [Google Scholar]https://doi.org/10.22323/1.256.0013",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "[Google Scholar]"
          ],
          "authors": [
            {
              "full_name": "Boyle, P.A."
            }
          ],
          "arxiv_eprint": "1702.00208",
          "publication_info": {
            "year": 2017,
            "artid": "013",
            "page_start": "013",
            "journal_title": "PoS",
            "journal_volume": "LATTICE2016"
          }
        },
        "curated_relation": false
      },
      {
        "raw_refs": [
          {
            "value": "R. Farber, ed., Parallel Programming with OpenACC, 1st edn. (Morgan Kaufmann, Boston, 2017) [Google Scholar]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Parallel Programming with OpenACC, 1st edn. (Morgan Kaufmann, Boston,) [Google Scholar]"
          ],
          "authors": [
            {
              "full_name": "Farber, R."
            }
          ],
          "publication_info": {
            "year": 2017
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "I. Karlin, T. Scogland, A.C. Jacob, S.F. Antao, G.T. Bercea, C. Bertolli, B.R. de Supinski, E.W. Draeger, A.E. Eichenberger, J. Glosli et al., Early Experiences Porting Three Applications to OpenMP 4.5 (Springer International Publishing, Cham, 2016), pp. 281–292 [Google Scholar]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Early Experiences Porting Three Applications to OpenMP 4.5",
            "International Publishing, Cham,), pp. 281-292 [Google Scholar]"
          ],
          "authors": [
            {
              "full_name": "Karlin, I."
            },
            {
              "full_name": "Scogland, T."
            },
            {
              "full_name": "Jacob, A.C."
            },
            {
              "full_name": "Antao, S.F."
            },
            {
              "full_name": "Bercea, G.T."
            },
            {
              "full_name": "Bertolli, C."
            },
            {
              "full_name": "de Supinski, B.R."
            },
            {
              "full_name": "Draeger, E.W."
            },
            {
              "full_name": "Eichenberger, A.E."
            },
            {
              "full_name": "Glosli, J."
            }
          ],
          "publication_info": {
            "year": 2016
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "OpenMP 4.5 IBM November 2015 Hackathon: Current Status and Lessons Learned, https://codesign.llnl.gov/pdfs/2015-11-IBM-OpenMP-Hackathon-outbrief-final.pdf [Google Scholar]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "OpenMP 4.5 IBM NovemberHackathon: Current Status and Lessons Learned",
            "[Google Scholar]"
          ],
          "publication_info": {
            "year": 2015
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "https://github.com/NVIDIA/jitify [Google Scholar]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "[Google Scholar]"
          ]
        }
      },
      {
        "raw_refs": [
          {
            "value": "K. Clark, https://github.com/maddyscientist/coalesced_ptr [Google Scholar]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "[Google Scholar]"
          ],
          "authors": [
            {
              "full_name": "Clark, K."
            }
          ]
        }
      }
    ],
    "public_notes": [
      {
        "value": "8 pages, 4 figures. Talk presented at the 35th International Symposium on Lattice Field Theory, 18-24 June 2017, Granada, Spain",
        "source": "arXiv"
      }
    ],
    "arxiv_eprints": [
      {
        "value": "1710.09409",
        "categories": [
          "hep-lat",
          "cs.MS"
        ]
      }
    ],
    "document_type": [
      "conference paper"
    ],
    "preprint_date": "2017-10-25",
    "control_number": 1632761,
    "legacy_version": "20180817113333.0",
    "number_of_pages": 8,
    "inspire_categories": [
      {
        "term": "Lattice"
      },
      {
        "term": "Computing"
      }
    ],
    "legacy_creation_date": "2017-10-27",
    "accelerator_experiments": [
      {
        "record": {
          "$ref": "https://inspirehep.net/api/experiments/2957096"
        },
        "legacy_name": "Grid",
        "curated_relation": true
      }
    ],
    "external_system_identifiers": [
      {
        "value": "2018EPJWC.17509006B",
        "schema": "ADS"
      }
    ]
  },
  "created": "2017-10-27T00:00:00+00:00",
  "id": "1632761",
  "revision_id": 121,
  "links": {
    "bibtex": "https://inspirehep.net/api/literature/1632761?format=bibtex",
    "latex-eu": "https://inspirehep.net/api/literature/1632761?format=latex-eu",
    "latex-us": "https://inspirehep.net/api/literature/1632761?format=latex-us",
    "json": "https://inspirehep.net/api/literature/1632761?format=json",
    "json-expanded": "https://inspirehep.net/api/literature/1632761?format=json-expanded",
    "cv": "https://inspirehep.net/api/literature/1632761?format=cv",
    "citations": "https://inspirehep.net/api/literature/?q=refersto%3Arecid%3A1632761"
  },
  "updated": "2025-08-04T18:03:06.794289+00:00"
}