{
  "id": "1980671",
  "uuid": "4247acc6-3db3-4d54-91d8-d71d7646fd4d",
  "metadata": {
    "citation_count": 1,
    "publication_info": [
      {
        "cnum": "C21-07-26.3",
        "year": 2022,
        "artid": "507",
        "page_start": "507",
        "journal_title": "PoS",
        "journal_record": {
          "$ref": "https://inspirehep.net/api/journals/1213080"
        },
        "journal_volume": "LATTICE2021",
        "conference_record": {
          "$ref": "https://inspirehep.net/api/conferences/1898790"
        }
      },
      {
        "cnum": "C21-07-26.3",
        "conference_record": {
          "$ref": "https://inspirehep.net/api/conferences/1898790"
        }
      }
    ],
    "documents": [
      {
        "key": "b5dbf9c0147ae3b43fb9f7a86301790c",
        "url": "https://inspirehep.net/files/b5dbf9c0147ae3b43fb9f7a86301790c",
        "filename": "document",
        "fulltext": true
      }
    ],
    "report_numbers": [
      {
        "value": "MIT-CTP/5348",
        "source": "arXiv"
      }
    ],
    "citation_count_without_self_citations": 1,
    "core": true,
    "dois": [
      {
        "value": "10.22323/1.396.0507",
        "source": "SISSA"
      }
    ],
    "titles": [
      {
        "title": "Implementation of the conjugate gradient algorithm for heterogeneous systems",
        "source": "SISSA"
      },
      {
        "title": "Implementation of the conjugate gradient algorithm for heterogeneous systems",
        "source": "arXiv"
      }
    ],
    "$schema": "https://inspirehep.net/schemas/records/hep.json",
    "authors": [
      {
        "uuid": "8f623d92-3614-4cf7-a426-f463fea6a7e2",
        "emails": [
          "calis@mit.edu"
        ],
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1835916"
        },
        "full_name": "Cali, Salvatore",
        "affiliations": [
          {
            "value": "MIT",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/907455"
            },
            "curated_relation": true
          }
        ],
        "signature_block": "CALs"
      },
      {
        "uuid": "10471b04-338e-4500-ad07-9a5155322a66",
        "emails": [
          "wdetmold@mit.edu"
        ],
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1019639"
        },
        "full_name": "Detmold, William",
        "affiliations": [
          {
            "value": "MIT",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/907455"
            },
            "curated_relation": true
          }
        ],
        "signature_block": "DATNALDw",
        "curated_relation": true
      },
      {
        "uuid": "d5b41a12-173a-4927-8d61-10418457bedd",
        "emails": [
          "grzegorz.korcyl@uj.edu.pl"
        ],
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1261689"
        },
        "full_name": "Korcyl, Grzegorz",
        "affiliations": [
          {
            "value": "Jagiellonian U.",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/902897"
            },
            "curated_relation": true
          }
        ],
        "signature_block": "CARCALg"
      },
      {
        "uuid": "45766803-0750-4e02-ad92-def7296cc025",
        "emails": [
          "piotr.korcyl@uj.edu.pl"
        ],
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1045052"
        },
        "full_name": "Korcyl, Piotr",
        "affiliations": [
          {
            "value": "Jagiellonian U.",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/902897"
            },
            "curated_relation": true
          }
        ],
        "signature_block": "CARCALp"
      },
      {
        "uuid": "e5520a67-8560-4bcd-965b-6abbe84aaad0",
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1091403"
        },
        "full_name": "Shanahan, Phiala",
        "affiliations": [
          {
            "value": "MIT",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/907455"
            },
            "curated_relation": true
          }
        ],
        "signature_block": "SANANp",
        "curated_relation": true
      }
    ],
    "curated": true,
    "figures": [
      {
        "key": "f9c65865eba4b31cefdc677e54c76ee9",
        "url": "https://inspirehep.net/files/f9c65865eba4b31cefdc677e54c76ee9",
        "label": "fig:tuning",
        "source": "arxiv",
        "caption": "Tuning of the optimal workgroup size on the Nvidia card A100-PCIE-40GB, performed for our implementation of the CG algorithm in SYCL/DPC++.",
        "filename": "workgroup.png",
        "material": "preprint"
      },
      {
        "key": "0c4daeae85e9dfa1f130ec9f68f8bafc",
        "url": "https://inspirehep.net/files/0c4daeae85e9dfa1f130ec9f68f8bafc",
        "label": "fig:speedup",
        "source": "arxiv",
        "caption": "Speedup study of our implementation of the CG algorithm in SYCL/DPC++, for a volume $V=14^4$ and with the Dirac operator stored in coordinate format. The reference point is the execution time obtained using a single compute unit.",
        "filename": "speedup.png",
        "material": "preprint"
      },
      {
        "key": "91e222ca101e842898fa53d399f0f0bf",
        "url": "https://inspirehep.net/files/91e222ca101e842898fa53d399f0f0bf",
        "label": "fig:roofline",
        "source": "arxiv",
        "caption": "Performance study of the sparse-matrix vector multiplication on different devices (points) and comparison with the naive roofline model (solid lines).",
        "filename": "roofline_spmv.png",
        "material": "preprint"
      }
    ],
    "license": [
      {
        "url": "https://creativecommons.org/licenses/by-nc-nd/4.0/",
        "license": "CC-BY-NC-ND-4.0",
        "imposing": "SISSA"
      },
      {
        "url": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/",
        "license": "arXiv nonexclusive-distrib 1.0",
        "material": "preprint"
      }
    ],
    "texkeys": [
      "Cali:2021adj"
    ],
    "citeable": true,
    "imprints": [
      {
        "date": "2022-05-16"
      }
    ],
    "keywords": [
      {
        "value": "operator: Dirac",
        "schema": "INSPIRE"
      },
      {
        "value": "lattice field theory",
        "schema": "INSPIRE"
      },
      {
        "value": "FPGA",
        "schema": "INSPIRE"
      }
    ],
    "refereed": false,
    "abstracts": [
      {
        "value": "Lattice QCD calculations require significant computational effort, with the dominant fraction of resources typically spent in the numerical inversion of the Dirac operator. One of the simplest\nmethods to solve such large and sparse linear systems is the conjugate gradient (CG) approach. In this work we present an implementation of CG that can be executed on different devices, including CPUs, GPUs, and FPGAs. This is achieved by using the SYCL/DPC++ framework, which allows the execution of the same source code on heterogeneous systems.",
        "source": "SISSA"
      },
      {
        "value": "Lattice QCD calculations require significant computational effort, with the dominant fraction of resources typically spent in the numerical inversion of the Dirac operator. One of the simplest methods to solve such large and sparse linear systems is the conjugate gradient (CG) approach. In this work we present an implementation of CG that can be executed on different devices, including CPUs, GPUs, and FPGAs. This is achieved by using the SYCL/DPC++ framework, which allows the execution of the same source code on heterogeneous systems.",
        "source": "arXiv"
      }
    ],
    "references": [
      {
        "raw_refs": [
          {
            "value": "[1] OpenMP Architecture Review Board, “The OpenMP API specification for parallel programming, http://www.openmp.org.”",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "misc": [
            "OpenMP Architecture Review Board, \"The OpenMP API specification for parallel programming"
          ],
          "urls": [
            {
              "value": "http://www.openmp.org"
            }
          ],
          "label": "1"
        }
      },
      {
        "raw_refs": [
          {
            "value": "[2] “HIP: C++ Heterogeneous-Compute Interface for Portability, https://rocmdocs.amd.com.”",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "misc": [
            "\"HIP: C++ Heterogeneous-Compute Interface for Portability"
          ],
          "urls": [
            {
              "value": "https://rocmdocs.amd.com"
            }
          ],
          "label": "2"
        }
      },
      {
        "raw_refs": [
          {
            "value": "[3] Khronos Group, “SYCL , https://www.khronos.org/sycl/.”",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "misc": [
            "Khronos Group, \"SYCL"
          ],
          "urls": [
            {
              "value": "https://www.khronos.org/sycl/"
            }
          ],
          "label": "3"
        }
      },
      {
        "raw_refs": [
          {
            "value": "[4] J. Reinders, B. Ashbaugh, J. Brodman, M. Kinsner, J. Pennycook and X. Tian, Data Parallel C++: Mastering DPC++ for Programming of Heterogeneous Systems using C++ and SYCL, Apress (2020).",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "misc": [
            "Data Parallel C++: Mastering DPC++ for Programming of Heterogeneous Systems using C++ and SYCL, Apress"
          ],
          "label": "4",
          "authors": [
            {
              "full_name": "Reinders, J."
            },
            {
              "full_name": "Ashbaugh, B."
            },
            {
              "full_name": "Brodman, J."
            },
            {
              "full_name": "Kinsner, M."
            },
            {
              "full_name": "Pennycook, J."
            },
            {
              "full_name": "Tian, X."
            }
          ],
          "publication_info": {
            "year": 2020
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/89145"
        },
        "raw_refs": [
          {
            "value": "[5] K.G. Wilson, Confinement of Quarks, Phys. Rev. D 10 (1974) 2445.",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "misc": [
            "Confinement of Quarks"
          ],
          "label": "5",
          "authors": [
            {
              "full_name": "Wilson, K.G."
            }
          ],
          "publication_info": {
            "year": 1974,
            "artid": "2445",
            "page_start": "2445",
            "journal_title": "Phys.Rev.D",
            "journal_volume": "10"
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[6] B. Xiao, P. Shanahan, D. Hackett, S. Calì and Y. Lin, Neural Network Preconditioning for U(1) Wilson-type Dirac Operators, Poster presented by Brian Xiao at Lattice 2021, https://indico.cern.ch/event/1006302/contributions/4380639/ [Paper in preparation] .",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "misc": [
            "Neural Network Preconditioning for U(1) Wilson-type Dirac Operators, Poster presented by Brian Xiao at Lattice",
            "[Paper in preparation]"
          ],
          "urls": [
            {
              "value": "https://indico.cern.ch/event/1006302/contributions/4380639/"
            }
          ],
          "label": "6",
          "authors": [
            {
              "full_name": "Xiao, B."
            },
            {
              "full_name": "Shanahan, P."
            },
            {
              "full_name": "Hackett, D."
            },
            {
              "full_name": "Cal, S."
            },
            {
              "full_name": "Lin, Y."
            }
          ],
          "publication_info": {
            "year": 2021
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[7] J. Sappl, L. Seiler, M. Harders and W. Rauch, Deep learning of preconditioners for conjugate gradient solvers in urban water related problems, CoRR abs/1906.06925 (2019) [1906.06925].",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "misc": [
            "Deep learning of preconditioners for conjugate gradient solvers in urban water related problems, CoRR abs/"
          ],
          "label": "7",
          "authors": [
            {
              "full_name": "Sappl, J."
            },
            {
              "full_name": "Seiler, L."
            },
            {
              "full_name": "Harders, M."
            },
            {
              "full_name": "Rauch, W."
            }
          ],
          "arxiv_eprint": "1906.06925"
        }
      },
      {
        "raw_refs": [
          {
            "value": "[7] J. Sappl, L. Seiler, M. Harders and W. Rauch, Deep learning of preconditioners for conjugate gradient solvers in urban water related problems, CoRR abs/1906.06925 (2019) [1906.06925].",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "label": "7",
          "authors": [
            {
              "full_name": "Sappl, J."
            },
            {
              "full_name": "Seiler, L."
            },
            {
              "full_name": "Harders, M."
            },
            {
              "full_name": "Rauch, W."
            }
          ],
          "arxiv_eprint": "1906.06925",
          "publication_info": {
            "year": 2019
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[8] Y. Saad, Iterative Methods for Sparse Linear Systems, Other Titles in Applied Mathematics, SIAM, second ed. (2003), 10.1137/1.9780898718003.",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "dois": [
            "10.1137/1.9780898718003"
          ],
          "misc": [
            "Iterative Methods for Sparse Linear Systems, Other Titles in Applied Mathematics",
            "second ed"
          ],
          "label": "8",
          "authors": [
            {
              "full_name": "Saad, Y."
            }
          ],
          "imprint": {
            "publisher": "SIAM"
          },
          "publication_info": {
            "year": 2003
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/1775722"
        },
        "raw_refs": [
          {
            "value": "[9] G. Korcyl and P. Korcyl, Optimized implementation of the conjugate gradient algorithm for FPGA-based platforms using the Dirac-Wilson operator as an example, 2001.05218.",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "misc": [
            "Optimized implementation of the conjugate gradient algorithm for FPGA-based platforms using the Dirac-Wilson operator as an example"
          ],
          "label": "9",
          "authors": [
            {
              "full_name": "Korcyl, G."
            },
            {
              "full_name": "Korcyl, P."
            }
          ],
          "arxiv_eprint": "2001.05218"
        }
      },
      {
        "raw_refs": [
          {
            "value": "[10] A. Zeni, K. O’Brien, M. Blott and M.D. Santambrogio, Optimized implementation of the hpcg benchmark on reconfigurable hardware, in European Conference on Parallel Processing, pp. 616–630, Springer, 2021.",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "misc": [
            "Optimized implementation of the hpcg benchmark on reconfigurable hardware, in European Conference on Parallel Processing, pp. 616-630"
          ],
          "label": "10",
          "authors": [
            {
              "full_name": "Zeni, A."
            },
            {
              "full_name": "O'Brien, K."
            },
            {
              "full_name": "Blott, M."
            },
            {
              "full_name": "Santambrogio, M.D."
            }
          ],
          "imprint": {
            "publisher": "Springer"
          },
          "publication_info": {
            "year": 2021
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[11] S. Williams, A. Waterman and D. Patterson, Roofline: An insightful visual performance model for multicore architectures, Commun. ACM 52 (2009) 65.",
            "schema": "text",
            "source": "desy"
          }
        ],
        "reference": {
          "misc": [
            "Roofline: An insightful visual performance model for multicore architectures"
          ],
          "label": "11",
          "authors": [
            {
              "full_name": "Williams, S."
            },
            {
              "full_name": "Waterman, A."
            },
            {
              "full_name": "Patterson, D."
            }
          ],
          "publication_info": {
            "year": 2009,
            "artid": "65",
            "page_start": "65",
            "journal_title": "Commun.ACM",
            "journal_volume": "52"
          }
        }
      }
    ],
    "public_notes": [
      {
        "value": "Proceedings of the 38th International Symposium on Lattice Field\n Theory, LATTICE2021 26th-30th July, 2021, Zoom/Gather@Massachusetts Institute\n of Technology",
        "source": "arXiv"
      }
    ],
    "arxiv_eprints": [
      {
        "value": "2111.14958",
        "categories": [
          "hep-lat"
        ]
      }
    ],
    "document_type": [
      "conference paper"
    ],
    "preprint_date": "2021-11-29",
    "control_number": 1980671,
    "number_of_pages": 8,
    "inspire_categories": [
      {
        "term": "Lattice",
        "source": "arxiv"
      }
    ]
  },
  "revision_id": 13,
  "updated": "2024-03-12T20:19:26.455588+00:00",
  "created": "2021-12-01T02:56:13.785743+00:00",
  "links": {
    "bibtex": "https://inspirehep.net/api/literature/1980671?format=bibtex",
    "latex-eu": "https://inspirehep.net/api/literature/1980671?format=latex-eu",
    "latex-us": "https://inspirehep.net/api/literature/1980671?format=latex-us",
    "json": "https://inspirehep.net/api/literature/1980671?format=json",
    "json-expanded": "https://inspirehep.net/api/literature/1980671?format=json-expanded",
    "cv": "https://inspirehep.net/api/literature/1980671?format=cv",
    "citations": "https://inspirehep.net/api/literature/?q=refersto%3Arecid%3A1980671"
  }
}