{
  "uuid": "8d9afc64-6a70-46e2-a8b7-e529c1352a9a",
  "updated": "2024-11-28T17:53:41.719359+00:00",
  "id": "1857794",
  "links": {
    "bibtex": "https://inspirehep.net/api/literature/1857794?format=bibtex",
    "latex-eu": "https://inspirehep.net/api/literature/1857794?format=latex-eu",
    "latex-us": "https://inspirehep.net/api/literature/1857794?format=latex-us",
    "json": "https://inspirehep.net/api/literature/1857794?format=json",
    "json-expanded": "https://inspirehep.net/api/literature/1857794?format=json-expanded",
    "cv": "https://inspirehep.net/api/literature/1857794?format=cv",
    "citations": "https://inspirehep.net/api/literature/?q=refersto%3Arecid%3A1857794"
  },
  "metadata": {
    "publication_info": [
      {
        "cnum": "C21-07-05.11",
        "year": 2021,
        "page_end": "11",
        "page_start": "1",
        "conference_record": {
          "$ref": "https://inspirehep.net/api/conferences/2688843"
        }
      },
      {
        "material": "publication",
        "pubinfo_freetext": "PASC '21: Proceedings of the Platform for Advanced Scientific\n  Computing Conference, July 2021, Article No.: 9, Pages 1-11"
      }
    ],
    "citation_count_without_self_citations": 5,
    "citation_count": 6,
    "core": true,
    "dois": [
      {
        "value": "10.1145/3468267.3470613",
        "source": "Association for Computing Machinery"
      },
      {
        "value": "10.1145/3468267.3470613",
        "source": "arXiv",
        "material": "publication"
      }
    ],
    "titles": [
      {
        "title": "Solving DWF dirac equation using multi-splitting preconditioned conjugate gradient with tensor cores on NVIDIA GPUs",
        "source": "Association for Computing Machinery"
      },
      {
        "title": "Solving DWF Dirac Equation Using Multi-splitting Preconditioned Conjugate Gradient with Tensor Cores on NVIDIA GPUs",
        "source": "arXiv"
      }
    ],
    "$schema": "https://inspirehep.net/schemas/records/hep.json",
    "authors": [
      {
        "uuid": "17f34ae1-4e2e-475f-a1f2-a1783db2dc6a",
        "emails": [
          "jtu@nvidia.com"
        ],
        "record": {
          "$ref": "https://inspirehep.net/api/authors/2003728"
        },
        "full_name": "Tu, Jiqun",
        "affiliations": [
          {
            "value": "NVIDIA, Santa Clara",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/1677141"
            }
          }
        ],
        "signature_block": "Tj",
        "raw_affiliations": [
          {
            "value": "NVIDIA Corporation"
          }
        ]
      },
      {
        "uuid": "e3028e47-086f-4290-a231-022b63b5a0c5",
        "emails": [
          "mclark@nvidia.com"
        ],
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1041420"
        },
        "full_name": "Clark, M.A.",
        "affiliations": [
          {
            "value": "NVIDIA, Santa Clara",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/1677141"
            }
          }
        ],
        "signature_block": "CLARCm",
        "raw_affiliations": [
          {
            "value": "NVIDIA Corporation"
          }
        ]
      },
      {
        "uuid": "94495040-dd7d-4b96-b9ec-b1814164d8b9",
        "emails": [
          "chulwoo@bnl.gov"
        ],
        "record": {
          "$ref": "https://inspirehep.net/api/authors/1274259"
        },
        "full_name": "Jung, Chulwoo",
        "affiliations": [
          {
            "value": "Brookhaven",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/902689"
            }
          }
        ],
        "signature_block": "JANGc",
        "raw_affiliations": [
          {
            "value": "Brookhaven National Laboratory"
          }
        ]
      },
      {
        "uuid": "fbcbb70a-3f4e-448f-bfad-2f6d87c698e6",
        "emails": [
          "rdm10@columbia.edu"
        ],
        "record": {
          "$ref": "https://inspirehep.net/api/authors/998209"
        },
        "full_name": "Mawhinney, Robert D.",
        "affiliations": [
          {
            "value": "Columbia U.",
            "record": {
              "$ref": "https://inspirehep.net/api/institutions/902749"
            }
          }
        ],
        "signature_block": "MANYr",
        "raw_affiliations": [
          {
            "value": "Columbia University"
          }
        ]
      }
    ],
    "curated": true,
    "figures": [
      {
        "key": "d238a8f5d7dbb4cc3dc7d86b92dbef64",
        "url": "https://inspirehep.net/files/d238a8f5d7dbb4cc3dc7d86b92dbef64",
        "label": "fig:ms_dec",
        "source": "arxiv",
        "caption": "Decomposition of the matrix $A$, the solution vector $x$ and the right-hand-side vector $b$ into local parts on a node.",
        "filename": "ms_dec.png",
        "material": "preprint"
      },
      {
        "key": "2e8b267a29876a328bd4af08409648de",
        "url": "https://inspirehep.net/files/2e8b267a29876a328bd4af08409648de",
        "label": "fig:snake",
        "source": "arxiv",
        "caption": "The normal operator $D^\\dagger D$ has as many as four Wilson hopping terms (stencils). Enforcing Dirichlet boundary condition on it requires the inclusion of the \\textit{snake} terms, which are represented, as a 2D illustration, by the black arrows.",
        "filename": "snake.png",
        "material": "preprint"
      },
      {
        "key": "2d85f30c1ad89c3203f27b68ab4c2597",
        "url": "https://inspirehep.net/files/2d85f30c1ad89c3203f27b68ab4c2597",
        "label": "fig:dw",
        "source": "arxiv",
        "caption": "A 2D illustration of using padded field to capture the snake terms. The orange and purple squares represent non-zero sites, and the white space represent zero sites. Black arrows represent the stencils relevant to the Dirichlet boundary condition. The stencil operators are not applied to the grey areas as they are either zero (as in 2)) or not needed for the output field at the center (as in 5) and 6)).",
        "filename": "pad.png",
        "material": "preprint"
      },
      {
        "key": "bee8145cc76849237588994a9ff9f432",
        "url": "https://inspirehep.net/files/bee8145cc76849237588994a9ff9f432",
        "label": "fig:hmma",
        "source": "arxiv",
        "caption": "An illustration of how the $M_5^{-1}$, with $L_s=12$, and the number of 4D space-time sites dealt with in the CUDA thread block (``4d'') being 16, is applied using the HMMA instruction. The matrix multiply is of shape ``$48$-by-$48$'' $\\times$ ``$48$-by-$96$'' $=$ ``$48$-by-$96$'', and is performed in the sub-block form of ``$16$-by-$16$'' += ``$16$-by-$16$'' $\\times$ ``$16$-by-$16$'' with tensor cores as shown in the figure. Note that sizes of the shapes shown here are not to scale.",
        "filename": "shared_memory_tensor_core.png",
        "material": "preprint"
      },
      {
        "key": "913a2a26b6940017ecf57c5435c08751",
        "url": "https://inspirehep.net/files/913a2a26b6940017ecf57c5435c08751",
        "label": "fig:residual-rbc",
        "source": "arxiv",
        "caption": "\\(L_2\\) residual as a function of (outer) iteration number for MSPCG (with and without reduced-$L_s$ acceleration) used on Dirac equation ($D^\\dagger Dx=y$) linear solves on SUMMIT to a tolerance of $10^{-12}$ on the \\texttt{RBC96} lattice. $y$ is a Gaussian random source vector.",
        "filename": "residual-rbc.png",
        "material": "preprint"
      },
      {
        "key": "db0a27d0edbfb2717f095ebc0f6999b1",
        "url": "https://inspirehep.net/files/db0a27d0edbfb2717f095ebc0f6999b1",
        "label": "fig:residual-cal",
        "source": "arxiv",
        "caption": "\\(L_2\\) residual as a function of (outer) iteration number for MSPCG (with and without reduced-$L_s$ acceleration) used on Dirac equation ($D^\\dagger Dx=Dy$) linear solves on SUMMIT to a tolerance of $10^{-10}$ on the \\texttt{CAL64} lattice. $y$ is a random source vector.",
        "filename": "residual-cal.png",
        "material": "preprint"
      }
    ],
    "license": [
      {
        "url": "http://creativecommons.org/licenses/by/4.0/",
        "license": "CC BY 4.0",
        "material": "preprint"
      }
    ],
    "texkeys": [
      "Tu:2021dvv"
    ],
    "citeable": true,
    "imprints": [
      {
        "date": "2021"
      }
    ],
    "keywords": [
      {
        "value": "operator: linear",
        "schema": "INSPIRE"
      },
      {
        "value": "lattice field theory",
        "schema": "INSPIRE"
      },
      {
        "value": "Dirac equation",
        "schema": "INSPIRE"
      },
      {
        "value": "domain wall",
        "schema": "INSPIRE"
      },
      {
        "value": "performance",
        "schema": "INSPIRE"
      },
      {
        "value": "fusion",
        "schema": "INSPIRE"
      },
      {
        "value": "costs",
        "schema": "INSPIRE"
      }
    ],
    "abstracts": [
      {
        "value": "We show that using the multi-splitting algorithm as a preconditioner for the domain wall Dirac linear operator, arising in lattice QCD, effectively reduces the inter-node communication cost, at the expense of performing more on-node floating point and memory operations. Correctly including the boundary snake terms, the preconditioner is implemented in the QUDA framework, where it is found that utilizing kernel fusion and the tensor cores on NVIDIA GPUs is necessary to achieve a sufficiently performant preconditioner. A reduced-dimension (reduced-L$_{s}$) strategy is also proposed and tested for the preconditioner. We find the method achieves lower time to solution than regular CG at high node count despite the additional local computational requirements from the preconditioner. This method could be useful for supercomputers with more on-node flops and memory bandwidth than inter-node communication bandwidth.",
        "source": "Association for Computing Machinery"
      },
      {
        "value": "We show that using the multi-splitting algorithm as a preconditioner for the domain wall Dirac linear operator, arising in lattice QCD, effectively reduces the inter-node communication cost, at the expense of performing more on-node floating point and memory operations. Correctly including the boundary \\textit{snake} terms, the preconditioner is implemented in the QUDA framework, where it is found that utilizing kernel fusion and the tensor cores on NVIDIA GPUs is necessary to achieve a sufficiently performant preconditioner. A reduced-dimension (reduced-$L_s$) strategy is also proposed and tested for the preconditioner. We find the method achieves lower time to solution than regular CG at high node count despite the additional local computational requirements from the preconditioner. This method could be useful for supercomputers with more on-node flops and memory bandwidth than inter-node communication bandwidth.",
        "source": "arXiv"
      }
    ],
    "references": [
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/855408"
        },
        "raw_refs": [
          {
            "value": "[1] R. Babich, J. Brannick, R.C. Brower, M.A. Clark, T.A. Manteuffel, S.F. McCormick, J.C. Osborn, and C. Rebbi. 2010. Adaptive multigrid algorithm for the lattice Wilson-Dirac operator. Phys. Rev. Lett. 105 (2010), 201602. arXiv:1005.3043 [hep-lat] , DOI: 10.1103/PhysRevLett.105.201602",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1103/PhysRevLett.105.201602"
          ],
          "misc": [
            "Adaptive multigrid algorithm for the lattice Wilson-Dirac operator"
          ],
          "label": "1",
          "authors": [
            {
              "full_name": "Babich, R."
            },
            {
              "full_name": "Brannick, J."
            },
            {
              "full_name": "Brower, R.C."
            },
            {
              "full_name": "Clark, M.A."
            },
            {
              "full_name": "Manteuffel, T.A."
            },
            {
              "full_name": "McCormick, S.F."
            },
            {
              "full_name": "Osborn, J.C."
            },
            {
              "full_name": "Rebbi, C."
            }
          ],
          "arxiv_eprint": "1005.3043",
          "publication_info": {
            "year": 2010,
            "artid": "201602",
            "journal_title": "Phys.Rev.Lett.",
            "journal_volume": "105"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/927455"
        },
        "raw_refs": [
          {
            "value": "[2] R Babich, M A Clark, B Joo, G Shi, R C Brower, and S Gottlieb. 2011. Scaling Lattice QCD beyond 100 GPUs. (2011). arXiv:1109.2935",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Scaling Lattice QCD beyond 100 GPUs"
          ],
          "label": "2",
          "authors": [
            {
              "full_name": "Babich, R."
            },
            {
              "full_name": "Clark, M.A."
            },
            {
              "full_name": "Joo, B."
            },
            {
              "full_name": "Shi, G."
            },
            {
              "full_name": "Brower, R.C."
            },
            {
              "full_name": "Gottlieb, S."
            }
          ],
          "arxiv_eprint": "1109.2935",
          "publication_info": {
            "year": 2011
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/756769"
        },
        "raw_refs": [
          {
            "value": "[3] J. Brannick, R.C. Brower, M.A. Clark, J.C. Osborn, and C. Rebbi. 2008. Adaptive Multigrid Algorithm for Lattice QCD. Phys. Rev. Lett. 100 (2008), 041601. arXiv:0707.4018 [hep-lat] , DOI: 10.1103/PhysRevLett.100.041601",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1103/PhysRevLett.100.041601"
          ],
          "misc": [
            "Adaptive Multigrid Algorithm for Lattice QCD"
          ],
          "label": "3",
          "authors": [
            {
              "full_name": "Brannick, J."
            },
            {
              "full_name": "Brower, R.C."
            },
            {
              "full_name": "Clark, M.A."
            },
            {
              "full_name": "Osborn, J.C."
            },
            {
              "full_name": "Rebbi, C."
            }
          ],
          "arxiv_eprint": "0707.4018",
          "publication_info": {
            "year": 2008,
            "artid": "041601",
            "journal_title": "Phys.Rev.Lett.",
            "journal_volume": "100"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/698111"
        },
        "raw_refs": [
          {
            "value": "[4] R.C. Brower, H. Neff, and K. Orginos. 2006. Möbius Fermions. Nuclear Physics B - Proceedings Supplements 153, 1 (2006), 191 -- 198. Proceedings of the Workshop on Computational Hadron Physics. , DOI: 10.1016/j.nuclphysbps.2006.01.047",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1016/j.nuclphysbps.2006.01.047"
          ],
          "misc": [
            "Möbius Fermions",
            "191 -- 198. Proceedings of the Workshop on Computational Hadron Physics"
          ],
          "label": "4",
          "authors": [
            {
              "full_name": "Brower, R.C."
            },
            {
              "full_name": "Neff, H."
            },
            {
              "full_name": "Orginos, K."
            }
          ],
          "publication_info": {
            "year": 2006,
            "artid": "1",
            "page_start": "1",
            "journal_title": "Nucl.Phys.B Proc.Suppl.",
            "journal_volume": "153"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/1791487"
        },
        "raw_refs": [
          {
            "value": "[5] Richard C. Brower, M.A. Clark, Dean Howarth, and Evan S. Weinberg. 2020. Multigrid for Chiral Lattice Fermions: Domain Wall. (4 2020). arXiv:2004.07732 [heplat]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Dean Howarth, and Evan S. Weinberg.. Multigrid for Chiral Lattice Fermions: Domain Wall. (4)"
          ],
          "label": "5",
          "authors": [
            {
              "full_name": "Brower, Richard C."
            },
            {
              "full_name": "Clark, M.A."
            }
          ],
          "arxiv_eprint": "2004.07732",
          "publication_info": {
            "year": 2020
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/659699"
        },
        "raw_refs": [
          {
            "value": "[6] Richard C. Brower, Hartmut Neff, and Kostas Orginos. 2005. Möbius fermions: Improved domain wall chiral fermions. Nuclear Physics B - Proceedings Supplements 140, SPEC. ISS. (sep 2005), 686--688. arXiv:0409118 [hep-lat] , DOI: 10.1016/j.nuclphysbps.2004.11.180",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1016/j.nuclphysbps.2004.11.180"
          ],
          "misc": [
            "Hartmut Neff, and Kostas Orginos.. Möbius fermions: Improved domain wall chiral fermions. Nuclear Physics B - Proceedings Supplements",
            "140, SPEC. ISS. (sep), 686--688"
          ],
          "label": "6",
          "authors": [
            {
              "full_name": "Brower, Richard C."
            }
          ],
          "arxiv_eprint": "hep-lat/0409118",
          "publication_info": {
            "year": 2005
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/837123"
        },
        "raw_refs": [
          {
            "value": "[7] M.A. Clark, R. Babich, K. Barros, R.C. Brower, and C. Rebbi. 2010. Solving Lattice QCD systems of equations using mixed precision solvers on GPUs. Comput. Phys. Commun. 181 (2010), 1517--1528. arXiv:0911.3191 [hep-lat] , DOI: 10.1016/j.cpc.2010.05.002",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1016/j.cpc.2010.05.002"
          ],
          "misc": [
            "Solving Lattice QCD systems of equations using mixed precision solvers on GPUs",
            "--1528"
          ],
          "label": "7",
          "authors": [
            {
              "full_name": "Clark, M.A."
            },
            {
              "full_name": "Babich, R."
            },
            {
              "full_name": "Barros, K."
            },
            {
              "full_name": "Brower, R.C."
            },
            {
              "full_name": "Rebbi, C."
            }
          ],
          "arxiv_eprint": "0911.3191",
          "publication_info": {
            "year": 2010,
            "artid": "1517",
            "page_start": "1517",
            "journal_title": "Comput.Phys.Commun.",
            "journal_volume": "181"
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[8] Jack Deslippe. [n.d.]. Perlmutter - A 2020 Pre-Exascale GPU-accelerated System for NERSC. Architecture and Early Application Performance Optimization Results. Paper presented at the meeting of GPU Technology Conference 2019, San Jose CA. https://on-demand.gputechconf.com/supercomputing/2019/pdf/sc1919-perlmutter-a-2020-pre-exascale-gpu-accelerated-system-for-nersc.-architecture-and-early-application-performance-optimization-results.pdf",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Jack Deslippe. [n.d.]. Perlmutter - APre-Exascale GPU-accelerated System for NERSC. Architecture and Early Application Performance Optimization Results. Paper presented at the meeting of GPU Technology Conference 2019, San Jose CA"
          ],
          "urls": [
            {
              "value": "https://on-demand.gputechconf.com/supercomputing/2019/pdf/sc1919-perlmutter-a-2020-pre-exascale-gpu-accelerated-system-for-nersc.-architecture-and-early-application-performance-optimization-results.pdf"
            }
          ],
          "label": "8",
          "publication_info": {
            "year": 2020
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/254077"
        },
        "raw_refs": [
          {
            "value": "[9] S. Duane, A.D. Kennedy, B.J. Pendleton, and D. Roweth. 1987. Hybrid Monte Carlo. Phys. Lett. B 195 (1987), 216--222. , DOI: 10.1016/0370-2693(87)91197-X",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1016/0370-2693(87)91197-X"
          ],
          "misc": [
            "Hybrid Monte Carlo",
            "--222"
          ],
          "label": "9",
          "authors": [
            {
              "full_name": "Duane, S."
            },
            {
              "full_name": "Kennedy, A.D."
            },
            {
              "full_name": "Pendleton, B.J."
            },
            {
              "full_name": "Roweth, D."
            }
          ],
          "publication_info": {
            "year": 1987,
            "artid": "216",
            "page_start": "216",
            "journal_title": "Phys.Lett.B",
            "journal_volume": "195"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/37849"
        },
        "raw_refs": [
          {
            "value": "[10] A. Frommer, V. Hannemann, B. Nockel, T. Lippert, and K. Schilling. 1994. Accelerating Wilson fermion matrix inversions by means of the stabilized biconjugate gradient algorithm. Int. J. Mod. Phys. C 5 (1994), 1073--1088. arXiv:hep-lat/9404013 , DOI: 10.1142/S012918319400115X",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1142/S012918319400115X"
          ],
          "misc": [
            "Accelerating Wilson fermion matrix inversions by means of the stabilized biconjugate gradient algorithm",
            "--1088. arXiv:"
          ],
          "label": "10",
          "authors": [
            {
              "full_name": "Frommer, A."
            },
            {
              "full_name": "Hannemann, V."
            },
            {
              "full_name": "Nockel, B."
            },
            {
              "full_name": "Lippert, T."
            },
            {
              "full_name": "Schilling, K."
            }
          ],
          "arxiv_eprint": "hep-lat/9404013",
          "publication_info": {
            "year": 1994,
            "artid": "1073",
            "page_start": "1073",
            "journal_title": "Int.J.Mod.Phys.C",
            "journal_volume": "5"
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[11] Gene H. Golub and Qiang Ye. 1999. Inexact Preconditioned Conjugate Gradient Method with Inner-Outer Iteration. SIAM Journal on Scientific Computing 21, 4 (1999), 1305--1320.",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "and Qiang Ye.. Inexact Preconditioned Conjugate Gradient Method with Inner-Outer Iteration",
            "1305--1320"
          ],
          "label": "11",
          "authors": [
            {
              "full_name": "Golub, Gene H."
            }
          ],
          "publication_info": {
            "year": 1999,
            "artid": "4",
            "page_start": "4",
            "journal_title": "SIAM J.Sci.Comput.",
            "journal_volume": "21"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/2853730"
        },
        "raw_refs": [
          {
            "value": "[12] M. R. Hestenes and E. Stiefel. 1952. Methods of conjugate gradients for solving linear systems. Journal of research of the National Bureau of Standards 49 (1952), 409--436., DOI: 10.6028/jres.049.044",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.6028/jres.049.044"
          ],
          "misc": [
            "Methods of conjugate gradients for solving linear systems",
            "--436"
          ],
          "label": "12",
          "authors": [
            {
              "full_name": "Hestenes, M.R."
            },
            {
              "full_name": "Stiefel, E."
            }
          ],
          "publication_info": {
            "year": 1952,
            "artid": "409",
            "page_start": "409",
            "journal_title": "J.Res.Natl.Bur.Stand.",
            "journal_volume": "49"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/379238"
        },
        "raw_refs": [
          {
            "value": "[13] Karl Jansen. 1996. Domain wall fermions and chiral gauge theories. Physics Report 273, 1 (1996), 1--54. arXiv:9410018 [hep-lat]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Karl Jansen.. Domain wall fermions and chiral gauge theories",
            "1--54"
          ],
          "label": "13",
          "arxiv_eprint": "hep-lat/9410018",
          "publication_info": {
            "year": 1996,
            "artid": "1",
            "page_start": "1",
            "journal_title": "Phys.Rept.",
            "journal_volume": "273"
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[14] Fabienne Jezequel, Raphaël Couturier, and Christophe Denis. 2012. Solving large sparse linear systems in a grid environment: The GREMLINS code versus the PETSc library. Journal of Supercomputing 59, 3 (2012), 1517--1532.",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Fabienne Jezequel, Raphaël Couturier, and Christophe Denis.. Solving large sparse linear systems in a grid environment: The GREMLINS code versus the PETSc library",
            "1517--1532"
          ],
          "label": "14",
          "publication_info": {
            "year": 2012,
            "artid": "3",
            "page_start": "3",
            "journal_title": "J.Supercomput.",
            "journal_volume": "59"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/630985"
        },
        "raw_refs": [
          {
            "value": "[15] Martin Lüscher. 2004. Solution of the Dirac equation in lattice QCD using a domain decomposition method. Computer Physics Communications 156, 3 (2004), 209--220. arXiv:0310048 [hep-lat] , DOI: 10.1016/S0010-4655(03)00486-7",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1016/S0010-4655(03)00486-7"
          ],
          "misc": [
            "Martin Lüscher.. Solution of the Dirac equation in lattice QCD using a domain decomposition method",
            "209--220"
          ],
          "label": "15",
          "arxiv_eprint": "hep-lat/0310048",
          "publication_info": {
            "year": 2004,
            "artid": "3",
            "page_start": "3",
            "journal_title": "Comput.Phys.Commun.",
            "journal_volume": "156"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/659592"
        },
        "raw_refs": [
          {
            "value": "[16] Martin Luscher. 2005. Schwarz-preconditioned HMC algorithm for two-flavour lattice QCD. Comput. Phys. Commun. 165 (2005), 199--220. arXiv:hep-lat/0409106 , DOI: 10.1016/j.cpc.2004.10.004",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1016/j.cpc.2004.10.004"
          ],
          "misc": [
            "Martin Luscher.. Schwarz-preconditioned HMC algorithm for two-flavour lattice QCD",
            "--220. arXiv:"
          ],
          "label": "16",
          "arxiv_eprint": "hep-lat/0409106",
          "publication_info": {
            "year": 2005,
            "artid": "199",
            "page_start": "199",
            "journal_title": "Comput.Phys.Commun.",
            "journal_volume": "165"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/1787931"
        },
        "raw_refs": [
          {
            "value": "[17] Gregory Edward McGlynn. 2016. Advances in Lattice Quantum Chromodynamics. Ph.D. Dissertation. Columbia U. , DOI: 10.7916/D8T72HD7",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.7916/D8T72HD7"
          ],
          "misc": [
            "Gregory Edward McGlynn.. Advances in Lattice Quantum Chromodynamics. Ph.D. Dissertation"
          ],
          "label": "17",
          "authors": [
            {
              "full_name": "U., Columbia"
            }
          ],
          "publication_info": {
            "year": 2016
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[18] Nolan Miller et al. 2020. FK/F&pi; from M\\\"{o}bius Domain-Wall fermions solved on gradient-flowed HISQ ensembles. Phys. Rev. D 102, 3 (2020), 034507. arXiv:2005.04795 [hep-lat] , DOI: 10.1103/PhysRevD.102.034507",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Nolan",
            "FK/F&pi"
          ],
          "label": "18",
          "authors": [
            {
              "full_name": "Miller"
            }
          ],
          "publication_info": {
            "year": 2020
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/1795052"
        },
        "raw_refs": [
          {
            "value": "[18] Nolan Miller et al. 2020. FK/F&pi; from M\\\"{o}bius Domain-Wall fermions solved on gradient-flowed HISQ ensembles. Phys. Rev. D 102, 3 (2020), 034507. arXiv:2005.04795 [hep-lat] , DOI: 10.1103/PhysRevD.102.034507",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1103/PhysRevD.102.034507"
          ],
          "misc": [
            "Nolan",
            "from M\\\"{o}bius Domain-Wall fermions solved on gradient-flowed HISQ ensembles",
            "034507"
          ],
          "label": "18",
          "authors": [
            {
              "full_name": "Miller"
            }
          ],
          "arxiv_eprint": "2005.04795",
          "publication_info": {
            "year": 2020,
            "artid": "3",
            "page_start": "3",
            "journal_title": "Phys.Rev.D",
            "journal_volume": "102"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/1832617"
        },
        "raw_refs": [
          {
            "value": "[19] Nolan Miller et al. 2020. Scale setting the M{\\\"o}bius Domain Wall Fermion on gradient-flowed HISQ action using the omega baryon mass and the gradient-flow scale w0. (11 2020). arXiv:2011.12166 [hep-lat]",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Nolan",
            "Scale setting the M{\\\"o}bius Domain Wall Fermion on gradient-flowed HISQ action using the omega baryon mass and the gradient-flow scale w0. (11)"
          ],
          "label": "19",
          "authors": [
            {
              "full_name": "Miller"
            }
          ],
          "arxiv_eprint": "2011.12166",
          "publication_info": {
            "year": 2020
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[20] Dianne P O'Leary and R E White. 1985. Multi-Splittings of Matrices and Parallel Solution of Linear Systems. SIAM Journal on Algebraic Discrete Methods 6, 4 (1985), 630--640.",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Multi-Splittings of Matrices and Parallel Solution of Linear Systems",
            "Journal on Algebraic Discrete Methods 6, 4 , 630--640"
          ],
          "label": "20",
          "authors": [
            {
              "full_name": "O'Leary, Dianne P."
            },
            {
              "full_name": "White, R.E."
            }
          ],
          "imprint": {
            "publisher": "SIAM"
          },
          "publication_info": {
            "year": 1985
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/877658"
        },
        "raw_refs": [
          {
            "value": "[21] Yusuke Osaki and Ken-ichi Ishikawa. 2010. Domain Decomposition method on GPU cluster. (2010), 1--7. arXiv:1011.3318",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Yusuke Osaki and Ken-ichi Ishikawa.. Domain Decomposition method on GPU cluster. , 1--7"
          ],
          "label": "21",
          "arxiv_eprint": "1011.3318",
          "publication_info": {
            "year": 2010
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[22] Y Saad. 1980. On the Rates of Convergence of the Lanczos and the Block-Lanczos Methods. SIAM J. Numer. Anal. 17, 5 (1980), 687--706.",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "On the Rates of Convergence of the Lanczos and the Block-Lanczos Methods",
            "687--706"
          ],
          "label": "22",
          "authors": [
            {
              "full_name": "Saad, Y."
            }
          ],
          "publication_info": {
            "year": 1980,
            "artid": "5",
            "page_start": "5",
            "journal_title": "SIAM J.Numer.Anal.",
            "journal_volume": "17"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/1279923"
        },
        "raw_refs": [
          {
            "value": "[23] Eigo Shintani, Rudy Arthur, Thomas Blum, Taku Izubuchi, Chulwoo Jung, and Christoph Lehner. 2015. Covariant approximation averaging. Physical Review D - Particles, Fields, Gravitation and Cosmology (2015). arXiv:1402.0244 , DOI: 10.1103/PhysRevD.91.114511",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1103/PhysRevD.91.114511"
          ],
          "misc": [
            "Eigo Shintani, Rudy Arthur, Thomas Blum, Taku Izubuchi, Chulwoo Jung, and Christoph Lehner.. Covariant approximation averaging. Physical",
            "Fields, Gravitation and Cosmology"
          ],
          "label": "23",
          "authors": [
            {
              "full_name": "Particles, Review D.-."
            }
          ],
          "arxiv_eprint": "1402.0244",
          "publication_info": {
            "year": 2015
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/754594"
        },
        "raw_refs": [
          {
            "value": "[24] Andreas Stathopoulos and Konstantinos Orginos. 2010. Computing and Deflating Eigenvalues While Solving Multiple Right-Hand Side Linear Systems with an Application to Quantum Chromodynamics. SIAM Journal on Scientific Computing 32, 1 (2010), 439--462. , DOI: 10.1137/080725532",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.1137/080725532"
          ],
          "misc": [
            "Andreas Stathopoulos and Konstantinos Orginos.. Computing and Deflating Eigenvalues While Solving Multiple Right-Hand Side Linear Systems with an Application to Quantum Chromodynamics",
            "439--462"
          ],
          "label": "24",
          "publication_info": {
            "year": 2010,
            "artid": "1",
            "page_start": "1",
            "journal_title": "SIAM J.Sci.Comput.",
            "journal_volume": "32"
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[25] Ilya Sutskever, James Martens, George Dahl, and Geoffrey Hinton. 2013. On the importance of initialization and momentum in deep learning. In Proceedings of the 30th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 28), Sanjoy Dasgupta and David McAllester (Eds.). PMLR, Atlanta, Georgia, USA, 1139--1147. http://proceedings.mlr.press/v28/sutskever13.html",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "Ilya Sutskever, James Martens, George Dahl, and Geoffrey Hinton.. On the importance of initialization and momentum in deep learning. In Proceedings of the 30th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 28), Sanjoy Dasgupta and David McAllester (Eds.). PMLR, Atlanta, Georgia, USA, 1139--1147"
          ],
          "urls": [
            {
              "value": "http://proceedings.mlr.press/v28/sutskever13.html"
            }
          ],
          "label": "25",
          "publication_info": {
            "year": 2013
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[26] H. A. van der Vorst. 1992. Bi-CGSTAB: A Fast and Smoothly Converging Variant of Bi-CG for the Solution of Nonsymmetric Linear Systems. SIAM J. Sci. Stat. Comput. 13, 2 (March 1992), 631--644.",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "H. A. van der Vorst.. Bi-CGSTAB:",
            "and Smoothly Converging Variant of Bi-CG for the Solution of Nonsymmetric Linear Systems",
            "631--644"
          ],
          "label": "26",
          "authors": [
            {
              "full_name": "Fast, A."
            }
          ],
          "publication_info": {
            "year": 1992,
            "artid": "2",
            "page_start": "2",
            "journal_title": "SIAM J.Sci.Statist.Comput.",
            "journal_volume": "13"
          }
        }
      },
      {
        "raw_refs": [
          {
            "value": "[27] Henk A. van der Vorst and Qiang Ye. 2000. Residual Replacement Strategies for Krylov Subspace Iterative Methods for the Convergence of True Residuals. SIAM Journal on Scientific Computing 22, 3 (2000), 835--852.",
            "schema": "text"
          }
        ],
        "reference": {
          "misc": [
            "van der Vorst and Qiang Ye.. Residual Replacement Strategies for Krylov Subspace Iterative Methods for the Convergence of True Residuals",
            "835--852"
          ],
          "label": "27",
          "authors": [
            {
              "full_name": "A., Henk"
            }
          ],
          "publication_info": {
            "year": 2000,
            "artid": "3",
            "page_start": "3",
            "journal_title": "SIAM J.Sci.Comput.",
            "journal_volume": "22"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/1499497"
        },
        "raw_refs": [
          {
            "value": "[28] Azusa Yamaguchi and Peter Boyle. 2016. Hierarchically deflated conjugate residual. PoS LATTICE2016 (2016), 374. arXiv:1611.06944 [hep-lat] , DOI: 10.22323/1.256.0374",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.22323/1.256.0374"
          ],
          "misc": [
            "Azusa Yamaguchi and Peter Boyle.. Hierarchically deflated conjugate residual"
          ],
          "label": "28",
          "arxiv_eprint": "1611.06944",
          "publication_info": {
            "year": 2016,
            "artid": "374",
            "page_start": "374",
            "journal_title": "PoS",
            "journal_volume": "LATTICE2016"
          }
        }
      },
      {
        "record": {
          "$ref": "https://inspirehep.net/api/literature/947047"
        },
        "raw_refs": [
          {
            "value": "[29] Hantao Yin and Robert Mawhinney. 2012. Improving DWF Simulations: Force Gradient Integrator and the Mobius Accelerated DWF Solver. PoS Lattice 2011 (2012), 051. , DOI: 10.22323/1.139.0051",
            "schema": "text"
          }
        ],
        "reference": {
          "dois": [
            "10.22323/1.139.0051"
          ],
          "misc": [
            "Hantao Yin and Robert Mawhinney.. Improving DWF Simulations: Force Gradient Integrator and the Mobius Accelerated DWF Solver"
          ],
          "label": "29",
          "publication_info": {
            "year": 2012,
            "artid": "051",
            "page_start": "051",
            "journal_title": "PoS",
            "journal_volume": "Lattice2011"
          }
        }
      }
    ],
    "public_notes": [
      {
        "value": "Add DOI",
        "source": "arXiv"
      }
    ],
    "arxiv_eprints": [
      {
        "value": "2104.05615",
        "categories": [
          "hep-lat"
        ]
      }
    ],
    "document_type": [
      "conference paper",
      "article"
    ],
    "preprint_date": "2021-04-12",
    "control_number": 1857794,
    "legacy_version": "20210506100830.0",
    "number_of_pages": 11,
    "inspire_categories": [
      {
        "term": "Lattice",
        "source": "arxiv"
      },
      {
        "term": "Lattice"
      }
    ],
    "legacy_creation_date": "2021-04-13"
  },
  "revision_id": 25,
  "created": "2021-04-13T00:00:00+00:00"
}