#!/usr/bin/env python3
"""Extract arXiv IDs from BibTeX files and print them to stdout (one per line).\n
Behavior:
    - Parses all input .bib files and merges entries using ``bibtex_lib.merge_bibtex_strings``.
    - Deduplication: when the same citation key appears in multiple files, only
      the first occurrence (from the first file listed) is kept.
    - Entries without an ``eprint`` field (i.e., no arXiv ID) are silently
      skipped and do not appear in the output.
    - Output is sorted by date (most recent first), derived from the arXiv ID
      (e.g., ``2604.20797`` → 2026-04).\n
Usage:
    python scripts/get_arxiv_id_from_bibtex.py refs.bib
    python scripts/get_arxiv_id_from_bibtex.py refs1.bib refs2.bib
"""

import argparse
import os
import sys

sys.path.insert(0, os.path.dirname(__file__))

from action_lib import extract_arxiv_ids_from_files

def main() -> None:
    """Parse CLI arguments and extract arXiv IDs from the given .bib files."""
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument("input", nargs="+", help="Input .bib files")
    args = parser.parse_args()
    for path in args.input:
        if not path.endswith(".bib"):
            parser.error(f"input file must be a .bib file: {path}")
        if not os.path.isfile(path):
            parser.error(f"input file not found: {path}")
    #
    for eprint in extract_arxiv_ids_from_files(args.input):
        print(eprint)

if __name__ == "__main__":
    main()
