from typing import Union
from pathlib import Path
import shutil
import re
import pdftotext
def _extract_text(path: Path) -> str:
with path.open("rb") as fin:
pdf = pdftotext.PDF(fin)
return ("=" * 80).join(pdf)
def _parse_date(text, keywords) -> str:
date = re.search(keywords + r"\s+(\d{2}-\d{2}-\d{4})", text).group(1)
date = date.replace("-", "")
return date[4:] + date[:4]
def rename_pdf(path: Union[str, Path]) -> None:
if isinstance(path, str):
path = Path(path)
text = _extract_text(path)
award_num = re.search(r"Award Number\s+(\d+)", text).group(1)
award_date = _parse_date(text, "Award Date")
release_date = _parse_date(text, "Release Date")
name = f"{award_num}_{award_date}_{release_date}.pdf"
try:
shutil.move(path, path.with_name(name))
except FileExistsError:
print(f"{path} has duplicate file!")