Files
tg_resume_db/pdf_merge.py
2026-03-11 15:27:10 +03:00

46 lines
1.3 KiB
Python

from __future__ import annotations
from pathlib import Path
from typing import Iterable, List, Optional
from pypdf import PdfReader, PdfWriter
def merge_pdfs(pdf_paths: Iterable[str | Path], out_pdf_path: str | Path) -> dict:
out_pdf_path = Path(out_pdf_path)
out_pdf_path.parent.mkdir(parents=True, exist_ok=True)
writer = PdfWriter()
merged: List[str] = []
skipped: List[str] = []
for p in pdf_paths:
path = Path(p)
try:
reader = PdfReader(str(path))
# просто добавляем страницы подряд
for page in reader.pages:
writer.add_page(page)
merged.append(str(path))
except Exception:
skipped.append(str(path))
if merged:
with out_pdf_path.open("wb") as f:
writer.write(f)
return {
"out_pdf": str(out_pdf_path),
"merged_count": len(merged),
"skipped_count": len(skipped),
"merged_files": merged,
"skipped_files": skipped,
}
def merge_all_pdfs_in_dir(files_dir: str | Path, out_pdf_path: str | Path) -> dict:
files_dir = Path(files_dir)
pdfs = sorted(files_dir.rglob("*.pdf")) + sorted(files_dir.rglob("*.PDF"))
return merge_pdfs(pdfs, out_pdf_path)