46 lines
1.3 KiB
Python
46 lines
1.3 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import Iterable, List, Optional
|
|
|
|
from pypdf import PdfReader, PdfWriter
|
|
|
|
|
|
def merge_pdfs(pdf_paths: Iterable[str | Path], out_pdf_path: str | Path) -> dict:
|
|
out_pdf_path = Path(out_pdf_path)
|
|
out_pdf_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
writer = PdfWriter()
|
|
|
|
merged: List[str] = []
|
|
skipped: List[str] = []
|
|
|
|
for p in pdf_paths:
|
|
path = Path(p)
|
|
try:
|
|
reader = PdfReader(str(path))
|
|
# просто добавляем страницы подряд
|
|
for page in reader.pages:
|
|
writer.add_page(page)
|
|
merged.append(str(path))
|
|
except Exception:
|
|
skipped.append(str(path))
|
|
|
|
if merged:
|
|
with out_pdf_path.open("wb") as f:
|
|
writer.write(f)
|
|
|
|
return {
|
|
"out_pdf": str(out_pdf_path),
|
|
"merged_count": len(merged),
|
|
"skipped_count": len(skipped),
|
|
"merged_files": merged,
|
|
"skipped_files": skipped,
|
|
}
|
|
|
|
|
|
def merge_all_pdfs_in_dir(files_dir: str | Path, out_pdf_path: str | Path) -> dict:
|
|
files_dir = Path(files_dir)
|
|
pdfs = sorted(files_dir.rglob("*.pdf")) + sorted(files_dir.rglob("*.PDF"))
|
|
return merge_pdfs(pdfs, out_pdf_path)
|