Files
tg_resume_db/importers/file_scan.py
2026-03-11 15:27:10 +03:00

22 lines
681 B
Python

from __future__ import annotations
from pathlib import Path
from typing import Dict, Iterator
RESUME_EXTS = {".pdf", ".docx", ".doc", ".txt", ".html", ".htm"}
def iter_files(root: Path) -> Iterator[Dict]:
for p in root.rglob("*"):
if p.is_file() and p.suffix.lower() in RESUME_EXTS:
yield {
"origin_type": "file_scan",
"export_path": str(root),
"chat_title": None,
"message_id": None,
"message_date": None,
"message_text": "",
"file_path": str(p.resolve()),
"original_name": p.name,
"extra": {},
}