from __future__ import annotations import re from typing import Dict, List, Optional, Tuple _SECTION_PATTERNS: dict[str, List[re.Pattern]] = { "contacts": [ re.compile(r"^\s*(contacts?|contact info|контакты)\s*$", re.I), ], "about": [ re.compile(r"^\s*(summary|about|profile|objective|о\s+себе|обо\s+мне|профиль|цель)\s*$", re.I), ], "skills": [ re.compile(r"^\s*(skills?|key skills|stack|tech( stack)?|навыки|технологии|компетенции)\s*$", re.I), ], "experience": [ re.compile(r"^\s*(experience|work experience|employment|опыт\s+работы|опыт)\s*$", re.I), ], "education": [ re.compile(r"^\s*(education|образование|курсы|certifications?|сертификаты)\s*$", re.I), ], "projects": [ re.compile(r"^\s*(projects?|проекты)\s*$", re.I), ], "languages": [ re.compile(r"^\s*(languages?|языки)\s*$", re.I), ], "certifications": [ re.compile(r"^\s*(certifications?|сертификаты|курсы)\s*$", re.I), ], "publications": [ re.compile(r"^\s*(publications?|публикации)\s*$", re.I), ], } def _match_header(line: str) -> Optional[str]: for key, patterns in _SECTION_PATTERNS.items(): for rx in patterns: if rx.match(line): return key return None def split_sections(clean_text: str, doc_type: str | None = None) -> Dict[str, str]: lines = [ln.strip() for ln in (clean_text or "").splitlines()] sections: Dict[str, List[str]] = {"header": []} current = "header" for ln in lines: if not ln: continue key = _match_header(ln) if key: current = key sections.setdefault(current, []) continue sections.setdefault(current, []).append(ln) out: Dict[str, str] = {} for k, vals in sections.items(): text = "\n".join(vals).strip() if text: out[k] = text return out def sections_present(sections: Dict[str, str]) -> List[str]: return sorted([k for k, v in (sections or {}).items() if v and k != "header"])