from __future__ import annotations import re from typing import Any, Dict, Optional from tg_resume_db.extract.parse import ( extract_contacts, extract_name_guess, extract_remote, extract_english, extract_roles_skills, extract_salary, extract_location_best_effort, extract_experience_years, ) _DESIRED_RE = re.compile(r"(?i)жел[а-я]*\s+должност[ьи]\s*[:\-]?\s*(.+)") _SPEC_RE = re.compile(r"(?i)специализаци[яи]\s*[:\-]?\s*(.+)") _SCHEDULE_RE = re.compile(r"(?i)график\s+работы\s*[:\-]?\s*(.+)") _EMPLOYMENT_RE = re.compile(r"(?i)занятость\s*[:\-]?\s*(.+)") def _pick(sections: Dict[str, str] | None, key: str, fallback: str) -> str: if not sections: return fallback return sections.get(key) or fallback def _find_first(regex: re.Pattern, text: str) -> Optional[str]: for ln in text.splitlines(): m = regex.search(ln) if m: val = m.group(1).strip() val = re.split(r"[|;/]", val)[0].strip() if 2 <= len(val) <= 80: return val return None def parse_resume(clean_text: str, sections: Dict[str, str] | None = None) -> Dict[str, Any]: header_text = _pick(sections, "header", clean_text) contacts_text = _pick(sections, "contacts", clean_text) about_text = _pick(sections, "about", clean_text) skills_text = _pick(sections, "skills", clean_text) exp_text = _pick(sections, "experience", clean_text) exp_scope = "\n".join([about_text, exp_text]).strip() or exp_text name = extract_name_guess(header_text) contacts_raw = extract_contacts(contacts_text) roles, skills = extract_roles_skills("\n".join([about_text, skills_text, exp_text])) remote = extract_remote(clean_text) english = extract_english(clean_text) location = extract_location_best_effort(clean_text) exp_years, exp_years_eng, exp_conf, exp_dbg = extract_experience_years(exp_scope) sal_min, sal_max, sal_conf, sal_dbg = extract_salary(clean_text) desired_title = _find_first(_DESIRED_RE, clean_text) specializations = _find_first(_SPEC_RE, clean_text) schedule = _find_first(_SCHEDULE_RE, clean_text) employment = _find_first(_EMPLOYMENT_RE, clean_text) return { "name": name, "contacts_raw": contacts_raw, "remote": remote, "english": english, "roles": roles, "skills": skills, "location": location, "exp_years": exp_years, "exp_years_eng": exp_years_eng, "exp_conf": exp_conf, "exp_dbg": exp_dbg, "salary_min": sal_min, "salary_max": sal_max, "salary_conf": sal_conf, "salary_dbg": sal_dbg, "desired_title": desired_title, "specializations": specializations, "employment_type": employment, "schedule": schedule, "parse_method": "hh_template", }