Initial commit

This commit is contained in:
2026-03-11 15:27:10 +03:00
commit 8b4b8d54d1
34 changed files with 7407 additions and 0 deletions

View File

@@ -0,0 +1 @@
__all__ = []

View File

@@ -0,0 +1,46 @@
from __future__ import annotations
from typing import Any, Dict
from tg_resume_db.extract.parse import (
extract_contacts,
extract_name_guess,
extract_remote,
extract_english,
extract_roles_skills,
extract_salary,
extract_location_best_effort,
extract_experience_years,
)
def parse_resume(clean_text: str, sections: Dict[str, str] | None = None) -> Dict[str, Any]:
text = clean_text or ""
contacts_raw = extract_contacts(text)
name = extract_name_guess(text)
remote = extract_remote(text)
english = extract_english(text)
roles, skills = extract_roles_skills(text)
location = extract_location_best_effort(text)
exp_years, exp_years_eng, exp_conf, exp_dbg = extract_experience_years(text)
sal_min, sal_max, sal_conf, sal_dbg = extract_salary(text)
return {
"name": name,
"contacts_raw": contacts_raw,
"remote": remote,
"english": english,
"roles": roles,
"skills": skills,
"location": location,
"exp_years": exp_years,
"exp_years_eng": exp_years_eng,
"exp_conf": exp_conf,
"exp_dbg": exp_dbg,
"salary_min": sal_min,
"salary_max": sal_max,
"salary_conf": sal_conf,
"salary_dbg": sal_dbg,
"parse_method": "generic_heur",
}

58
extract/templates/hh.py Normal file
View File

@@ -0,0 +1,58 @@
from __future__ import annotations
from typing import Any, Dict
from tg_resume_db.extract.parse import (
extract_contacts,
extract_name_guess,
extract_remote,
extract_english,
extract_roles_skills,
extract_salary,
extract_location_best_effort,
extract_experience_years,
)
def _pick(sections: Dict[str, str] | None, key: str, fallback: str) -> str:
if not sections:
return fallback
return sections.get(key) or fallback
def parse_resume(clean_text: str, sections: Dict[str, str] | None = None) -> Dict[str, Any]:
header_text = _pick(sections, "header", clean_text)
contacts_text = _pick(sections, "contacts", clean_text)
about_text = _pick(sections, "about", clean_text)
skills_text = _pick(sections, "skills", clean_text)
exp_text = _pick(sections, "experience", clean_text)
exp_scope = "\n".join([about_text, exp_text]).strip() or exp_text
name = extract_name_guess(header_text)
contacts_raw = extract_contacts(contacts_text)
roles, skills = extract_roles_skills("\n".join([about_text, skills_text, exp_text]))
remote = extract_remote(clean_text)
english = extract_english(clean_text)
location = extract_location_best_effort(clean_text)
exp_years, exp_years_eng, exp_conf, exp_dbg = extract_experience_years(exp_scope)
sal_min, sal_max, sal_conf, sal_dbg = extract_salary(clean_text)
return {
"name": name,
"contacts_raw": contacts_raw,
"remote": remote,
"english": english,
"roles": roles,
"skills": skills,
"location": location,
"exp_years": exp_years,
"exp_years_eng": exp_years_eng,
"exp_conf": exp_conf,
"exp_dbg": exp_dbg,
"salary_min": sal_min,
"salary_max": sal_max,
"salary_conf": sal_conf,
"salary_dbg": sal_dbg,
"parse_method": "hh_template",
}

View File

@@ -0,0 +1,85 @@
from __future__ import annotations
import re
from typing import Any, Dict, Optional
from tg_resume_db.extract.parse import (
extract_contacts,
extract_name_guess,
extract_remote,
extract_english,
extract_roles_skills,
extract_salary,
extract_location_best_effort,
extract_experience_years,
)
_DESIRED_RE = re.compile(r"(?i)жел[а-я]*\s+должност[ьи]\s*[:\-]?\s*(.+)")
_SPEC_RE = re.compile(r"(?i)специализаци[яи]\s*[:\-]?\s*(.+)")
_SCHEDULE_RE = re.compile(r"(?i)график\s+работы\s*[:\-]?\s*(.+)")
_EMPLOYMENT_RE = re.compile(r"(?i)занятость\s*[:\-]?\s*(.+)")
def _pick(sections: Dict[str, str] | None, key: str, fallback: str) -> str:
if not sections:
return fallback
return sections.get(key) or fallback
def _find_first(regex: re.Pattern, text: str) -> Optional[str]:
for ln in text.splitlines():
m = regex.search(ln)
if m:
val = m.group(1).strip()
val = re.split(r"[|;/]", val)[0].strip()
if 2 <= len(val) <= 80:
return val
return None
def parse_resume(clean_text: str, sections: Dict[str, str] | None = None) -> Dict[str, Any]:
header_text = _pick(sections, "header", clean_text)
contacts_text = _pick(sections, "contacts", clean_text)
about_text = _pick(sections, "about", clean_text)
skills_text = _pick(sections, "skills", clean_text)
exp_text = _pick(sections, "experience", clean_text)
exp_scope = "\n".join([about_text, exp_text]).strip() or exp_text
name = extract_name_guess(header_text)
contacts_raw = extract_contacts(contacts_text)
roles, skills = extract_roles_skills("\n".join([about_text, skills_text, exp_text]))
remote = extract_remote(clean_text)
english = extract_english(clean_text)
location = extract_location_best_effort(clean_text)
exp_years, exp_years_eng, exp_conf, exp_dbg = extract_experience_years(exp_scope)
sal_min, sal_max, sal_conf, sal_dbg = extract_salary(clean_text)
desired_title = _find_first(_DESIRED_RE, clean_text)
specializations = _find_first(_SPEC_RE, clean_text)
schedule = _find_first(_SCHEDULE_RE, clean_text)
employment = _find_first(_EMPLOYMENT_RE, clean_text)
return {
"name": name,
"contacts_raw": contacts_raw,
"remote": remote,
"english": english,
"roles": roles,
"skills": skills,
"location": location,
"exp_years": exp_years,
"exp_years_eng": exp_years_eng,
"exp_conf": exp_conf,
"exp_dbg": exp_dbg,
"salary_min": sal_min,
"salary_max": sal_max,
"salary_conf": sal_conf,
"salary_dbg": sal_dbg,
"desired_title": desired_title,
"specializations": specializations,
"employment_type": employment,
"schedule": schedule,
"parse_method": "hh_template",
}

View File

@@ -0,0 +1,57 @@
from __future__ import annotations
from typing import Any, Dict
from tg_resume_db.extract.parse import (
extract_contacts,
extract_name_guess,
extract_remote,
extract_english,
extract_roles_skills,
extract_salary,
extract_location_best_effort,
extract_experience_years,
)
def _pick(sections: Dict[str, str] | None, key: str, fallback: str) -> str:
if not sections:
return fallback
return sections.get(key) or fallback
def parse_resume(clean_text: str, sections: Dict[str, str] | None = None) -> Dict[str, Any]:
header_text = _pick(sections, "header", clean_text)
about_text = _pick(sections, "about", clean_text)
skills_text = _pick(sections, "skills", clean_text)
exp_text = _pick(sections, "experience", clean_text)
exp_scope = "\n".join([about_text, exp_text]).strip() or exp_text
name = extract_name_guess(header_text)
contacts_raw = extract_contacts(clean_text)
roles, skills = extract_roles_skills("\n".join([about_text, skills_text, exp_text]))
remote = extract_remote(clean_text)
english = extract_english(clean_text)
location = extract_location_best_effort(clean_text)
exp_years, exp_years_eng, exp_conf, exp_dbg = extract_experience_years(exp_scope)
sal_min, sal_max, sal_conf, sal_dbg = extract_salary(clean_text)
return {
"name": name,
"contacts_raw": contacts_raw,
"remote": remote,
"english": english,
"roles": roles,
"skills": skills,
"location": location,
"exp_years": exp_years,
"exp_years_eng": exp_years_eng,
"exp_conf": exp_conf,
"exp_dbg": exp_dbg,
"salary_min": sal_min,
"salary_max": sal_max,
"salary_conf": sal_conf,
"salary_dbg": sal_dbg,
"parse_method": "linkedin_template",
}

View File

@@ -0,0 +1,46 @@
from __future__ import annotations
from typing import Any, Dict
from tg_resume_db.extract.parse import (
extract_contacts,
extract_name_guess,
extract_remote,
extract_english,
extract_roles_skills,
extract_salary,
extract_location_best_effort,
extract_experience_years,
)
def parse_resume(clean_text: str, sections: Dict[str, str] | None = None) -> Dict[str, Any]:
text = clean_text or ""
contacts_raw = extract_contacts(text)
name = extract_name_guess(text)
roles, skills = extract_roles_skills(text)
remote = extract_remote(text)
english = extract_english(text)
location = extract_location_best_effort(text)
exp_years, exp_years_eng, exp_conf, exp_dbg = extract_experience_years(text)
sal_min, sal_max, sal_conf, sal_dbg = extract_salary(text)
return {
"name": name,
"contacts_raw": contacts_raw,
"remote": remote,
"english": english,
"roles": roles,
"skills": skills,
"location": location,
"exp_years": exp_years,
"exp_years_eng": exp_years_eng,
"exp_conf": exp_conf,
"exp_dbg": exp_dbg,
"salary_min": sal_min,
"salary_max": sal_max,
"salary_conf": sal_conf,
"salary_dbg": sal_dbg,
"parse_method": "one_page_template",
}

View File

@@ -0,0 +1,11 @@
from __future__ import annotations
from typing import Any, Dict
from tg_resume_db.extract.templates.one_page import parse_resume as _parse
def parse_resume(clean_text: str, sections: Dict[str, str] | None = None) -> Dict[str, Any]:
out = _parse(clean_text, sections)
out["parse_method"] = "one_page_en"
return out

View File

@@ -0,0 +1,11 @@
from __future__ import annotations
from typing import Any, Dict
from tg_resume_db.extract.templates.one_page import parse_resume as _parse
def parse_resume(clean_text: str, sections: Dict[str, str] | None = None) -> Dict[str, Any]:
out = _parse(clean_text, sections)
out["parse_method"] = "one_page_ru"
return out

View File

@@ -0,0 +1,45 @@
from __future__ import annotations
from typing import Any, Dict
from tg_resume_db.extract.parse import (
extract_contacts,
extract_name_guess,
extract_remote,
extract_english,
extract_roles_skills,
extract_salary,
extract_location_best_effort,
extract_experience_years,
)
def parse_resume(clean_text: str, sections: Dict[str, str] | None = None) -> Dict[str, Any]:
text = clean_text or ""
contacts_raw = extract_contacts(text)
name = extract_name_guess(text)
roles, skills = extract_roles_skills(text)
remote = extract_remote(text)
english = extract_english(text)
location = extract_location_best_effort(text)
exp_years, exp_years_eng, exp_conf, exp_dbg = extract_experience_years(text)
sal_min, sal_max, sal_conf, sal_dbg = extract_salary(text)
return {
"name": name,
"contacts_raw": contacts_raw,
"remote": remote,
"english": english,
"roles": roles,
"skills": skills,
"location": location,
"exp_years": exp_years,
"exp_years_eng": exp_years_eng,
"exp_conf": exp_conf,
"exp_dbg": exp_dbg,
"salary_min": sal_min,
"salary_max": sal_max,
"salary_conf": sal_conf,
"salary_dbg": sal_dbg,
"parse_method": "pptx_template",
}