wag-managment-api-service-v.../service_account_records/regex_func.py

import re
from difflib import get_close_matches


categories = {
    "DAIRE": ["daire", "dagire", "daare", "nolu daire", "no", "nolu dairenin"],
    "APARTMAN": ["apartman", "aparman", "aprmn"],
    "VILLA": ["villa", "vlla"],
    "BINA": ["bina", "binna"]
}


def word_straighten(word, ref_list, threshold=0.8):
    matches = get_close_matches(word, ref_list, n=1, cutoff=threshold)
    return matches[0] if matches else word


def category_finder(text, output_template="{kategori} {numara}"):
    result = {category: [] for category in categories}  # Sonuçları depolamak için bir sözlük
    for category, patterns in categories.items():
        words = re.split(r'\W+', text)
        straighten_words = [word_straighten(word, patterns) for word in words]
        straighten_text = ' '.join(straighten_words)
        pattern = r'(?:\b|\s|^)(?:' + '|'.join(map(re.escape, patterns)) + r')(?:\s*|:|\-|\#)*(\d+)(?:\b|$)'
        if founds_list := re.findall(pattern, straighten_text, re.IGNORECASE):
            list_of_output = [output_template.format(kategori=category, numara=num) for num in founds_list]
            result[category].extend([i for i in list_of_output if str(i).replace(' ', '')])
    return result