40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
import sys
|
||
|
||
if "/service_account_records" not in list(sys.path):
|
||
sys.path.append("/service_account_records")
|
||
|
||
import re
|
||
|
||
from difflib import get_close_matches
|
||
from configs import AccountConfig
|
||
|
||
|
||
def word_straighten(word, ref_list, threshold=0.8):
|
||
matches = get_close_matches(word, ref_list, n=1, cutoff=threshold)
|
||
return matches[0] if matches else word
|
||
|
||
|
||
def category_finder(text, output_template="{kategori} {numara}"):
|
||
categories = AccountConfig.CATEGORIES
|
||
result = {
|
||
category: [] for category in categories
|
||
} # Sonuçları depolamak için bir sözlük
|
||
for category, patterns in categories.items():
|
||
words = re.split(r"\W+", text)
|
||
straighten_words = [word_straighten(word, patterns) for word in words]
|
||
straighten_text = " ".join(straighten_words)
|
||
pattern = (
|
||
r"(?:\b|\s|^)(?:"
|
||
+ "|".join(map(re.escape, patterns))
|
||
+ r")(?:\s*|:|\-|\#)*(\d+)(?:\b|$)"
|
||
)
|
||
if founds_list := re.findall(pattern, straighten_text, re.IGNORECASE):
|
||
list_of_output = [
|
||
output_template.format(kategori=category, numara=num)
|
||
for num in founds_list
|
||
]
|
||
result[category].extend(
|
||
[i for i in list_of_output if str(i).replace(" ", "")]
|
||
)
|
||
return result
|