updated Parser comment

2025-08-14 00:10:57 +03:00
parent 7a5521648c
commit 4ec9031005
17 changed files with 949 additions and 592 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -56,8 +56,12 @@ pids
 report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 env
 .env
-ServicesRunner/AccountRecordServices/Finder/Iban/.prisma-cache
+# ServicesRunner/AccountRecordServices/Finder/Iban/.prisma-cache
-ServicesRunner/AccountRecordServices/Finder/Comment/.prisma-cache
+# ServicesRunner/AccountRecordServices/Finder/Comment/.prisma-cache
 # ServicesRunner/AccountRecordServices/Finder/Parser/Comment/.prisma-cache
 **/.prisma-cache
 venv/
 .vscode/
 __pycache__/
--- a/ServicesApi/src/auth/select/select.service.ts
+++ b/ServicesApi/src/auth/select/select.service.ts
@@ -6,6 +6,7 @@ import { PrismaService } from '@/src/prisma.service';
 import { EventsService } from '@/src/navigator/events/events.service';
 import { PagesService } from '@/src/navigator/pages/pages.service';
 import { MenusService } from '@/src/navigator/menus/menu.service';
 import { includes } from 'zod';
@Injectable()
 export class SelectService {
--- a/ServicesRunner/AccountRecordServices/Finder/Accounts/.dockerignore
+++ b/ServicesRunner/AccountRecordServices/Finder/Accounts/.dockerignore
--- a/ServicesRunner/AccountRecordServices/Finder/Accounts/Dockerfile
+++ b/ServicesRunner/AccountRecordServices/Finder/Accounts/Dockerfile
--- a/ServicesRunner/AccountRecordServices/Finder/Accounts/app.py
+++ b/ServicesRunner/AccountRecordServices/Finder/Accounts/app.py
@@ -2,7 +2,7 @@ import time
 import arrow
 import pprint
-from json import dumps
+from json import dumps, loads
 from decimal import Decimal
 from pydantic import BaseModel
@@ -85,7 +85,8 @@ def enclose_task_and_send_mail_to_build_manager(prisma_service: PrismaService, s
    for build_id, saved_list_of_account_record in saved_list_of_account_records.items():
        build_manager_occupant_type = prisma_service.find_first(table="occupant_types", query={"occupant_code":"BU-MNG", "is_confirmed": True, "active": True})
        living_space = prisma_service.find_first(
-            table="build_living_space", query={"build_id": build_id, "occupant_type_id": build_manager_occupant_type['id'], "expiry_starts": {"lte": today}, "expiry_ends": {"gte": today}}
+            table="build_living_space", query={
                "build_id": build_id, "occupant_type_id": build_manager_occupant_type['id'],  "expiry_starts": {"lte": today}, "expiry_ends": {"gte": today}}
        )
        build = prisma_service.find_first(table="builds", query={"id": build_id})
        person = prisma_service.find_first(table="people", query={"id": living_space['person_id']})
@@ -105,8 +106,10 @@ def enclose_task_and_send_mail_to_build_manager(prisma_service: PrismaService, s
 if __name__ == "__main__":
    prisma_service = PrismaService()
    process_comment_finder_service = ProcessCommentFinderService()
    print("Process Comment service started")
    try:
        print("Process Comment service started sleeping for 5 seconds")
@@ -117,7 +120,10 @@ if __name__ == "__main__":
            for task in tasks:
                if not check_task_belong_to_this_service(task):
                    continue
-                write_account_records_row_from_finder_comment(finder_comments=task.data.FinderComment, prisma_service=prisma_service, saved_list_of_account_records=saved_list_of_account_records)
+                write_account_records_row_from_finder_comment(
                    finder_comments=task.data.FinderComment, prisma_service=prisma_service, saved_list_of_account_records=saved_list_of_account_records
                )
                save_task_object_for_comment_parsing(task=task, process_comment_finder_service=process_comment_finder_service)
                process_comment_finder_service.update_task_status(task_uuid=task.task, is_completed=True, status=Status.COMPLETED)
                process_comment_finder_service.delete_task(task_uuid=task.task)
            enclose_task_and_send_mail_to_build_manager(
--- a/ServicesRunner/AccountRecordServices/Finder/Accounts/entrypoint.sh
+++ b/ServicesRunner/AccountRecordServices/Finder/Accounts/entrypoint.sh
--- a/ServicesRunner/AccountRecordServices/Finder/Comment/matchers.py
+++ b/ServicesRunner/AccountRecordServices/Finder/Comment/matchers.py
@@ -1,518 +0,0 @@
 import re
 import arrow
 from json import loads, dumps
 from unidecode import unidecode
 from difflib import SequenceMatcher
 from itertools import permutations
 from time import perf_counter
 turkish_months = ["OCAK", "ŞUBAT", "MART", "NİSAN", "MAYIS", "HAZİRAN", "TEMMUZ", "AĞUSTOS", "EYLÜL", "EKİM", "KASIM", "ARALIK"]
 turkish_months_abbr = {
    "OCA": "OCAK", "SUB": "ŞUBAT", "ŞUB": "ŞUBAT", "MAR": "MART", "NIS": "NİSAN", "MAY": "MAYIS", "HAZ": "HAZİRAN", "HZR": "HAZİRAN",
    "TEM": "TEMMUZ", "AGU": "AĞUSTOS", "AGT": "AĞUSTOS", "EYL": "EYLÜL", "EKI": "EKİM", "KAS": "KASIM", "ARA": "ARALIK",
 }
 month_to_number_dict = {
    "ocak": 1, "şubat": 2, "mart": 3, "nisan": 4, "mayıs": 5, "haziran": 6, "temmuz": 7, "ağustos": 8, "eylül": 9, "ekim": 10, "kasım": 11, "aralık": 12,
    "ocak": 1, "subat": 2, "mart": 3, "nisan": 4, "mayis": 5, "haziran": 6, "temmuz": 7, "agustos": 8, "eylul": 9, "ekim": 10, "kasim": 11, "aralik": 12
 }
 start_year = 1950
 current_year = arrow.now().year
 def clean_text(text):
    text = str(text)
    text = re.sub(r'\d{8,}', ' ', text)
    # text = re.sub(r'\b[A-Za-z0-9]*?[0-9]+[A-Za-z0-9]*?[A-Za-z]+[A-Za-z0-9]*\b|\b[A-Za-z0-9]*?[A-Za-z]+[A-Za-z0-9]*?[0-9]+[A-Za-z0-9]*\b', ' ', text)
    text = text.replace("/", " ")
    text = text.replace("_", " ")
    text_remove_underscore = text.replace("-", " ").replace("+", " ")
    text_remove_asterisk = text_remove_underscore.replace("*", " ")
    text_remove_comma = text_remove_asterisk.replace(",", " ")
    text_remove_dots = text_remove_comma.replace(".", " ")
    text_remove_dots = re.sub(r'\s+', ' ', text_remove_dots)
    text_remove_dots = text_remove_dots.strip()
    return text_remove_dots
 def normalize_text(text: str):
    text = text.replace('İ', 'i')
    text = text.replace('I', 'ı')
    text = text.replace('Ş', 'ş')
    text = text.replace('Ğ', 'ğ')
    text = text.replace('Ü', 'ü')
    text = text.replace('Ö', 'ö')
    text = text.replace('Ç', 'ç')
    return unidecode(text).lower()
 def get_person_initials(person: dict):
    parts = [person.get("firstname", ""), person.get("middle_name", ""), person.get("surname", ""), person.get("birthname", "")]
    return [unidecode(p.strip())[0].upper() for p in parts if p]
 def get_text_initials(matched_text: str):
    return [unidecode(word.strip())[0].upper() for word in matched_text.split() if word.strip()]
 def generate_dictonary_of_patterns(people: list[dict]):
    """
    completly remove middle_name instead do regex firstName + SomeWord + surname
    """
    patterns_dict = {}
    for person in people:
        person_id = person.get('id')
        firstname = person.get('firstname', '').strip() if person.get('firstname') else ""
        middle_name = person.get('middle_name', '').strip() if person.get('middle_name') else ""
        surname = person.get('surname', '').strip() if person.get('surname') else ""
        birthname = person.get('birthname', '').strip() if person.get('birthname') else ""
        if not firstname or not surname:
            continue
        name_parts = {
            'firstname': {
                'orig': firstname,
                'norm': normalize_text(firstname) if firstname else "",
                'init': normalize_text(firstname)[0] if firstname else ""
            },
            'surname': {
                'orig': surname,
                'norm': normalize_text(surname) if surname else "",
                'init': normalize_text(surname)[0] if surname else ""
            }
        }
        if middle_name:
            name_parts['middle_name'] = {
                'orig': middle_name,
                'norm': normalize_text(middle_name) if middle_name else "",
                'init': normalize_text(middle_name)[0] if middle_name else ""
            }
        if birthname and normalize_text(birthname) != normalize_text(surname):
            name_parts['birthname'] = {
                'orig': birthname,
                'norm': normalize_text(birthname),
                'init': normalize_text(birthname)[0] if birthname else ""
            }
        person_patterns = set()
        def create_pattern(parts, formats, separators=None):
            if separators is None:
                separators = [""]
            patterns = []
            for fmt in formats:
                for sep in separators:
                    pattern_parts = []
                    for part_type, part_name in fmt:
                        if part_name in parts and part_type in parts[part_name]:
                            pattern_parts.append(re.escape(parts[part_name][part_type]))
                    if pattern_parts:
                        patterns.append(r"\b" + sep.join(pattern_parts) + r"\b")
            return patterns
        name_formats = [
            [('orig', 'firstname'), ('orig', 'surname')],
            [('norm', 'firstname'), ('norm', 'surname')],
            [('orig', 'surname'), ('orig', 'firstname')],
            [('norm', 'surname'), ('norm', 'firstname')],
        ]
        if 'middle_name' in name_parts:
            name_formats = [
                [('orig', 'firstname'), ('orig', 'middle_name'), ('orig', 'surname')],
                [('norm', 'firstname'), ('norm', 'middle_name'), ('norm', 'surname')],
            ]
        person_patterns.update(create_pattern(name_parts, name_formats, [" ", ""]))
        if 'middle_name' in name_parts:
            middle_name_formats = [
                [('orig', 'firstname'), ('orig', 'middle_name')],
                [('norm', 'firstname'), ('norm', 'middle_name')],
                [('orig', 'middle_name'), ('orig', 'surname')],
                [('norm', 'middle_name'), ('norm', 'surname')],
            ]
            person_patterns.update(create_pattern(name_parts, middle_name_formats, [" ", ""]))
        if 'birthname' in name_parts and name_parts['surname']['orig'] != name_parts['birthname']['orig']:
            birthname_formats = [
                [('orig', 'firstname'), ('orig', 'birthname')],
                [('norm', 'firstname'), ('norm', 'birthname')],
                [('orig', 'birthname'), ('orig', 'firstname')],
                [('norm', 'birthname'), ('norm', 'firstname')],
            ]
            person_patterns.update(create_pattern(name_parts, birthname_formats, [" ", ""]))
        initial_formats = [[('init', 'firstname'), ('init', 'middle_name'), ('init', 'surname')], [('init', 'firstname'), ('init', 'surname')]]
        person_patterns.update(create_pattern(name_parts, initial_formats, ["", ".", " ", ". "]))
        if 'middle_name' in name_parts:
            triple_initial_formats = [
                [('init', 'firstname'), ('init', 'middle_name'), ('init', 'surname')],
            ]
            person_patterns.update(create_pattern(name_parts, triple_initial_formats, ["", ".", " ", ". "]))
        compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in person_patterns]
        patterns_dict[person_id] = compiled_patterns
    return patterns_dict
 def extract_person_name_with_regex(found_dict: dict, process_comment: str, patterns_dict: dict, people: list[dict]):
    cleaned_text = process_comment  
    all_matches = []
    for person_id, patterns in patterns_dict.items():
        person = next((p for p in people if p.get('id') == person_id), None)
        if not person:
            continue
        firstname_norm = normalize_text(person.get("firstname", "").strip()) if person.get("firstname") else ""
        middle_name_norm = normalize_text(person.get("middle_name", "").strip()) if person.get("middle_name") else ""
        surname_norm = normalize_text(person.get("surname", "").strip()) if person.get("surname") else ""
        birthname_norm = normalize_text(person.get("birthname", "").strip()) if person.get("birthname") else ""
        text_norm = normalize_text(process_comment)
        person_matches = []
        for pattern in patterns:
            for match in pattern.finditer(text_norm):
                start, end = match.span()
                matched_text = process_comment[start:end]  
                matched_text_norm = normalize_text(matched_text)
                is_valid_match = False
                if len(matched_text_norm.split()) <= 1:
                    is_valid_match = False
                else:
                    has_firstname = firstname_norm and firstname_norm in matched_text_norm
                    has_surname = surname_norm and surname_norm in matched_text_norm
                    has_birthname = birthname_norm and birthname_norm in matched_text_norm
                    if (has_firstname and has_surname) or (has_firstname and has_birthname):
                        is_valid_match = True
                if is_valid_match:
                    person_matches.append({'matched_text': matched_text, 'start': start, 'end': end})
        if person_matches:
            person_matches.sort(key=lambda x: len(x['matched_text']), reverse=True)
            non_overlapping_matches = []
            for match in person_matches:
                overlaps = False
                for existing_match in non_overlapping_matches:
                    if (match['start'] < existing_match['end'] and match['end'] > existing_match['start']):
                        overlaps = True
                        break
                if not overlaps:
                    non_overlapping_matches.append(match)
            if non_overlapping_matches:
                found_dict["name_match"] = person
                all_matches.extend([(match, person) for match in non_overlapping_matches])
    if all_matches:
        all_matches.sort(key=lambda x: x[0]['start'], reverse=True)
        for match, person in all_matches:
            matched_text = match['matched_text']
            matched_words = matched_text.split()
            for word in matched_words:
                word_norm = normalize_text(word).strip()
                if not word_norm:
                    continue
                text_norm = normalize_text(cleaned_text)
                for word_match in re.finditer(rf'\b{re.escape(word_norm)}\b', text_norm, re.IGNORECASE):
                    start, end = word_match.span()
                    cleaned_text = cleaned_text[:start] + ' ' * (end - start) + cleaned_text[end:]
        cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
    return found_dict, cleaned_text
 def extract_build_parts_info(found_dict, process_comment):
    """
    Regex of parts such as :
    2 nolu daire
    9 NUMARALI DAI
    daire 3
    3 nolu dairenin
    11nolu daire
    Daire No 12
    2NOLU DAIRE
    12 No lu daire
    D:10
    NO:11
    NO :3
    """
    apartment_number = None
    cleaned_text = process_comment
    def clean_text_apartment_number(text, match):
        clean_text = text.replace(match.group(0), '').strip()
        clean_text = re.sub(r'\s+', ' ', clean_text).strip()
        return clean_text
    pattern1 = re.compile(r'(\d+)\s*nolu\s*daire', re.IGNORECASE)
    match = pattern1.search(cleaned_text)
    if match:
        apartment_number = match.group(1)
        found_dict['apartment_number'] = apartment_number
        return found_dict, clean_text_apartment_number(cleaned_text, match)
    pattern4 = re.compile(r'(\d+)\s*nolu\s*daire\w*', re.IGNORECASE)
    match = pattern4.search(cleaned_text)
    if match:
        apartment_number = match.group(1)
        found_dict['apartment_number'] = apartment_number
        return found_dict, clean_text_apartment_number(cleaned_text, match)
    pattern5 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
    match = pattern5.search(cleaned_text)
    if match:
        apartment_number = match.group(1)
        found_dict['apartment_number'] = apartment_number
        return found_dict, clean_text_apartment_number(cleaned_text, match)
    pattern7 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
    match = pattern7.search(cleaned_text)
    if match:
        apartment_number = match.group(1)
        found_dict['apartment_number'] = apartment_number
        return found_dict, clean_text_apartment_number(cleaned_text, match)
    pattern8 = re.compile(r'(\d+)\s*no\s*lu\s*daire', re.IGNORECASE)
    match = pattern8.search(cleaned_text)
    if match:
        apartment_number = match.group(1)
        found_dict['apartment_number'] = apartment_number
        return found_dict, clean_text_apartment_number(cleaned_text, match)
    pattern6 = re.compile(r'daire\s*no\s*(\d+)', re.IGNORECASE)
    match = pattern6.search(cleaned_text)
    if match:
        apartment_number = match.group(1)
        found_dict['apartment_number'] = apartment_number
        return found_dict, clean_text_apartment_number(cleaned_text, match)
    pattern2 = re.compile(r'(\d+)\s*numarali\s*dai', re.IGNORECASE)
    match = pattern2.search(cleaned_text)
    if match:
        apartment_number = match.group(1)
        found_dict['apartment_number'] = apartment_number
        return found_dict, clean_text_apartment_number(cleaned_text, match)
    pattern3 = re.compile(r'daire\s*(\d+)', re.IGNORECASE)
    match = pattern3.search(cleaned_text)
    if match:
        apartment_number = match.group(1)
        found_dict['apartment_number'] = apartment_number
        return found_dict, clean_text_apartment_number(cleaned_text, match)
    pattern9 = re.compile(r'd\s*:\s*(\d+)', re.IGNORECASE)
    match = pattern9.search(cleaned_text)
    if match:
        apartment_number = match.group(1)
        found_dict['apartment_number'] = apartment_number
        return found_dict, clean_text_apartment_number(cleaned_text, match)
    pattern10 = re.compile(r'no\s*:\s*(\d+)', re.IGNORECASE)
    match = pattern10.search(cleaned_text)
    if match:
        apartment_number = match.group(1)
        found_dict['apartment_number'] = apartment_number
        return found_dict, clean_text_apartment_number(cleaned_text, match)
    return found_dict, cleaned_text
 def extract_months(found_dict, process_comment):
    """
    Extract Turkish month names and abbreviations from the process comment
    """
    original_text = process_comment
    def normalize_turkish(text: str) -> str:
        """Properly normalize Turkish text for case-insensitive comparison"""
        text = text.lower()
        text = text.replace('i̇', 'i')  # Handle dotted i properly
        text = text.replace('ı', 'i')   # Convert dotless i to regular i for matching
        text = unidecode(text)          # Remove other diacritics
        return text
    if 'months' not in found_dict:
        found_dict['months'] = []
    months_found, working_text = False, original_text
    for month in turkish_months:
        pattern = re.compile(r'\b' + re.escape(month) + r'\b', re.IGNORECASE)
        for match in pattern.finditer(original_text):
            matched_text = match.group(0)
            normalized_month = normalize_turkish(month)
            month_number = None
            if month.lower() in month_to_number_dict:
                month_number = month_to_number_dict[month.lower()]
            elif normalized_month in month_to_number_dict:
                month_number = month_to_number_dict[normalized_month]
            month_info = {'name': month, 'number': month_number}
            found_dict['months'].append(month_info)
            months_found = True
            working_text = working_text.replace(matched_text, '', 1)
    for abbr, full_month in turkish_months_abbr.items():
        pattern = re.compile(r'\b' + re.escape(abbr) + r'\b', re.IGNORECASE)
        for match in pattern.finditer(working_text):
            matched_text = match.group(0)
            normalized_month = normalize_turkish(full_month)
            month_number = None
            if full_month.lower() in month_to_number_dict:
                month_number = month_to_number_dict[full_month.lower()]
            elif normalized_month in month_to_number_dict:
                month_number = month_to_number_dict[normalized_month]
            month_info = {'name': full_month, 'number': month_number}
            found_dict['months'].append(month_info)
            months_found = True
            working_text = working_text.replace(matched_text, '', 1)
    return found_dict, working_text
 def extract_year(found_dict, process_comment):
    """
    Extract years from the process comment
    """
    original_text = process_comment
    if 'years' not in found_dict:
        found_dict['years'] = []
    working_text = original_text
    for year in range(start_year, current_year + 1):
        pattern = re.compile(r'\b' + str(year) + r'\b', re.IGNORECASE)
        for match in pattern.finditer(original_text):
            matched_text = match.group(0)
            if str(matched_text).isdigit():
                found_dict['years'].append(int(matched_text))
                working_text = working_text.replace(matched_text, '', 1)
    return found_dict, working_text
 def extract_payment_type(found_dict, process_comment):
    """
    Extract payment type from the process comment
    aidat
    AİD
    aidatı
    TADİLAT
    YAKIT
    yakıt
    yakit
    """
    original_text = process_comment
    working_text = original_text
    if 'payment_types' not in found_dict:
        found_dict['payment_types'] = []
    payment_keywords = {'aidat': ['aidat', 'aİd', 'aid', 'aidatı', 'aidati'], 'tadilat': ['tadilat', 'tadİlat', 'tadilatı'], 'yakit': ['yakit', 'yakıt', 'yakıtı', 'yakiti']}
    for payment_type, keywords in payment_keywords.items():
        for keyword in keywords:
            pattern = re.compile(r'\b' + keyword + r'\b', re.IGNORECASE)
            for match in pattern.finditer(original_text):
                matched_text = match.group(0)
                if payment_type not in found_dict['payment_types']:
                    found_dict['payment_types'].append(payment_type)
                working_text = working_text.replace(matched_text, '', 1)
    return found_dict, working_text
 def main(account_records, people):
    list_of_regex_patterns = generate_dictonary_of_patterns(people=people)
    dicts_found = dict()
    dicts_not_found = dict()
    count_extracted = 0
    for account_record in account_records:
        account_record_id = str(account_record["id"])
        found_dict = {}
        process_comment_iteration = clean_text(text=account_record["process_comment"])
        found_dict, cleaned_process_comment = extract_person_name_with_regex(found_dict=found_dict, process_comment=process_comment_iteration, patterns_dict=list_of_regex_patterns, people=people)
        found_dict, cleaned_process_comment = extract_build_parts_info(found_dict=found_dict, process_comment=cleaned_process_comment)
        found_dict, cleaned_process_comment = extract_months(found_dict=found_dict, process_comment=cleaned_process_comment)
        found_dict, cleaned_process_comment = extract_year(found_dict=found_dict, process_comment=cleaned_process_comment)
        found_dict, cleaned_process_comment = extract_payment_type(found_dict=found_dict, process_comment=cleaned_process_comment)
        if found_dict:
            dicts_found[str(account_record_id)] = found_dict
        else:
            dicts_not_found[str(account_record_id)] = account_record_id
    for id_, item in dicts_found.items():
        months_are_valid = bool(item.get("months", []))
        years_are_valid = bool(item.get("years", []))
        payment_types_are_valid = bool(item.get("payment_types", []))
        apartment_number_are_valid = bool(item.get("apartment_number", []))
        person_name_are_valid = bool(item.get("name_match", []))
        account_record_to_save = AccountRecords.query.filter_by(id=int(id_)).first()
        save_dict = dict(
            account_records_id=account_record_to_save.id, account_records_uu_id=str(account_record_to_save.uu_id), prediction_model="regex", treshold=1, is_first_prediction=False
        )
        update_dict = dict(prediction_model="regex", treshold=1, is_first_prediction=False)
        if any([months_are_valid, years_are_valid, payment_types_are_valid, apartment_number_are_valid, person_name_are_valid]):
            count_extracted += 1
            if months_are_valid:
                print(f"months: {item['months']}")
                data_to_save = dumps({"data": item['months']})
                prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="months", prediction_model="regex").first()
                if not prediction_result:
                    created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="months", prediction_result=data_to_save)
                    created_account_prediction.save()
                else:
                    prediction_result.update(**update_dict, prediction_result=data_to_save)
                    prediction_result.save()
            if years_are_valid:
                print(f"years: {item['years']}")
                data_to_save = dumps({"data": item['years']})
                prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="years", prediction_model="regex").first()
                if not prediction_result:
                    created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="years", prediction_result=data_to_save)
                    created_account_prediction.save()
                else:
                    prediction_result.update(**update_dict, prediction_result=data_to_save)
                    prediction_result.save()
            if payment_types_are_valid:
                print(f"payment_types: {item['payment_types']}")
                data_to_save = dumps({"data": item['payment_types']})
                prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="payment_types", prediction_model="regex").first()
                if not prediction_result:
                    created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="payment_types", prediction_result=data_to_save)
                    created_account_prediction.save()
                else:
                    prediction_result.update(**update_dict, prediction_result=data_to_save)
                    prediction_result.save()
            if apartment_number_are_valid:
                print(f"apartment_number: {item['apartment_number']}")
                prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="apartment_number", prediction_model="regex").first()
                if not prediction_result:
                    created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="apartment_number", prediction_result=item['apartment_number'])
                    created_account_prediction.save()
                else:
                    prediction_result.update(**update_dict, prediction_result=item['apartment_number'])
                    prediction_result.save()
            if person_name_are_valid:
                print(f"person_name: {item['name_match']}")
                data_to_save = dumps({"data": item['name_match']})
                prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="person_name", prediction_model="regex").first()
                if not prediction_result:
                    created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="person_name", prediction_result=data_to_save)
                    created_account_prediction.save()
                else:
                    prediction_result.update(**update_dict, prediction_result=data_to_save)
                    prediction_result.save()
    print("\n===== SUMMARY =====")
    print(f"extracted data total            : {count_extracted}")
    print(f"not extracted data total        : {len(account_records) - count_extracted}")
    print(f"Total account records processed : {len(account_records)}")
 if __name__ == "__main__":
    people_query = sqlalchemy_text("""
        SELECT DISTINCT ON (p.id) p.firstname, p.middle_name, p.surname, p.birthname, bl.id 
        FROM public.people as p
        INNER JOIN public.build_living_space as bl ON bl.person_id = p.id
        INNER JOIN public.build_parts as bp ON bp.id = bl.build_parts_id
        INNER JOIN public.build as b ON b.id = bp.build_id
        WHERE b.id = 1
        ORDER BY p.id
    """)
    people_raw = session.execute(people_query).all()
    remove_duplicate = list()
    clean_people_list = list()
    for person in people_raw:
        merged_name = f"{person[0]} {person[1]} {person[2]} {person[3]}"
        if merged_name not in remove_duplicate:
            clean_people_list.append(person)
            remove_duplicate.append(merged_name)
    people = [{"firstname": p[0], "middle_name": p[1], "surname": p[2], "birthname": p[3], 'id': p[4]} for p in clean_people_list]
    query_account_records = sqlalchemy_text("""
        SELECT a.id, a.iban, a.bank_date, a.process_comment FROM public.account_records as a where currency_value > 0
    """)    # and bank_date::date >= '2020-01-01'
    account_records = session.execute(query_account_records).all()
    account_records = [{"id": ar[0], "iban": ar[1], "bank_date": ar[2], "process_comment": ar[3]} for ar in account_records]
    try:
        main(session=session, account_records=account_records, people=people)
    except Exception as e:
        print(f"{e}")
    session.close()
    session_factory.remove()
--- a/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/.dockerignore
+++ b/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/.dockerignore
@@ -0,0 +1,14 @@
 __pycache__/
 *.pyc
 *.pyo
 *.pyd
 *.db
 *.sqlite3
 *.log
 *.env
 venv/
 .env.*
 node_modules/
 .prisma/
 .prisma-cache/
 ServicesRunnner/AccountRecordServices/Test/venv/
--- a/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/Dockerfile
+++ b/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/Dockerfile
@@ -0,0 +1,22 @@
 FROM python:3.12-slim
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV VIRTUAL_ENV=/opt/venv
 ENV PRISMA_SCHEMA_PATH=/app/Depends/schema.prisma
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 ENV PYTHONPATH=/app
 RUN apt-get update && apt-get install -y --no-install-recommends gcc curl && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 COPY ServicesRunner/Depends/ /app/Depends/
 COPY ServicesRunner/AccountRecordServices/Finder/Parser/Comment /app/
 COPY ServicesRunner/requirements.txt /app/requirements.txt
 COPY ServicesRunner/AccountRecordServices/Finder/Parser/Comment/entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh
 CMD ["/entrypoint.sh"]
--- a/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/app.py
+++ b/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/app.py
@@ -0,0 +1,161 @@
 import time
 import arrow
 from typing import Optional
 from pydantic import BaseModel
 from matchers import Parser
 from models import BuildingCluster, BuildPart, BuildLivingSpace, Person, User, OccupantType
 from Depends.prisma_client import PrismaService
 from Depends.config import ConfigServices, RedisTaskObject
 from Depends.service_handler import ProcessCommentParserService
 def check_task_belong_to_this_service(task: RedisTaskObject):
    """
    Check if task belongs to this service
    """
    if not task.service == ConfigServices.TASK_COMMENT_PARSER:
        return False
    if not task.completed:
        return False
    if task.is_completed:
        return False
    if not task.data:
        return False
    return True
 def get_all_person_data_due_to_build(prisma_service: PrismaService):
    """
    Get all person data due to build with comprehensive inner joins
    Returns a dictionary of buildings clustered with their build parts, people, and living spaces
    """
    buildings_dict, today = {}, arrow.now().to('GMT+3').datetime
    occupant_flat_owner = prisma_service.find_first(table="occupant_types", query={"occupant_code": "FL-OWN", "active": True, "is_confirmed": True}, include={"user_types": True})
    occupant_tenant = prisma_service.find_first(table="occupant_types", query={"occupant_code": "FL-TEN", "active": True, "is_confirmed": True}, include={"user_types": True})
    possible_money_sender_occupants = [occupant_flat_owner.id, occupant_tenant.id]
    buildings = prisma_service.find_many(table="build", query={"active": True, "is_confirmed": True,"expiry_starts": {"lte": today}, "expiry_ends": {"gte": today}})
    for build in buildings:
        buildings_dict[str(build.id)] = BuildingCluster(
            id=build.id,
            uu_id=build.uu_id,
            build_name=build.build_name,
            build_no=build.build_no,
            build_date=str(build.build_date),
            decision_period_date=str(build.decision_period_date),
            expiry_starts=str(build.expiry_starts),
            expiry_ends=str(build.expiry_ends),
            is_confirmed=build.is_confirmed,
            active=build.active,
            build_parts=[]
        )
        build_parts = prisma_service.find_many(table="build_parts", query={"build_id": build.id, "active": True, "is_confirmed": True, "human_livable": True, "expiry_starts": {"lte": today}, "expiry_ends": {"gte": today}})
        for build_part in build_parts:
            part_obj = BuildPart(
                id=build_part.id,
                uu_id=build_part.uu_id,
                part_no=build_part.part_no,
                part_level=build_part.part_level,
                part_code=build_part.part_code,
                part_gross_size=build_part.part_gross_size,
                part_net_size=build_part.part_net_size,
                human_livable=build_part.human_livable,
                build_id=build_part.build_id,
                build_uu_id=build_part.build_uu_id,
                is_confirmed=build_part.is_confirmed,
                active=build_part.active,
                living_spaces=[],
                build=None
            )
            living_spaces = prisma_service.find_many(
                table="build_living_space", include={"occupant_types": True, "people": {"include": {"users": True}}},
                query={"build_parts_id": build_part.id, "active": True, "is_confirmed": True, "expiry_starts": {"lte": today}, "expiry_ends": {"gte": today}, "occupant_type_id": {"in": possible_money_sender_occupants}},
            )
            for living_space in living_spaces:
                person = living_space.people
                user = prisma_service.find_first(table="users", query={"person_id": person.id, "active": True, "is_confirmed": True})
                user_of_person = None
                if user:
                    user_of_person = User(
                        id=user.id,
                        uu_id=user.uu_id,
                        user_tag=user.user_tag,
                        user_type=user.user_type,
                        email=user.email,
                        phone_number=user.phone_number,
                        related_company=user.related_company,
                        is_confirmed=user.is_confirmed,
                        active=user.active
                    )
                person_obj = Person(
                    id=person.id,
                    uu_id=person.uu_id,
                    firstname=person.firstname,
                    surname=person.surname,
                    middle_name=person.middle_name,
                    birthname=person.birthname,
                    is_confirmed=person.is_confirmed,
                    active=person.active,
                    user=user_of_person
                )
                occupant_type = living_space.occupant_types
                occupant_type_obj = OccupantType(
                    id=occupant_type.id,
                    uu_id=occupant_type.uu_id,
                    occupant_code=occupant_type.occupant_code,
                    occupant_type=occupant_type.occupant_type,
                    is_confirmed=occupant_type.is_confirmed,
                    active=occupant_type.active,
                    user_type_uu_id=occupant_type.user_type_uu_id
                )
                living_space_obj = BuildLivingSpace(
                    id=living_space.id,
                    uu_id=living_space.uu_id,
                    expiry_starts=str(living_space.expiry_starts),
                    expiry_ends=str(living_space.expiry_ends),
                    fix_value=float(living_space.fix_value),
                    fix_percent=float(living_space.fix_percent),
                    agreement_no=living_space.agreement_no,
                    marketing_process=living_space.marketing_process,
                    build_parts_id=living_space.build_parts_id,
                    build_parts_uu_id=living_space.build_parts_uu_id,
                    person_id=living_space.person_id,
                    person_uu_id=living_space.person_uu_id,
                    occupant_type_id=living_space.occupant_type_id,
                    occupant_type_uu_id=living_space.occupant_type_uu_id,
                    is_confirmed=living_space.is_confirmed,
                    active=living_space.active,
                    person=person_obj,
                    occupant_types=occupant_type_obj
                )
                part_obj.living_spaces.append(living_space_obj)
            buildings_dict[str(build.id)].build_parts.append(part_obj)
    return {i: v.dict(exclude_none=True) for i, v in buildings_dict.items()}
 def get_all_companies_data(prisma_service: PrismaService):
    return prisma_service.find_many(table="companies", query={"active": True, "is_confirmed": True})
 if __name__ == "__main__":
    print("Process Comment Parser service started")
    prisma_service = PrismaService()
    process_comment_parser_service = ProcessCommentParserService()
    search_people = get_all_person_data_due_to_build(prisma_service)
    process_comment_parser_service.set_task_requirements(search_people)
    arriving_account_records = prisma_service.find_many(table="account_records", query={"active": True, "is_confirmed": True, "approved_record": False, "currency_value": {"gt": 0}})
    debt_account_records = prisma_service.find_many(table="account_records", query={"active": True, "is_confirmed": True, "approved_record": False, "currency_value": {"lt": 0}})
    try:
        while True:
            time.sleep(5)
            print("Process Comment Parser service started sleeping for 5 seconds")
            tasks_dict = process_comment_parser_service.get_task_requirements()
            task_requirements: dict[str, BuildingCluster] = {idx: BuildingCluster(**value) for idx, value in tasks_dict.items()}
            parser = Parser(account_records=arriving_account_records, task_requirements=task_requirements)
            parsed_records = parser.parse()
    except Exception as e:
        print(f"Process Comment Parser service error: {str(e)}")
        raise e
    finally:
        prisma_service.disconnect()
--- a/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/entrypoint.sh
+++ b/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/entrypoint.sh
@@ -0,0 +1,19 @@
 #!/bin/sh
 VENV_PATH="/opt/venv"
 REQUIREMENTS_PATH="/app/requirements.txt"
 SCHEMA_PATH="/app/Depends/schema.prisma"
 PRISMA_BINARY_PATH="/root/.cache/prisma-python/binaries"
 if [ ! -x "$VENV_PATH/bin/python" ]; then
  python -m venv "$VENV_PATH"
  "$VENV_PATH/bin/pip" install pip --upgrade 
  "$VENV_PATH/bin/pip" install --no-cache-dir -r "$REQUIREMENTS_PATH"
  "$VENV_PATH/bin/prisma" generate --schema "$SCHEMA_PATH"
 fi
 if ! find "$PRISMA_BINARY_PATH" -type f -name "prisma-query-engine-debian-openssl-3.0.x" | grep -q .; then
  "$VENV_PATH/bin/prisma" py fetch
 fi
 exec "$VENV_PATH/bin/python" -u app.py
--- a/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/matchers.py
+++ b/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/matchers.py
@@ -0,0 +1,566 @@
 import pprint
 import re
 import arrow
 from json import loads, dumps
 from unidecode import unidecode
 from models import BuildingCluster, Person
 turkish_months = ["OCAK", "ŞUBAT", "MART", "NİSAN", "MAYIS", "HAZİRAN", "TEMMUZ", "AĞUSTOS", "EYLÜL", "EKİM", "KASIM", "ARALIK"]
 turkish_months_abbr = {
    "OCA": "OCAK", "SUB": "ŞUBAT", "ŞUB": "ŞUBAT", "MAR": "MART", "NIS": "NİSAN", "MAY": "MAYIS", "HAZ": "HAZİRAN", "HZR": "HAZİRAN",
    "TEM": "TEMMUZ", "AGU": "AĞUSTOS", "AGT": "AĞUSTOS", "EYL": "EYLÜL", "EKI": "EKİM", "KAS": "KASIM", "ARA": "ARALIK", "AGUSTOS": "AĞUSTOS"
 }
 month_to_number_dict = {
    "ocak": 1, "şubat": 2, "mart": 3, "nisan": 4, "mayıs": 5, "haziran": 6, "temmuz": 7, "ağustos": 8, "eylül": 9, "ekim": 10, "kasım": 11, "aralık": 12,
    "ocak": 1, "subat": 2, "mart": 3, "nisan": 4, "mayis": 5, "haziran": 6, "temmuz": 7, "agustos": 8, "eylul": 9, "ekim": 10, "kasim": 11, "aralik": 12
 }
 start_year = 1950
 current_year = arrow.now().year
 class ParsedComment:
    def __init__(self, account_record_id: int, org_comment: str) -> None:
        self.account_record_id: int = account_record_id
        self.org_comment: str = org_comment
        self.comment: str = None
        self.people: list[dict] = []
        self.parts: list[dict] = []
        self.months: list[str] = []
        self.years: list[str] = []
        self.payment_types: list[str] = []
    def set_people(self, people: list[dict]) -> None:
        self.people = people
    def set_parts(self, parts: list[dict]) -> None:
        self.parts = parts
    def set_months(self, months: list[str]) -> None:
        self.months = months
    def set_years(self, years: list[str]) -> None:
        self.years = years
    def set_payment_types(self, payment_types: list[str]) -> None:
        self.payment_types = payment_types
 class ParserHelpers:
    @staticmethod
    def normalize_text(text: str) -> str:
        text = text.replace('İ', 'i')
        text = text.replace('I', 'ı')
        text = text.replace('Ş', 'ş')
        text = text.replace('Ğ', 'ğ')
        text = text.replace('Ü', 'ü')
        text = text.replace('Ö', 'ö')
        text = text.replace('Ç', 'ç')
        return unidecode(text).lower()
 class ParserRequirements(ParserHelpers):
    def create_pattern(parts, formats, separators=None):
        """
        parts: dict
        formats: list[list[tuple[str, str]]]
        separators: list[str]
        """
        if separators is None:
            separators = [""]
        patterns = []
        for fmt in formats:
            for sep in separators:
                pattern_parts = []
                for part_type, part_name in fmt:
                    if part_name in parts and part_type in parts[part_name]:
                        pattern_parts.append(re.escape(parts[part_name][part_type]))
                if pattern_parts:
                    patterns.append(r"\b" + sep.join(pattern_parts) + r"\b")
        return patterns
    @classmethod
    def generate_dictonary_of_patterns(cls, person: Person):
        """Completly remove middle_name instead do regex firstName + SomeWord + surname"""
        patterns_dict = {}
        person_patterns, firstname, birthname = set(), person.firstname.strip() if person.firstname else "", person.birthname.strip() if person.birthname else ""
        middle_name, surname = person.middle_name.strip() if person.middle_name else "", person.surname.strip() if person.surname else ""
        if not firstname or not surname:
            return patterns_dict
        name_parts = {
            'firstname': {'orig': firstname, 'norm': cls.normalize_text(firstname) if firstname else "", 'init': cls.normalize_text(firstname)[0] if firstname else ""},
            'surname': {'orig': surname, 'norm': cls.normalize_text(surname) if surname else "", 'init': cls.normalize_text(surname)[0] if surname else ""}
        }
        if middle_name:
            name_parts['middle_name'] = {'orig': middle_name, 'norm': cls.normalize_text(middle_name) if middle_name else "", 'init': cls.normalize_text(middle_name)[0] if middle_name else ""}
        if birthname and cls.normalize_text(birthname) != cls.normalize_text(surname):
            name_parts['birthname'] = {'orig': birthname, 'norm': cls.normalize_text(birthname), 'init': cls.normalize_text(birthname)[0] if birthname else ""}
        name_formats = [[('orig', 'firstname'), ('orig', 'surname')], [('norm', 'firstname'), ('norm', 'surname')], [('orig', 'surname'), ('orig', 'firstname')], [('norm', 'surname'), ('norm', 'firstname')]]
        if 'middle_name' in name_parts:
            name_formats = [[('orig', 'firstname'), ('orig', 'middle_name'), ('orig', 'surname')], [('norm', 'firstname'), ('norm', 'middle_name'), ('norm', 'surname')]]
        person_patterns.update(cls.create_pattern(name_parts, name_formats, [" ", ""]))
        if 'middle_name' in name_parts:
            middle_name_formats = [[('orig', 'firstname'), ('orig', 'middle_name')], [('norm', 'firstname'), ('norm', 'middle_name')], [('orig', 'middle_name'), ('orig', 'surname')], [('norm', 'middle_name'), ('norm', 'surname')],]
            person_patterns.update(cls.create_pattern(name_parts, middle_name_formats, [" ", ""]))
        if 'birthname' in name_parts and name_parts['surname']['orig'] != name_parts['birthname']['orig']:
            birthname_formats = [
                [('orig', 'firstname'), ('orig', 'birthname')], [('norm', 'firstname'), ('norm', 'birthname')], 
                [('orig', 'birthname'), ('orig', 'firstname')], [('norm', 'birthname'), ('norm', 'firstname')]
            ]
            person_patterns.update(cls.create_pattern(name_parts, birthname_formats, [" ", ""]))
        initial_formats = [[('init', 'firstname'), ('init', 'middle_name'), ('init', 'surname')], [('init', 'firstname'), ('init', 'surname')]]
        person_patterns.update(cls.create_pattern(name_parts, initial_formats, ["", ".", " ", ". "]))
        if 'middle_name' in name_parts:
            triple_initial_formats = [[('init', 'firstname'), ('init', 'middle_name'), ('init', 'surname')]]
            person_patterns.update(cls.create_pattern(name_parts, triple_initial_formats, ["", ".", " ", ". "]))
        compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in person_patterns]
        patterns_dict[str(person.id)] = compiled_patterns
        return patterns_dict
 class CommentParser(ParserHelpers):
    def __init__(self, account_record, people_regex_dict: dict, people_dict: dict) -> None:
        self.original_comment: str = account_record.process_comment
        self.comment: str = self.clean_text(account_record.process_comment)
        self.people_regex_dict: dict = people_regex_dict
        self.people: dict = people_dict
        self.account_record_id: str = str(account_record.id)
        self.build_id: str = str(account_record.build_id)
        self.parsed_comment: ParsedComment = ParsedComment(account_record_id=self.account_record_id, org_comment=self.original_comment)
    @staticmethod
    def clean_text_apartment_number(text: str, match):
        clean_text = text.replace(match.group(0), '').strip()
        clean_text = re.sub(r'\s+', ' ', clean_text).strip()
        return clean_text
    @staticmethod
    def clean_text(text: str) -> str:
        text = str(text)
        text = re.sub(r'\d{8,}', ' ', text)
        # text = re.sub(r'\b[A-Za-z0-9]*?[0-9]+[A-Za-z0-9]*?[A-Za-z]+[A-Za-z0-9]*\b|\b[A-Za-z0-9]*?[A-Za-z]+[A-Za-z0-9]*?[0-9]+[A-Za-z0-9]*\b', ' ', text)
        text = text.replace("/", " ")
        text = text.replace("_", " ")
        text_remove_underscore = text.replace("-", " ").replace("+", " ")
        text_remove_asterisk = text_remove_underscore.replace("*", " ")
        text_remove_comma = text_remove_asterisk.replace(",", " ")
        text_remove_dots = text_remove_comma.replace(".", " ")
        text_remove_dots = re.sub(r'\s+', ' ', text_remove_dots)
        text_remove_dots = text_remove_dots.strip()
        return text_remove_dots
    def get_people_regex_by_build_id(self) -> dict:
        """
        Get people regex by build id
        """
        return self.people_regex_dict.get(self.build_id, {})
    def get_person(self, person_id: str) -> Person | None:
        return self.people[str(self.build_id)].get(person_id, None)
    def parse_comment(self) -> ParsedComment:
        """
        Parse comment and extract information
        """
        self.extract_person_name_with_regex()
        self.extract_build_parts_info()
        self.extract_months()
        self.extract_years()
        self.extract_payment_type()
        self.comment = self.comment.strip()
        self.parsed_comment.comment = self.comment
        return self.parsed_comment
    def get_text_initials(matched_text: str):
        return [unidecode(word.strip())[0].upper() for word in matched_text.split() if word.strip()]
    def extract_person_name_with_regex(self):
        all_matches, found_dict = [], {}
        build_regex = self.get_people_regex_by_build_id()
        for person_id, patterns in build_regex.items():
            person_matches = []
            person = self.get_person(str(person_id))
            if not person:
                continue
            firstname_norm = str(self.normalize_text(person.firstname)).strip() if person.firstname else ""
            # middle_name_norm = str(self.normalize_text(person.middle_name)).strip() if person.middle_name else ""
            surname_norm = str(self.normalize_text(person.surname)).strip() if person.surname else ""
            birthname_norm = str(self.normalize_text(person.birthname)).strip() if person.birthname else ""
            text_norm = str(self.normalize_text(self.comment))
            for pattern in patterns[str(person_id)]:
                for match in pattern.finditer(text_norm):
                    start, end = match.span()
                    matched_text: str = self.comment[start:end]  
                    matched_text_norm = self.normalize_text(matched_text)
                    is_valid_match = False
                    if len(matched_text_norm.split()) <= 1:
                        is_valid_match = False
                    else:
                        has_firstname = firstname_norm and firstname_norm in matched_text_norm
                        has_surname = surname_norm and surname_norm in matched_text_norm
                        has_birthname = birthname_norm and birthname_norm in matched_text_norm
                        if (has_firstname and has_surname) or (has_firstname and has_birthname):
                            is_valid_match = True
                    if is_valid_match:
                        person_matches.append({'matched_text': matched_text, 'start': start, 'end': end})
            if person_matches:
                person_matches.sort(key=lambda x: len(x['matched_text']), reverse=True)
                non_overlapping_matches = []
                for match in person_matches:
                    overlaps = False
                    for existing_match in non_overlapping_matches:
                        if (match['start'] < existing_match['end'] and match['end'] > existing_match['start']):
                            overlaps = True
                            break
                    if not overlaps:
                        non_overlapping_matches.append(match)
                if non_overlapping_matches:
                    found_dict["name_match"] = person
                    all_matches.extend([(match, person) for match in non_overlapping_matches])
        if all_matches:
            all_matches.sort(key=lambda x: x[0]['start'], reverse=True)
            for match, person in all_matches:
                matched_text: str = match['matched_text']
                matched_words = matched_text.split()
                for word in matched_words:
                    word_norm = str(self.normalize_text(word)).strip()
                    if not word_norm:
                        continue
                    text_norm = self.normalize_text(self.comment)
                    if not any([person_com for person_com in self.parsed_comment.people if str(person_com.id) == str(person.id)]):
                        self.parsed_comment.people.append(person)
                    for word_match in re.finditer(rf'\b{re.escape(word_norm)}\b', text_norm, re.IGNORECASE):
                        start, end = word_match.span()
                        self.comment = self.comment[:start] + ' ' * (end - start) + self.comment[end:]
        self.comment = re.sub(r'\s+', ' ', self.comment).strip()
    def extract_build_parts_info(self):
        """
        Regex of parts such as :
        2 nolu daire
        9 NUMARALI DAI
        daire 3
        3 nolu dairenin
        11nolu daire
        Daire No 12
        2NOLU DAIRE
        12 No lu daire
        D:10
        NO:11
        NO :3
        """
        apartment_number = None
        pattern1 = re.compile(r'(\d+)\s*nolu\s*daire', re.IGNORECASE)
        match = pattern1.search(self.comment)
        if match:
            apartment_number = match.group(1)
            self.parsed_comment.parts.append(apartment_number)
            self.comment = self.clean_text_apartment_number(self.comment, match)
            return 
        pattern4 = re.compile(r'(\d+)\s*nolu\s*daire\w*', re.IGNORECASE)
        match = pattern4.search(self.comment)
        if match:
            apartment_number = match.group(1)
            self.parsed_comment.parts.append(apartment_number)
            self.comment = self.clean_text_apartment_number(self.comment, match)
            return 
        pattern5 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
        match = pattern5.search(self.comment)
        if match:
            apartment_number = match.group(1)
            self.parsed_comment.parts.append(apartment_number)
            self.comment = self.clean_text_apartment_number(self.comment, match)
            return 
        pattern7 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
        match = pattern7.search(self.comment)
        if match:
            apartment_number = match.group(1)
            self.parsed_comment.parts.append(apartment_number)
            self.comment = self.clean_text_apartment_number(self.comment, match)
            return 
        pattern8 = re.compile(r'(\d+)\s*no\s*lu\s*daire', re.IGNORECASE)
        match = pattern8.search(self.comment)
        if match:
            apartment_number = match.group(1)
            self.parsed_comment.parts.append(apartment_number)
            self.comment = self.clean_text_apartment_number(self.comment, match)
            return 
        pattern6 = re.compile(r'daire\s*no\s*(\d+)', re.IGNORECASE)
        match = pattern6.search(self.comment)
        if match:
            apartment_number = match.group(1)
            self.parsed_comment.parts.append(apartment_number)
            self.comment = self.clean_text_apartment_number(self.comment, match)
            return 
        pattern2 = re.compile(r'(\d+)\s*numarali\s*dai', re.IGNORECASE)
        match = pattern2.search(self.comment)
        if match:
            apartment_number = match.group(1)
            self.parsed_comment.parts.append(apartment_number)
            self.comment = self.clean_text_apartment_number(self.comment, match)
            return 
        pattern3 = re.compile(r'daire\s*(\d+)', re.IGNORECASE)
        match = pattern3.search(self.comment)
        if match:
            apartment_number = match.group(1)
            self.parsed_comment.parts.append(apartment_number)
            self.comment = self.clean_text_apartment_number(self.comment, match)
            return 
        pattern9 = re.compile(r'd\s*:\s*(\d+)', re.IGNORECASE)
        match = pattern9.search(self.comment)
        if match:
            apartment_number = match.group(1)
            self.parsed_comment.parts.append(apartment_number)
            self.comment = self.clean_text_apartment_number(self.comment, match)
            return 
        pattern10 = re.compile(r'no\s*:\s*(\d+)', re.IGNORECASE)
        match = pattern10.search(self.comment)
        if match:
            apartment_number = match.group(1)
            self.parsed_comment.parts.append(apartment_number)
            self.comment = self.clean_text_apartment_number(self.comment, match)
            return 
        # return found_dict, self.comment
    def extract_months(self):
        """
        Extract Turkish month names and abbreviations from the process comment
        """
        original_text = self.comment
        working_text = original_text
        for month in turkish_months:
            pattern = re.compile(r'\b' + re.escape(month) + r'\b', re.IGNORECASE)
            for match in pattern.finditer(original_text):
                matched_text = match.group(0)
                normalized_month = self.normalize_text(month)
                month_number = None
                if month.lower() in month_to_number_dict:
                    month_number = month_to_number_dict[month.lower()]
                elif normalized_month in month_to_number_dict:
                    month_number = month_to_number_dict[normalized_month]
                month_info = {'name': month, 'number': month_number}
                self.parsed_comment.months.append(month_info)
                working_text = working_text.replace(matched_text, '', 1)
        for abbr, full_month in turkish_months_abbr.items():
            pattern = re.compile(r'\b' + re.escape(abbr) + r'\b', re.IGNORECASE)
            for match in pattern.finditer(working_text):
                matched_text = match.group(0)
                normalized_month = self.normalize_text(full_month)
                month_number = None
                if full_month.lower() in month_to_number_dict:
                    month_number = month_to_number_dict[full_month.lower()]
                elif normalized_month in month_to_number_dict:
                    month_number = month_to_number_dict[normalized_month]
                month_info = {'name': full_month, 'number': month_number}
                self.parsed_comment.months.append(month_info)
                working_text = working_text.replace(matched_text, '', 1)
        self.comment = working_text
    def extract_years(self):
        """
        Extract years from the process comment
        """
        original_text = self.comment
        working_text = original_text
        for year in range(start_year, current_year + 1):
            pattern = re.compile(r'\b' + str(year) + r'\b', re.IGNORECASE)
            for match in pattern.finditer(original_text):
                matched_text = match.group(0)
                if str(matched_text).isdigit():
                    self.parsed_comment.years.append(int(matched_text))
                    working_text = working_text.replace(matched_text, '', 1)
        self.comment = working_text
    def extract_payment_type(self):
        """
        Extract payment type from the process comment : aidat, AİD, aidatı, TADİLAT, YAKIT, yakıt, yakit
        """
        original_text = self.comment
        working_text = original_text
        payment_keywords = {'aidat': ['aidat', 'aİd', 'aid', 'aidatı', 'aidati'], 'tadilat': ['tadilat', 'tadİlat', 'tadilatı'], 'yakit': ['yakit', 'yakıt', 'yakıtı', 'yakiti']}
        for payment_type, keywords in payment_keywords.items():
            for keyword in keywords:
                pattern = re.compile(r'\b' + keyword + r'\b', re.IGNORECASE)
                for match in pattern.finditer(original_text):
                    matched_text = match.group(0)
                    if payment_type not in self.parsed_comment.payment_types:
                        self.parsed_comment.payment_types.append(payment_type)
                    working_text = working_text.replace(matched_text, '', 1)
        self.comment = working_text
 class Parser:
    def __init__(self, account_records: list, task_requirements: dict[str, BuildingCluster]) -> None:
        """
        Initialize parser with account records and task requirements
        """
        self.account_records: list = account_records 
        self.task_requirements: dict[str, BuildingCluster] = task_requirements
        self.people_dict: dict[str, Person] = {}
        self.people_regex_dict: dict = self.prepare_people_regex_dict()
        self.parsed_records: list[ParsedComment] = []
    def prepare_people_regex_dict(self):
        """Prepare regex dictionary for people"""
        regex_pattern_dict = {}
        for build_id, build_cluster in self.task_requirements.items():
            for build_part in build_cluster.build_parts:
                for living_space in build_part.living_spaces:
                    person: Person = living_space.person
                    if str(build_id) in self.people_dict:
                        if not str(person.id) in self.people_dict[str(build_id)]:
                            self.people_dict[str(build_id)][str(person.id)] = person
                    else:
                        self.people_dict[str(build_id)] = {str(person.id): person}
        for build_id, people in self.people_dict.items():
            people: dict[str, Person] = people
            for person_id, person in people.items():
                if str(build_id) not in regex_pattern_dict:
                    regex_pattern_dict[str(build_id)] = {}
                regex_pattern_dict[str(build_id)][str(person_id)] = ParserRequirements.generate_dictonary_of_patterns(person)
        return regex_pattern_dict
    def parse(self):
        """Parse account records based on task requirements"""
        for account_record in self.account_records:
            comment_parser = CommentParser(account_record=account_record, people_regex_dict=self.people_regex_dict, people_dict=self.people_dict)
            parsed_comment = comment_parser.parse_comment()
            self.parsed_records.append(parsed_comment)
        for parsed_record in self.parsed_records:
            print("*" * 150)
            pprint.pprint({
                "original_comment": parsed_record.org_comment, "comment": parsed_record.comment, "people": parsed_record.people, 
                "parts": parsed_record.parts, "months": parsed_record.months, "years": parsed_record.years, "payment_types": parsed_record.payment_types
            }, indent=2)
        return self.parsed_records
 def commented_code():
    def main(account_records, people):
        list_of_regex_patterns = generate_dictonary_of_patterns(people=people)
        dicts_found, dicts_not_found, count_extracted = dict(), dict(), 0
        for account_record in account_records:
            account_record_id = str(account_record["id"])
            found_dict = {}
            process_comment_iteration = clean_text(text=account_record["process_comment"])
            found_dict, cleaned_process_comment = extract_person_name_with_regex(found_dict=found_dict, process_comment=process_comment_iteration, patterns_dict=list_of_regex_patterns, people=people)
            found_dict, cleaned_process_comment = extract_build_parts_info(found_dict=found_dict, process_comment=cleaned_process_comment)
            found_dict, cleaned_process_comment = extract_months(found_dict=found_dict, process_comment=cleaned_process_comment)
            found_dict, cleaned_process_comment = extract_year(found_dict=found_dict, process_comment=cleaned_process_comment)
            found_dict, cleaned_process_comment = extract_payment_type(found_dict=found_dict, process_comment=cleaned_process_comment)
            if found_dict:
                dicts_found[str(account_record_id)] = found_dict
            else:
                dicts_not_found[str(account_record_id)] = account_record_id
        for id_, item in dicts_found.items():
            months_are_valid = bool(item.get("months", []))
            years_are_valid = bool(item.get("years", []))
            payment_types_are_valid = bool(item.get("payment_types", []))
            apartment_number_are_valid = bool(item.get("apartment_number", []))
            person_name_are_valid = bool(item.get("name_match", []))
            account_record_to_save = AccountRecords.query.filter_by(id=int(id_)).first()
            save_dict = dict(account_records_id=account_record_to_save.id, account_records_uu_id=str(account_record_to_save.uu_id), prediction_model="regex", treshold=1, is_first_prediction=False)
            update_dict = dict(prediction_model="regex", treshold=1, is_first_prediction=False)
            if any([months_are_valid, years_are_valid, payment_types_are_valid, apartment_number_are_valid, person_name_are_valid]):
                count_extracted += 1
                if months_are_valid:
                    print(f"months: {item['months']}")
                    data_to_save = dumps({"data": item['months']})
                    prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="months", prediction_model="regex").first()
                    if not prediction_result:
                        created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="months", prediction_result=data_to_save)
                        created_account_prediction.save()
                    else:
                        prediction_result.update(**update_dict, prediction_result=data_to_save)
                        prediction_result.save()
                if years_are_valid:
                    print(f"years: {item['years']}")
                    data_to_save = dumps({"data": item['years']})
                    prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="years", prediction_model="regex").first()
                    if not prediction_result:
                        created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="years", prediction_result=data_to_save)
                        created_account_prediction.save()
                    else:
                        prediction_result.update(**update_dict, prediction_result=data_to_save)
                        prediction_result.save()
                if payment_types_are_valid:
                    print(f"payment_types: {item['payment_types']}")
                    data_to_save = dumps({"data": item['payment_types']})
                    prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="payment_types", prediction_model="regex").first()
                    if not prediction_result:
                        created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="payment_types", prediction_result=data_to_save)
                        created_account_prediction.save()
                    else:
                        prediction_result.update(**update_dict, prediction_result=data_to_save)
                        prediction_result.save()
                if apartment_number_are_valid:
                    print(f"apartment_number: {item['apartment_number']}")
                    prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="apartment_number", prediction_model="regex").first()
                    if not prediction_result:
                        created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="apartment_number", prediction_result=item['apartment_number'])
                        created_account_prediction.save()
                    else:
                        prediction_result.update(**update_dict, prediction_result=item['apartment_number'])
                        prediction_result.save()
                if person_name_are_valid:
                    print(f"person_name: {item['name_match']}")
                    data_to_save = dumps({"data": item['name_match']})
                    prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="person_name", prediction_model="regex").first()
                    if not prediction_result:
                        created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="person_name", prediction_result=data_to_save)
                        created_account_prediction.save()
                    else:
                        prediction_result.update(**update_dict, prediction_result=data_to_save)
                        prediction_result.save()
        print("\n===== SUMMARY =====")
        print(f"extracted data total            : {count_extracted}")
        print(f"not extracted data total        : {len(account_records) - count_extracted}")
        print(f"Total account records processed : {len(account_records)}")
 # if __name__ == "__main__":
 #     people_query = sqlalchemy_text("""
 #         SELECT DISTINCT ON (p.id) p.firstname, p.middle_name, p.surname, p.birthname, bl.id 
 #         FROM public.people as p
 #         INNER JOIN public.build_living_space as bl ON bl.person_id = p.id
 #         INNER JOIN public.build_parts as bp ON bp.id = bl.build_parts_id
 #         INNER JOIN public.build as b ON b.id = bp.build_id
 #         WHERE b.id = 1
 #         ORDER BY p.id
 #     """)
 #     people_raw = session.execute(people_query).all()
 #     remove_duplicate = list()
 #     clean_people_list = list()
 #     for person in people_raw:
 #         merged_name = f"{person[0]} {person[1]} {person[2]} {person[3]}"
 #         if merged_name not in remove_duplicate:
 #             clean_people_list.append(person)
 #             remove_duplicate.append(merged_name)
 #     people = [{"firstname": p[0], "middle_name": p[1], "surname": p[2], "birthname": p[3], 'id': p[4]} for p in clean_people_list]
 #     query_account_records = sqlalchemy_text("""
 #         SELECT a.id, a.iban, a.bank_date, a.process_comment FROM public.account_records as a where currency_value > 0
 #     """)    # and bank_date::date >= '2020-01-01'
 #     account_records = session.execute(query_account_records).all()
 #     account_records = [{"id": ar[0], "iban": ar[1], "bank_date": ar[2], "process_comment": ar[3]} for ar in account_records]
 #     try:
 #         main(session=session, account_records=account_records, people=people)
 #     except Exception as e:
 #         print(f"{e}")
 #     session.close()
 #     session_factory.remove()
--- a/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/models.py
+++ b/ServicesRunner/AccountRecordServices/Finder/Parser/Comment/models.py
@@ -0,0 +1,93 @@
 from typing import Optional, List
 from pydantic import BaseModel
 class User(BaseModel):
    id: int
    uu_id: str
    user_tag: str
    user_type: str
    email: str
    phone_number: str
    related_company: str
    is_confirmed: bool
    active: bool
 class Person(BaseModel):
    id: int
    uu_id: str
    firstname: str
    surname: str
    middle_name: Optional[str] = ""
    birthname: Optional[str] = ""
    # national_identity_id: str
    is_confirmed: bool
    active: bool
    user: Optional[User] = None
 class OccupantType(BaseModel):
    id: int
    uu_id: str
    occupant_code: str
    occupant_type: str
    is_confirmed: bool
    active: bool
    user_type_uu_id: Optional[str] = None
 class BuildPart(BaseModel):
    id: int
    uu_id: str
    part_no: str
    part_level: str
    part_code: str
    part_gross_size: float
    part_net_size: float
    human_livable: bool
    build_id: int
    build_uu_id: str
    is_confirmed: bool
    active: bool
    living_spaces: Optional[List['BuildLivingSpace']] = None
 class BuildLivingSpace(BaseModel):
    id: int
    uu_id: str
    expiry_starts: str
    expiry_ends: str
    fix_value: float
    fix_percent: float
    agreement_no: str
    marketing_process: bool
    build_parts_id: int
    build_parts_uu_id: str
    person_id: int
    person_uu_id: str
    occupant_type_id: int
    occupant_type_uu_id: str
    is_confirmed: bool
    active: bool
    person: Optional[Person] = None
    occupant_type: Optional[OccupantType] = None
 class BuildingCluster(BaseModel):
    id: int
    uu_id: str
    build_name: str
    build_no: str
    build_date: str
    decision_period_date: str
    expiry_starts: str
    expiry_ends: str
    is_confirmed: bool
    active: bool
    build_parts: List['BuildPart'] = []
 # Update forward references for models with circular dependencies
 BuildPart.update_forward_refs()
 BuildingCluster.update_forward_refs()
--- a/ServicesRunner/Depends/config.py
+++ b/ServicesRunner/Depends/config.py
@@ -138,6 +138,7 @@ class ConfigServices:
    TASK_UUID_INDEX_PREFIX: str = "BANK:SERVICES:TASK:UUID"
    TASK_SEEN_PREFIX: str = "BANK:SERVICES:TASK:SEEN"
    TASK_DELETED_PREFIX: str = "BANK:SERVICES:TASK:DELETED"
    TASK_COMMENT_PARSER: str = "BANK:SERVICES:TASK:COMMENT:PARSER"
    SERVICE_PREFIX_MAIL_READER: str = "MailReader"
    SERVICE_PREFIX_MAIL_PARSER: str = "MailParser"
@@ -145,6 +146,7 @@ class ConfigServices:
    SERVICE_PREFIX_FINDER_COMMENT: str = "FinderComment"
    SERVICE_PREFIX_MAIL_SENDER: str = "MailSender"
    TEMPLATE_ACCOUNT_RECORDS: str = "template_accounts.html"
--- a/ServicesRunner/Depends/prisma_client.py
+++ b/ServicesRunner/Depends/prisma_client.py
@@ -132,7 +132,7 @@ class PrismaService:
            table_selected: BaseModelClient = getattr(db, table, None)
            if not table_selected:
                raise ValueError(f"Table {table} not found")
-            rows = await table_selected.find_many(where=query, take=take, skip=skip, order=order or [], select=select, include=include)
+            rows = await table_selected.find_many(where=query, take=take, skip=skip, order=order or [], include=include)
        # print(f"[{datetime.now()}] Find many query completed in {time.time() - start:.2f}s")
        return rows
@@ -234,7 +234,7 @@ class PrismaService:
        self, table: str, query: Optional[dict] = None, take: int = None, skip: int = None, 
        order: Optional[list[dict]] = None, select: Optional[dict] = None, include: Optional[dict] = None
    ):
-        result = self._submit(self._a_find_many(table=table, query=query, take=take, skip=skip, order=order, select=select, include=include))
+        result = self._submit(self._a_find_many(table=table, query=query, take=take, skip=skip, order=order, include=include))
        if select and result:
            result = [{k: v for k, v in item.items() if k in select} for item in result]
        return result
--- a/ServicesRunner/Depends/service_handler.py
+++ b/ServicesRunner/Depends/service_handler.py
@@ -605,15 +605,6 @@ class MailReaderService:
        self.service_retriever = ServiceTaskRetriever(self.redis_handler)
        self._initialized = True
    def ensure_connection(self):
        """
        Ensure Redis connection is established
        Returns:
            bool: True if connection is established, False otherwise
        """
        return self.redis_handler.ensure_connection()
    def get_task_by_uuid(self, task_uuid: str) -> RedisTaskObject:
        """
        Get a task object by its UUID
@@ -840,34 +831,6 @@ class MailParserService:
    def fetch_all_tasks(self) -> list[RedisTaskObject]:
        return self.service_retriever.fetch_all_tasks()
    def ensure_connection(self):
        """
        Ensure Redis connection is established
        Returns:
            bool: True if connection is established, False otherwise
        """
        return self.redis_handler.ensure_connection()
    def _check_redis_connection(self) -> bool:
        """
        Check if Redis connection is alive using RedisHandler
        Returns:
            True if connection is alive, False otherwise
        """
        try:
            # Use RedisHandler to check connection
            connection_status = self.redis_handler.ensure_connection()
            if connection_status:
                logger.info("Redis connection established via RedisHandler")
            else:
                logger.error("Redis connection check failed via RedisHandler")
            return connection_status
        except RedisHandler.REDIS_EXCEPTIONS as e:
            logger.error(f"Redis connection failed: {str(e)}")
            return False
    def get_task_by_uuid(self, task_uuid: str) -> RedisTaskObject:
        """
        Get a task object by its UUID
@@ -948,34 +911,6 @@ class IbanFinderService:
    def fetch_all_tasks(self) -> list[RedisTaskObject]:
        return self.service_retriever.fetch_all_tasks()
    def ensure_connection(self):
        """
        Ensure Redis connection is established
        Returns:
            bool: True if connection is established, False otherwise
        """
        return self.redis_handler.ensure_connection()
    def _check_redis_connection(self) -> bool:
        """
        Check if Redis connection is alive using RedisHandler
        Returns:
            True if connection is alive, False otherwise
        """
        try:
            # Use RedisHandler to check connection
            connection_status = self.redis_handler.ensure_connection()
            if connection_status:
                logger.info("Redis connection established via RedisHandler")
            else:
                logger.error("Redis connection check failed via RedisHandler")
            return connection_status
        except RedisHandler.REDIS_EXCEPTIONS as e:
            logger.error(f"Redis connection failed: {str(e)}")
            return False
    def get_task_by_uuid(self, task_uuid: str) -> RedisTaskObject:
        """
        Get a task object by its UUID
@@ -1152,3 +1087,41 @@ class ProcessCommentFinderService:
        return self.service_retriever.delete_task(task_uuid, max_retries)
 class ProcessCommentParserService:
    """
    Class for processing comment parser tasks
    """
    instance = None
    REDIS_EXCEPTIONS = RedisHandler.REDIS_EXCEPTIONS
    def __init__(self):
        if hasattr(self, '_initialized') and self._initialized:
            return
        self.service_retriever: ServiceTaskRetriever = ServiceTaskRetriever()
        self._initialized = True
    def fetch_all_tasks(self) -> list[RedisTaskObject]:
        """
        Get all tasks from Redis
        Returns:
            list: List of task objects
        """
        return self.service_retriever.fetch_all_tasks_parser()
    def get_task_requirements(self) -> dict:
        """
        Get task requirements from Redis
        Returns:
            dict: Task requirements if found
        """
        if task_object := self.service_retriever.redis_handler.get(f'{ConfigServices.TASK_COMMENT_PARSER}'):
            return loads(task_object)
        return None
    def set_task_requirements(self, task_object: RedisTaskObject):
        """
        Set task requirements in Redis
        """
        return self.service_retriever.redis_handler.set(f'{ConfigServices.TASK_COMMENT_PARSER}', dumps(task_object))
--- a/docker-compose.bank.yml
+++ b/docker-compose.bank.yml
@@ -120,6 +120,22 @@ services:
      options:
        max-size: "10m"
        max-file: "3"
  process_comment_parser:
    container_name: process_comment_parser
    build:
      context: .
      dockerfile: ServicesRunner/AccountRecordServices/Finder/Parser/Comment/Dockerfile
    networks:
      - bank-services-network
    volumes:
      - ./ServicesRunner/AccountRecordServices/Finder/Parser/Comment/venv:/opt/venv
      - ./ServicesRunner/AccountRecordServices/Finder/Parser/Comment/.prisma-cache:/root/.cache/prisma-python
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"
  # finder_payments:
  #   container_name: finder_payments
  #   env_file:
@@ -135,8 +151,6 @@ services:
  #       max-size: "10m"
  #       max-file: "3"
 networks:
  bank-services-network:
    driver: bridge