services api

2025-07-31 17:26:30 +03:00
parent 479104a04f
commit 1f8db23f75
56 changed files with 1976 additions and 120 deletions
--- a/ServicesBank/Parser/.dockerignore
+++ b/ServicesBank/Parser/.dockerignore
@@ -0,0 +1,93 @@
+# Git
+.git
+.gitignore
+.gitattributes
+
+
+# CI
+.codeclimate.yml
+.travis.yml
+.taskcluster.yml
+
+# Docker
+docker-compose.yml
+service_app/Dockerfile
+.docker
+.dockerignore
+
+# Byte-compiled / optimized / DLL files
+**/__pycache__/
+**/*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+service_app/env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Virtual environment
+service_app/.env
+.venv/
+venv/
+
+# PyCharm
+.idea
+
+# Python mode for VIM
+.ropeproject
+**/.ropeproject
+
+# Vim swap files
+**/*.swp
+
+# VS Code
+.vscode/
+
+test_application/
+
+
--- a/ServicesBank/Parser/Dockerfile
+++ b/ServicesBank/Parser/Dockerfile
@@ -0,0 +1,24 @@
+FROM python:3.12-slim
+
+WORKDIR /
+
+ENV PYTHONPATH=/ PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=1
+
+RUN apt-get update && apt-get install -y --no-install-recommends gcc && rm -rf /var/lib/apt/lists/* && pip install --no-cache-dir poetry
+
+COPY pyproject.toml ./pyproject.toml
+
+RUN poetry config virtualenvs.create false && poetry install --no-interaction --no-ansi --no-root --only main && pip cache purge && rm -rf ~/.cache/pypoetry
+
+RUN apt-get update && apt-get install -y cron
+
+COPY /ServicesBank/Parser /
+COPY /ServicesApi/Schemas /Schemas
+COPY /ServicesApi/Controllers /Controllers
+
+RUN touch /var/log/cron.log
+
+RUN chmod +x /entrypoint.sh
+RUN chmod +x /run_app.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
--- a/ServicesBank/Parser/README.md
+++ b/ServicesBank/Parser/README.md
@@ -0,0 +1,3 @@
+# Docs of Build Extractor
+
+Finds build_id, decision_book_id, living_space_id from AccountRecords
--- a/ServicesBank/Parser/entrypoint.sh
+++ b/ServicesBank/Parser/entrypoint.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# Create environment file that will be available to cron jobs
+echo "POSTGRES_USER=\"$POSTGRES_USER\"" >> /env.sh
+echo "POSTGRES_PASSWORD=\"$POSTGRES_PASSWORD\"" >> /env.sh
+echo "POSTGRES_DB=\"$POSTGRES_DB\"" >> /env.sh
+echo "POSTGRES_HOST=\"$POSTGRES_HOST\"" >> /env.sh
+echo "POSTGRES_PORT=$POSTGRES_PORT" >> /env.sh
+echo "POSTGRES_ENGINE=\"$POSTGRES_ENGINE\"" >> /env.sh
+echo "POSTGRES_POOL_PRE_PING=\"$POSTGRES_POOL_PRE_PING\"" >> /env.sh
+echo "POSTGRES_POOL_SIZE=$POSTGRES_POOL_SIZE" >> /env.sh
+echo "POSTGRES_MAX_OVERFLOW=$POSTGRES_MAX_OVERFLOW" >> /env.sh
+echo "POSTGRES_POOL_RECYCLE=$POSTGRES_POOL_RECYCLE" >> /env.sh
+echo "POSTGRES_POOL_TIMEOUT=$POSTGRES_POOL_TIMEOUT" >> /env.sh
+echo "POSTGRES_ECHO=\"$POSTGRES_ECHO\"" >> /env.sh
+
+# Add Python environment variables
+echo "PYTHONPATH=/" >> /env.sh
+echo "PYTHONUNBUFFERED=1" >> /env.sh
+echo "PYTHONDONTWRITEBYTECODE=1" >> /env.sh
+
+# Make the environment file available to cron
+echo "*/15 * * * * /run_app.sh >> /var/log/cron.log 2>&1" > /tmp/crontab_list
+crontab /tmp/crontab_list
+
+# Start cron
+cron
+
+# Tail the log file
+tail -f /var/log/cron.log
--- a/ServicesBank/Parser/run_app.sh
+++ b/ServicesBank/Parser/run_app.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Source the environment file directly
+. /env.sh
+
+# Re-export all variables to ensure they're available to the Python script
+export POSTGRES_USER
+export POSTGRES_PASSWORD
+export POSTGRES_DB
+export POSTGRES_HOST
+export POSTGRES_PORT
+export POSTGRES_ENGINE
+export POSTGRES_POOL_PRE_PING
+export POSTGRES_POOL_SIZE
+export POSTGRES_MAX_OVERFLOW
+export POSTGRES_POOL_RECYCLE
+export POSTGRES_POOL_TIMEOUT
+export POSTGRES_ECHO
+
+# Python environment variables
+export PYTHONPATH
+export PYTHONUNBUFFERED
+export PYTHONDONTWRITEBYTECODE
+
+# env >> /var/log/cron.log
+/usr/local/bin/python /runner.py
--- a/ServicesBank/Parser/runner.py
+++ b/ServicesBank/Parser/runner.py
@@ -0,0 +1,636 @@
+import re
+import arrow
+
+from json import loads, dumps
+from unidecode import unidecode
+from difflib import SequenceMatcher
+from itertools import permutations
+from time import perf_counter
+from sqlalchemy import text as sqlalchemy_text
+
+from Controllers.Postgres.engine import get_session_factory
+from Schemas.account.account import AccountRecordsPredict, AccountRecords
+
+
+def clean_text(text):
+    text = str(text)
+    text = re.sub(r'\d{8,}', ' ', text)
+    # text = re.sub(r'\b[A-Za-z0-9]*?[0-9]+[A-Za-z0-9]*?[A-Za-z]+[A-Za-z0-9]*\b|\b[A-Za-z0-9]*?[A-Za-z]+[A-Za-z0-9]*?[0-9]+[A-Za-z0-9]*\b', ' ', text)
+    text = text.replace("/", " ")
+    text = text.replace("_", " ")
+    text_remove_underscore = text.replace("-", " ").replace("+", " ")
+    text_remove_asterisk = text_remove_underscore.replace("*", " ")
+    text_remove_comma = text_remove_asterisk.replace(",", " ")
+    text_remove_dots = text_remove_comma.replace(".", " ")
+    text_remove_dots = re.sub(r'\s+', ' ', text_remove_dots)
+    text_remove_dots = text_remove_dots.strip()
+    return text_remove_dots
+
+
+def normalize_text(text):
+    text = text.replace('İ', 'i')
+    text = text.replace('I', 'ı')
+    text = text.replace('Ş', 'ş')
+    text = text.replace('Ğ', 'ğ')
+    text = text.replace('Ü', 'ü')
+    text = text.replace('Ö', 'ö')
+    text = text.replace('Ç', 'ç')
+    return unidecode(text).lower()
+
+
+def get_person_initials(person):
+    parts = [person.get("firstname", ""), person.get("middle_name", ""), person.get("surname", ""), person.get("birthname", "")]
+    return [unidecode(p.strip())[0].upper() for p in parts if p]
+
+
+def get_text_initials(matched_text):
+    return [unidecode(word.strip())[0].upper() for word in matched_text.split() if word.strip()]
+
+
+def generate_dictonary_of_patterns(people):
+
+    """
+    completly remove middle_name instead do regex firstName + SomeWord + surname
+    """
+    patterns_dict = {}
+
+    for person in people:
+        person_id = person.get('id')
+        firstname = person.get('firstname', '').strip() if person.get('firstname') else ""
+        middle_name = person.get('middle_name', '').strip() if person.get('middle_name') else ""
+        surname = person.get('surname', '').strip() if person.get('surname') else ""
+        birthname = person.get('birthname', '').strip() if person.get('birthname') else ""
+
+        if not firstname or not surname:
+            continue
+        
+        name_parts = {
+            'firstname': {
+                'orig': firstname,
+                'norm': normalize_text(firstname) if firstname else "",
+                'init': normalize_text(firstname)[0] if firstname else ""
+            },
+            'surname': {
+                'orig': surname,
+                'norm': normalize_text(surname) if surname else "",
+                'init': normalize_text(surname)[0] if surname else ""
+            }
+        }
+        
+        if middle_name:
+            name_parts['middle_name'] = {
+                'orig': middle_name,
+                'norm': normalize_text(middle_name) if middle_name else "",
+                'init': normalize_text(middle_name)[0] if middle_name else ""
+            }
+
+        if birthname and normalize_text(birthname) != normalize_text(surname):
+            name_parts['birthname'] = {
+                'orig': birthname,
+                'norm': normalize_text(birthname),
+                'init': normalize_text(birthname)[0] if birthname else ""
+            }
+        
+        person_patterns = set()
+        
+        def create_pattern(parts, formats, separators=None):
+            if separators is None:
+                separators = [""]
+            
+            patterns = []
+            for fmt in formats:
+                for sep in separators:
+                    pattern_parts = []
+                    for part_type, part_name in fmt:
+                        if part_name in parts and part_type in parts[part_name]:
+                            pattern_parts.append(re.escape(parts[part_name][part_type]))
+                    if pattern_parts:
+                        patterns.append(r"\b" + sep.join(pattern_parts) + r"\b")
+            return patterns
+        
+        name_formats = [
+            [('orig', 'firstname'), ('orig', 'surname')],
+            [('norm', 'firstname'), ('norm', 'surname')],
+            [('orig', 'surname'), ('orig', 'firstname')],
+            [('norm', 'surname'), ('norm', 'firstname')],
+        ]
+        if 'middle_name' in name_parts:
+            name_formats = [
+                [('orig', 'firstname'), ('orig', 'middle_name'), ('orig', 'surname')],
+                [('norm', 'firstname'), ('norm', 'middle_name'), ('norm', 'surname')],
+            ]
+
+        person_patterns.update(create_pattern(name_parts, name_formats, [" ", ""]))
+        
+        if 'middle_name' in name_parts:
+            middle_name_formats = [
+                [('orig', 'firstname'), ('orig', 'middle_name')],
+                [('norm', 'firstname'), ('norm', 'middle_name')],
+                [('orig', 'middle_name'), ('orig', 'surname')],
+                [('norm', 'middle_name'), ('norm', 'surname')],
+            ]
+            person_patterns.update(create_pattern(name_parts, middle_name_formats, [" ", ""]))
+        
+        if 'birthname' in name_parts and name_parts['surname']['orig'] != name_parts['birthname']['orig']:
+            birthname_formats = [
+                [('orig', 'firstname'), ('orig', 'birthname')],
+                [('norm', 'firstname'), ('norm', 'birthname')],
+                [('orig', 'birthname'), ('orig', 'firstname')],
+                [('norm', 'birthname'), ('norm', 'firstname')],
+            ]
+            person_patterns.update(create_pattern(name_parts, birthname_formats, [" ", ""]))
+        
+        initial_formats = [
+            [('init', 'firstname'), ('init', 'middle_name'), ('init', 'surname')],
+            [('init', 'firstname'), ('init', 'surname')],
+        ]
+        person_patterns.update(create_pattern(name_parts, initial_formats, ["", ".", " ", ". "]))
+        
+        if 'middle_name' in name_parts:
+            triple_initial_formats = [
+                [('init', 'firstname'), ('init', 'middle_name'), ('init', 'surname')],
+            ]
+            person_patterns.update(create_pattern(name_parts, triple_initial_formats, ["", ".", " ", ". "]))
+        
+        compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in person_patterns]
+        
+        patterns_dict[person_id] = compiled_patterns
+    
+    return patterns_dict
+
+
+def extract_person_name_with_regex(found_dict, process_comment, patterns_dict, people):
+    cleaned_text = process_comment  
+    all_matches = []
+
+    for person_id, patterns in patterns_dict.items():
+        person = next((p for p in people if p.get('id') == person_id), None)
+        if not person:
+            continue
+            
+        firstname_norm = normalize_text(person.get("firstname", "").strip()) if person.get("firstname") else ""
+        middle_name_norm = normalize_text(person.get("middle_name", "").strip()) if person.get("middle_name") else ""
+        surname_norm = normalize_text(person.get("surname", "").strip()) if person.get("surname") else ""
+        birthname_norm = normalize_text(person.get("birthname", "").strip()) if person.get("birthname") else ""
+        
+        text_norm = normalize_text(process_comment)
+        person_matches = []
+        
+        for pattern in patterns:
+            for match in pattern.finditer(text_norm):
+                start, end = match.span()
+                matched_text = process_comment[start:end]  
+                matched_text_norm = normalize_text(matched_text)
+                
+                is_valid_match = False
+                
+                # Strict validation: require both firstname AND surname/birthname
+                # No single-word matches allowed
+                if len(matched_text_norm.split()) <= 1:
+                    # Single word matches are not allowed
+                    is_valid_match = False
+                else:
+                    # For multi-word matches, require firstname AND (surname OR birthname)
+                    has_firstname = firstname_norm and firstname_norm in matched_text_norm
+                    has_surname = surname_norm and surname_norm in matched_text_norm
+                    has_birthname = birthname_norm and birthname_norm in matched_text_norm
+                    
+                    # Both firstname and surname/birthname must be present
+                    if (has_firstname and has_surname) or (has_firstname and has_birthname):
+                        is_valid_match = True
+                
+                if is_valid_match:
+                    person_matches.append({
+                        'matched_text': matched_text,
+                        'start': start,
+                        'end': end
+                    })
+        
+        if person_matches:
+            person_matches.sort(key=lambda x: len(x['matched_text']), reverse=True)
+            
+            non_overlapping_matches = []
+            for match in person_matches:
+                overlaps = False
+                for existing_match in non_overlapping_matches:
+                    if (match['start'] < existing_match['end'] and match['end'] > existing_match['start']):
+                        overlaps = True
+                        break
+                
+                if not overlaps:
+                    non_overlapping_matches.append(match)
+            
+            if non_overlapping_matches:
+                found_dict["name_match"] = person
+                all_matches.extend([(match, person) for match in non_overlapping_matches])
+    
+    if all_matches:
+        all_matches.sort(key=lambda x: x[0]['start'], reverse=True)
+        
+        for match, person in all_matches:
+            matched_text = match['matched_text']
+            
+            matched_words = matched_text.split()
+            
+            for word in matched_words:
+                word_norm = normalize_text(word).strip()
+                
+                if not word_norm:
+                    continue
+                
+                text_norm = normalize_text(cleaned_text)
+                for word_match in re.finditer(rf'\b{re.escape(word_norm)}\b', text_norm, re.IGNORECASE):
+                    start, end = word_match.span()
+                    cleaned_text = cleaned_text[:start] + ' ' * (end - start) + cleaned_text[end:]
+        
+        cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
+    
+    return found_dict, cleaned_text
+
+
+def extract_build_parts_info(found_dict, process_comment):
+
+    """
+    Regex of parts such as :
+    2 nolu daire
+    9 NUMARALI DAI
+    daire 3
+    3 nolu dairenin
+    11nolu daire
+    Daire No 12
+    2NOLU DAIRE
+    12 No lu daire
+    D:10
+    NO:11
+    NO :3
+    """
+    # Initialize apartment number variable
+    apartment_number = None
+    cleaned_text = process_comment
+    
+    def clean_text_apartment_number(text, match):
+        clean_text = text.replace(match.group(0), '').strip()
+        clean_text = re.sub(r'\s+', ' ', clean_text).strip()
+        return clean_text
+
+    # Pattern 1: X nolu daire (with space)
+    pattern1 = re.compile(r'(\d+)\s*nolu\s*daire', re.IGNORECASE)
+    match = pattern1.search(cleaned_text)
+    if match:
+        apartment_number = match.group(1)
+        found_dict['apartment_number'] = apartment_number
+        return found_dict, clean_text_apartment_number(cleaned_text, match)
+    
+    # Pattern 4: X nolu dairenin
+    pattern4 = re.compile(r'(\d+)\s*nolu\s*daire\w*', re.IGNORECASE)
+    match = pattern4.search(cleaned_text)
+    if match:
+        apartment_number = match.group(1)
+        found_dict['apartment_number'] = apartment_number
+        return found_dict, clean_text_apartment_number(cleaned_text, match)
+    
+    # Pattern 5: XNolu daire (without space)
+    pattern5 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
+    match = pattern5.search(cleaned_text)
+    if match:
+        apartment_number = match.group(1)
+        found_dict['apartment_number'] = apartment_number
+        return found_dict, clean_text_apartment_number(cleaned_text, match)
+
+    # Pattern 7: XNOLU DAIRE (all caps, no space)
+    pattern7 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
+    match = pattern7.search(cleaned_text)
+    if match:
+        apartment_number = match.group(1)
+        found_dict['apartment_number'] = apartment_number
+        return found_dict, clean_text_apartment_number(cleaned_text, match)
+    
+    # Pattern 8: X No lu daire
+    pattern8 = re.compile(r'(\d+)\s*no\s*lu\s*daire', re.IGNORECASE)
+    match = pattern8.search(cleaned_text)
+    if match:
+        apartment_number = match.group(1)
+        found_dict['apartment_number'] = apartment_number
+        return found_dict, clean_text_apartment_number(cleaned_text, match)
+
+    # Pattern 6: Daire No X
+    pattern6 = re.compile(r'daire\s*no\s*(\d+)', re.IGNORECASE)
+    match = pattern6.search(cleaned_text)
+    if match:
+        apartment_number = match.group(1)
+        found_dict['apartment_number'] = apartment_number
+        return found_dict, clean_text_apartment_number(cleaned_text, match)
+
+    # Pattern 2: X NUMARALI DAI
+    pattern2 = re.compile(r'(\d+)\s*numarali\s*dai', re.IGNORECASE)
+    match = pattern2.search(cleaned_text)
+    if match:
+        apartment_number = match.group(1)
+        found_dict['apartment_number'] = apartment_number
+        return found_dict, clean_text_apartment_number(cleaned_text, match)
+    
+    # Pattern 3: daire X
+    pattern3 = re.compile(r'daire\s*(\d+)', re.IGNORECASE)
+    match = pattern3.search(cleaned_text)
+    if match:
+        apartment_number = match.group(1)
+        found_dict['apartment_number'] = apartment_number
+        return found_dict, clean_text_apartment_number(cleaned_text, match)
+    
+    # Pattern 9: D:X
+    pattern9 = re.compile(r'd\s*:\s*(\d+)', re.IGNORECASE)
+    match = pattern9.search(cleaned_text)
+    if match:
+        apartment_number = match.group(1)
+        found_dict['apartment_number'] = apartment_number
+        return found_dict, clean_text_apartment_number(cleaned_text, match)
+    
+    # Pattern 10: NO:X or NO :X
+    pattern10 = re.compile(r'no\s*:\s*(\d+)', re.IGNORECASE)
+    match = pattern10.search(cleaned_text)
+    if match:
+        apartment_number = match.group(1)
+        found_dict['apartment_number'] = apartment_number
+        return found_dict, clean_text_apartment_number(cleaned_text, match)
+    
+    return found_dict, cleaned_text
+
+
+def extract_months(found_dict, process_comment):
+    """
+    Extract Turkish month names and abbreviations from the process comment
+    """
+    original_text = process_comment
+    # Updated dictionary with normalized keys for better matching
+    month_to_number_dict = {
+        "ocak": 1, "şubat": 2, "mart": 3, "nisan": 4, "mayıs": 5, "haziran": 6,
+        "temmuz": 7, "ağustos": 8, "eylül": 9, "ekim": 10, "kasım": 11, "aralık": 12,
+        # Add normalized versions without Turkish characters
+        "ocak": 1, "subat": 2, "mart": 3, "nisan": 4, "mayis": 5, "haziran": 6,
+        "temmuz": 7, "agustos": 8, "eylul": 9, "ekim": 10, "kasim": 11, "aralik": 12
+    }
+    
+    def clean_text_month(text, match):
+        clean_text = text.replace(match.group(0), '').strip()
+        clean_text = re.sub(r'\s+', ' ', clean_text).strip()
+        return clean_text
+    
+    def normalize_turkish(text):
+        """Properly normalize Turkish text for case-insensitive comparison"""
+        text = text.lower()
+        text = text.replace('i̇', 'i')  # Handle dotted i properly
+        text = text.replace('ı', 'i')   # Convert dotless i to regular i for matching
+        text = unidecode(text)          # Remove other diacritics
+        return text
+    
+    if 'months' not in found_dict:
+        found_dict['months'] = []
+    
+    months_found, working_text = False, original_text
+    
+    for month in turkish_months:
+        pattern = re.compile(r'\b' + re.escape(month) + r'\b', re.IGNORECASE)
+        for match in pattern.finditer(original_text):
+            matched_text = match.group(0)
+            
+            normalized_month = normalize_turkish(month)
+            month_number = None
+            
+            if month.lower() in month_to_number_dict:
+                month_number = month_to_number_dict[month.lower()]
+            elif normalized_month in month_to_number_dict:
+                month_number = month_to_number_dict[normalized_month]
+            
+            month_info = {'name': month, 'number': month_number}
+            found_dict['months'].append(month_info)
+            months_found = True
+            working_text = working_text.replace(matched_text, '', 1)
+    
+    for abbr, full_month in turkish_months_abbr.items():
+        pattern = re.compile(r'\b' + re.escape(abbr) + r'\b', re.IGNORECASE)
+        
+        for match in pattern.finditer(working_text):
+            matched_text = match.group(0)
+            normalized_month = normalize_turkish(full_month)
+            month_number = None
+            
+            if full_month.lower() in month_to_number_dict:
+                month_number = month_to_number_dict[full_month.lower()]
+            elif normalized_month in month_to_number_dict:
+                month_number = month_to_number_dict[normalized_month]
+            
+            month_info = {'name': full_month, 'number': month_number}
+            found_dict['months'].append(month_info)
+            months_found = True
+            working_text = working_text.replace(matched_text, '', 1)
+
+    return found_dict, working_text
+
+
+def extract_year(found_dict, process_comment):
+    """
+    Extract years from the process comment
+    """
+    original_text = process_comment
+    
+    if 'years' not in found_dict:
+        found_dict['years'] = []
+    
+    working_text = original_text
+    
+    for year in range(start_year, current_year + 1):
+        pattern = re.compile(r'\b' + str(year) + r'\b', re.IGNORECASE)
+        for match in pattern.finditer(original_text):
+            matched_text = match.group(0)
+            if str(matched_text).isdigit():
+                found_dict['years'].append(int(matched_text))
+                working_text = working_text.replace(matched_text, '', 1)
+
+    return found_dict, working_text
+
+
+def extract_payment_type(found_dict, process_comment):
+    """
+    Extract payment type from the process comment
+    aidat
+    AİD
+    aidatı
+    TADİLAT
+    YAKIT
+    yakıt
+    yakit
+    """
+    original_text = process_comment
+    working_text = original_text
+    
+    if 'payment_types' not in found_dict:
+        found_dict['payment_types'] = []
+    
+    payment_keywords = {
+        'aidat': ['aidat', 'aİd', 'aid', 'aidatı', 'aidati'],
+        'tadilat': ['tadilat', 'tadİlat', 'tadilatı'],
+        'yakit': ['yakit', 'yakıt', 'yakıtı', 'yakiti']
+    }
+    
+    for payment_type, keywords in payment_keywords.items():
+        for keyword in keywords:
+            pattern = re.compile(r'\b' + keyword + r'\b', re.IGNORECASE)
+            for match in pattern.finditer(original_text):
+                matched_text = match.group(0)
+                if payment_type not in found_dict['payment_types']:
+                    found_dict['payment_types'].append(payment_type)
+                working_text = working_text.replace(matched_text, '', 1)
+    
+    return found_dict, working_text
+
+
+def main(session, account_records, people):
+
+    list_of_regex_patterns = generate_dictonary_of_patterns(people=people)
+    dicts_found = dict()
+    dicts_not_found = dict()
+    count_extracted = 0
+    for account_record in account_records:
+        account_record_id = str(account_record["id"])
+        found_dict = {}
+        process_comment_iteration = clean_text(text=account_record["process_comment"])
+        found_dict, cleaned_process_comment = extract_person_name_with_regex(
+            found_dict=found_dict, process_comment=process_comment_iteration, patterns_dict=list_of_regex_patterns, people=people
+        )
+
+        found_dict, cleaned_process_comment = extract_build_parts_info(
+            found_dict=found_dict, process_comment=cleaned_process_comment
+        )
+        found_dict, cleaned_process_comment = extract_months(
+            found_dict=found_dict, process_comment=cleaned_process_comment
+        )
+        found_dict, cleaned_process_comment = extract_year(
+            found_dict=found_dict, process_comment=cleaned_process_comment
+        )
+        found_dict, cleaned_process_comment = extract_payment_type(
+            found_dict=found_dict, process_comment=cleaned_process_comment
+        )
+        if found_dict:
+            dicts_found[str(account_record_id)] = found_dict
+        else:
+            dicts_not_found[str(account_record_id)] = account_record_id
+
+             
+    for id_, item in dicts_found.items():
+        AccountRecordsPredict.set_session(session)
+        AccountRecords.set_session(session)
+
+        months_are_valid = bool(item.get("months", []))
+        years_are_valid = bool(item.get("years", []))
+        payment_types_are_valid = bool(item.get("payment_types", []))
+        apartment_number_are_valid = bool(item.get("apartment_number", []))
+        person_name_are_valid = bool(item.get("name_match", []))
+        account_record_to_save = AccountRecords.query.filter_by(id=int(id_)).first()
+        save_dict = dict(
+            account_records_id=account_record_to_save.id, account_records_uu_id=str(account_record_to_save.uu_id), prediction_model="regex", treshold=1, is_first_prediction=False
+        )
+        update_dict = dict(prediction_model="regex", treshold=1, is_first_prediction=False)
+        if any([months_are_valid, years_are_valid, payment_types_are_valid, apartment_number_are_valid, person_name_are_valid]):
+            count_extracted += 1
+            if months_are_valid:
+                print(f"months: {item['months']}")
+                data_to_save = dumps({"data": item['months']})
+                prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="months", prediction_model="regex").first()
+                if not prediction_result:
+                    created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="months", prediction_result=data_to_save)
+                    created_account_prediction.save()
+                else:
+                    prediction_result.update(**update_dict, prediction_result=data_to_save)
+                    prediction_result.save()
+            if years_are_valid:
+                print(f"years: {item['years']}")
+                data_to_save = dumps({"data": item['years']})
+                prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="years", prediction_model="regex").first()
+                if not prediction_result:
+                    created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="years", prediction_result=data_to_save)
+                    created_account_prediction.save()
+                else:
+                    prediction_result.update(**update_dict, prediction_result=data_to_save)
+                    prediction_result.save()
+            if payment_types_are_valid:
+                print(f"payment_types: {item['payment_types']}")
+                data_to_save = dumps({"data": item['payment_types']})
+                prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="payment_types", prediction_model="regex").first()
+                if not prediction_result:
+                    created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="payment_types", prediction_result=data_to_save)
+                    created_account_prediction.save()
+                else:
+                    prediction_result.update(**update_dict, prediction_result=data_to_save)
+                    prediction_result.save()
+            if apartment_number_are_valid:
+                print(f"apartment_number: {item['apartment_number']}")
+                prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="apartment_number", prediction_model="regex").first()
+                if not prediction_result:
+                    created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="apartment_number", prediction_result=item['apartment_number'])
+                    created_account_prediction.save()
+                else:
+                    prediction_result.update(**update_dict, prediction_result=item['apartment_number'])
+                    prediction_result.save()
+            if person_name_are_valid:
+                print(f"person_name: {item['name_match']}")
+                data_to_save = dumps({"data": item['name_match']})
+                prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="person_name", prediction_model="regex").first()
+                if not prediction_result:
+                    created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="person_name", prediction_result=data_to_save)
+                    created_account_prediction.save()
+                else:
+                    prediction_result.update(**update_dict, prediction_result=data_to_save)
+                    prediction_result.save()
+    
+    print("\n===== SUMMARY =====")
+    print(f"extracted data total            : {count_extracted}")
+    print(f"not extracted data total        : {len(account_records) - count_extracted}")
+    print(f"Total account records processed : {len(account_records)}")
+
+
+if __name__ == "__main__":
+
+    session_factory = get_session_factory()
+    session = session_factory()
+
+    turkish_months = ["OCAK", "ŞUBAT", "MART", "NİSAN", "MAYIS", "HAZİRAN", "TEMMUZ", "AĞUSTOS", "EYLÜL", "EKİM", "KASIM", "ARALIK"]
+    turkish_months_abbr = {
+        "OCA": "OCAK", "SUB": "ŞUBAT", "ŞUB": "ŞUBAT", "MAR": "MART", "NIS": "NİSAN", "MAY": "MAYIS", "HAZ": "HAZİRAN", "HZR": "HAZİRAN",
+        "TEM": "TEMMUZ", "AGU": "AĞUSTOS", "AGT": "AĞUSTOS", "EYL": "EYLÜL", "EKI": "EKİM", "KAS": "KASIM", "ARA": "ARALIK",
+    }
+    start_year = 1950
+    current_year = arrow.now().year
+
+    people_query = sqlalchemy_text("""
+        SELECT DISTINCT ON (p.id) p.firstname, p.middle_name, p.surname, p.birthname, bl.id 
+        FROM public.people as p
+        INNER JOIN public.build_living_space as bl ON bl.person_id = p.id
+        INNER JOIN public.build_parts as bp ON bp.id = bl.build_parts_id
+        INNER JOIN public.build as b ON b.id = bp.build_id
+        WHERE b.id = 1
+        ORDER BY p.id
+    """)
+
+    people_raw = session.execute(people_query).all()
+    remove_duplicate = list()
+    clean_people_list = list()
+    for person in people_raw:
+        merged_name = f"{person[0]} {person[1]} {person[2]} {person[3]}"
+        if merged_name not in remove_duplicate:
+            clean_people_list.append(person)
+            remove_duplicate.append(merged_name)
+    
+    people = [{"firstname": p[0], "middle_name": p[1], "surname": p[2], "birthname": p[3], 'id': p[4]} for p in clean_people_list]
+    query_account_records = sqlalchemy_text("""
+        SELECT a.id, a.iban, a.bank_date, a.process_comment FROM public.account_records as a where currency_value > 0
+    """)    # and bank_date::date >= '2020-01-01'
+    account_records = session.execute(query_account_records).all()
+    account_records = [{"id": ar[0], "iban": ar[1], "bank_date": ar[2], "process_comment": ar[3]} for ar in account_records]
+
+    try:
+        main(session=session, account_records=account_records, people=people)
+    except Exception as e:
+        print(f"{e}")
+    
+    session.close()
+    session_factory.remove()