From 7a5521648c1a6b740be0e0b0420d1af08f22a77e Mon Sep 17 00:00:00 2001 From: Berkay Date: Mon, 11 Aug 2025 20:22:26 +0300 Subject: [PATCH] Comment service added mail sender service --- .../Finder/Comment/app.py | 26 +- .../Finder/Comment/matchers.py | 518 ++++++++++++++++++ ServicesRunner/Depends/service_handler.py | 8 +- 3 files changed, 539 insertions(+), 13 deletions(-) create mode 100644 ServicesRunner/AccountRecordServices/Finder/Comment/matchers.py diff --git a/ServicesRunner/AccountRecordServices/Finder/Comment/app.py b/ServicesRunner/AccountRecordServices/Finder/Comment/app.py index 78f7c3b..f9e4a63 100644 --- a/ServicesRunner/AccountRecordServices/Finder/Comment/app.py +++ b/ServicesRunner/AccountRecordServices/Finder/Comment/app.py @@ -34,6 +34,9 @@ class BankReceive(BaseModel): def check_task_belong_to_this_service(task: RedisTaskObject): + """ + Check if task belongs to this service + """ if not task.service == ConfigServices.SERVICE_PREFIX_FINDER_IBAN: return False if not task.completed: @@ -46,6 +49,9 @@ def check_task_belong_to_this_service(task: RedisTaskObject): def write_account_records_row_from_finder_comment(finder_comments: list[FinderComment], prisma_service: PrismaService, saved_list_of_account_records: dict): + """ + Write account records row from finder comment + """ finder_comments = list(finder_comments) for finder_comment in finder_comments: bank_date = arrow.get(finder_comment.bank_date).replace(tzinfo='GMT+3').datetime @@ -75,11 +81,12 @@ def enclose_task_and_send_mail_to_build_manager(prisma_service: PrismaService, s """ if not saved_list_of_account_records: return - today = arrow.now().to('GMT+3').datetime + list_of_new_set, today = [], arrow.now().to('GMT+3').datetime for build_id, saved_list_of_account_record in saved_list_of_account_records.items(): build_manager_occupant_type = prisma_service.find_first(table="occupant_types", query={"occupant_code":"BU-MNG", "is_confirmed": True, "active": True}) living_space = prisma_service.find_first( - table="build_living_space", query={"build_id": build_id, "occupant_type_id": build_manager_occupant_type['id'], "expiry_starts": {"lte": today}, "expiry_ends": {"gte": today}}) + table="build_living_space", query={"build_id": build_id, "occupant_type_id": build_manager_occupant_type['id'], "expiry_starts": {"lte": today}, "expiry_ends": {"gte": today}} + ) build = prisma_service.find_first(table="builds", query={"id": build_id}) person = prisma_service.find_first(table="people", query={"id": living_space['person_id']}) user = prisma_service.find_first(table="users", query={"person_id": person['id']}) @@ -87,13 +94,17 @@ def enclose_task_and_send_mail_to_build_manager(prisma_service: PrismaService, s receivers=[user.email], data=saved_list_of_account_record, template_name=ConfigServices.TEMPLATE_ACCOUNT_RECORDS, subject=f"{build['name']} Cari Durum Bilgilendirme Raporu - {today.strftime('%d/%m/%Y %H:%M')}", ) - set_mail_object = RedisMailSender(task=task, data=send_object, service=ConfigServices.SERVICE_PREFIX_MAIL_SENDER, status=Status.PENDING, completed=False, created_at=today.strftime('%Y-%m-%d %H:%M:%S')) - process_comment_finder_service.service_retriever.redis_client.set(ConfigServices.SERVICE_PREFIX_MAIL_SENDER, dumps(set_mail_object.dict())) - return + set_mail_object = RedisMailSender( + task=task, data=send_object, service=ConfigServices.SERVICE_PREFIX_MAIL_SENDER, status=Status.PENDING, completed=False, created_at=today.strftime('%Y-%m-%d %H:%M:%S') + ) + list_of_new_set.append(set_mail_object) + if list_of_new_set: + process_comment_finder_service.service_retriever.redis_client.set(ConfigServices.SERVICE_PREFIX_MAIL_SENDER, dumps( + {"type": "mail_sender", "data": list_of_new_set, "count": len(list_of_new_set), "created_at": today.strftime('%Y-%m-%d %H:%M:%S')} + )) if __name__ == "__main__": - prisma_service = PrismaService() process_comment_finder_service = ProcessCommentFinderService() print("Process Comment service started") @@ -109,6 +120,9 @@ if __name__ == "__main__": write_account_records_row_from_finder_comment(finder_comments=task.data.FinderComment, prisma_service=prisma_service, saved_list_of_account_records=saved_list_of_account_records) process_comment_finder_service.update_task_status(task_uuid=task.task, is_completed=True, status=Status.COMPLETED) process_comment_finder_service.delete_task(task_uuid=task.task) + enclose_task_and_send_mail_to_build_manager( + prisma_service=prisma_service, saved_list_of_account_records=saved_list_of_account_records, process_comment_finder_service=process_comment_finder_service, task=task + ) except Exception as e: raise finally: diff --git a/ServicesRunner/AccountRecordServices/Finder/Comment/matchers.py b/ServicesRunner/AccountRecordServices/Finder/Comment/matchers.py new file mode 100644 index 0000000..638c21e --- /dev/null +++ b/ServicesRunner/AccountRecordServices/Finder/Comment/matchers.py @@ -0,0 +1,518 @@ +import re +import arrow + +from json import loads, dumps +from unidecode import unidecode +from difflib import SequenceMatcher +from itertools import permutations +from time import perf_counter + +turkish_months = ["OCAK", "ŞUBAT", "MART", "NİSAN", "MAYIS", "HAZİRAN", "TEMMUZ", "AĞUSTOS", "EYLÜL", "EKİM", "KASIM", "ARALIK"] +turkish_months_abbr = { + "OCA": "OCAK", "SUB": "ŞUBAT", "ŞUB": "ŞUBAT", "MAR": "MART", "NIS": "NİSAN", "MAY": "MAYIS", "HAZ": "HAZİRAN", "HZR": "HAZİRAN", + "TEM": "TEMMUZ", "AGU": "AĞUSTOS", "AGT": "AĞUSTOS", "EYL": "EYLÜL", "EKI": "EKİM", "KAS": "KASIM", "ARA": "ARALIK", +} +month_to_number_dict = { + "ocak": 1, "şubat": 2, "mart": 3, "nisan": 4, "mayıs": 5, "haziran": 6, "temmuz": 7, "ağustos": 8, "eylül": 9, "ekim": 10, "kasım": 11, "aralık": 12, + "ocak": 1, "subat": 2, "mart": 3, "nisan": 4, "mayis": 5, "haziran": 6, "temmuz": 7, "agustos": 8, "eylul": 9, "ekim": 10, "kasim": 11, "aralik": 12 +} +start_year = 1950 +current_year = arrow.now().year + + +def clean_text(text): + text = str(text) + text = re.sub(r'\d{8,}', ' ', text) + # text = re.sub(r'\b[A-Za-z0-9]*?[0-9]+[A-Za-z0-9]*?[A-Za-z]+[A-Za-z0-9]*\b|\b[A-Za-z0-9]*?[A-Za-z]+[A-Za-z0-9]*?[0-9]+[A-Za-z0-9]*\b', ' ', text) + text = text.replace("/", " ") + text = text.replace("_", " ") + text_remove_underscore = text.replace("-", " ").replace("+", " ") + text_remove_asterisk = text_remove_underscore.replace("*", " ") + text_remove_comma = text_remove_asterisk.replace(",", " ") + text_remove_dots = text_remove_comma.replace(".", " ") + text_remove_dots = re.sub(r'\s+', ' ', text_remove_dots) + text_remove_dots = text_remove_dots.strip() + return text_remove_dots + + +def normalize_text(text: str): + text = text.replace('İ', 'i') + text = text.replace('I', 'ı') + text = text.replace('Ş', 'ş') + text = text.replace('Ğ', 'ğ') + text = text.replace('Ü', 'ü') + text = text.replace('Ö', 'ö') + text = text.replace('Ç', 'ç') + return unidecode(text).lower() + + +def get_person_initials(person: dict): + parts = [person.get("firstname", ""), person.get("middle_name", ""), person.get("surname", ""), person.get("birthname", "")] + return [unidecode(p.strip())[0].upper() for p in parts if p] + + +def get_text_initials(matched_text: str): + return [unidecode(word.strip())[0].upper() for word in matched_text.split() if word.strip()] + + +def generate_dictonary_of_patterns(people: list[dict]): + """ + completly remove middle_name instead do regex firstName + SomeWord + surname + """ + patterns_dict = {} + for person in people: + person_id = person.get('id') + firstname = person.get('firstname', '').strip() if person.get('firstname') else "" + middle_name = person.get('middle_name', '').strip() if person.get('middle_name') else "" + surname = person.get('surname', '').strip() if person.get('surname') else "" + birthname = person.get('birthname', '').strip() if person.get('birthname') else "" + if not firstname or not surname: + continue + + name_parts = { + 'firstname': { + 'orig': firstname, + 'norm': normalize_text(firstname) if firstname else "", + 'init': normalize_text(firstname)[0] if firstname else "" + }, + 'surname': { + 'orig': surname, + 'norm': normalize_text(surname) if surname else "", + 'init': normalize_text(surname)[0] if surname else "" + } + } + + if middle_name: + name_parts['middle_name'] = { + 'orig': middle_name, + 'norm': normalize_text(middle_name) if middle_name else "", + 'init': normalize_text(middle_name)[0] if middle_name else "" + } + + if birthname and normalize_text(birthname) != normalize_text(surname): + name_parts['birthname'] = { + 'orig': birthname, + 'norm': normalize_text(birthname), + 'init': normalize_text(birthname)[0] if birthname else "" + } + + person_patterns = set() + def create_pattern(parts, formats, separators=None): + if separators is None: + separators = [""] + patterns = [] + for fmt in formats: + for sep in separators: + pattern_parts = [] + for part_type, part_name in fmt: + if part_name in parts and part_type in parts[part_name]: + pattern_parts.append(re.escape(parts[part_name][part_type])) + if pattern_parts: + patterns.append(r"\b" + sep.join(pattern_parts) + r"\b") + return patterns + + name_formats = [ + [('orig', 'firstname'), ('orig', 'surname')], + [('norm', 'firstname'), ('norm', 'surname')], + [('orig', 'surname'), ('orig', 'firstname')], + [('norm', 'surname'), ('norm', 'firstname')], + ] + if 'middle_name' in name_parts: + name_formats = [ + [('orig', 'firstname'), ('orig', 'middle_name'), ('orig', 'surname')], + [('norm', 'firstname'), ('norm', 'middle_name'), ('norm', 'surname')], + ] + person_patterns.update(create_pattern(name_parts, name_formats, [" ", ""])) + + if 'middle_name' in name_parts: + middle_name_formats = [ + [('orig', 'firstname'), ('orig', 'middle_name')], + [('norm', 'firstname'), ('norm', 'middle_name')], + [('orig', 'middle_name'), ('orig', 'surname')], + [('norm', 'middle_name'), ('norm', 'surname')], + ] + person_patterns.update(create_pattern(name_parts, middle_name_formats, [" ", ""])) + + if 'birthname' in name_parts and name_parts['surname']['orig'] != name_parts['birthname']['orig']: + birthname_formats = [ + [('orig', 'firstname'), ('orig', 'birthname')], + [('norm', 'firstname'), ('norm', 'birthname')], + [('orig', 'birthname'), ('orig', 'firstname')], + [('norm', 'birthname'), ('norm', 'firstname')], + ] + person_patterns.update(create_pattern(name_parts, birthname_formats, [" ", ""])) + initial_formats = [[('init', 'firstname'), ('init', 'middle_name'), ('init', 'surname')], [('init', 'firstname'), ('init', 'surname')]] + person_patterns.update(create_pattern(name_parts, initial_formats, ["", ".", " ", ". "])) + if 'middle_name' in name_parts: + triple_initial_formats = [ + [('init', 'firstname'), ('init', 'middle_name'), ('init', 'surname')], + ] + person_patterns.update(create_pattern(name_parts, triple_initial_formats, ["", ".", " ", ". "])) + compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in person_patterns] + patterns_dict[person_id] = compiled_patterns + return patterns_dict + + +def extract_person_name_with_regex(found_dict: dict, process_comment: str, patterns_dict: dict, people: list[dict]): + cleaned_text = process_comment + all_matches = [] + for person_id, patterns in patterns_dict.items(): + person = next((p for p in people if p.get('id') == person_id), None) + if not person: + continue + firstname_norm = normalize_text(person.get("firstname", "").strip()) if person.get("firstname") else "" + middle_name_norm = normalize_text(person.get("middle_name", "").strip()) if person.get("middle_name") else "" + surname_norm = normalize_text(person.get("surname", "").strip()) if person.get("surname") else "" + birthname_norm = normalize_text(person.get("birthname", "").strip()) if person.get("birthname") else "" + text_norm = normalize_text(process_comment) + person_matches = [] + for pattern in patterns: + for match in pattern.finditer(text_norm): + start, end = match.span() + matched_text = process_comment[start:end] + matched_text_norm = normalize_text(matched_text) + is_valid_match = False + if len(matched_text_norm.split()) <= 1: + is_valid_match = False + else: + has_firstname = firstname_norm and firstname_norm in matched_text_norm + has_surname = surname_norm and surname_norm in matched_text_norm + has_birthname = birthname_norm and birthname_norm in matched_text_norm + if (has_firstname and has_surname) or (has_firstname and has_birthname): + is_valid_match = True + if is_valid_match: + person_matches.append({'matched_text': matched_text, 'start': start, 'end': end}) + if person_matches: + person_matches.sort(key=lambda x: len(x['matched_text']), reverse=True) + non_overlapping_matches = [] + for match in person_matches: + overlaps = False + for existing_match in non_overlapping_matches: + if (match['start'] < existing_match['end'] and match['end'] > existing_match['start']): + overlaps = True + break + if not overlaps: + non_overlapping_matches.append(match) + if non_overlapping_matches: + found_dict["name_match"] = person + all_matches.extend([(match, person) for match in non_overlapping_matches]) + if all_matches: + all_matches.sort(key=lambda x: x[0]['start'], reverse=True) + for match, person in all_matches: + matched_text = match['matched_text'] + matched_words = matched_text.split() + for word in matched_words: + word_norm = normalize_text(word).strip() + if not word_norm: + continue + text_norm = normalize_text(cleaned_text) + for word_match in re.finditer(rf'\b{re.escape(word_norm)}\b', text_norm, re.IGNORECASE): + start, end = word_match.span() + cleaned_text = cleaned_text[:start] + ' ' * (end - start) + cleaned_text[end:] + cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() + return found_dict, cleaned_text + + +def extract_build_parts_info(found_dict, process_comment): + """ + Regex of parts such as : + 2 nolu daire + 9 NUMARALI DAI + daire 3 + 3 nolu dairenin + 11nolu daire + Daire No 12 + 2NOLU DAIRE + 12 No lu daire + D:10 + NO:11 + NO :3 + """ + apartment_number = None + cleaned_text = process_comment + def clean_text_apartment_number(text, match): + clean_text = text.replace(match.group(0), '').strip() + clean_text = re.sub(r'\s+', ' ', clean_text).strip() + return clean_text + pattern1 = re.compile(r'(\d+)\s*nolu\s*daire', re.IGNORECASE) + match = pattern1.search(cleaned_text) + if match: + apartment_number = match.group(1) + found_dict['apartment_number'] = apartment_number + return found_dict, clean_text_apartment_number(cleaned_text, match) + pattern4 = re.compile(r'(\d+)\s*nolu\s*daire\w*', re.IGNORECASE) + match = pattern4.search(cleaned_text) + if match: + apartment_number = match.group(1) + found_dict['apartment_number'] = apartment_number + return found_dict, clean_text_apartment_number(cleaned_text, match) + pattern5 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE) + match = pattern5.search(cleaned_text) + if match: + apartment_number = match.group(1) + found_dict['apartment_number'] = apartment_number + return found_dict, clean_text_apartment_number(cleaned_text, match) + pattern7 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE) + match = pattern7.search(cleaned_text) + if match: + apartment_number = match.group(1) + found_dict['apartment_number'] = apartment_number + return found_dict, clean_text_apartment_number(cleaned_text, match) + pattern8 = re.compile(r'(\d+)\s*no\s*lu\s*daire', re.IGNORECASE) + match = pattern8.search(cleaned_text) + if match: + apartment_number = match.group(1) + found_dict['apartment_number'] = apartment_number + return found_dict, clean_text_apartment_number(cleaned_text, match) + pattern6 = re.compile(r'daire\s*no\s*(\d+)', re.IGNORECASE) + match = pattern6.search(cleaned_text) + if match: + apartment_number = match.group(1) + found_dict['apartment_number'] = apartment_number + return found_dict, clean_text_apartment_number(cleaned_text, match) + pattern2 = re.compile(r'(\d+)\s*numarali\s*dai', re.IGNORECASE) + match = pattern2.search(cleaned_text) + if match: + apartment_number = match.group(1) + found_dict['apartment_number'] = apartment_number + return found_dict, clean_text_apartment_number(cleaned_text, match) + pattern3 = re.compile(r'daire\s*(\d+)', re.IGNORECASE) + match = pattern3.search(cleaned_text) + if match: + apartment_number = match.group(1) + found_dict['apartment_number'] = apartment_number + return found_dict, clean_text_apartment_number(cleaned_text, match) + pattern9 = re.compile(r'd\s*:\s*(\d+)', re.IGNORECASE) + match = pattern9.search(cleaned_text) + if match: + apartment_number = match.group(1) + found_dict['apartment_number'] = apartment_number + return found_dict, clean_text_apartment_number(cleaned_text, match) + pattern10 = re.compile(r'no\s*:\s*(\d+)', re.IGNORECASE) + match = pattern10.search(cleaned_text) + if match: + apartment_number = match.group(1) + found_dict['apartment_number'] = apartment_number + return found_dict, clean_text_apartment_number(cleaned_text, match) + return found_dict, cleaned_text + + +def extract_months(found_dict, process_comment): + """ + Extract Turkish month names and abbreviations from the process comment + """ + original_text = process_comment + + def normalize_turkish(text: str) -> str: + """Properly normalize Turkish text for case-insensitive comparison""" + text = text.lower() + text = text.replace('i̇', 'i') # Handle dotted i properly + text = text.replace('ı', 'i') # Convert dotless i to regular i for matching + text = unidecode(text) # Remove other diacritics + return text + + if 'months' not in found_dict: + found_dict['months'] = [] + + months_found, working_text = False, original_text + for month in turkish_months: + pattern = re.compile(r'\b' + re.escape(month) + r'\b', re.IGNORECASE) + for match in pattern.finditer(original_text): + matched_text = match.group(0) + normalized_month = normalize_turkish(month) + month_number = None + if month.lower() in month_to_number_dict: + month_number = month_to_number_dict[month.lower()] + elif normalized_month in month_to_number_dict: + month_number = month_to_number_dict[normalized_month] + month_info = {'name': month, 'number': month_number} + found_dict['months'].append(month_info) + months_found = True + working_text = working_text.replace(matched_text, '', 1) + + for abbr, full_month in turkish_months_abbr.items(): + pattern = re.compile(r'\b' + re.escape(abbr) + r'\b', re.IGNORECASE) + for match in pattern.finditer(working_text): + matched_text = match.group(0) + normalized_month = normalize_turkish(full_month) + month_number = None + if full_month.lower() in month_to_number_dict: + month_number = month_to_number_dict[full_month.lower()] + elif normalized_month in month_to_number_dict: + month_number = month_to_number_dict[normalized_month] + month_info = {'name': full_month, 'number': month_number} + found_dict['months'].append(month_info) + months_found = True + working_text = working_text.replace(matched_text, '', 1) + return found_dict, working_text + + +def extract_year(found_dict, process_comment): + """ + Extract years from the process comment + """ + original_text = process_comment + if 'years' not in found_dict: + found_dict['years'] = [] + working_text = original_text + for year in range(start_year, current_year + 1): + pattern = re.compile(r'\b' + str(year) + r'\b', re.IGNORECASE) + for match in pattern.finditer(original_text): + matched_text = match.group(0) + if str(matched_text).isdigit(): + found_dict['years'].append(int(matched_text)) + working_text = working_text.replace(matched_text, '', 1) + return found_dict, working_text + + +def extract_payment_type(found_dict, process_comment): + """ + Extract payment type from the process comment + aidat + AİD + aidatı + TADİLAT + YAKIT + yakıt + yakit + """ + original_text = process_comment + working_text = original_text + if 'payment_types' not in found_dict: + found_dict['payment_types'] = [] + payment_keywords = {'aidat': ['aidat', 'aİd', 'aid', 'aidatı', 'aidati'], 'tadilat': ['tadilat', 'tadİlat', 'tadilatı'], 'yakit': ['yakit', 'yakıt', 'yakıtı', 'yakiti']} + for payment_type, keywords in payment_keywords.items(): + for keyword in keywords: + pattern = re.compile(r'\b' + keyword + r'\b', re.IGNORECASE) + for match in pattern.finditer(original_text): + matched_text = match.group(0) + if payment_type not in found_dict['payment_types']: + found_dict['payment_types'].append(payment_type) + working_text = working_text.replace(matched_text, '', 1) + return found_dict, working_text + + +def main(account_records, people): + list_of_regex_patterns = generate_dictonary_of_patterns(people=people) + dicts_found = dict() + dicts_not_found = dict() + count_extracted = 0 + for account_record in account_records: + account_record_id = str(account_record["id"]) + found_dict = {} + process_comment_iteration = clean_text(text=account_record["process_comment"]) + found_dict, cleaned_process_comment = extract_person_name_with_regex(found_dict=found_dict, process_comment=process_comment_iteration, patterns_dict=list_of_regex_patterns, people=people) + found_dict, cleaned_process_comment = extract_build_parts_info(found_dict=found_dict, process_comment=cleaned_process_comment) + found_dict, cleaned_process_comment = extract_months(found_dict=found_dict, process_comment=cleaned_process_comment) + found_dict, cleaned_process_comment = extract_year(found_dict=found_dict, process_comment=cleaned_process_comment) + found_dict, cleaned_process_comment = extract_payment_type(found_dict=found_dict, process_comment=cleaned_process_comment) + if found_dict: + dicts_found[str(account_record_id)] = found_dict + else: + dicts_not_found[str(account_record_id)] = account_record_id + + for id_, item in dicts_found.items(): + + months_are_valid = bool(item.get("months", [])) + years_are_valid = bool(item.get("years", [])) + payment_types_are_valid = bool(item.get("payment_types", [])) + apartment_number_are_valid = bool(item.get("apartment_number", [])) + person_name_are_valid = bool(item.get("name_match", [])) + account_record_to_save = AccountRecords.query.filter_by(id=int(id_)).first() + save_dict = dict( + account_records_id=account_record_to_save.id, account_records_uu_id=str(account_record_to_save.uu_id), prediction_model="regex", treshold=1, is_first_prediction=False + ) + update_dict = dict(prediction_model="regex", treshold=1, is_first_prediction=False) + if any([months_are_valid, years_are_valid, payment_types_are_valid, apartment_number_are_valid, person_name_are_valid]): + count_extracted += 1 + if months_are_valid: + print(f"months: {item['months']}") + data_to_save = dumps({"data": item['months']}) + prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="months", prediction_model="regex").first() + if not prediction_result: + created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="months", prediction_result=data_to_save) + created_account_prediction.save() + else: + prediction_result.update(**update_dict, prediction_result=data_to_save) + prediction_result.save() + if years_are_valid: + print(f"years: {item['years']}") + data_to_save = dumps({"data": item['years']}) + prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="years", prediction_model="regex").first() + if not prediction_result: + created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="years", prediction_result=data_to_save) + created_account_prediction.save() + else: + prediction_result.update(**update_dict, prediction_result=data_to_save) + prediction_result.save() + if payment_types_are_valid: + print(f"payment_types: {item['payment_types']}") + data_to_save = dumps({"data": item['payment_types']}) + prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="payment_types", prediction_model="regex").first() + if not prediction_result: + created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="payment_types", prediction_result=data_to_save) + created_account_prediction.save() + else: + prediction_result.update(**update_dict, prediction_result=data_to_save) + prediction_result.save() + if apartment_number_are_valid: + print(f"apartment_number: {item['apartment_number']}") + prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="apartment_number", prediction_model="regex").first() + if not prediction_result: + created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="apartment_number", prediction_result=item['apartment_number']) + created_account_prediction.save() + else: + prediction_result.update(**update_dict, prediction_result=item['apartment_number']) + prediction_result.save() + if person_name_are_valid: + print(f"person_name: {item['name_match']}") + data_to_save = dumps({"data": item['name_match']}) + prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="person_name", prediction_model="regex").first() + if not prediction_result: + created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="person_name", prediction_result=data_to_save) + created_account_prediction.save() + else: + prediction_result.update(**update_dict, prediction_result=data_to_save) + prediction_result.save() + + print("\n===== SUMMARY =====") + print(f"extracted data total : {count_extracted}") + print(f"not extracted data total : {len(account_records) - count_extracted}") + print(f"Total account records processed : {len(account_records)}") + + +if __name__ == "__main__": + + people_query = sqlalchemy_text(""" + SELECT DISTINCT ON (p.id) p.firstname, p.middle_name, p.surname, p.birthname, bl.id + FROM public.people as p + INNER JOIN public.build_living_space as bl ON bl.person_id = p.id + INNER JOIN public.build_parts as bp ON bp.id = bl.build_parts_id + INNER JOIN public.build as b ON b.id = bp.build_id + WHERE b.id = 1 + ORDER BY p.id + """) + + people_raw = session.execute(people_query).all() + remove_duplicate = list() + clean_people_list = list() + for person in people_raw: + merged_name = f"{person[0]} {person[1]} {person[2]} {person[3]}" + if merged_name not in remove_duplicate: + clean_people_list.append(person) + remove_duplicate.append(merged_name) + + people = [{"firstname": p[0], "middle_name": p[1], "surname": p[2], "birthname": p[3], 'id': p[4]} for p in clean_people_list] + query_account_records = sqlalchemy_text(""" + SELECT a.id, a.iban, a.bank_date, a.process_comment FROM public.account_records as a where currency_value > 0 + """) # and bank_date::date >= '2020-01-01' + account_records = session.execute(query_account_records).all() + account_records = [{"id": ar[0], "iban": ar[1], "bank_date": ar[2], "process_comment": ar[3]} for ar in account_records] + + try: + main(session=session, account_records=account_records, people=people) + except Exception as e: + print(f"{e}") + + session.close() + session_factory.remove() diff --git a/ServicesRunner/Depends/service_handler.py b/ServicesRunner/Depends/service_handler.py index e27394a..7290aeb 100644 --- a/ServicesRunner/Depends/service_handler.py +++ b/ServicesRunner/Depends/service_handler.py @@ -321,13 +321,7 @@ class ServiceTaskRetriever: # Create new task object write_object = RedisTaskObject( - task=task_uuid, - data=redis_data, - completed=False, - service=service_name, - status=Status.COMPLETED, - created_at=datetime.now().isoformat(), - is_completed=False + task=task_uuid, data=redis_data, completed=False, service=service_name, status=Status.COMPLETED, created_at=datetime.now().isoformat(), is_completed=False ) # Convert to dict for serialization