Comment service added mail sender service
This commit is contained in:
parent
ca98adc338
commit
7a5521648c
|
|
@ -34,6 +34,9 @@ class BankReceive(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
def check_task_belong_to_this_service(task: RedisTaskObject):
|
def check_task_belong_to_this_service(task: RedisTaskObject):
|
||||||
|
"""
|
||||||
|
Check if task belongs to this service
|
||||||
|
"""
|
||||||
if not task.service == ConfigServices.SERVICE_PREFIX_FINDER_IBAN:
|
if not task.service == ConfigServices.SERVICE_PREFIX_FINDER_IBAN:
|
||||||
return False
|
return False
|
||||||
if not task.completed:
|
if not task.completed:
|
||||||
|
|
@ -46,6 +49,9 @@ def check_task_belong_to_this_service(task: RedisTaskObject):
|
||||||
|
|
||||||
|
|
||||||
def write_account_records_row_from_finder_comment(finder_comments: list[FinderComment], prisma_service: PrismaService, saved_list_of_account_records: dict):
|
def write_account_records_row_from_finder_comment(finder_comments: list[FinderComment], prisma_service: PrismaService, saved_list_of_account_records: dict):
|
||||||
|
"""
|
||||||
|
Write account records row from finder comment
|
||||||
|
"""
|
||||||
finder_comments = list(finder_comments)
|
finder_comments = list(finder_comments)
|
||||||
for finder_comment in finder_comments:
|
for finder_comment in finder_comments:
|
||||||
bank_date = arrow.get(finder_comment.bank_date).replace(tzinfo='GMT+3').datetime
|
bank_date = arrow.get(finder_comment.bank_date).replace(tzinfo='GMT+3').datetime
|
||||||
|
|
@ -75,11 +81,12 @@ def enclose_task_and_send_mail_to_build_manager(prisma_service: PrismaService, s
|
||||||
"""
|
"""
|
||||||
if not saved_list_of_account_records:
|
if not saved_list_of_account_records:
|
||||||
return
|
return
|
||||||
today = arrow.now().to('GMT+3').datetime
|
list_of_new_set, today = [], arrow.now().to('GMT+3').datetime
|
||||||
for build_id, saved_list_of_account_record in saved_list_of_account_records.items():
|
for build_id, saved_list_of_account_record in saved_list_of_account_records.items():
|
||||||
build_manager_occupant_type = prisma_service.find_first(table="occupant_types", query={"occupant_code":"BU-MNG", "is_confirmed": True, "active": True})
|
build_manager_occupant_type = prisma_service.find_first(table="occupant_types", query={"occupant_code":"BU-MNG", "is_confirmed": True, "active": True})
|
||||||
living_space = prisma_service.find_first(
|
living_space = prisma_service.find_first(
|
||||||
table="build_living_space", query={"build_id": build_id, "occupant_type_id": build_manager_occupant_type['id'], "expiry_starts": {"lte": today}, "expiry_ends": {"gte": today}})
|
table="build_living_space", query={"build_id": build_id, "occupant_type_id": build_manager_occupant_type['id'], "expiry_starts": {"lte": today}, "expiry_ends": {"gte": today}}
|
||||||
|
)
|
||||||
build = prisma_service.find_first(table="builds", query={"id": build_id})
|
build = prisma_service.find_first(table="builds", query={"id": build_id})
|
||||||
person = prisma_service.find_first(table="people", query={"id": living_space['person_id']})
|
person = prisma_service.find_first(table="people", query={"id": living_space['person_id']})
|
||||||
user = prisma_service.find_first(table="users", query={"person_id": person['id']})
|
user = prisma_service.find_first(table="users", query={"person_id": person['id']})
|
||||||
|
|
@ -87,13 +94,17 @@ def enclose_task_and_send_mail_to_build_manager(prisma_service: PrismaService, s
|
||||||
receivers=[user.email], data=saved_list_of_account_record, template_name=ConfigServices.TEMPLATE_ACCOUNT_RECORDS,
|
receivers=[user.email], data=saved_list_of_account_record, template_name=ConfigServices.TEMPLATE_ACCOUNT_RECORDS,
|
||||||
subject=f"{build['name']} Cari Durum Bilgilendirme Raporu - {today.strftime('%d/%m/%Y %H:%M')}",
|
subject=f"{build['name']} Cari Durum Bilgilendirme Raporu - {today.strftime('%d/%m/%Y %H:%M')}",
|
||||||
)
|
)
|
||||||
set_mail_object = RedisMailSender(task=task, data=send_object, service=ConfigServices.SERVICE_PREFIX_MAIL_SENDER, status=Status.PENDING, completed=False, created_at=today.strftime('%Y-%m-%d %H:%M:%S'))
|
set_mail_object = RedisMailSender(
|
||||||
process_comment_finder_service.service_retriever.redis_client.set(ConfigServices.SERVICE_PREFIX_MAIL_SENDER, dumps(set_mail_object.dict()))
|
task=task, data=send_object, service=ConfigServices.SERVICE_PREFIX_MAIL_SENDER, status=Status.PENDING, completed=False, created_at=today.strftime('%Y-%m-%d %H:%M:%S')
|
||||||
return
|
)
|
||||||
|
list_of_new_set.append(set_mail_object)
|
||||||
|
if list_of_new_set:
|
||||||
|
process_comment_finder_service.service_retriever.redis_client.set(ConfigServices.SERVICE_PREFIX_MAIL_SENDER, dumps(
|
||||||
|
{"type": "mail_sender", "data": list_of_new_set, "count": len(list_of_new_set), "created_at": today.strftime('%Y-%m-%d %H:%M:%S')}
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
prisma_service = PrismaService()
|
prisma_service = PrismaService()
|
||||||
process_comment_finder_service = ProcessCommentFinderService()
|
process_comment_finder_service = ProcessCommentFinderService()
|
||||||
print("Process Comment service started")
|
print("Process Comment service started")
|
||||||
|
|
@ -109,6 +120,9 @@ if __name__ == "__main__":
|
||||||
write_account_records_row_from_finder_comment(finder_comments=task.data.FinderComment, prisma_service=prisma_service, saved_list_of_account_records=saved_list_of_account_records)
|
write_account_records_row_from_finder_comment(finder_comments=task.data.FinderComment, prisma_service=prisma_service, saved_list_of_account_records=saved_list_of_account_records)
|
||||||
process_comment_finder_service.update_task_status(task_uuid=task.task, is_completed=True, status=Status.COMPLETED)
|
process_comment_finder_service.update_task_status(task_uuid=task.task, is_completed=True, status=Status.COMPLETED)
|
||||||
process_comment_finder_service.delete_task(task_uuid=task.task)
|
process_comment_finder_service.delete_task(task_uuid=task.task)
|
||||||
|
enclose_task_and_send_mail_to_build_manager(
|
||||||
|
prisma_service=prisma_service, saved_list_of_account_records=saved_list_of_account_records, process_comment_finder_service=process_comment_finder_service, task=task
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,518 @@
|
||||||
|
import re
|
||||||
|
import arrow
|
||||||
|
|
||||||
|
from json import loads, dumps
|
||||||
|
from unidecode import unidecode
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
from itertools import permutations
|
||||||
|
from time import perf_counter
|
||||||
|
|
||||||
|
turkish_months = ["OCAK", "ŞUBAT", "MART", "NİSAN", "MAYIS", "HAZİRAN", "TEMMUZ", "AĞUSTOS", "EYLÜL", "EKİM", "KASIM", "ARALIK"]
|
||||||
|
turkish_months_abbr = {
|
||||||
|
"OCA": "OCAK", "SUB": "ŞUBAT", "ŞUB": "ŞUBAT", "MAR": "MART", "NIS": "NİSAN", "MAY": "MAYIS", "HAZ": "HAZİRAN", "HZR": "HAZİRAN",
|
||||||
|
"TEM": "TEMMUZ", "AGU": "AĞUSTOS", "AGT": "AĞUSTOS", "EYL": "EYLÜL", "EKI": "EKİM", "KAS": "KASIM", "ARA": "ARALIK",
|
||||||
|
}
|
||||||
|
month_to_number_dict = {
|
||||||
|
"ocak": 1, "şubat": 2, "mart": 3, "nisan": 4, "mayıs": 5, "haziran": 6, "temmuz": 7, "ağustos": 8, "eylül": 9, "ekim": 10, "kasım": 11, "aralık": 12,
|
||||||
|
"ocak": 1, "subat": 2, "mart": 3, "nisan": 4, "mayis": 5, "haziran": 6, "temmuz": 7, "agustos": 8, "eylul": 9, "ekim": 10, "kasim": 11, "aralik": 12
|
||||||
|
}
|
||||||
|
start_year = 1950
|
||||||
|
current_year = arrow.now().year
|
||||||
|
|
||||||
|
|
||||||
|
def clean_text(text):
|
||||||
|
text = str(text)
|
||||||
|
text = re.sub(r'\d{8,}', ' ', text)
|
||||||
|
# text = re.sub(r'\b[A-Za-z0-9]*?[0-9]+[A-Za-z0-9]*?[A-Za-z]+[A-Za-z0-9]*\b|\b[A-Za-z0-9]*?[A-Za-z]+[A-Za-z0-9]*?[0-9]+[A-Za-z0-9]*\b', ' ', text)
|
||||||
|
text = text.replace("/", " ")
|
||||||
|
text = text.replace("_", " ")
|
||||||
|
text_remove_underscore = text.replace("-", " ").replace("+", " ")
|
||||||
|
text_remove_asterisk = text_remove_underscore.replace("*", " ")
|
||||||
|
text_remove_comma = text_remove_asterisk.replace(",", " ")
|
||||||
|
text_remove_dots = text_remove_comma.replace(".", " ")
|
||||||
|
text_remove_dots = re.sub(r'\s+', ' ', text_remove_dots)
|
||||||
|
text_remove_dots = text_remove_dots.strip()
|
||||||
|
return text_remove_dots
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_text(text: str):
|
||||||
|
text = text.replace('İ', 'i')
|
||||||
|
text = text.replace('I', 'ı')
|
||||||
|
text = text.replace('Ş', 'ş')
|
||||||
|
text = text.replace('Ğ', 'ğ')
|
||||||
|
text = text.replace('Ü', 'ü')
|
||||||
|
text = text.replace('Ö', 'ö')
|
||||||
|
text = text.replace('Ç', 'ç')
|
||||||
|
return unidecode(text).lower()
|
||||||
|
|
||||||
|
|
||||||
|
def get_person_initials(person: dict):
|
||||||
|
parts = [person.get("firstname", ""), person.get("middle_name", ""), person.get("surname", ""), person.get("birthname", "")]
|
||||||
|
return [unidecode(p.strip())[0].upper() for p in parts if p]
|
||||||
|
|
||||||
|
|
||||||
|
def get_text_initials(matched_text: str):
|
||||||
|
return [unidecode(word.strip())[0].upper() for word in matched_text.split() if word.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def generate_dictonary_of_patterns(people: list[dict]):
|
||||||
|
"""
|
||||||
|
completly remove middle_name instead do regex firstName + SomeWord + surname
|
||||||
|
"""
|
||||||
|
patterns_dict = {}
|
||||||
|
for person in people:
|
||||||
|
person_id = person.get('id')
|
||||||
|
firstname = person.get('firstname', '').strip() if person.get('firstname') else ""
|
||||||
|
middle_name = person.get('middle_name', '').strip() if person.get('middle_name') else ""
|
||||||
|
surname = person.get('surname', '').strip() if person.get('surname') else ""
|
||||||
|
birthname = person.get('birthname', '').strip() if person.get('birthname') else ""
|
||||||
|
if not firstname or not surname:
|
||||||
|
continue
|
||||||
|
|
||||||
|
name_parts = {
|
||||||
|
'firstname': {
|
||||||
|
'orig': firstname,
|
||||||
|
'norm': normalize_text(firstname) if firstname else "",
|
||||||
|
'init': normalize_text(firstname)[0] if firstname else ""
|
||||||
|
},
|
||||||
|
'surname': {
|
||||||
|
'orig': surname,
|
||||||
|
'norm': normalize_text(surname) if surname else "",
|
||||||
|
'init': normalize_text(surname)[0] if surname else ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if middle_name:
|
||||||
|
name_parts['middle_name'] = {
|
||||||
|
'orig': middle_name,
|
||||||
|
'norm': normalize_text(middle_name) if middle_name else "",
|
||||||
|
'init': normalize_text(middle_name)[0] if middle_name else ""
|
||||||
|
}
|
||||||
|
|
||||||
|
if birthname and normalize_text(birthname) != normalize_text(surname):
|
||||||
|
name_parts['birthname'] = {
|
||||||
|
'orig': birthname,
|
||||||
|
'norm': normalize_text(birthname),
|
||||||
|
'init': normalize_text(birthname)[0] if birthname else ""
|
||||||
|
}
|
||||||
|
|
||||||
|
person_patterns = set()
|
||||||
|
def create_pattern(parts, formats, separators=None):
|
||||||
|
if separators is None:
|
||||||
|
separators = [""]
|
||||||
|
patterns = []
|
||||||
|
for fmt in formats:
|
||||||
|
for sep in separators:
|
||||||
|
pattern_parts = []
|
||||||
|
for part_type, part_name in fmt:
|
||||||
|
if part_name in parts and part_type in parts[part_name]:
|
||||||
|
pattern_parts.append(re.escape(parts[part_name][part_type]))
|
||||||
|
if pattern_parts:
|
||||||
|
patterns.append(r"\b" + sep.join(pattern_parts) + r"\b")
|
||||||
|
return patterns
|
||||||
|
|
||||||
|
name_formats = [
|
||||||
|
[('orig', 'firstname'), ('orig', 'surname')],
|
||||||
|
[('norm', 'firstname'), ('norm', 'surname')],
|
||||||
|
[('orig', 'surname'), ('orig', 'firstname')],
|
||||||
|
[('norm', 'surname'), ('norm', 'firstname')],
|
||||||
|
]
|
||||||
|
if 'middle_name' in name_parts:
|
||||||
|
name_formats = [
|
||||||
|
[('orig', 'firstname'), ('orig', 'middle_name'), ('orig', 'surname')],
|
||||||
|
[('norm', 'firstname'), ('norm', 'middle_name'), ('norm', 'surname')],
|
||||||
|
]
|
||||||
|
person_patterns.update(create_pattern(name_parts, name_formats, [" ", ""]))
|
||||||
|
|
||||||
|
if 'middle_name' in name_parts:
|
||||||
|
middle_name_formats = [
|
||||||
|
[('orig', 'firstname'), ('orig', 'middle_name')],
|
||||||
|
[('norm', 'firstname'), ('norm', 'middle_name')],
|
||||||
|
[('orig', 'middle_name'), ('orig', 'surname')],
|
||||||
|
[('norm', 'middle_name'), ('norm', 'surname')],
|
||||||
|
]
|
||||||
|
person_patterns.update(create_pattern(name_parts, middle_name_formats, [" ", ""]))
|
||||||
|
|
||||||
|
if 'birthname' in name_parts and name_parts['surname']['orig'] != name_parts['birthname']['orig']:
|
||||||
|
birthname_formats = [
|
||||||
|
[('orig', 'firstname'), ('orig', 'birthname')],
|
||||||
|
[('norm', 'firstname'), ('norm', 'birthname')],
|
||||||
|
[('orig', 'birthname'), ('orig', 'firstname')],
|
||||||
|
[('norm', 'birthname'), ('norm', 'firstname')],
|
||||||
|
]
|
||||||
|
person_patterns.update(create_pattern(name_parts, birthname_formats, [" ", ""]))
|
||||||
|
initial_formats = [[('init', 'firstname'), ('init', 'middle_name'), ('init', 'surname')], [('init', 'firstname'), ('init', 'surname')]]
|
||||||
|
person_patterns.update(create_pattern(name_parts, initial_formats, ["", ".", " ", ". "]))
|
||||||
|
if 'middle_name' in name_parts:
|
||||||
|
triple_initial_formats = [
|
||||||
|
[('init', 'firstname'), ('init', 'middle_name'), ('init', 'surname')],
|
||||||
|
]
|
||||||
|
person_patterns.update(create_pattern(name_parts, triple_initial_formats, ["", ".", " ", ". "]))
|
||||||
|
compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in person_patterns]
|
||||||
|
patterns_dict[person_id] = compiled_patterns
|
||||||
|
return patterns_dict
|
||||||
|
|
||||||
|
|
||||||
|
def extract_person_name_with_regex(found_dict: dict, process_comment: str, patterns_dict: dict, people: list[dict]):
|
||||||
|
cleaned_text = process_comment
|
||||||
|
all_matches = []
|
||||||
|
for person_id, patterns in patterns_dict.items():
|
||||||
|
person = next((p for p in people if p.get('id') == person_id), None)
|
||||||
|
if not person:
|
||||||
|
continue
|
||||||
|
firstname_norm = normalize_text(person.get("firstname", "").strip()) if person.get("firstname") else ""
|
||||||
|
middle_name_norm = normalize_text(person.get("middle_name", "").strip()) if person.get("middle_name") else ""
|
||||||
|
surname_norm = normalize_text(person.get("surname", "").strip()) if person.get("surname") else ""
|
||||||
|
birthname_norm = normalize_text(person.get("birthname", "").strip()) if person.get("birthname") else ""
|
||||||
|
text_norm = normalize_text(process_comment)
|
||||||
|
person_matches = []
|
||||||
|
for pattern in patterns:
|
||||||
|
for match in pattern.finditer(text_norm):
|
||||||
|
start, end = match.span()
|
||||||
|
matched_text = process_comment[start:end]
|
||||||
|
matched_text_norm = normalize_text(matched_text)
|
||||||
|
is_valid_match = False
|
||||||
|
if len(matched_text_norm.split()) <= 1:
|
||||||
|
is_valid_match = False
|
||||||
|
else:
|
||||||
|
has_firstname = firstname_norm and firstname_norm in matched_text_norm
|
||||||
|
has_surname = surname_norm and surname_norm in matched_text_norm
|
||||||
|
has_birthname = birthname_norm and birthname_norm in matched_text_norm
|
||||||
|
if (has_firstname and has_surname) or (has_firstname and has_birthname):
|
||||||
|
is_valid_match = True
|
||||||
|
if is_valid_match:
|
||||||
|
person_matches.append({'matched_text': matched_text, 'start': start, 'end': end})
|
||||||
|
if person_matches:
|
||||||
|
person_matches.sort(key=lambda x: len(x['matched_text']), reverse=True)
|
||||||
|
non_overlapping_matches = []
|
||||||
|
for match in person_matches:
|
||||||
|
overlaps = False
|
||||||
|
for existing_match in non_overlapping_matches:
|
||||||
|
if (match['start'] < existing_match['end'] and match['end'] > existing_match['start']):
|
||||||
|
overlaps = True
|
||||||
|
break
|
||||||
|
if not overlaps:
|
||||||
|
non_overlapping_matches.append(match)
|
||||||
|
if non_overlapping_matches:
|
||||||
|
found_dict["name_match"] = person
|
||||||
|
all_matches.extend([(match, person) for match in non_overlapping_matches])
|
||||||
|
if all_matches:
|
||||||
|
all_matches.sort(key=lambda x: x[0]['start'], reverse=True)
|
||||||
|
for match, person in all_matches:
|
||||||
|
matched_text = match['matched_text']
|
||||||
|
matched_words = matched_text.split()
|
||||||
|
for word in matched_words:
|
||||||
|
word_norm = normalize_text(word).strip()
|
||||||
|
if not word_norm:
|
||||||
|
continue
|
||||||
|
text_norm = normalize_text(cleaned_text)
|
||||||
|
for word_match in re.finditer(rf'\b{re.escape(word_norm)}\b', text_norm, re.IGNORECASE):
|
||||||
|
start, end = word_match.span()
|
||||||
|
cleaned_text = cleaned_text[:start] + ' ' * (end - start) + cleaned_text[end:]
|
||||||
|
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
|
||||||
|
return found_dict, cleaned_text
|
||||||
|
|
||||||
|
|
||||||
|
def extract_build_parts_info(found_dict, process_comment):
|
||||||
|
"""
|
||||||
|
Regex of parts such as :
|
||||||
|
2 nolu daire
|
||||||
|
9 NUMARALI DAI
|
||||||
|
daire 3
|
||||||
|
3 nolu dairenin
|
||||||
|
11nolu daire
|
||||||
|
Daire No 12
|
||||||
|
2NOLU DAIRE
|
||||||
|
12 No lu daire
|
||||||
|
D:10
|
||||||
|
NO:11
|
||||||
|
NO :3
|
||||||
|
"""
|
||||||
|
apartment_number = None
|
||||||
|
cleaned_text = process_comment
|
||||||
|
def clean_text_apartment_number(text, match):
|
||||||
|
clean_text = text.replace(match.group(0), '').strip()
|
||||||
|
clean_text = re.sub(r'\s+', ' ', clean_text).strip()
|
||||||
|
return clean_text
|
||||||
|
pattern1 = re.compile(r'(\d+)\s*nolu\s*daire', re.IGNORECASE)
|
||||||
|
match = pattern1.search(cleaned_text)
|
||||||
|
if match:
|
||||||
|
apartment_number = match.group(1)
|
||||||
|
found_dict['apartment_number'] = apartment_number
|
||||||
|
return found_dict, clean_text_apartment_number(cleaned_text, match)
|
||||||
|
pattern4 = re.compile(r'(\d+)\s*nolu\s*daire\w*', re.IGNORECASE)
|
||||||
|
match = pattern4.search(cleaned_text)
|
||||||
|
if match:
|
||||||
|
apartment_number = match.group(1)
|
||||||
|
found_dict['apartment_number'] = apartment_number
|
||||||
|
return found_dict, clean_text_apartment_number(cleaned_text, match)
|
||||||
|
pattern5 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
|
||||||
|
match = pattern5.search(cleaned_text)
|
||||||
|
if match:
|
||||||
|
apartment_number = match.group(1)
|
||||||
|
found_dict['apartment_number'] = apartment_number
|
||||||
|
return found_dict, clean_text_apartment_number(cleaned_text, match)
|
||||||
|
pattern7 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
|
||||||
|
match = pattern7.search(cleaned_text)
|
||||||
|
if match:
|
||||||
|
apartment_number = match.group(1)
|
||||||
|
found_dict['apartment_number'] = apartment_number
|
||||||
|
return found_dict, clean_text_apartment_number(cleaned_text, match)
|
||||||
|
pattern8 = re.compile(r'(\d+)\s*no\s*lu\s*daire', re.IGNORECASE)
|
||||||
|
match = pattern8.search(cleaned_text)
|
||||||
|
if match:
|
||||||
|
apartment_number = match.group(1)
|
||||||
|
found_dict['apartment_number'] = apartment_number
|
||||||
|
return found_dict, clean_text_apartment_number(cleaned_text, match)
|
||||||
|
pattern6 = re.compile(r'daire\s*no\s*(\d+)', re.IGNORECASE)
|
||||||
|
match = pattern6.search(cleaned_text)
|
||||||
|
if match:
|
||||||
|
apartment_number = match.group(1)
|
||||||
|
found_dict['apartment_number'] = apartment_number
|
||||||
|
return found_dict, clean_text_apartment_number(cleaned_text, match)
|
||||||
|
pattern2 = re.compile(r'(\d+)\s*numarali\s*dai', re.IGNORECASE)
|
||||||
|
match = pattern2.search(cleaned_text)
|
||||||
|
if match:
|
||||||
|
apartment_number = match.group(1)
|
||||||
|
found_dict['apartment_number'] = apartment_number
|
||||||
|
return found_dict, clean_text_apartment_number(cleaned_text, match)
|
||||||
|
pattern3 = re.compile(r'daire\s*(\d+)', re.IGNORECASE)
|
||||||
|
match = pattern3.search(cleaned_text)
|
||||||
|
if match:
|
||||||
|
apartment_number = match.group(1)
|
||||||
|
found_dict['apartment_number'] = apartment_number
|
||||||
|
return found_dict, clean_text_apartment_number(cleaned_text, match)
|
||||||
|
pattern9 = re.compile(r'd\s*:\s*(\d+)', re.IGNORECASE)
|
||||||
|
match = pattern9.search(cleaned_text)
|
||||||
|
if match:
|
||||||
|
apartment_number = match.group(1)
|
||||||
|
found_dict['apartment_number'] = apartment_number
|
||||||
|
return found_dict, clean_text_apartment_number(cleaned_text, match)
|
||||||
|
pattern10 = re.compile(r'no\s*:\s*(\d+)', re.IGNORECASE)
|
||||||
|
match = pattern10.search(cleaned_text)
|
||||||
|
if match:
|
||||||
|
apartment_number = match.group(1)
|
||||||
|
found_dict['apartment_number'] = apartment_number
|
||||||
|
return found_dict, clean_text_apartment_number(cleaned_text, match)
|
||||||
|
return found_dict, cleaned_text
|
||||||
|
|
||||||
|
|
||||||
|
def extract_months(found_dict, process_comment):
|
||||||
|
"""
|
||||||
|
Extract Turkish month names and abbreviations from the process comment
|
||||||
|
"""
|
||||||
|
original_text = process_comment
|
||||||
|
|
||||||
|
def normalize_turkish(text: str) -> str:
|
||||||
|
"""Properly normalize Turkish text for case-insensitive comparison"""
|
||||||
|
text = text.lower()
|
||||||
|
text = text.replace('i̇', 'i') # Handle dotted i properly
|
||||||
|
text = text.replace('ı', 'i') # Convert dotless i to regular i for matching
|
||||||
|
text = unidecode(text) # Remove other diacritics
|
||||||
|
return text
|
||||||
|
|
||||||
|
if 'months' not in found_dict:
|
||||||
|
found_dict['months'] = []
|
||||||
|
|
||||||
|
months_found, working_text = False, original_text
|
||||||
|
for month in turkish_months:
|
||||||
|
pattern = re.compile(r'\b' + re.escape(month) + r'\b', re.IGNORECASE)
|
||||||
|
for match in pattern.finditer(original_text):
|
||||||
|
matched_text = match.group(0)
|
||||||
|
normalized_month = normalize_turkish(month)
|
||||||
|
month_number = None
|
||||||
|
if month.lower() in month_to_number_dict:
|
||||||
|
month_number = month_to_number_dict[month.lower()]
|
||||||
|
elif normalized_month in month_to_number_dict:
|
||||||
|
month_number = month_to_number_dict[normalized_month]
|
||||||
|
month_info = {'name': month, 'number': month_number}
|
||||||
|
found_dict['months'].append(month_info)
|
||||||
|
months_found = True
|
||||||
|
working_text = working_text.replace(matched_text, '', 1)
|
||||||
|
|
||||||
|
for abbr, full_month in turkish_months_abbr.items():
|
||||||
|
pattern = re.compile(r'\b' + re.escape(abbr) + r'\b', re.IGNORECASE)
|
||||||
|
for match in pattern.finditer(working_text):
|
||||||
|
matched_text = match.group(0)
|
||||||
|
normalized_month = normalize_turkish(full_month)
|
||||||
|
month_number = None
|
||||||
|
if full_month.lower() in month_to_number_dict:
|
||||||
|
month_number = month_to_number_dict[full_month.lower()]
|
||||||
|
elif normalized_month in month_to_number_dict:
|
||||||
|
month_number = month_to_number_dict[normalized_month]
|
||||||
|
month_info = {'name': full_month, 'number': month_number}
|
||||||
|
found_dict['months'].append(month_info)
|
||||||
|
months_found = True
|
||||||
|
working_text = working_text.replace(matched_text, '', 1)
|
||||||
|
return found_dict, working_text
|
||||||
|
|
||||||
|
|
||||||
|
def extract_year(found_dict, process_comment):
|
||||||
|
"""
|
||||||
|
Extract years from the process comment
|
||||||
|
"""
|
||||||
|
original_text = process_comment
|
||||||
|
if 'years' not in found_dict:
|
||||||
|
found_dict['years'] = []
|
||||||
|
working_text = original_text
|
||||||
|
for year in range(start_year, current_year + 1):
|
||||||
|
pattern = re.compile(r'\b' + str(year) + r'\b', re.IGNORECASE)
|
||||||
|
for match in pattern.finditer(original_text):
|
||||||
|
matched_text = match.group(0)
|
||||||
|
if str(matched_text).isdigit():
|
||||||
|
found_dict['years'].append(int(matched_text))
|
||||||
|
working_text = working_text.replace(matched_text, '', 1)
|
||||||
|
return found_dict, working_text
|
||||||
|
|
||||||
|
|
||||||
|
def extract_payment_type(found_dict, process_comment):
|
||||||
|
"""
|
||||||
|
Extract payment type from the process comment
|
||||||
|
aidat
|
||||||
|
AİD
|
||||||
|
aidatı
|
||||||
|
TADİLAT
|
||||||
|
YAKIT
|
||||||
|
yakıt
|
||||||
|
yakit
|
||||||
|
"""
|
||||||
|
original_text = process_comment
|
||||||
|
working_text = original_text
|
||||||
|
if 'payment_types' not in found_dict:
|
||||||
|
found_dict['payment_types'] = []
|
||||||
|
payment_keywords = {'aidat': ['aidat', 'aİd', 'aid', 'aidatı', 'aidati'], 'tadilat': ['tadilat', 'tadİlat', 'tadilatı'], 'yakit': ['yakit', 'yakıt', 'yakıtı', 'yakiti']}
|
||||||
|
for payment_type, keywords in payment_keywords.items():
|
||||||
|
for keyword in keywords:
|
||||||
|
pattern = re.compile(r'\b' + keyword + r'\b', re.IGNORECASE)
|
||||||
|
for match in pattern.finditer(original_text):
|
||||||
|
matched_text = match.group(0)
|
||||||
|
if payment_type not in found_dict['payment_types']:
|
||||||
|
found_dict['payment_types'].append(payment_type)
|
||||||
|
working_text = working_text.replace(matched_text, '', 1)
|
||||||
|
return found_dict, working_text
|
||||||
|
|
||||||
|
|
||||||
|
def main(account_records, people):
|
||||||
|
list_of_regex_patterns = generate_dictonary_of_patterns(people=people)
|
||||||
|
dicts_found = dict()
|
||||||
|
dicts_not_found = dict()
|
||||||
|
count_extracted = 0
|
||||||
|
for account_record in account_records:
|
||||||
|
account_record_id = str(account_record["id"])
|
||||||
|
found_dict = {}
|
||||||
|
process_comment_iteration = clean_text(text=account_record["process_comment"])
|
||||||
|
found_dict, cleaned_process_comment = extract_person_name_with_regex(found_dict=found_dict, process_comment=process_comment_iteration, patterns_dict=list_of_regex_patterns, people=people)
|
||||||
|
found_dict, cleaned_process_comment = extract_build_parts_info(found_dict=found_dict, process_comment=cleaned_process_comment)
|
||||||
|
found_dict, cleaned_process_comment = extract_months(found_dict=found_dict, process_comment=cleaned_process_comment)
|
||||||
|
found_dict, cleaned_process_comment = extract_year(found_dict=found_dict, process_comment=cleaned_process_comment)
|
||||||
|
found_dict, cleaned_process_comment = extract_payment_type(found_dict=found_dict, process_comment=cleaned_process_comment)
|
||||||
|
if found_dict:
|
||||||
|
dicts_found[str(account_record_id)] = found_dict
|
||||||
|
else:
|
||||||
|
dicts_not_found[str(account_record_id)] = account_record_id
|
||||||
|
|
||||||
|
for id_, item in dicts_found.items():
|
||||||
|
|
||||||
|
months_are_valid = bool(item.get("months", []))
|
||||||
|
years_are_valid = bool(item.get("years", []))
|
||||||
|
payment_types_are_valid = bool(item.get("payment_types", []))
|
||||||
|
apartment_number_are_valid = bool(item.get("apartment_number", []))
|
||||||
|
person_name_are_valid = bool(item.get("name_match", []))
|
||||||
|
account_record_to_save = AccountRecords.query.filter_by(id=int(id_)).first()
|
||||||
|
save_dict = dict(
|
||||||
|
account_records_id=account_record_to_save.id, account_records_uu_id=str(account_record_to_save.uu_id), prediction_model="regex", treshold=1, is_first_prediction=False
|
||||||
|
)
|
||||||
|
update_dict = dict(prediction_model="regex", treshold=1, is_first_prediction=False)
|
||||||
|
if any([months_are_valid, years_are_valid, payment_types_are_valid, apartment_number_are_valid, person_name_are_valid]):
|
||||||
|
count_extracted += 1
|
||||||
|
if months_are_valid:
|
||||||
|
print(f"months: {item['months']}")
|
||||||
|
data_to_save = dumps({"data": item['months']})
|
||||||
|
prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="months", prediction_model="regex").first()
|
||||||
|
if not prediction_result:
|
||||||
|
created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="months", prediction_result=data_to_save)
|
||||||
|
created_account_prediction.save()
|
||||||
|
else:
|
||||||
|
prediction_result.update(**update_dict, prediction_result=data_to_save)
|
||||||
|
prediction_result.save()
|
||||||
|
if years_are_valid:
|
||||||
|
print(f"years: {item['years']}")
|
||||||
|
data_to_save = dumps({"data": item['years']})
|
||||||
|
prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="years", prediction_model="regex").first()
|
||||||
|
if not prediction_result:
|
||||||
|
created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="years", prediction_result=data_to_save)
|
||||||
|
created_account_prediction.save()
|
||||||
|
else:
|
||||||
|
prediction_result.update(**update_dict, prediction_result=data_to_save)
|
||||||
|
prediction_result.save()
|
||||||
|
if payment_types_are_valid:
|
||||||
|
print(f"payment_types: {item['payment_types']}")
|
||||||
|
data_to_save = dumps({"data": item['payment_types']})
|
||||||
|
prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="payment_types", prediction_model="regex").first()
|
||||||
|
if not prediction_result:
|
||||||
|
created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="payment_types", prediction_result=data_to_save)
|
||||||
|
created_account_prediction.save()
|
||||||
|
else:
|
||||||
|
prediction_result.update(**update_dict, prediction_result=data_to_save)
|
||||||
|
prediction_result.save()
|
||||||
|
if apartment_number_are_valid:
|
||||||
|
print(f"apartment_number: {item['apartment_number']}")
|
||||||
|
prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="apartment_number", prediction_model="regex").first()
|
||||||
|
if not prediction_result:
|
||||||
|
created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="apartment_number", prediction_result=item['apartment_number'])
|
||||||
|
created_account_prediction.save()
|
||||||
|
else:
|
||||||
|
prediction_result.update(**update_dict, prediction_result=item['apartment_number'])
|
||||||
|
prediction_result.save()
|
||||||
|
if person_name_are_valid:
|
||||||
|
print(f"person_name: {item['name_match']}")
|
||||||
|
data_to_save = dumps({"data": item['name_match']})
|
||||||
|
prediction_result = AccountRecordsPredict.query.filter_by(account_records_id=account_record_to_save.id, prediction_field="person_name", prediction_model="regex").first()
|
||||||
|
if not prediction_result:
|
||||||
|
created_account_prediction = AccountRecordsPredict.create(**save_dict, prediction_field="person_name", prediction_result=data_to_save)
|
||||||
|
created_account_prediction.save()
|
||||||
|
else:
|
||||||
|
prediction_result.update(**update_dict, prediction_result=data_to_save)
|
||||||
|
prediction_result.save()
|
||||||
|
|
||||||
|
print("\n===== SUMMARY =====")
|
||||||
|
print(f"extracted data total : {count_extracted}")
|
||||||
|
print(f"not extracted data total : {len(account_records) - count_extracted}")
|
||||||
|
print(f"Total account records processed : {len(account_records)}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
people_query = sqlalchemy_text("""
|
||||||
|
SELECT DISTINCT ON (p.id) p.firstname, p.middle_name, p.surname, p.birthname, bl.id
|
||||||
|
FROM public.people as p
|
||||||
|
INNER JOIN public.build_living_space as bl ON bl.person_id = p.id
|
||||||
|
INNER JOIN public.build_parts as bp ON bp.id = bl.build_parts_id
|
||||||
|
INNER JOIN public.build as b ON b.id = bp.build_id
|
||||||
|
WHERE b.id = 1
|
||||||
|
ORDER BY p.id
|
||||||
|
""")
|
||||||
|
|
||||||
|
people_raw = session.execute(people_query).all()
|
||||||
|
remove_duplicate = list()
|
||||||
|
clean_people_list = list()
|
||||||
|
for person in people_raw:
|
||||||
|
merged_name = f"{person[0]} {person[1]} {person[2]} {person[3]}"
|
||||||
|
if merged_name not in remove_duplicate:
|
||||||
|
clean_people_list.append(person)
|
||||||
|
remove_duplicate.append(merged_name)
|
||||||
|
|
||||||
|
people = [{"firstname": p[0], "middle_name": p[1], "surname": p[2], "birthname": p[3], 'id': p[4]} for p in clean_people_list]
|
||||||
|
query_account_records = sqlalchemy_text("""
|
||||||
|
SELECT a.id, a.iban, a.bank_date, a.process_comment FROM public.account_records as a where currency_value > 0
|
||||||
|
""") # and bank_date::date >= '2020-01-01'
|
||||||
|
account_records = session.execute(query_account_records).all()
|
||||||
|
account_records = [{"id": ar[0], "iban": ar[1], "bank_date": ar[2], "process_comment": ar[3]} for ar in account_records]
|
||||||
|
|
||||||
|
try:
|
||||||
|
main(session=session, account_records=account_records, people=people)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"{e}")
|
||||||
|
|
||||||
|
session.close()
|
||||||
|
session_factory.remove()
|
||||||
|
|
@ -321,13 +321,7 @@ class ServiceTaskRetriever:
|
||||||
|
|
||||||
# Create new task object
|
# Create new task object
|
||||||
write_object = RedisTaskObject(
|
write_object = RedisTaskObject(
|
||||||
task=task_uuid,
|
task=task_uuid, data=redis_data, completed=False, service=service_name, status=Status.COMPLETED, created_at=datetime.now().isoformat(), is_completed=False
|
||||||
data=redis_data,
|
|
||||||
completed=False,
|
|
||||||
service=service_name,
|
|
||||||
status=Status.COMPLETED,
|
|
||||||
created_at=datetime.now().isoformat(),
|
|
||||||
is_completed=False
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert to dict for serialization
|
# Convert to dict for serialization
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue