Comment Parser Regex service completed

This commit is contained in:
Berkay 2025-08-14 21:20:40 +03:00
parent 82b1d4825b
commit 456203f5cf
7 changed files with 245 additions and 101 deletions

View File

@ -403,6 +403,7 @@ model account_records {
accounting_receipt_number Int @default(0) accounting_receipt_number Int @default(0)
status_id Int @default(0) @db.SmallInt status_id Int @default(0) @db.SmallInt
approved_record Boolean @default(false) approved_record Boolean @default(false)
is_predicted Boolean @default(false)
import_file_name String? @db.VarChar import_file_name String? @db.VarChar
receive_debit Int? receive_debit Int?
receive_debit_uu_id String? @db.VarChar receive_debit_uu_id String? @db.VarChar

View File

@ -3,7 +3,7 @@ import arrow
from typing import Optional from typing import Optional
from pydantic import BaseModel from pydantic import BaseModel
from matchers import Parser from matchers import ParsedComment, Parser
from models import BuildingCluster, BuildPart, BuildLivingSpace, Person, User, OccupantType from models import BuildingCluster, BuildPart, BuildLivingSpace, Person, User, OccupantType
from Depends.prisma_client import PrismaService from Depends.prisma_client import PrismaService
@ -126,7 +126,7 @@ def get_all_person_data_due_to_build(prisma_service: PrismaService):
occupant_type_uu_id=living_space.occupant_type_uu_id, occupant_type_uu_id=living_space.occupant_type_uu_id,
is_confirmed=living_space.is_confirmed, is_confirmed=living_space.is_confirmed,
active=living_space.active, active=living_space.active,
person=person_obj, person=person_obj,
occupant_types=occupant_type_obj occupant_types=occupant_type_obj
) )
part_obj.living_spaces.append(living_space_obj) part_obj.living_spaces.append(living_space_obj)
@ -138,22 +138,104 @@ def get_all_companies_data(prisma_service: PrismaService):
return prisma_service.find_many(table="companies", query={"active": True, "is_confirmed": True}) return prisma_service.find_many(table="companies", query={"active": True, "is_confirmed": True})
def get_all_person_data_due_to_account_record(prisma_service: PrismaService):
arriving_account_records = prisma_service.find_many(table="account_records", query={"is_predicted": False, "active": True, "is_confirmed": True, "approved_record": False, "currency_value": {"gt": 0}})
debt_account_records = prisma_service.find_many(table="account_records", query={"is_predicted": False, "active": True, "is_confirmed": True, "approved_record": False, "currency_value": {"lt": 0}})
return arriving_account_records, debt_account_records
def check_if_any_account_record_added(prisma_service: PrismaService):
any_record = prisma_service.find_first(table="account_records", query={"is_predicted": False, "active": True, "is_confirmed": True, "approved_record": False})
return any_record is not None
def check_if_any_building_added(prisma_service: PrismaService, build_id_list: list[str | int]):
already_build_ids_list = [int(i) for i in build_id_list]
any_building = prisma_service.find_first(table="build", query={"active": True, "is_confirmed": True, "id": {"not": {"in": already_build_ids_list} }})
return any_building is not None
def update_account_record_set_is_predict_true(prisma_service: PrismaService, account_record_id: int):
return prisma_service.update(table="account_records", query={"id": account_record_id}, data={"is_predicted": True})
def update_account_records(prisma_service: PrismaService, parsed_record: ParsedComment, collect_possible_parts_dict: dict[str, list[dict]]):
payment_type_result = None
if not parsed_record.people:
return
person = parsed_record.people[0]
if parsed_record.payment_types:
if parsed_record.payment_types[0] == "aidat":
payment_type_result = prisma_service.find_first(table="api_enum_dropdown", query={"key":"BDT-D"})
elif parsed_record.payment_types[0] == "tadilat":
payment_type_result = prisma_service.find_first(table="api_enum_dropdown", query={"key":"BDT-R"})
build_parts_id = collect_possible_parts_dict[str(person.id)][0]["id"]
build_parts_uu_id = collect_possible_parts_dict[str(person.id)][0]["uu_id"]
add_dict = {
"build_parts": {"connect": {"id": int(build_parts_id)}}, "build_parts_uu_id": str(build_parts_uu_id),
"people_account_records_send_person_idTopeople": {"connect": {"id": int(person.id)}}, "send_person_uu_id": str(person.uu_id), "is_predicted": True
}
if payment_type_result:
add_dict["api_enum_dropdown_account_records_payment_result_typeToapi_enum_dropdown"] = {"connect": {"id": int(payment_type_result.id)}}
add_dict["payment_result_type_uu_id"] = str(payment_type_result.uu_id)
return prisma_service.update(table="account_records", where={"id": int(parsed_record.account_record_id)}, data=add_dict)
def set_prediction_to_redis(process_comment_parser_service: ProcessCommentParserService, parsed_record: ParsedComment, possible: list[dict]):
predict_account_records = process_comment_parser_service.get_predict_account_record()
predict_account_records[str(parsed_record.account_record_id)] = {
"account_record_id": parsed_record.account_record_id, "build_id": parsed_record.build_id, "payment_type": parsed_record.payment_types,
"months": parsed_record.months, "years": parsed_record.years, "parts": parsed_record.parts, "predictions": possible,
}
process_comment_parser_service.set_predict_account_record(predict_account_record=predict_account_records)
update_account_record_set_is_predict_true(prisma_service=prisma_service, account_record_id=parsed_record.account_record_id)
return
if __name__ == "__main__": if __name__ == "__main__":
print("Process Comment Parser service started") print("Process Comment Parser service started")
renew = False
prisma_service = PrismaService() prisma_service = PrismaService()
process_comment_parser_service = ProcessCommentParserService() process_comment_parser_service = ProcessCommentParserService()
search_people = get_all_person_data_due_to_build(prisma_service) search_people = get_all_person_data_due_to_build(prisma_service)
process_comment_parser_service.set_task_requirements(search_people) process_comment_parser_service.set_task_requirements(search_people)
arriving_account_records = prisma_service.find_many(table="account_records", query={"active": True, "is_confirmed": True, "approved_record": False, "currency_value": {"gt": 0}}) arriving_account_records, debt_account_records = get_all_person_data_due_to_account_record(prisma_service)
debt_account_records = prisma_service.find_many(table="account_records", query={"active": True, "is_confirmed": True, "approved_record": False, "currency_value": {"lt": 0}})
try: try:
while True: while True:
time.sleep(5) if not check_if_any_account_record_added(prisma_service) or renew:
arriving_account_records, debt_account_records = get_all_person_data_due_to_account_record(prisma_service)
renew = False
print("Process Comment Parser service started sleeping for 5 seconds") print("Process Comment Parser service started sleeping for 5 seconds")
tasks_dict = process_comment_parser_service.get_task_requirements() tasks_dict: dict[str, BuildingCluster] = process_comment_parser_service.get_task_requirements()
task_requirements: dict[str, BuildingCluster] = {idx: BuildingCluster(**value) for idx, value in tasks_dict.items()} task_requirements: dict[str, BuildingCluster] = {idx: BuildingCluster(**value) for idx, value in tasks_dict.items()}
if not check_if_any_building_added(prisma_service, list(task_requirements.keys())):
search_people = get_all_person_data_due_to_build(prisma_service)
process_comment_parser_service.set_task_requirements(search_people)
parser = Parser(account_records=arriving_account_records, task_requirements=task_requirements) parser = Parser(account_records=arriving_account_records, task_requirements=task_requirements)
parsed_records = parser.parse() parsed_records = parser.parse()
for parsed_record in parsed_records:
collect_possible_parts_dict = {}
if not parsed_record.people:
continue
for person in parsed_record.people:
build_id = parsed_record.build_id
person_id = person.id
building = task_requirements[str(build_id)]
for build_part in building.build_parts:
for living_space in build_part.living_spaces:
if str(living_space.person_id) == str(person_id):
if str(person_id) in collect_possible_parts_dict:
collect_possible_parts_dict[str(person_id)] = [*collect_possible_parts_dict[str(person_id)], build_part.dict()]
else:
collect_possible_parts_dict[str(person_id)] = [build_part.dict()]
if len(collect_possible_parts_dict.keys()) == 1:
for key, possible in collect_possible_parts_dict.items():
if len(possible) == 1:
update_account_records(prisma_service=prisma_service, parsed_record=parsed_record, collect_possible_parts_dict=collect_possible_parts_dict)
else:
set_prediction_to_redis(process_comment_parser_service=process_comment_parser_service, parsed_record=parsed_record, possible=possible)
renew = True
time.sleep(5)
except Exception as e: except Exception as e:
print(f"Process Comment Parser service error: {str(e)}") print(f"Process Comment Parser service error: {str(e)}")
raise e raise e

View File

@ -22,9 +22,10 @@ current_year = arrow.now().year
class ParsedComment: class ParsedComment:
def __init__(self, account_record_id: int, org_comment: str) -> None: def __init__(self, account_record_id: int, org_comment: str, build_id: int) -> None:
self.account_record_id: int = account_record_id self.account_record_id: int = account_record_id
self.org_comment: str = org_comment self.org_comment: str = org_comment
self.build_id: int = build_id
self.comment: str = None self.comment: str = None
self.people: list[dict] = [] self.people: list[dict] = []
self.parts: list[dict] = [] self.parts: list[dict] = []
@ -131,7 +132,7 @@ class CommentParser(ParserHelpers):
self.people: dict = people_dict self.people: dict = people_dict
self.account_record_id: str = str(account_record.id) self.account_record_id: str = str(account_record.id)
self.build_id: str = str(account_record.build_id) self.build_id: str = str(account_record.build_id)
self.parsed_comment: ParsedComment = ParsedComment(account_record_id=self.account_record_id, org_comment=self.original_comment) self.parsed_comment: ParsedComment = ParsedComment(account_record_id=self.account_record_id, org_comment=self.original_comment, build_id=self.build_id)
@staticmethod @staticmethod
def clean_text_apartment_number(text: str, match): def clean_text_apartment_number(text: str, match):
@ -241,91 +242,39 @@ class CommentParser(ParserHelpers):
def extract_build_parts_info(self): def extract_build_parts_info(self):
""" """
Regex of parts such as : Daire numarasını çeşitli Türkçe yazım biçimlerinden tek regex ile ayıklar.
2 nolu daire Eşleşme bulunursa:
9 NUMARALI DAI - numarayı self.parsed_comment.parts'a ekler
daire 3 - metni temizler (senin clean_text_apartment_number metodunla)
3 nolu dairenin
11nolu daire
Daire No 12
2NOLU DAIRE
12 No lu daire
D:10
NO:11
NO :3
""" """
apartment_number = None COMBINED_APT_PATTERN = re.compile(
pattern1 = re.compile(r'(\d+)\s*nolu\s*daire', re.IGNORECASE) r"""
match = pattern1.search(self.comment) \b(?:
if match: (?P<n1>\d+)\s*nolu\s*dair\w* # 2 nolu daire / 3 nolu dairenin
apartment_number = match.group(1) | (?P<n2>\d+)\s*no\s*lu\s*dair\w* # 12 No lu daire
self.parsed_comment.parts.append(apartment_number) | (?P<n3>\d+)nolu\s*dair\w* # 11nolu daire / 2NOLU DAIRE
self.comment = self.clean_text_apartment_number(self.comment, match) | (?P<n4>\d+)\s*numaral[ıi]\s*dai\w* # 9 numaralı dai/daire
return | dair[]?\s*no\.?\s*(?P<n5>\d+) # Daire No 12 / Daire No. 12
pattern4 = re.compile(r'(\d+)\s*nolu\s*daire\w*', re.IGNORECASE) | \bd\s*[:\-]?\s*(?P<n6>\d+) # D:10 / D-10
match = pattern4.search(self.comment) | \bno\b(?!\s*lu)\s*[:\-]?\s*(?P<n7>\d+) # NO:11 / NO :3 (nolu hariç)
if match: | dair[]?\s*(?P<n8>\d+) # daire 3
apartment_number = match.group(1) | (?P<n9>\d+)\s*numara # 9 NUMARA
self.parsed_comment.parts.append(apartment_number) | \bno\s*/\s*(?P<n10>\d+) # NO/11
self.comment = self.clean_text_apartment_number(self.comment, match) | /(?P<n11>\d+) # /11
return )\b
pattern5 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE) """,
match = pattern5.search(self.comment) re.IGNORECASE | re.VERBOSE
if match: )
apartment_number = match.group(1) m = COMBINED_APT_PATTERN.search(self.comment)
self.parsed_comment.parts.append(apartment_number) if not m:
self.comment = self.clean_text_apartment_number(self.comment, match) return
return for g in m.groups():
pattern7 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE) if g:
match = pattern7.search(self.comment) apartment_number = g
if match: break
apartment_number = match.group(1) self.parsed_comment.parts.append(apartment_number)
self.parsed_comment.parts.append(apartment_number) self.comment = self.clean_text_apartment_number(self.comment, m)
self.comment = self.clean_text_apartment_number(self.comment, match) return
return
pattern8 = re.compile(r'(\d+)\s*no\s*lu\s*daire', re.IGNORECASE)
match = pattern8.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern6 = re.compile(r'daire\s*no\s*(\d+)', re.IGNORECASE)
match = pattern6.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern2 = re.compile(r'(\d+)\s*numarali\s*dai', re.IGNORECASE)
match = pattern2.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern3 = re.compile(r'daire\s*(\d+)', re.IGNORECASE)
match = pattern3.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern9 = re.compile(r'd\s*:\s*(\d+)', re.IGNORECASE)
match = pattern9.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern10 = re.compile(r'no\s*:\s*(\d+)', re.IGNORECASE)
match = pattern10.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
# return found_dict, self.comment
def extract_months(self): def extract_months(self):
""" """
@ -383,7 +332,11 @@ class CommentParser(ParserHelpers):
""" """
original_text = self.comment original_text = self.comment
working_text = original_text working_text = original_text
payment_keywords = {'aidat': ['aidat', 'aİd', 'aid', 'aidatı', 'aidati'], 'tadilat': ['tadilat', 'tadİlat', 'tadilatı'], 'yakit': ['yakit', 'yakıt', 'yakıtı', 'yakiti']} payment_keywords = {
'aidat': ['aidat', 'aİd', 'aid', 'aidatı', 'aidati'],
'tadilat': ['tadilat', 'tadİlat', 'tadilatı'],
'yakit': ['yakit', 'yakıt', 'yakıtı', 'yakiti']
}
for payment_type, keywords in payment_keywords.items(): for payment_type, keywords in payment_keywords.items():
for keyword in keywords: for keyword in keywords:
pattern = re.compile(r'\b' + keyword + r'\b', re.IGNORECASE) pattern = re.compile(r'\b' + keyword + r'\b', re.IGNORECASE)
@ -430,16 +383,11 @@ class Parser:
def parse(self): def parse(self):
"""Parse account records based on task requirements""" """Parse account records based on task requirements"""
for account_record in self.account_records: for account_record in self.account_records:
if not account_record.build_id:
continue
comment_parser = CommentParser(account_record=account_record, people_regex_dict=self.people_regex_dict, people_dict=self.people_dict) comment_parser = CommentParser(account_record=account_record, people_regex_dict=self.people_regex_dict, people_dict=self.people_dict)
parsed_comment = comment_parser.parse_comment() parsed_comment = comment_parser.parse_comment()
self.parsed_records.append(parsed_comment) self.parsed_records.append(parsed_comment)
for parsed_record in self.parsed_records:
print("*" * 150)
pprint.pprint({
"original_comment": parsed_record.org_comment, "comment": parsed_record.comment, "people": parsed_record.people,
"parts": parsed_record.parts, "months": parsed_record.months, "years": parsed_record.years, "payment_types": parsed_record.payment_types
}, indent=2)
return self.parsed_records return self.parsed_records
@ -529,6 +477,96 @@ def commented_code():
print(f"Total account records processed : {len(account_records)}") print(f"Total account records processed : {len(account_records)}")
# def extract_build_parts_info(self):
# """
# Regex of parts such as :
# 2 nolu daire
# 9 NUMARALI DAI
# daire 3
# 3 nolu dairenin
# 11nolu daire
# Daire No 12
# 2NOLU DAIRE
# 12 No lu daire
# D:10
# NO:11
# NO :3
# """
# apartment_number = None
# pattern1 = re.compile(r'(\d+)\s*nolu\s*daire', re.IGNORECASE)
# match = pattern1.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern4 = re.compile(r'(\d+)\s*nolu\s*daire\w*', re.IGNORECASE)
# match = pattern4.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern5 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
# match = pattern5.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern7 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
# match = pattern7.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern8 = re.compile(r'(\d+)\s*no\s*lu\s*daire', re.IGNORECASE)
# match = pattern8.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern6 = re.compile(r'daire\s*no\s*(\d+)', re.IGNORECASE)
# match = pattern6.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern2 = re.compile(r'(\d+)\s*numarali\s*dai', re.IGNORECASE)
# match = pattern2.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern3 = re.compile(r'daire\s*(\d+)', re.IGNORECASE)
# match = pattern3.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern9 = re.compile(r'd\s*:\s*(\d+)', re.IGNORECASE)
# match = pattern9.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern10 = re.compile(r'no\s*:\s*(\d+)', re.IGNORECASE)
# match = pattern10.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# # return found_dict, self.comment
# if __name__ == "__main__": # if __name__ == "__main__":
# people_query = sqlalchemy_text(""" # people_query = sqlalchemy_text("""

View File

@ -139,6 +139,7 @@ class ConfigServices:
TASK_SEEN_PREFIX: str = "BANK:SERVICES:TASK:SEEN" TASK_SEEN_PREFIX: str = "BANK:SERVICES:TASK:SEEN"
TASK_DELETED_PREFIX: str = "BANK:SERVICES:TASK:DELETED" TASK_DELETED_PREFIX: str = "BANK:SERVICES:TASK:DELETED"
TASK_COMMENT_PARSER: str = "BANK:SERVICES:TASK:COMMENT:PARSER" TASK_COMMENT_PARSER: str = "BANK:SERVICES:TASK:COMMENT:PARSER"
TASK_PREDICT_RESULT: str = "BANK:SERVICES:TASK:COMMENT:RESULT"
SERVICE_PREFIX_MAIL_READER: str = "MailReader" SERVICE_PREFIX_MAIL_READER: str = "MailReader"
SERVICE_PREFIX_MAIL_PARSER: str = "MailParser" SERVICE_PREFIX_MAIL_PARSER: str = "MailParser"

View File

@ -0,0 +1,12 @@
generator client {
provider = "prisma-client-py"
// binaryTargets = ["native", "linux-musl-openssl-3.0.x"]
enable_experimental_decimal = true
recursive_type_depth = -1
interface = "asyncio"
}
datasource db {
provider = "postgresql"
url = "postgresql://postgres:password@10.10.2.14:5432/postgres?schema=public"
}

View File

@ -1,6 +1,5 @@
generator client { generator client {
provider = "prisma-client-py" provider = "prisma-client-py"
// binaryTargets = ["native", "linux-musl-openssl-3.0.x"]
enable_experimental_decimal = true enable_experimental_decimal = true
recursive_type_depth = -1 recursive_type_depth = -1
interface = "asyncio" interface = "asyncio"
@ -406,6 +405,7 @@ model account_records {
accounting_receipt_number Int @default(0) accounting_receipt_number Int @default(0)
status_id Int @default(0) @db.SmallInt status_id Int @default(0) @db.SmallInt
approved_record Boolean @default(false) approved_record Boolean @default(false)
is_predicted Boolean @default(false)
import_file_name String? @db.VarChar import_file_name String? @db.VarChar
receive_debit Int? receive_debit Int?
receive_debit_uu_id String? @db.VarChar receive_debit_uu_id String? @db.VarChar

View File

@ -1125,3 +1125,13 @@ class ProcessCommentParserService:
Set task requirements in Redis Set task requirements in Redis
""" """
return self.service_retriever.redis_handler.set(f'{ConfigServices.TASK_COMMENT_PARSER}', dumps(task_object)) return self.service_retriever.redis_handler.set(f'{ConfigServices.TASK_COMMENT_PARSER}', dumps(task_object))
def set_predict_account_record(self, predict_account_record: dict):
return self.service_retriever.redis_handler.set(f'{ConfigServices.TASK_PREDICT_RESULT}', dumps(predict_account_record))
def get_predict_account_record(self):
if predict_account_record := self.service_retriever.redis_handler.get(f'{ConfigServices.TASK_PREDICT_RESULT}'):
return loads(predict_account_record)
return None