Comment Parser Regex service completed

This commit is contained in:
Berkay 2025-08-14 21:20:40 +03:00
parent 82b1d4825b
commit 456203f5cf
7 changed files with 245 additions and 101 deletions

View File

@ -403,6 +403,7 @@ model account_records {
accounting_receipt_number Int @default(0)
status_id Int @default(0) @db.SmallInt
approved_record Boolean @default(false)
is_predicted Boolean @default(false)
import_file_name String? @db.VarChar
receive_debit Int?
receive_debit_uu_id String? @db.VarChar

View File

@ -3,7 +3,7 @@ import arrow
from typing import Optional
from pydantic import BaseModel
from matchers import Parser
from matchers import ParsedComment, Parser
from models import BuildingCluster, BuildPart, BuildLivingSpace, Person, User, OccupantType
from Depends.prisma_client import PrismaService
@ -126,7 +126,7 @@ def get_all_person_data_due_to_build(prisma_service: PrismaService):
occupant_type_uu_id=living_space.occupant_type_uu_id,
is_confirmed=living_space.is_confirmed,
active=living_space.active,
person=person_obj,
person=person_obj,
occupant_types=occupant_type_obj
)
part_obj.living_spaces.append(living_space_obj)
@ -138,22 +138,104 @@ def get_all_companies_data(prisma_service: PrismaService):
return prisma_service.find_many(table="companies", query={"active": True, "is_confirmed": True})
def get_all_person_data_due_to_account_record(prisma_service: PrismaService):
arriving_account_records = prisma_service.find_many(table="account_records", query={"is_predicted": False, "active": True, "is_confirmed": True, "approved_record": False, "currency_value": {"gt": 0}})
debt_account_records = prisma_service.find_many(table="account_records", query={"is_predicted": False, "active": True, "is_confirmed": True, "approved_record": False, "currency_value": {"lt": 0}})
return arriving_account_records, debt_account_records
def check_if_any_account_record_added(prisma_service: PrismaService):
any_record = prisma_service.find_first(table="account_records", query={"is_predicted": False, "active": True, "is_confirmed": True, "approved_record": False})
return any_record is not None
def check_if_any_building_added(prisma_service: PrismaService, build_id_list: list[str | int]):
already_build_ids_list = [int(i) for i in build_id_list]
any_building = prisma_service.find_first(table="build", query={"active": True, "is_confirmed": True, "id": {"not": {"in": already_build_ids_list} }})
return any_building is not None
def update_account_record_set_is_predict_true(prisma_service: PrismaService, account_record_id: int):
return prisma_service.update(table="account_records", query={"id": account_record_id}, data={"is_predicted": True})
def update_account_records(prisma_service: PrismaService, parsed_record: ParsedComment, collect_possible_parts_dict: dict[str, list[dict]]):
payment_type_result = None
if not parsed_record.people:
return
person = parsed_record.people[0]
if parsed_record.payment_types:
if parsed_record.payment_types[0] == "aidat":
payment_type_result = prisma_service.find_first(table="api_enum_dropdown", query={"key":"BDT-D"})
elif parsed_record.payment_types[0] == "tadilat":
payment_type_result = prisma_service.find_first(table="api_enum_dropdown", query={"key":"BDT-R"})
build_parts_id = collect_possible_parts_dict[str(person.id)][0]["id"]
build_parts_uu_id = collect_possible_parts_dict[str(person.id)][0]["uu_id"]
add_dict = {
"build_parts": {"connect": {"id": int(build_parts_id)}}, "build_parts_uu_id": str(build_parts_uu_id),
"people_account_records_send_person_idTopeople": {"connect": {"id": int(person.id)}}, "send_person_uu_id": str(person.uu_id), "is_predicted": True
}
if payment_type_result:
add_dict["api_enum_dropdown_account_records_payment_result_typeToapi_enum_dropdown"] = {"connect": {"id": int(payment_type_result.id)}}
add_dict["payment_result_type_uu_id"] = str(payment_type_result.uu_id)
return prisma_service.update(table="account_records", where={"id": int(parsed_record.account_record_id)}, data=add_dict)
def set_prediction_to_redis(process_comment_parser_service: ProcessCommentParserService, parsed_record: ParsedComment, possible: list[dict]):
predict_account_records = process_comment_parser_service.get_predict_account_record()
predict_account_records[str(parsed_record.account_record_id)] = {
"account_record_id": parsed_record.account_record_id, "build_id": parsed_record.build_id, "payment_type": parsed_record.payment_types,
"months": parsed_record.months, "years": parsed_record.years, "parts": parsed_record.parts, "predictions": possible,
}
process_comment_parser_service.set_predict_account_record(predict_account_record=predict_account_records)
update_account_record_set_is_predict_true(prisma_service=prisma_service, account_record_id=parsed_record.account_record_id)
return
if __name__ == "__main__":
print("Process Comment Parser service started")
renew = False
prisma_service = PrismaService()
process_comment_parser_service = ProcessCommentParserService()
search_people = get_all_person_data_due_to_build(prisma_service)
process_comment_parser_service.set_task_requirements(search_people)
arriving_account_records = prisma_service.find_many(table="account_records", query={"active": True, "is_confirmed": True, "approved_record": False, "currency_value": {"gt": 0}})
debt_account_records = prisma_service.find_many(table="account_records", query={"active": True, "is_confirmed": True, "approved_record": False, "currency_value": {"lt": 0}})
arriving_account_records, debt_account_records = get_all_person_data_due_to_account_record(prisma_service)
try:
while True:
time.sleep(5)
if not check_if_any_account_record_added(prisma_service) or renew:
arriving_account_records, debt_account_records = get_all_person_data_due_to_account_record(prisma_service)
renew = False
print("Process Comment Parser service started sleeping for 5 seconds")
tasks_dict = process_comment_parser_service.get_task_requirements()
tasks_dict: dict[str, BuildingCluster] = process_comment_parser_service.get_task_requirements()
task_requirements: dict[str, BuildingCluster] = {idx: BuildingCluster(**value) for idx, value in tasks_dict.items()}
if not check_if_any_building_added(prisma_service, list(task_requirements.keys())):
search_people = get_all_person_data_due_to_build(prisma_service)
process_comment_parser_service.set_task_requirements(search_people)
parser = Parser(account_records=arriving_account_records, task_requirements=task_requirements)
parsed_records = parser.parse()
for parsed_record in parsed_records:
collect_possible_parts_dict = {}
if not parsed_record.people:
continue
for person in parsed_record.people:
build_id = parsed_record.build_id
person_id = person.id
building = task_requirements[str(build_id)]
for build_part in building.build_parts:
for living_space in build_part.living_spaces:
if str(living_space.person_id) == str(person_id):
if str(person_id) in collect_possible_parts_dict:
collect_possible_parts_dict[str(person_id)] = [*collect_possible_parts_dict[str(person_id)], build_part.dict()]
else:
collect_possible_parts_dict[str(person_id)] = [build_part.dict()]
if len(collect_possible_parts_dict.keys()) == 1:
for key, possible in collect_possible_parts_dict.items():
if len(possible) == 1:
update_account_records(prisma_service=prisma_service, parsed_record=parsed_record, collect_possible_parts_dict=collect_possible_parts_dict)
else:
set_prediction_to_redis(process_comment_parser_service=process_comment_parser_service, parsed_record=parsed_record, possible=possible)
renew = True
time.sleep(5)
except Exception as e:
print(f"Process Comment Parser service error: {str(e)}")
raise e

View File

@ -22,9 +22,10 @@ current_year = arrow.now().year
class ParsedComment:
def __init__(self, account_record_id: int, org_comment: str) -> None:
def __init__(self, account_record_id: int, org_comment: str, build_id: int) -> None:
self.account_record_id: int = account_record_id
self.org_comment: str = org_comment
self.build_id: int = build_id
self.comment: str = None
self.people: list[dict] = []
self.parts: list[dict] = []
@ -131,7 +132,7 @@ class CommentParser(ParserHelpers):
self.people: dict = people_dict
self.account_record_id: str = str(account_record.id)
self.build_id: str = str(account_record.build_id)
self.parsed_comment: ParsedComment = ParsedComment(account_record_id=self.account_record_id, org_comment=self.original_comment)
self.parsed_comment: ParsedComment = ParsedComment(account_record_id=self.account_record_id, org_comment=self.original_comment, build_id=self.build_id)
@staticmethod
def clean_text_apartment_number(text: str, match):
@ -241,91 +242,39 @@ class CommentParser(ParserHelpers):
def extract_build_parts_info(self):
"""
Regex of parts such as :
2 nolu daire
9 NUMARALI DAI
daire 3
3 nolu dairenin
11nolu daire
Daire No 12
2NOLU DAIRE
12 No lu daire
D:10
NO:11
NO :3
Daire numarasını çeşitli Türkçe yazım biçimlerinden tek regex ile ayıklar.
Eşleşme bulunursa:
- numarayı self.parsed_comment.parts'a ekler
- metni temizler (senin clean_text_apartment_number metodunla)
"""
apartment_number = None
pattern1 = re.compile(r'(\d+)\s*nolu\s*daire', re.IGNORECASE)
match = pattern1.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern4 = re.compile(r'(\d+)\s*nolu\s*daire\w*', re.IGNORECASE)
match = pattern4.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern5 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
match = pattern5.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern7 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
match = pattern7.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern8 = re.compile(r'(\d+)\s*no\s*lu\s*daire', re.IGNORECASE)
match = pattern8.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern6 = re.compile(r'daire\s*no\s*(\d+)', re.IGNORECASE)
match = pattern6.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern2 = re.compile(r'(\d+)\s*numarali\s*dai', re.IGNORECASE)
match = pattern2.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern3 = re.compile(r'daire\s*(\d+)', re.IGNORECASE)
match = pattern3.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern9 = re.compile(r'd\s*:\s*(\d+)', re.IGNORECASE)
match = pattern9.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
pattern10 = re.compile(r'no\s*:\s*(\d+)', re.IGNORECASE)
match = pattern10.search(self.comment)
if match:
apartment_number = match.group(1)
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, match)
return
# return found_dict, self.comment
COMBINED_APT_PATTERN = re.compile(
r"""
\b(?:
(?P<n1>\d+)\s*nolu\s*dair\w* # 2 nolu daire / 3 nolu dairenin
| (?P<n2>\d+)\s*no\s*lu\s*dair\w* # 12 No lu daire
| (?P<n3>\d+)nolu\s*dair\w* # 11nolu daire / 2NOLU DAIRE
| (?P<n4>\d+)\s*numaral[ıi]\s*dai\w* # 9 numaralı dai/daire
| dair[]?\s*no\.?\s*(?P<n5>\d+) # Daire No 12 / Daire No. 12
| \bd\s*[:\-]?\s*(?P<n6>\d+) # D:10 / D-10
| \bno\b(?!\s*lu)\s*[:\-]?\s*(?P<n7>\d+) # NO:11 / NO :3 (nolu hariç)
| dair[]?\s*(?P<n8>\d+) # daire 3
| (?P<n9>\d+)\s*numara # 9 NUMARA
| \bno\s*/\s*(?P<n10>\d+) # NO/11
| /(?P<n11>\d+) # /11
)\b
""",
re.IGNORECASE | re.VERBOSE
)
m = COMBINED_APT_PATTERN.search(self.comment)
if not m:
return
for g in m.groups():
if g:
apartment_number = g
break
self.parsed_comment.parts.append(apartment_number)
self.comment = self.clean_text_apartment_number(self.comment, m)
return
def extract_months(self):
"""
@ -383,7 +332,11 @@ class CommentParser(ParserHelpers):
"""
original_text = self.comment
working_text = original_text
payment_keywords = {'aidat': ['aidat', 'aİd', 'aid', 'aidatı', 'aidati'], 'tadilat': ['tadilat', 'tadİlat', 'tadilatı'], 'yakit': ['yakit', 'yakıt', 'yakıtı', 'yakiti']}
payment_keywords = {
'aidat': ['aidat', 'aİd', 'aid', 'aidatı', 'aidati'],
'tadilat': ['tadilat', 'tadİlat', 'tadilatı'],
'yakit': ['yakit', 'yakıt', 'yakıtı', 'yakiti']
}
for payment_type, keywords in payment_keywords.items():
for keyword in keywords:
pattern = re.compile(r'\b' + keyword + r'\b', re.IGNORECASE)
@ -430,16 +383,11 @@ class Parser:
def parse(self):
"""Parse account records based on task requirements"""
for account_record in self.account_records:
if not account_record.build_id:
continue
comment_parser = CommentParser(account_record=account_record, people_regex_dict=self.people_regex_dict, people_dict=self.people_dict)
parsed_comment = comment_parser.parse_comment()
self.parsed_records.append(parsed_comment)
for parsed_record in self.parsed_records:
print("*" * 150)
pprint.pprint({
"original_comment": parsed_record.org_comment, "comment": parsed_record.comment, "people": parsed_record.people,
"parts": parsed_record.parts, "months": parsed_record.months, "years": parsed_record.years, "payment_types": parsed_record.payment_types
}, indent=2)
return self.parsed_records
@ -529,6 +477,96 @@ def commented_code():
print(f"Total account records processed : {len(account_records)}")
# def extract_build_parts_info(self):
# """
# Regex of parts such as :
# 2 nolu daire
# 9 NUMARALI DAI
# daire 3
# 3 nolu dairenin
# 11nolu daire
# Daire No 12
# 2NOLU DAIRE
# 12 No lu daire
# D:10
# NO:11
# NO :3
# """
# apartment_number = None
# pattern1 = re.compile(r'(\d+)\s*nolu\s*daire', re.IGNORECASE)
# match = pattern1.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern4 = re.compile(r'(\d+)\s*nolu\s*daire\w*', re.IGNORECASE)
# match = pattern4.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern5 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
# match = pattern5.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern7 = re.compile(r'(\d+)nolu\s*daire', re.IGNORECASE)
# match = pattern7.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern8 = re.compile(r'(\d+)\s*no\s*lu\s*daire', re.IGNORECASE)
# match = pattern8.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern6 = re.compile(r'daire\s*no\s*(\d+)', re.IGNORECASE)
# match = pattern6.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern2 = re.compile(r'(\d+)\s*numarali\s*dai', re.IGNORECASE)
# match = pattern2.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern3 = re.compile(r'daire\s*(\d+)', re.IGNORECASE)
# match = pattern3.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern9 = re.compile(r'd\s*:\s*(\d+)', re.IGNORECASE)
# match = pattern9.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# pattern10 = re.compile(r'no\s*:\s*(\d+)', re.IGNORECASE)
# match = pattern10.search(self.comment)
# if match:
# apartment_number = match.group(1)
# self.parsed_comment.parts.append(apartment_number)
# self.comment = self.clean_text_apartment_number(self.comment, match)
# return
# # return found_dict, self.comment
# if __name__ == "__main__":
# people_query = sqlalchemy_text("""

View File

@ -139,6 +139,7 @@ class ConfigServices:
TASK_SEEN_PREFIX: str = "BANK:SERVICES:TASK:SEEN"
TASK_DELETED_PREFIX: str = "BANK:SERVICES:TASK:DELETED"
TASK_COMMENT_PARSER: str = "BANK:SERVICES:TASK:COMMENT:PARSER"
TASK_PREDICT_RESULT: str = "BANK:SERVICES:TASK:COMMENT:RESULT"
SERVICE_PREFIX_MAIL_READER: str = "MailReader"
SERVICE_PREFIX_MAIL_PARSER: str = "MailParser"

View File

@ -0,0 +1,12 @@
generator client {
provider = "prisma-client-py"
// binaryTargets = ["native", "linux-musl-openssl-3.0.x"]
enable_experimental_decimal = true
recursive_type_depth = -1
interface = "asyncio"
}
datasource db {
provider = "postgresql"
url = "postgresql://postgres:password@10.10.2.14:5432/postgres?schema=public"
}

View File

@ -1,6 +1,5 @@
generator client {
provider = "prisma-client-py"
// binaryTargets = ["native", "linux-musl-openssl-3.0.x"]
enable_experimental_decimal = true
recursive_type_depth = -1
interface = "asyncio"
@ -406,6 +405,7 @@ model account_records {
accounting_receipt_number Int @default(0)
status_id Int @default(0) @db.SmallInt
approved_record Boolean @default(false)
is_predicted Boolean @default(false)
import_file_name String? @db.VarChar
receive_debit Int?
receive_debit_uu_id String? @db.VarChar

View File

@ -1125,3 +1125,13 @@ class ProcessCommentParserService:
Set task requirements in Redis
"""
return self.service_retriever.redis_handler.set(f'{ConfigServices.TASK_COMMENT_PARSER}', dumps(task_object))
def set_predict_account_record(self, predict_account_record: dict):
return self.service_retriever.redis_handler.set(f'{ConfigServices.TASK_PREDICT_RESULT}', dumps(predict_account_record))
def get_predict_account_record(self):
if predict_account_record := self.service_retriever.redis_handler.get(f'{ConfigServices.TASK_PREDICT_RESULT}'):
return loads(predict_account_record)
return None