updated Services Task database

This commit is contained in:
2025-08-19 20:17:14 +03:00
parent e4f6afbc93
commit 4e6774a15b
25 changed files with 824 additions and 124 deletions

View File

@@ -10,6 +10,7 @@ COPY app/services/parser/excel/README.md ./
COPY app/core ./app/core
COPY app/services/common/ ./app/services/common/
COPY app/services/types/ ./app/services/types/
COPY app/services/parser/excel/ ./app/services/parser/excel/
RUN pip install --upgrade pip && pip install --no-cache-dir .

View File

@@ -0,0 +1,118 @@
import sys
import logging
from time import sleep
from typing import List
import pandas as pd
from datetime import datetime
from io import BytesIO
from base64 import b64decode
from unidecode import unidecode
from app.services.types.mail import ProcessMailObject, MailParser
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler(sys.stdout), logging.FileHandler('isbank_parser_service.log')]
)
logger = logging.getLogger('IsBank_Parser_Service')
class IsbankMailParserService:
def try_dataframe_extract_with_xlsx(self, binary_data: BytesIO):
try:
df = pd.read_excel(binary_data, engine='openpyxl')
return df
except Exception as e:
return None
def try_dataframe_extract_with_xls(self, binary_data: BytesIO):
try:
df = pd.read_excel(binary_data, engine='xlrd')
return df
except Exception as e:
return None
def try_dataframe_extract_else(self, binary_data: BytesIO):
try:
df = pd.read_excel(binary_data, engine='openpyxl')
except Exception as e1:
try:
binary_data.seek(0)
df = pd.read_excel(binary_data, engine='xlrd')
except Exception as e2:
return None
return df
def parse_record(self, excel_frame: pd.DataFrame, file_name: str) -> list[dict]:
"""Parse Excel file data.
Args:
excel_frame: DataFrame containing Excel data
Returns:
list[dict]: List of parsed data dictionaries
"""
iban, data_list = "", []
try:
for row in excel_frame.itertuples():
if "IBAN" in str(row[3]).upper():
iban = str(row[5]).replace(" ", "")
if not str(row[1]) == "nan" and not str(row[2]) == "nan":
if len(str(row[1]).split("/")) > 2:
data_list.append(dict(
filename=file_name, iban=str(iban), bank_date=datetime.strptime(str(row[1]), "%d/%m/%Y-%H:%M:%S").strftime("%Y-%m-%d %H:%M:%S"),
channel_branch=unidecode(str(row[3])), currency_value=(float(str(row[4]).replace(",", "")) if row[4] else 0),
balance=(float(str(row[5]).replace(",", "")) if row[5] else 0), additional_balance=(float(str(row[6]).replace(",", "")) if row[6] else 0),
process_name=str(row[7]), process_type=unidecode(str(row[8])), process_comment=unidecode(str(row[9])), bank_reference_code=str(row[15]),
))
except Exception as e:
print(f"[PARSER_SERVICE] Error parsing Excel file: {str(e)}")
return data_list
def parse_dataframes(self, dataframe: pd.DataFrame, task: ProcessMailObject, attachment_data: MailParser):
data_list = self.parse_record(dataframe, attachment_data.filename)
print(f"[PARSER_SERVICE] Successfully parsed {len(data_list)} records from Excel file")
if data_list:
print(f"Updated service data for task {task.uuid} with {len(data_list)} records")
return data_list
return None
def process_task(self, active_task: ProcessMailObject):
"""Process a task object using the MailParserService
Args: task: RedisTaskObject or task dictionary to process
"""
try:
for attachment in active_task.data.attachments:
task_id = active_task.data.id
if not attachment or not attachment.data:
print(f"[PARSER_SERVICE] No data found for task {task_id}")
continue
binary_data: bytes = b64decode(attachment.data)
excel_data = BytesIO(binary_data)
df = self.try_dataframe_extract_with_xlsx(excel_data)
if df is None:
excel_data.seek(0)
df = self.try_dataframe_extract_with_xls(excel_data)
if df is None:
excel_data.seek(0)
df = self.try_dataframe_extract_else(excel_data)
if df is not None:
return self.parse_dataframes(df, active_task, attachment)
except Exception as e:
print(f"[PARSER_SERVICE] Error processing task: {str(e)}")
parser = IsbankMailParserService()
def parse_isbank_mail(mailObject: ProcessMailObject):
try:
return parser.process_task(mailObject)
except Exception as e:
print(f"[PARSER_SERVICE] Error parsing mail: {str(e)}")
return None

View File

@@ -1,40 +1,79 @@
import os
import asyncio
import fnmatch
from typing import Awaitable, Callable
from app.services.common.service_base_async import ServiceBaseAsync, Job
from app.services.parser.excel.isbank.parser import parse_isbank_mail
from app.services.types.mail import MailParsedResult, PlainMailReader, ProcessMailObject
from app.services.common.service_base_async import ServiceBaseAsync
from app.services.types.queue import Enqueue
from app.services.types.task import Job
PRODUCE_BURST = int(os.getenv("PRODUCE_BURST", "10"))
PRODUCE_ONCE = os.getenv("PRODUCE_ONCE", "true").lower() == "true"
EVENT_TYPE = os.getenv("EVENT_TYPE", "db-mongo")
PROCESS_SEC = 10
bank_mail_dict = {
"ileti.isbank.com.tr": parse_isbank_mail
}
def grab_fn_callable(domain: str) -> Callable[[ServiceBaseAsync, dict], Awaitable[None]]:
for pat, fn in bank_mail_dict.items():
if fnmatch.fnmatch(domain, pat):
return fn
return None
async def produce(svc: ServiceBaseAsync):
await asyncio.sleep(PROCESS_SEC)
async def handle_from_parser(svc: ServiceBaseAsync, job):
job = Job(**job)
async def handle_from_parser(svc: ServiceBaseAsync, job: dict):
job_model = Job(**job)
await svc.ack_current()
await asyncio.sleep(PROCESS_SEC)
async def handle_from_mail(svc: ServiceBaseAsync, job):
job = Job(**job)
await svc.enqueue(task_id=job.task_id, payload=job.payload, type_="parser.excel.publish")
print("Parser Excel Consumer from mail handle_from_mail :", job.task_id)
await svc.ack_current()
async def handle_from_mail_parser(svc: ServiceBaseAsync, job: dict):
job_model = Job(**job)
process_mail_object = ProcessMailObject(**job_model.payload)
mail_id = process_mail_object.data.id
task_id = f"IsBankServiceExcelParser_{mail_id}"
await asyncio.sleep(PROCESS_SEC)
function_handler = grab_fn_callable(process_mail_object.data.from_.domain)
if not function_handler:
await svc.dlq_current(job, error="Invalid domain")
return
parsed_data = function_handler(process_mail_object)
if not parsed_data:
plain_mail_data = PlainMailReader(**process_mail_object.data.model_dump())
parsed_result = MailParsedResult(task_id=task_id, mail_data=plain_mail_data.model_dump(), send_to="Completed", data=parsed_data)
print("Parser Excel Consumer from mail handle_from_mail :", parsed_result)
enqueue = Enqueue(task_id=task_id, payload=parsed_result.model_dump(), action="mail.service.publish")
await svc.enqueue(enqueue)
await svc.ack_current()
else:
plain_mail_data = PlainMailReader(**process_mail_object.data.model_dump())
parsed_result = MailParsedResult(task_id=task_id, mail_data=plain_mail_data.model_dump(), send_to="Completed", data=parsed_data)
enqueue = Enqueue(task_id=task_id, payload=parsed_result.model_dump(), action="parser.comment.publish")
await svc.enqueue(enqueue)
await svc.ack_current()
print("Parser Excel Consumer from mail handle_from_mail :", task_id)
async def consume_default(svc: ServiceBaseAsync, job):
job = Job(**job)
async def consume_default(svc: ServiceBaseAsync, job: dict):
job_model = Job(**job)
await svc.ack_current()
await asyncio.sleep(PROCESS_SEC)
if __name__ == "__main__":
svc = ServiceBaseAsync(produce_fn=produce, consume_fn=consume_default, handlers={"parser.mail.publish": handle_from_mail})
svc = ServiceBaseAsync(produce_fn=produce, consume_fn=consume_default, handlers={"parser.mail.publish": handle_from_mail_parser})
asyncio.run(svc.run())

View File

@@ -16,7 +16,12 @@ dependencies = [
"aio-pika>=9.4.1",
"prometheus-client>=0.20.0",
"uvloop>=0.19.0",
"pydantic"
"pydantic",
"arrow>=1.3.0",
"pandas>=2.2.3",
"unidecode>=1.3.8",
"xlrd>=2.0.1",
"openpyxl>=3.1.2",
]
[project.optional-dependencies]