updated Services Task database
This commit is contained in:
@@ -11,6 +11,7 @@ COPY app/services/parser/comment/README.md ./
|
||||
COPY app/core ./app/core
|
||||
COPY app/services/common/ ./app/services/common/
|
||||
COPY app/services/parser/comment/ ./app/services/parser/comment/
|
||||
COPY app/services/types/ ./app/services/types/
|
||||
|
||||
RUN pip install --upgrade pip && pip install --no-cache-dir .
|
||||
RUN mkdir -p /app/data
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
import asyncio
|
||||
|
||||
from app.services.common.service_base_async import ServiceBaseAsync, Job
|
||||
from app.services.common.service_base_async import ServiceBaseAsync
|
||||
from app.services.types.queue import Enqueue
|
||||
from app.services.types.task import Job
|
||||
from app.services.types.mail import MailParsedResult
|
||||
from app.services.types.mail import PlainMailReader
|
||||
from app.services.types.mail import ProcessMailObject
|
||||
|
||||
|
||||
PROCESS_SEC = 10
|
||||
@@ -12,10 +17,11 @@ async def produce(_svc: ServiceBaseAsync):
|
||||
|
||||
|
||||
async def handle_excel_publish(svc: ServiceBaseAsync, job: dict):
|
||||
print("Parser Comment Consumer from excel handle_excel_publish :", job)
|
||||
job_model = Job(**job)
|
||||
mail_id = job_model.payload['mail_id']
|
||||
task_id = f"IsBankServiceMailParser_{mail_id}"
|
||||
await svc.enqueue(task_id=task_id, payload=job_model.payload, type_="parser.comment.publish")
|
||||
task_id = f"IsBankServiceCommentParser_{mail_id}"
|
||||
await svc.enqueue(task_id=task_id, payload=job_model.payload, action="parser.comment.publish")
|
||||
print("Parser Comment Consumer from excel handle_excel_publish :", job_model.task_id)
|
||||
await svc.ack_current()
|
||||
await asyncio.sleep(PROCESS_SEC)
|
||||
|
||||
@@ -10,6 +10,7 @@ COPY app/services/parser/excel/README.md ./
|
||||
|
||||
COPY app/core ./app/core
|
||||
COPY app/services/common/ ./app/services/common/
|
||||
COPY app/services/types/ ./app/services/types/
|
||||
COPY app/services/parser/excel/ ./app/services/parser/excel/
|
||||
|
||||
RUN pip install --upgrade pip && pip install --no-cache-dir .
|
||||
|
||||
118
ServicesTask/app/services/parser/excel/isbank/parser.py
Normal file
118
ServicesTask/app/services/parser/excel/isbank/parser.py
Normal file
@@ -0,0 +1,118 @@
|
||||
import sys
|
||||
import logging
|
||||
from time import sleep
|
||||
from typing import List
|
||||
import pandas as pd
|
||||
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
from base64 import b64decode
|
||||
from unidecode import unidecode
|
||||
|
||||
from app.services.types.mail import ProcessMailObject, MailParser
|
||||
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[logging.StreamHandler(sys.stdout), logging.FileHandler('isbank_parser_service.log')]
|
||||
)
|
||||
logger = logging.getLogger('IsBank_Parser_Service')
|
||||
|
||||
|
||||
class IsbankMailParserService:
|
||||
|
||||
|
||||
def try_dataframe_extract_with_xlsx(self, binary_data: BytesIO):
|
||||
try:
|
||||
df = pd.read_excel(binary_data, engine='openpyxl')
|
||||
return df
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
def try_dataframe_extract_with_xls(self, binary_data: BytesIO):
|
||||
try:
|
||||
df = pd.read_excel(binary_data, engine='xlrd')
|
||||
return df
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
def try_dataframe_extract_else(self, binary_data: BytesIO):
|
||||
try:
|
||||
df = pd.read_excel(binary_data, engine='openpyxl')
|
||||
except Exception as e1:
|
||||
try:
|
||||
binary_data.seek(0)
|
||||
df = pd.read_excel(binary_data, engine='xlrd')
|
||||
except Exception as e2:
|
||||
return None
|
||||
return df
|
||||
|
||||
def parse_record(self, excel_frame: pd.DataFrame, file_name: str) -> list[dict]:
|
||||
"""Parse Excel file data.
|
||||
|
||||
Args:
|
||||
excel_frame: DataFrame containing Excel data
|
||||
|
||||
Returns:
|
||||
list[dict]: List of parsed data dictionaries
|
||||
"""
|
||||
iban, data_list = "", []
|
||||
try:
|
||||
for row in excel_frame.itertuples():
|
||||
if "IBAN" in str(row[3]).upper():
|
||||
iban = str(row[5]).replace(" ", "")
|
||||
if not str(row[1]) == "nan" and not str(row[2]) == "nan":
|
||||
if len(str(row[1]).split("/")) > 2:
|
||||
data_list.append(dict(
|
||||
filename=file_name, iban=str(iban), bank_date=datetime.strptime(str(row[1]), "%d/%m/%Y-%H:%M:%S").strftime("%Y-%m-%d %H:%M:%S"),
|
||||
channel_branch=unidecode(str(row[3])), currency_value=(float(str(row[4]).replace(",", "")) if row[4] else 0),
|
||||
balance=(float(str(row[5]).replace(",", "")) if row[5] else 0), additional_balance=(float(str(row[6]).replace(",", "")) if row[6] else 0),
|
||||
process_name=str(row[7]), process_type=unidecode(str(row[8])), process_comment=unidecode(str(row[9])), bank_reference_code=str(row[15]),
|
||||
))
|
||||
except Exception as e:
|
||||
print(f"[PARSER_SERVICE] Error parsing Excel file: {str(e)}")
|
||||
return data_list
|
||||
|
||||
def parse_dataframes(self, dataframe: pd.DataFrame, task: ProcessMailObject, attachment_data: MailParser):
|
||||
data_list = self.parse_record(dataframe, attachment_data.filename)
|
||||
print(f"[PARSER_SERVICE] Successfully parsed {len(data_list)} records from Excel file")
|
||||
if data_list:
|
||||
print(f"Updated service data for task {task.uuid} with {len(data_list)} records")
|
||||
return data_list
|
||||
return None
|
||||
|
||||
def process_task(self, active_task: ProcessMailObject):
|
||||
"""Process a task object using the MailParserService
|
||||
Args: task: RedisTaskObject or task dictionary to process
|
||||
"""
|
||||
try:
|
||||
for attachment in active_task.data.attachments:
|
||||
task_id = active_task.data.id
|
||||
if not attachment or not attachment.data:
|
||||
print(f"[PARSER_SERVICE] No data found for task {task_id}")
|
||||
continue
|
||||
binary_data: bytes = b64decode(attachment.data)
|
||||
excel_data = BytesIO(binary_data)
|
||||
df = self.try_dataframe_extract_with_xlsx(excel_data)
|
||||
if df is None:
|
||||
excel_data.seek(0)
|
||||
df = self.try_dataframe_extract_with_xls(excel_data)
|
||||
if df is None:
|
||||
excel_data.seek(0)
|
||||
df = self.try_dataframe_extract_else(excel_data)
|
||||
if df is not None:
|
||||
return self.parse_dataframes(df, active_task, attachment)
|
||||
except Exception as e:
|
||||
print(f"[PARSER_SERVICE] Error processing task: {str(e)}")
|
||||
|
||||
|
||||
parser = IsbankMailParserService()
|
||||
|
||||
|
||||
def parse_isbank_mail(mailObject: ProcessMailObject):
|
||||
try:
|
||||
return parser.process_task(mailObject)
|
||||
except Exception as e:
|
||||
print(f"[PARSER_SERVICE] Error parsing mail: {str(e)}")
|
||||
return None
|
||||
@@ -1,40 +1,79 @@
|
||||
import os
|
||||
import asyncio
|
||||
import fnmatch
|
||||
from typing import Awaitable, Callable
|
||||
|
||||
from app.services.common.service_base_async import ServiceBaseAsync, Job
|
||||
from app.services.parser.excel.isbank.parser import parse_isbank_mail
|
||||
from app.services.types.mail import MailParsedResult, PlainMailReader, ProcessMailObject
|
||||
from app.services.common.service_base_async import ServiceBaseAsync
|
||||
from app.services.types.queue import Enqueue
|
||||
from app.services.types.task import Job
|
||||
|
||||
|
||||
PRODUCE_BURST = int(os.getenv("PRODUCE_BURST", "10"))
|
||||
PRODUCE_ONCE = os.getenv("PRODUCE_ONCE", "true").lower() == "true"
|
||||
EVENT_TYPE = os.getenv("EVENT_TYPE", "db-mongo")
|
||||
|
||||
PROCESS_SEC = 10
|
||||
|
||||
|
||||
bank_mail_dict = {
|
||||
"ileti.isbank.com.tr": parse_isbank_mail
|
||||
}
|
||||
|
||||
|
||||
def grab_fn_callable(domain: str) -> Callable[[ServiceBaseAsync, dict], Awaitable[None]]:
|
||||
for pat, fn in bank_mail_dict.items():
|
||||
if fnmatch.fnmatch(domain, pat):
|
||||
return fn
|
||||
return None
|
||||
|
||||
|
||||
async def produce(svc: ServiceBaseAsync):
|
||||
await asyncio.sleep(PROCESS_SEC)
|
||||
|
||||
|
||||
async def handle_from_parser(svc: ServiceBaseAsync, job):
|
||||
job = Job(**job)
|
||||
async def handle_from_parser(svc: ServiceBaseAsync, job: dict):
|
||||
job_model = Job(**job)
|
||||
await svc.ack_current()
|
||||
await asyncio.sleep(PROCESS_SEC)
|
||||
|
||||
|
||||
async def handle_from_mail(svc: ServiceBaseAsync, job):
|
||||
job = Job(**job)
|
||||
await svc.enqueue(task_id=job.task_id, payload=job.payload, type_="parser.excel.publish")
|
||||
print("Parser Excel Consumer from mail handle_from_mail :", job.task_id)
|
||||
await svc.ack_current()
|
||||
async def handle_from_mail_parser(svc: ServiceBaseAsync, job: dict):
|
||||
job_model = Job(**job)
|
||||
process_mail_object = ProcessMailObject(**job_model.payload)
|
||||
mail_id = process_mail_object.data.id
|
||||
task_id = f"IsBankServiceExcelParser_{mail_id}"
|
||||
await asyncio.sleep(PROCESS_SEC)
|
||||
function_handler = grab_fn_callable(process_mail_object.data.from_.domain)
|
||||
if not function_handler:
|
||||
await svc.dlq_current(job, error="Invalid domain")
|
||||
return
|
||||
parsed_data = function_handler(process_mail_object)
|
||||
if not parsed_data:
|
||||
plain_mail_data = PlainMailReader(**process_mail_object.data.model_dump())
|
||||
parsed_result = MailParsedResult(task_id=task_id, mail_data=plain_mail_data.model_dump(), send_to="Completed", data=parsed_data)
|
||||
print("Parser Excel Consumer from mail handle_from_mail :", parsed_result)
|
||||
enqueue = Enqueue(task_id=task_id, payload=parsed_result.model_dump(), action="mail.service.publish")
|
||||
await svc.enqueue(enqueue)
|
||||
await svc.ack_current()
|
||||
else:
|
||||
plain_mail_data = PlainMailReader(**process_mail_object.data.model_dump())
|
||||
parsed_result = MailParsedResult(task_id=task_id, mail_data=plain_mail_data.model_dump(), send_to="Completed", data=parsed_data)
|
||||
enqueue = Enqueue(task_id=task_id, payload=parsed_result.model_dump(), action="parser.comment.publish")
|
||||
await svc.enqueue(enqueue)
|
||||
await svc.ack_current()
|
||||
print("Parser Excel Consumer from mail handle_from_mail :", task_id)
|
||||
|
||||
|
||||
async def consume_default(svc: ServiceBaseAsync, job):
|
||||
job = Job(**job)
|
||||
async def consume_default(svc: ServiceBaseAsync, job: dict):
|
||||
|
||||
job_model = Job(**job)
|
||||
await svc.ack_current()
|
||||
await asyncio.sleep(PROCESS_SEC)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
svc = ServiceBaseAsync(produce_fn=produce, consume_fn=consume_default, handlers={"parser.mail.publish": handle_from_mail})
|
||||
svc = ServiceBaseAsync(produce_fn=produce, consume_fn=consume_default, handlers={"parser.mail.publish": handle_from_mail_parser})
|
||||
asyncio.run(svc.run())
|
||||
|
||||
@@ -16,7 +16,12 @@ dependencies = [
|
||||
"aio-pika>=9.4.1",
|
||||
"prometheus-client>=0.20.0",
|
||||
"uvloop>=0.19.0",
|
||||
"pydantic"
|
||||
"pydantic",
|
||||
"arrow>=1.3.0",
|
||||
"pandas>=2.2.3",
|
||||
"unidecode>=1.3.8",
|
||||
"xlrd>=2.0.1",
|
||||
"openpyxl>=3.1.2",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
@@ -10,6 +10,7 @@ COPY app/services/parser/mail/README.md ./
|
||||
|
||||
COPY app/core ./app/core
|
||||
COPY app/services/common/ ./app/services/common/
|
||||
COPY app/services/types/ ./app/services/types/
|
||||
COPY app/services/parser/mail/ ./app/services/parser/mail/
|
||||
|
||||
RUN pip install --upgrade pip && pip install --no-cache-dir .
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
import os
|
||||
import asyncio
|
||||
import fnmatch
|
||||
|
||||
from app.services.common.service_base_async import ServiceBaseAsync, Job
|
||||
from app.services.types.queue import Enqueue
|
||||
from app.services.common.service_base_async import ServiceBaseAsync
|
||||
from app.services.types.task import Job
|
||||
from app.services.types.mail import ProcessMailObject
|
||||
|
||||
|
||||
PRODUCE_BURST = int(os.getenv("PRODUCE_BURST", "10"))
|
||||
@@ -15,16 +19,20 @@ async def produce(svc: ServiceBaseAsync):
|
||||
|
||||
|
||||
async def handle_mail_publish(svc: ServiceBaseAsync, job: dict):
|
||||
# Check for bank mail is from which Bank and to which bulding
|
||||
job_model = Job(**job)
|
||||
mail_id = job_model.payload['mail_id']
|
||||
process_mail_object = ProcessMailObject(**job_model.payload)
|
||||
mail_id = process_mail_object.data.id
|
||||
task_id = f"IsBankServiceMailParser_{mail_id}"
|
||||
await svc.enqueue(task_id=task_id, payload=job_model.payload, type_="parser.excel.publish")
|
||||
print("Parser Mail Consumer parsed handle_mail_publish :", job_model.task_id)
|
||||
enqueue = Enqueue(task_id=task_id, payload=process_mail_object.model_dump(), action="parser.excel.publish")
|
||||
await svc.enqueue(enqueue)
|
||||
print(f"Parser Mail Consumer parsed handle_mail_publish : {enqueue.task_id}")
|
||||
await svc.ack_current()
|
||||
await asyncio.sleep(PROCESS_SEC)
|
||||
|
||||
|
||||
async def handle_mongo_publish(svc: ServiceBaseAsync, job: dict):
|
||||
print('job', job)
|
||||
job_model = Job(**job)
|
||||
await svc.ack_current()
|
||||
print("Parser Mail Consumer default handle_mongo_publish :", job_model.task_id)
|
||||
|
||||
Reference in New Issue
Block a user