production-evyos-systems-an.../ServicesTask/app/services/parser/excel/isbank/parser.py

119 lines
4.6 KiB
Python

import sys
import logging
from time import sleep
from typing import List
import pandas as pd
from datetime import datetime
from io import BytesIO
from base64 import b64decode
from unidecode import unidecode
from app.services.types.mail import ProcessMailObject, MailParser
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler(sys.stdout), logging.FileHandler('isbank_parser_service.log')]
)
logger = logging.getLogger('IsBank_Parser_Service')
class IsbankMailParserService:
def try_dataframe_extract_with_xlsx(self, binary_data: BytesIO):
try:
df = pd.read_excel(binary_data, engine='openpyxl')
return df
except Exception as e:
return None
def try_dataframe_extract_with_xls(self, binary_data: BytesIO):
try:
df = pd.read_excel(binary_data, engine='xlrd')
return df
except Exception as e:
return None
def try_dataframe_extract_else(self, binary_data: BytesIO):
try:
df = pd.read_excel(binary_data, engine='openpyxl')
except Exception as e1:
try:
binary_data.seek(0)
df = pd.read_excel(binary_data, engine='xlrd')
except Exception as e2:
return None
return df
def parse_record(self, excel_frame: pd.DataFrame, file_name: str) -> list[dict]:
"""Parse Excel file data.
Args:
excel_frame: DataFrame containing Excel data
Returns:
list[dict]: List of parsed data dictionaries
"""
iban, data_list = "", []
try:
for row in excel_frame.itertuples():
if "IBAN" in str(row[3]).upper():
iban = str(row[5]).replace(" ", "")
if not str(row[1]) == "nan" and not str(row[2]) == "nan":
if len(str(row[1]).split("/")) > 2:
data_list.append(dict(
filename=file_name, iban=str(iban), bank_date=datetime.strptime(str(row[1]), "%d/%m/%Y-%H:%M:%S").strftime("%Y-%m-%d %H:%M:%S"),
channel_branch=unidecode(str(row[3])), currency_value=(float(str(row[4]).replace(",", "")) if row[4] else 0),
balance=(float(str(row[5]).replace(",", "")) if row[5] else 0), additional_balance=(float(str(row[6]).replace(",", "")) if row[6] else 0),
process_name=str(row[7]), process_type=unidecode(str(row[8])), process_comment=unidecode(str(row[9])), bank_reference_code=str(row[15]),
))
except Exception as e:
print(f"[PARSER_SERVICE] Error parsing Excel file: {str(e)}")
return data_list
def parse_dataframes(self, dataframe: pd.DataFrame, task: ProcessMailObject, attachment_data: MailParser):
data_list = self.parse_record(dataframe, attachment_data.filename)
print(f"[PARSER_SERVICE] Successfully parsed {len(data_list)} records from Excel file")
if data_list:
print(f"Updated service data for task {task.uuid} with {len(data_list)} records")
return data_list
return None
def process_task(self, active_task: ProcessMailObject):
"""Process a task object using the MailParserService
Args: task: RedisTaskObject or task dictionary to process
"""
try:
for attachment in active_task.data.attachments:
task_id = active_task.data.id
if not attachment or not attachment.data:
print(f"[PARSER_SERVICE] No data found for task {task_id}")
continue
binary_data: bytes = b64decode(attachment.data)
excel_data = BytesIO(binary_data)
df = self.try_dataframe_extract_with_xlsx(excel_data)
if df is None:
excel_data.seek(0)
df = self.try_dataframe_extract_with_xls(excel_data)
if df is None:
excel_data.seek(0)
df = self.try_dataframe_extract_else(excel_data)
if df is not None:
return self.parse_dataframes(df, active_task, attachment)
except Exception as e:
print(f"[PARSER_SERVICE] Error processing task: {str(e)}")
parser = IsbankMailParserService()
def parse_isbank_mail(mailObject: ProcessMailObject):
try:
return parser.process_task(mailObject)
except Exception as e:
print(f"[PARSER_SERVICE] Error parsing mail: {str(e)}")
return None