119 lines
4.6 KiB
Python
119 lines
4.6 KiB
Python
import sys
|
|
import logging
|
|
from time import sleep
|
|
from typing import List
|
|
import pandas as pd
|
|
|
|
from datetime import datetime
|
|
from io import BytesIO
|
|
from base64 import b64decode
|
|
from unidecode import unidecode
|
|
|
|
from app.services.types.mail import ProcessMailObject, MailParser
|
|
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=[logging.StreamHandler(sys.stdout), logging.FileHandler('isbank_parser_service.log')]
|
|
)
|
|
logger = logging.getLogger('IsBank_Parser_Service')
|
|
|
|
|
|
class IsbankMailParserService:
|
|
|
|
|
|
def try_dataframe_extract_with_xlsx(self, binary_data: BytesIO):
|
|
try:
|
|
df = pd.read_excel(binary_data, engine='openpyxl')
|
|
return df
|
|
except Exception as e:
|
|
return None
|
|
|
|
def try_dataframe_extract_with_xls(self, binary_data: BytesIO):
|
|
try:
|
|
df = pd.read_excel(binary_data, engine='xlrd')
|
|
return df
|
|
except Exception as e:
|
|
return None
|
|
|
|
def try_dataframe_extract_else(self, binary_data: BytesIO):
|
|
try:
|
|
df = pd.read_excel(binary_data, engine='openpyxl')
|
|
except Exception as e1:
|
|
try:
|
|
binary_data.seek(0)
|
|
df = pd.read_excel(binary_data, engine='xlrd')
|
|
except Exception as e2:
|
|
return None
|
|
return df
|
|
|
|
def parse_record(self, excel_frame: pd.DataFrame, file_name: str) -> list[dict]:
|
|
"""Parse Excel file data.
|
|
|
|
Args:
|
|
excel_frame: DataFrame containing Excel data
|
|
|
|
Returns:
|
|
list[dict]: List of parsed data dictionaries
|
|
"""
|
|
iban, data_list = "", []
|
|
try:
|
|
for row in excel_frame.itertuples():
|
|
if "IBAN" in str(row[3]).upper():
|
|
iban = str(row[5]).replace(" ", "")
|
|
if not str(row[1]) == "nan" and not str(row[2]) == "nan":
|
|
if len(str(row[1]).split("/")) > 2:
|
|
data_list.append(dict(
|
|
filename=file_name, iban=str(iban), bank_date=datetime.strptime(str(row[1]), "%d/%m/%Y-%H:%M:%S").strftime("%Y-%m-%d %H:%M:%S"),
|
|
channel_branch=unidecode(str(row[3])), currency_value=(float(str(row[4]).replace(",", "")) if row[4] else 0),
|
|
balance=(float(str(row[5]).replace(",", "")) if row[5] else 0), additional_balance=(float(str(row[6]).replace(",", "")) if row[6] else 0),
|
|
process_name=str(row[7]), process_type=unidecode(str(row[8])), process_comment=unidecode(str(row[9])), bank_reference_code=str(row[15]),
|
|
))
|
|
except Exception as e:
|
|
print(f"[PARSER_SERVICE] Error parsing Excel file: {str(e)}")
|
|
return data_list
|
|
|
|
def parse_dataframes(self, dataframe: pd.DataFrame, task: ProcessMailObject, attachment_data: MailParser):
|
|
data_list = self.parse_record(dataframe, attachment_data.filename)
|
|
print(f"[PARSER_SERVICE] Successfully parsed {len(data_list)} records from Excel file")
|
|
if data_list:
|
|
print(f"Updated service data for task {task.uuid} with {len(data_list)} records")
|
|
return data_list
|
|
return None
|
|
|
|
def process_task(self, active_task: ProcessMailObject):
|
|
"""Process a task object using the MailParserService
|
|
Args: task: RedisTaskObject or task dictionary to process
|
|
"""
|
|
try:
|
|
for attachment in active_task.data.attachments:
|
|
task_id = active_task.data.id
|
|
if not attachment or not attachment.data:
|
|
print(f"[PARSER_SERVICE] No data found for task {task_id}")
|
|
continue
|
|
binary_data: bytes = b64decode(attachment.data)
|
|
excel_data = BytesIO(binary_data)
|
|
df = self.try_dataframe_extract_with_xlsx(excel_data)
|
|
if df is None:
|
|
excel_data.seek(0)
|
|
df = self.try_dataframe_extract_with_xls(excel_data)
|
|
if df is None:
|
|
excel_data.seek(0)
|
|
df = self.try_dataframe_extract_else(excel_data)
|
|
if df is not None:
|
|
return self.parse_dataframes(df, active_task, attachment)
|
|
except Exception as e:
|
|
print(f"[PARSER_SERVICE] Error processing task: {str(e)}")
|
|
|
|
|
|
parser = IsbankMailParserService()
|
|
|
|
|
|
def parse_isbank_mail(mailObject: ProcessMailObject):
|
|
try:
|
|
return parser.process_task(mailObject)
|
|
except Exception as e:
|
|
print(f"[PARSER_SERVICE] Error parsing mail: {str(e)}")
|
|
return None
|