import sys import logging from time import sleep from typing import List import pandas as pd from datetime import datetime from io import BytesIO from base64 import b64decode from unidecode import unidecode from app.services.types.mail import ProcessMailObject, MailParser logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout), logging.FileHandler('isbank_parser_service.log')] ) logger = logging.getLogger('IsBank_Parser_Service') class IsbankMailParserService: def try_dataframe_extract_with_xlsx(self, binary_data: BytesIO): try: df = pd.read_excel(binary_data, engine='openpyxl') return df except Exception as e: return None def try_dataframe_extract_with_xls(self, binary_data: BytesIO): try: df = pd.read_excel(binary_data, engine='xlrd') return df except Exception as e: return None def try_dataframe_extract_else(self, binary_data: BytesIO): try: df = pd.read_excel(binary_data, engine='openpyxl') except Exception as e1: try: binary_data.seek(0) df = pd.read_excel(binary_data, engine='xlrd') except Exception as e2: return None return df def parse_record(self, excel_frame: pd.DataFrame, file_name: str) -> list[dict]: """Parse Excel file data. Args: excel_frame: DataFrame containing Excel data Returns: list[dict]: List of parsed data dictionaries """ iban, data_list = "", [] try: for row in excel_frame.itertuples(): if "IBAN" in str(row[3]).upper(): iban = str(row[5]).replace(" ", "") if not str(row[1]) == "nan" and not str(row[2]) == "nan": if len(str(row[1]).split("/")) > 2: data_list.append(dict( filename=file_name, iban=str(iban), bank_date=datetime.strptime(str(row[1]), "%d/%m/%Y-%H:%M:%S").strftime("%Y-%m-%d %H:%M:%S"), channel_branch=unidecode(str(row[3])), currency_value=(float(str(row[4]).replace(",", "")) if row[4] else 0), balance=(float(str(row[5]).replace(",", "")) if row[5] else 0), additional_balance=(float(str(row[6]).replace(",", "")) if row[6] else 0), process_name=str(row[7]), process_type=unidecode(str(row[8])), process_comment=unidecode(str(row[9])), bank_reference_code=str(row[15]), )) except Exception as e: print(f"[PARSER_SERVICE] Error parsing Excel file: {str(e)}") return data_list def parse_dataframes(self, dataframe: pd.DataFrame, task: ProcessMailObject, attachment_data: MailParser): data_list = self.parse_record(dataframe, attachment_data.filename) print(f"[PARSER_SERVICE] Successfully parsed {len(data_list)} records from Excel file") if data_list: print(f"Updated service data for task {task.uuid} with {len(data_list)} records") return data_list return None def process_task(self, active_task: ProcessMailObject): """Process a task object using the MailParserService Args: task: RedisTaskObject or task dictionary to process """ try: for attachment in active_task.data.attachments: task_id = active_task.data.id if not attachment or not attachment.data: print(f"[PARSER_SERVICE] No data found for task {task_id}") continue binary_data: bytes = b64decode(attachment.data) excel_data = BytesIO(binary_data) df = self.try_dataframe_extract_with_xlsx(excel_data) if df is None: excel_data.seek(0) df = self.try_dataframe_extract_with_xls(excel_data) if df is None: excel_data.seek(0) df = self.try_dataframe_extract_else(excel_data) if df is not None: return self.parse_dataframes(df, active_task, attachment) except Exception as e: print(f"[PARSER_SERVICE] Error processing task: {str(e)}") parser = IsbankMailParserService() def parse_isbank_mail(mailObject: ProcessMailObject): try: return parser.process_task(mailObject) except Exception as e: print(f"[PARSER_SERVICE] Error parsing mail: {str(e)}") return None