import time import arrow import io import json import base64 import datetime from pandas import DataFrame, read_excel from unidecode import unidecode from BankServices.ServiceDepends.config import Config # Import Redis pub/sub handler from Controllers.Redis.Broadcast.actions import redis_pubsub # Define Redis channels REDIS_CHANNEL_IN = "reader" # Subscribe to Email Service channel REDIS_CHANNEL_OUT = "parser" # Publish to Parser Service channel delimiter = "|" def publish_parsed_data_to_redis(data, collected_data_dict: list[dict], filename: str) -> bool: """Publish parsed data to Redis. Args: data: Original message data from Redis collected_data_dict: Parsed data from Excel file filename: Name of the processed file Returns: bool: Success status """ # Create a copy of the original message to preserve metadata message = data.copy() if isinstance(data, dict) else {} message.pop("payload", None) message.pop("is_base64", None) # Update with parsed data and new stage if collected_data_dict: message["parsed"] = collected_data_dict message["stage"] = "parsed" # Update stage to 'parsed' else: message["parsed"] = None message["stage"] = "not found" # Mark as 'not found' if parsing failed # Add processing timestamp message["parsed_at"] = str(arrow.now()) message["filename"] = filename # Publish to Redis channel result = redis_pubsub.publisher.publish(REDIS_CHANNEL_OUT, message) if result.status: print(f"[PARSER_SERVICE] Published parsed data for {filename} with stage: {message['stage']}") return True else: print(f"[PARSER_SERVICE] Publish error: {result.error}") return False def parse_excel_file(excel_frame: DataFrame) -> list[dict]: """Parse Excel file data. Args: excel_frame: DataFrame containing Excel data Returns: list[dict]: List of parsed data dictionaries """ iban, data_list = "", [] try: for row in excel_frame.itertuples(): if "IBAN" in str(row[3]).upper(): iban = str(row[5]).replace(" ", "") if not str(row[1]) == "nan" and not str(row[2]) == "nan": if len(str(row[1]).split("/")) > 2: data_list.append( dict( iban=str(iban), bank_date=arrow.get( datetime.datetime.strptime(str(row[1]), "%d/%m/%Y-%H:%M:%S") ).__str__(), channel_branch=unidecode(str(row[3])), currency_value=( float(str(row[4]).replace(",", "")) if row[4] else 0 ), balance=float(str(row[5]).replace(",", "")) if row[5] else 0, additional_balance=( float(str(row[6]).replace(",", "")) if row[6] else 0 ), process_name=str(row[7]), process_type=unidecode(str(row[8])), process_comment=unidecode(str(row[9])), bank_reference_code=str(row[15]), ) ) print(f"[PARSER_SERVICE] Successfully parsed {len(data_list)} records from Excel file") except Exception as e: print(f"[PARSER_SERVICE] Error parsing Excel file: {str(e)}") return data_list def process_message(message): """Process a message from Redis. Args: message: Message data from Redis subscriber """ # Extract the message data data = message["data"] # If data is a string, parse it as JSON if isinstance(data, str): try: data = json.loads(data) except json.JSONDecodeError as e: print(f"[PARSER_SERVICE] Error parsing message data: {e}") return # Check if stage is 'red' before processing if data.get("stage") == "red": try: filename = data.get("filename") payload = data.get("payload") is_base64 = data.get("is_base64", False) print(f"[PARSER_SERVICE] Processing file: {filename}") # Handle base64-encoded payload if is_base64 and isinstance(payload, str): try: # Decode base64 string to bytes payload = base64.b64decode(payload) print(f"[PARSER_SERVICE] Successfully decoded base64 payload, size: {len(payload)} bytes") except Exception as e: print(f"[PARSER_SERVICE] Error decoding base64 payload: {str(e)}") # Convert regular string payload to bytes if needed elif isinstance(payload, str): payload = payload.encode('utf-8') # Create an in-memory file-like object and try multiple approaches excel_frame = None errors = [] # Save payload to a temporary file for debugging if needed temp_file_path = f"/tmp/{filename}" try: with open(temp_file_path, 'wb') as f: f.write(payload) print(f"[PARSER_SERVICE] Saved payload to {temp_file_path} for debugging") except Exception as e: print(f"[PARSER_SERVICE] Could not save debug file: {str(e)}") # Try different approaches to read the Excel file approaches = [ # Approach 1: Try xlrd for .xls files lambda: DataFrame(read_excel(io.BytesIO(payload), engine='xlrd')) if filename.lower().endswith('.xls') else None, # Approach 2: Try openpyxl for .xlsx files lambda: DataFrame(read_excel(io.BytesIO(payload), engine='openpyxl')) if filename.lower().endswith('.xlsx') else None, # Approach 3: Try xlrd with explicit sheet name lambda: DataFrame(read_excel(io.BytesIO(payload), engine='xlrd', sheet_name=0)), # Approach 4: Try with temporary file lambda: DataFrame(read_excel(temp_file_path)), ] # Try each approach until one works for i, approach in enumerate(approaches): try: result = approach() if result is not None: excel_frame = result print(f"[PARSER_SERVICE] Successfully read Excel file using approach {i+1}") break except Exception as e: errors.append(f"Approach {i+1}: {str(e)}") # If all approaches failed, raise an exception if excel_frame is None: error_details = "\n".join(errors) raise Exception(f"Failed to read Excel file using all approaches:\n{error_details}") # Extract data from the Excel file collected_data_dict = parse_excel_file(excel_frame) # Publish parsed data to Redis publish_parsed_data_to_redis( data=data, collected_data_dict=collected_data_dict, filename=filename ) except Exception as e: print(f"[PARSER_SERVICE] Error processing message: {str(e)}") else: print(f"[PARSER_SERVICE] Skipped message with UUID: {data.get('uuid')} (stage is not 'red')") def app(): """Main application function.""" print("[PARSER_SERVICE] Starting Parser Service") # Subscribe to the input channel result = redis_pubsub.subscriber.subscribe(REDIS_CHANNEL_IN, process_message) if result.status: print(f"[PARSER_SERVICE] Subscribed to channel: {REDIS_CHANNEL_IN}") else: print(f"[PARSER_SERVICE] Subscribe error: {result.error}") return # Start listening for messages listen_result = redis_pubsub.subscriber.start_listening(in_thread=True) if listen_result.status: print("[PARSER_SERVICE] Listening for messages") else: print(f"[PARSER_SERVICE] Error starting listener: {listen_result.error}") return if __name__ == "__main__": # Initialize the app once app() # Keep the main thread alive try: while True: time.sleep(Config.EMAIL_SLEEP) except KeyboardInterrupt: print("\n[PARSER_SERVICE] Stopping service...") redis_pubsub.subscriber.stop_listening()