Bank Services tested and completed
This commit is contained in:
29
BankServices/ParserService/Dockerfile
Normal file
29
BankServices/ParserService/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /
|
||||
|
||||
# Install system dependencies and Poetry
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends gcc \
|
||||
&& rm -rf /var/lib/apt/lists/* && pip install --no-cache-dir poetry
|
||||
|
||||
# Copy Poetry configuration
|
||||
COPY /BankServices/ParserService/pyproject.toml ./pyproject.toml
|
||||
|
||||
# Configure Poetry and install dependencies with optimizations
|
||||
RUN poetry config virtualenvs.create false \
|
||||
&& poetry install --no-interaction --no-ansi --no-root --only main \
|
||||
&& pip cache purge && rm -rf ~/.cache/pypoetry
|
||||
|
||||
# Copy application code
|
||||
COPY /BankServices/ParserService /BankServices/ParserService
|
||||
COPY /BankServices/ParserService /
|
||||
COPY /BankServices/ServiceDepends /BankServices/ServiceDepends
|
||||
|
||||
COPY /Controllers /Controllers
|
||||
COPY /BankServices/ServiceDepends/config.py /BankServices/ServiceDepends/config.py
|
||||
|
||||
# Set Python path to include app directory
|
||||
ENV PYTHONPATH=/ PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=1
|
||||
|
||||
# Run the application
|
||||
CMD ["python", "/BankServices/ParserService/app.py"]
|
||||
76
BankServices/ParserService/README.md
Normal file
76
BankServices/ParserService/README.md
Normal file
@@ -0,0 +1,76 @@
|
||||
# Parser Service
|
||||
|
||||
## Overview
|
||||
The Parser Service is the second component in the Redis pub/sub processing chain for bank-related email automation. It subscribes to messages with stage="red" from the Email Service, parses Excel attachments, and publishes the processed data back to Redis with stage="parsed".
|
||||
|
||||
## Features
|
||||
|
||||
### Redis Integration
|
||||
- Subscribes to the "CollectedData" Redis channel for messages with stage="red"
|
||||
- Processes Excel attachments contained in the messages
|
||||
- Publishes parsed data back to Redis with stage="parsed" or "not found"
|
||||
- Maintains message metadata and adds processing timestamps
|
||||
|
||||
### Excel Processing
|
||||
- Parses bank statement Excel files
|
||||
- Extracts transaction data including:
|
||||
- IBAN numbers
|
||||
- Transaction dates and times
|
||||
- Currency values and balances
|
||||
- Transaction types and references
|
||||
- Branch information
|
||||
|
||||
### Error Handling
|
||||
- Robust error management for Excel parsing
|
||||
- Detailed logging of processing steps and errors
|
||||
- Graceful handling of malformed messages
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
The service uses the same Redis configuration as the Email Service:
|
||||
```
|
||||
REDIS_HOST=10.10.2.15
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=your_strong_password_here
|
||||
```
|
||||
|
||||
## Deployment
|
||||
|
||||
### Docker
|
||||
The service is containerized using Docker and can be deployed using the provided Dockerfile and docker-compose configuration.
|
||||
|
||||
```bash
|
||||
# Build and start the service
|
||||
docker compose -f bank-services-docker-compose.yml up -d --build
|
||||
|
||||
# View logs
|
||||
docker compose -f bank-services-docker-compose.yml logs -f parser_service
|
||||
|
||||
# Stop the service
|
||||
docker compose -f bank-services-docker-compose.yml down
|
||||
```
|
||||
|
||||
### Service Management
|
||||
The `check_bank_services.sh` script provides a simple way to restart the service:
|
||||
|
||||
```bash
|
||||
./check_bank_services.sh
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
### Redis Pub/Sub Chain
|
||||
This service is the second in a multi-stage processing chain:
|
||||
1. **Email Service**: Reads emails, extracts attachments, publishes to Redis with stage="red"
|
||||
2. **Parser Service** (this service): Subscribes to stage="red" messages, parses Excel data, republishes with stage="parsed"
|
||||
3. **Writer Service**: Subscribes to stage="processed" messages, writes data to final destination, marks as stage="completed"
|
||||
|
||||
## Development
|
||||
|
||||
### Dependencies
|
||||
- Python 3.12
|
||||
- Pandas and xlrd for Excel processing
|
||||
- Redis for pub/sub messaging
|
||||
- Arrow for date handling
|
||||
- Unidecode for text normalization
|
||||
227
BankServices/ParserService/app.py
Normal file
227
BankServices/ParserService/app.py
Normal file
@@ -0,0 +1,227 @@
|
||||
import time
|
||||
import arrow
|
||||
import io
|
||||
import json
|
||||
import base64
|
||||
import datetime
|
||||
|
||||
from pandas import DataFrame, read_excel
|
||||
from unidecode import unidecode
|
||||
from BankServices.ServiceDepends.config import Config
|
||||
|
||||
# Import Redis pub/sub handler
|
||||
from Controllers.Redis.Broadcast.actions import redis_pubsub
|
||||
|
||||
# Define Redis channels
|
||||
REDIS_CHANNEL_IN = "reader" # Subscribe to Email Service channel
|
||||
REDIS_CHANNEL_OUT = "parser" # Publish to Parser Service channel
|
||||
delimiter = "|"
|
||||
|
||||
|
||||
def publish_parsed_data_to_redis(data, collected_data_dict: list[dict], filename: str) -> bool:
|
||||
"""Publish parsed data to Redis.
|
||||
|
||||
Args:
|
||||
data: Original message data from Redis
|
||||
collected_data_dict: Parsed data from Excel file
|
||||
filename: Name of the processed file
|
||||
|
||||
Returns:
|
||||
bool: Success status
|
||||
"""
|
||||
# Create a copy of the original message to preserve metadata
|
||||
message = data.copy() if isinstance(data, dict) else {}
|
||||
message.pop("payload", None)
|
||||
message.pop("is_base64", None)
|
||||
# Update with parsed data and new stage
|
||||
if collected_data_dict:
|
||||
message["parsed"] = collected_data_dict
|
||||
message["stage"] = "parsed" # Update stage to 'parsed'
|
||||
else:
|
||||
message["parsed"] = None
|
||||
message["stage"] = "not found" # Mark as 'not found' if parsing failed
|
||||
|
||||
# Add processing timestamp
|
||||
message["parsed_at"] = str(arrow.now())
|
||||
message["filename"] = filename
|
||||
|
||||
# Publish to Redis channel
|
||||
result = redis_pubsub.publisher.publish(REDIS_CHANNEL_OUT, message)
|
||||
|
||||
if result.status:
|
||||
print(f"[PARSER_SERVICE] Published parsed data for {filename} with stage: {message['stage']}")
|
||||
return True
|
||||
else:
|
||||
print(f"[PARSER_SERVICE] Publish error: {result.error}")
|
||||
return False
|
||||
|
||||
|
||||
def parse_excel_file(excel_frame: DataFrame) -> list[dict]:
|
||||
"""Parse Excel file data.
|
||||
|
||||
Args:
|
||||
excel_frame: DataFrame containing Excel data
|
||||
|
||||
Returns:
|
||||
list[dict]: List of parsed data dictionaries
|
||||
"""
|
||||
iban, data_list = "", []
|
||||
try:
|
||||
for row in excel_frame.itertuples():
|
||||
if "IBAN" in str(row[3]).upper():
|
||||
iban = str(row[5]).replace(" ", "")
|
||||
if not str(row[1]) == "nan" and not str(row[2]) == "nan":
|
||||
if len(str(row[1]).split("/")) > 2:
|
||||
data_list.append(
|
||||
dict(
|
||||
iban=str(iban),
|
||||
bank_date=arrow.get(
|
||||
datetime.datetime.strptime(str(row[1]), "%d/%m/%Y-%H:%M:%S")
|
||||
).__str__(),
|
||||
channel_branch=unidecode(str(row[3])),
|
||||
currency_value=(
|
||||
float(str(row[4]).replace(",", "")) if row[4] else 0
|
||||
),
|
||||
balance=float(str(row[5]).replace(",", "")) if row[5] else 0,
|
||||
additional_balance=(
|
||||
float(str(row[6]).replace(",", "")) if row[6] else 0
|
||||
),
|
||||
process_name=str(row[7]),
|
||||
process_type=unidecode(str(row[8])),
|
||||
process_comment=unidecode(str(row[9])),
|
||||
bank_reference_code=str(row[15]),
|
||||
)
|
||||
)
|
||||
print(f"[PARSER_SERVICE] Successfully parsed {len(data_list)} records from Excel file")
|
||||
except Exception as e:
|
||||
print(f"[PARSER_SERVICE] Error parsing Excel file: {str(e)}")
|
||||
return data_list
|
||||
|
||||
|
||||
def process_message(message):
|
||||
"""Process a message from Redis.
|
||||
|
||||
Args:
|
||||
message: Message data from Redis subscriber
|
||||
"""
|
||||
# Extract the message data
|
||||
data = message["data"]
|
||||
|
||||
# If data is a string, parse it as JSON
|
||||
if isinstance(data, str):
|
||||
try:
|
||||
data = json.loads(data)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"[PARSER_SERVICE] Error parsing message data: {e}")
|
||||
return
|
||||
|
||||
# Check if stage is 'red' before processing
|
||||
if data.get("stage") == "red":
|
||||
try:
|
||||
filename = data.get("filename")
|
||||
payload = data.get("payload")
|
||||
is_base64 = data.get("is_base64", False)
|
||||
print(f"[PARSER_SERVICE] Processing file: {filename}")
|
||||
|
||||
# Handle base64-encoded payload
|
||||
if is_base64 and isinstance(payload, str):
|
||||
try:
|
||||
# Decode base64 string to bytes
|
||||
payload = base64.b64decode(payload)
|
||||
print(f"[PARSER_SERVICE] Successfully decoded base64 payload, size: {len(payload)} bytes")
|
||||
except Exception as e:
|
||||
print(f"[PARSER_SERVICE] Error decoding base64 payload: {str(e)}")
|
||||
# Convert regular string payload to bytes if needed
|
||||
elif isinstance(payload, str):
|
||||
payload = payload.encode('utf-8')
|
||||
|
||||
# Create an in-memory file-like object and try multiple approaches
|
||||
excel_frame = None
|
||||
errors = []
|
||||
|
||||
# Save payload to a temporary file for debugging if needed
|
||||
temp_file_path = f"/tmp/{filename}"
|
||||
try:
|
||||
with open(temp_file_path, 'wb') as f:
|
||||
f.write(payload)
|
||||
print(f"[PARSER_SERVICE] Saved payload to {temp_file_path} for debugging")
|
||||
except Exception as e:
|
||||
print(f"[PARSER_SERVICE] Could not save debug file: {str(e)}")
|
||||
|
||||
# Try different approaches to read the Excel file
|
||||
approaches = [
|
||||
# Approach 1: Try xlrd for .xls files
|
||||
lambda: DataFrame(read_excel(io.BytesIO(payload), engine='xlrd')) if filename.lower().endswith('.xls') else None,
|
||||
# Approach 2: Try openpyxl for .xlsx files
|
||||
lambda: DataFrame(read_excel(io.BytesIO(payload), engine='openpyxl')) if filename.lower().endswith('.xlsx') else None,
|
||||
# Approach 3: Try xlrd with explicit sheet name
|
||||
lambda: DataFrame(read_excel(io.BytesIO(payload), engine='xlrd', sheet_name=0)),
|
||||
# Approach 4: Try with temporary file
|
||||
lambda: DataFrame(read_excel(temp_file_path)),
|
||||
]
|
||||
|
||||
# Try each approach until one works
|
||||
for i, approach in enumerate(approaches):
|
||||
try:
|
||||
result = approach()
|
||||
if result is not None:
|
||||
excel_frame = result
|
||||
print(f"[PARSER_SERVICE] Successfully read Excel file using approach {i+1}")
|
||||
break
|
||||
except Exception as e:
|
||||
errors.append(f"Approach {i+1}: {str(e)}")
|
||||
|
||||
# If all approaches failed, raise an exception
|
||||
if excel_frame is None:
|
||||
error_details = "\n".join(errors)
|
||||
raise Exception(f"Failed to read Excel file using all approaches:\n{error_details}")
|
||||
|
||||
# Extract data from the Excel file
|
||||
collected_data_dict = parse_excel_file(excel_frame)
|
||||
|
||||
# Publish parsed data to Redis
|
||||
publish_parsed_data_to_redis(
|
||||
data=data,
|
||||
collected_data_dict=collected_data_dict,
|
||||
filename=filename
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[PARSER_SERVICE] Error processing message: {str(e)}")
|
||||
else:
|
||||
print(f"[PARSER_SERVICE] Skipped message with UUID: {data.get('uuid')} (stage is not 'red')")
|
||||
|
||||
|
||||
def app():
|
||||
"""Main application function."""
|
||||
print("[PARSER_SERVICE] Starting Parser Service")
|
||||
|
||||
# Subscribe to the input channel
|
||||
result = redis_pubsub.subscriber.subscribe(REDIS_CHANNEL_IN, process_message)
|
||||
|
||||
if result.status:
|
||||
print(f"[PARSER_SERVICE] Subscribed to channel: {REDIS_CHANNEL_IN}")
|
||||
else:
|
||||
print(f"[PARSER_SERVICE] Subscribe error: {result.error}")
|
||||
return
|
||||
|
||||
# Start listening for messages
|
||||
listen_result = redis_pubsub.subscriber.start_listening(in_thread=True)
|
||||
|
||||
if listen_result.status:
|
||||
print("[PARSER_SERVICE] Listening for messages")
|
||||
else:
|
||||
print(f"[PARSER_SERVICE] Error starting listener: {listen_result.error}")
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Initialize the app once
|
||||
app()
|
||||
|
||||
# Keep the main thread alive
|
||||
try:
|
||||
while True:
|
||||
time.sleep(Config.EMAIL_SLEEP)
|
||||
except KeyboardInterrupt:
|
||||
print("\n[PARSER_SERVICE] Stopping service...")
|
||||
redis_pubsub.subscriber.stop_listening()
|
||||
15
BankServices/ParserService/pyproject.toml
Normal file
15
BankServices/ParserService/pyproject.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
[project]
|
||||
name = "parserservice"
|
||||
version = "0.1.0"
|
||||
description = "Parser Service for bank email attachments using Redis pub/sub"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"arrow>=1.3.0",
|
||||
"pandas>=2.2.3",
|
||||
"redis>=5.0.1",
|
||||
"unidecode>=1.3.8",
|
||||
"xlrd>=2.0.1",
|
||||
"openpyxl>=3.1.2",
|
||||
"pydantic-settings>=2.8.1",
|
||||
]
|
||||
Reference in New Issue
Block a user