black shift

This commit is contained in:
2025-04-22 11:10:29 +03:00
parent d7f1da8de8
commit e5f88f2eb4
30 changed files with 671 additions and 521 deletions

View File

@@ -18,14 +18,16 @@ REDIS_CHANNEL_OUT = "parser" # Publish to Parser Service channel
delimiter = "|"
def publish_parsed_data_to_redis(data, collected_data_dict: list[dict], filename: str) -> bool:
def publish_parsed_data_to_redis(
data, collected_data_dict: list[dict], filename: str
) -> bool:
"""Publish parsed data to Redis.
Args:
data: Original message data from Redis
collected_data_dict: Parsed data from Excel file
filename: Name of the processed file
Returns:
bool: Success status
"""
@@ -40,16 +42,18 @@ def publish_parsed_data_to_redis(data, collected_data_dict: list[dict], filename
else:
message["parsed"] = None
message["stage"] = "not found" # Mark as 'not found' if parsing failed
# Add processing timestamp
message["parsed_at"] = str(arrow.now())
message["filename"] = filename
# Publish to Redis channel
result = redis_pubsub.publisher.publish(REDIS_CHANNEL_OUT, message)
if result.status:
print(f"[PARSER_SERVICE] Published parsed data for {filename} with stage: {message['stage']}")
print(
f"[PARSER_SERVICE] Published parsed data for {filename} with stage: {message['stage']}"
)
return True
else:
print(f"[PARSER_SERVICE] Publish error: {result.error}")
@@ -58,10 +62,10 @@ def publish_parsed_data_to_redis(data, collected_data_dict: list[dict], filename
def parse_excel_file(excel_frame: DataFrame) -> list[dict]:
"""Parse Excel file data.
Args:
excel_frame: DataFrame containing Excel data
Returns:
list[dict]: List of parsed data dictionaries
"""
@@ -76,13 +80,17 @@ def parse_excel_file(excel_frame: DataFrame) -> list[dict]:
dict(
iban=str(iban),
bank_date=arrow.get(
datetime.datetime.strptime(str(row[1]), "%d/%m/%Y-%H:%M:%S")
datetime.datetime.strptime(
str(row[1]), "%d/%m/%Y-%H:%M:%S"
)
).__str__(),
channel_branch=unidecode(str(row[3])),
currency_value=(
float(str(row[4]).replace(",", "")) if row[4] else 0
),
balance=float(str(row[5]).replace(",", "")) if row[5] else 0,
balance=(
float(str(row[5]).replace(",", "")) if row[5] else 0
),
additional_balance=(
float(str(row[6]).replace(",", "")) if row[6] else 0
),
@@ -92,7 +100,9 @@ def parse_excel_file(excel_frame: DataFrame) -> list[dict]:
bank_reference_code=str(row[15]),
)
)
print(f"[PARSER_SERVICE] Successfully parsed {len(data_list)} records from Excel file")
print(
f"[PARSER_SERVICE] Successfully parsed {len(data_list)} records from Excel file"
)
except Exception as e:
print(f"[PARSER_SERVICE] Error parsing Excel file: {str(e)}")
return data_list
@@ -100,13 +110,13 @@ def parse_excel_file(excel_frame: DataFrame) -> list[dict]:
def process_message(message):
"""Process a message from Redis.
Args:
message: Message data from Redis subscriber
"""
# Extract the message data
data = message["data"]
# If data is a string, parse it as JSON
if isinstance(data, str):
try:
@@ -114,7 +124,7 @@ def process_message(message):
except json.JSONDecodeError as e:
print(f"[PARSER_SERVICE] Error parsing message data: {e}")
return
# Check if stage is 'red' before processing
if data.get("stage") == "red":
try:
@@ -122,91 +132,109 @@ def process_message(message):
payload = data.get("payload")
is_base64 = data.get("is_base64", False)
print(f"[PARSER_SERVICE] Processing file: {filename}")
# Handle base64-encoded payload
if is_base64 and isinstance(payload, str):
try:
# Decode base64 string to bytes
payload = base64.b64decode(payload)
print(f"[PARSER_SERVICE] Successfully decoded base64 payload, size: {len(payload)} bytes")
print(
f"[PARSER_SERVICE] Successfully decoded base64 payload, size: {len(payload)} bytes"
)
except Exception as e:
print(f"[PARSER_SERVICE] Error decoding base64 payload: {str(e)}")
# Convert regular string payload to bytes if needed
elif isinstance(payload, str):
payload = payload.encode('utf-8')
payload = payload.encode("utf-8")
# Create an in-memory file-like object and try multiple approaches
excel_frame = None
errors = []
# Save payload to a temporary file for debugging if needed
temp_file_path = f"/tmp/{filename}"
try:
with open(temp_file_path, 'wb') as f:
with open(temp_file_path, "wb") as f:
f.write(payload)
print(f"[PARSER_SERVICE] Saved payload to {temp_file_path} for debugging")
print(
f"[PARSER_SERVICE] Saved payload to {temp_file_path} for debugging"
)
except Exception as e:
print(f"[PARSER_SERVICE] Could not save debug file: {str(e)}")
# Try different approaches to read the Excel file
approaches = [
# Approach 1: Try xlrd for .xls files
lambda: DataFrame(read_excel(io.BytesIO(payload), engine='xlrd')) if filename.lower().endswith('.xls') else None,
lambda: (
DataFrame(read_excel(io.BytesIO(payload), engine="xlrd"))
if filename.lower().endswith(".xls")
else None
),
# Approach 2: Try openpyxl for .xlsx files
lambda: DataFrame(read_excel(io.BytesIO(payload), engine='openpyxl')) if filename.lower().endswith('.xlsx') else None,
lambda: (
DataFrame(read_excel(io.BytesIO(payload), engine="openpyxl"))
if filename.lower().endswith(".xlsx")
else None
),
# Approach 3: Try xlrd with explicit sheet name
lambda: DataFrame(read_excel(io.BytesIO(payload), engine='xlrd', sheet_name=0)),
lambda: DataFrame(
read_excel(io.BytesIO(payload), engine="xlrd", sheet_name=0)
),
# Approach 4: Try with temporary file
lambda: DataFrame(read_excel(temp_file_path)),
]
# Try each approach until one works
for i, approach in enumerate(approaches):
try:
result = approach()
if result is not None:
excel_frame = result
print(f"[PARSER_SERVICE] Successfully read Excel file using approach {i+1}")
print(
f"[PARSER_SERVICE] Successfully read Excel file using approach {i+1}"
)
break
except Exception as e:
errors.append(f"Approach {i+1}: {str(e)}")
# If all approaches failed, raise an exception
if excel_frame is None:
error_details = "\n".join(errors)
raise Exception(f"Failed to read Excel file using all approaches:\n{error_details}")
raise Exception(
f"Failed to read Excel file using all approaches:\n{error_details}"
)
# Extract data from the Excel file
collected_data_dict = parse_excel_file(excel_frame)
# Publish parsed data to Redis
publish_parsed_data_to_redis(
data=data,
collected_data_dict=collected_data_dict,
filename=filename
data=data, collected_data_dict=collected_data_dict, filename=filename
)
except Exception as e:
print(f"[PARSER_SERVICE] Error processing message: {str(e)}")
else:
print(f"[PARSER_SERVICE] Skipped message with UUID: {data.get('uuid')} (stage is not 'red')")
print(
f"[PARSER_SERVICE] Skipped message with UUID: {data.get('uuid')} (stage is not 'red')"
)
def app():
"""Main application function."""
print("[PARSER_SERVICE] Starting Parser Service")
# Subscribe to the input channel
result = redis_pubsub.subscriber.subscribe(REDIS_CHANNEL_IN, process_message)
if result.status:
print(f"[PARSER_SERVICE] Subscribed to channel: {REDIS_CHANNEL_IN}")
else:
print(f"[PARSER_SERVICE] Subscribe error: {result.error}")
return
# Start listening for messages
listen_result = redis_pubsub.subscriber.start_listening(in_thread=True)
if listen_result.status:
print("[PARSER_SERVICE] Listening for messages")
else:
@@ -217,7 +245,7 @@ def app():
if __name__ == "__main__":
# Initialize the app once
app()
# Keep the main thread alive
try:
while True: