97 lines
3.2 KiB
Python
97 lines
3.2 KiB
Python
import time
|
|
import arrow
|
|
import io
|
|
import datetime
|
|
|
|
from pandas import DataFrame, read_excel
|
|
from Services.MongoService.provider import MongoProvider
|
|
|
|
from unidecode import unidecode
|
|
from Configs.mongo import MongoConfig
|
|
|
|
|
|
mongo_prefix = "CollectedData"
|
|
delimiter = "|"
|
|
|
|
|
|
def collect_excel_files_from_mongo_database(mongo_provider) -> list:
|
|
return mongo_provider.find_many(filter_query={"stage": "read"})
|
|
|
|
|
|
def update_parsed_data_to_mongo_database(
|
|
mongo_provider, collected_data_dict: list[dict], filename: str
|
|
) -> None:
|
|
if collected_data_dict:
|
|
mongo_provider.update_one(
|
|
filter_query={"filename": filename},
|
|
update_data={"$set": {"parsed": collected_data_dict, "stage": "parsed"}},
|
|
)
|
|
return
|
|
mongo_provider.update_one(
|
|
filter_query={"filename": filename},
|
|
update_data={"$set": {"parsed": None, "stage": "not found"}},
|
|
)
|
|
return
|
|
|
|
|
|
def parse_excel_file(excel_frame: DataFrame) -> list[dict]:
|
|
iban, data_list = "", []
|
|
for row in excel_frame.itertuples():
|
|
if "IBAN" in str(row[3]).upper():
|
|
iban = str(row[5]).replace(" ", "")
|
|
if not str(row[1]) == "nan" and not str(row[2]) == "nan":
|
|
if len(str(row[1]).split("/")) > 2:
|
|
data_list.append(dict(
|
|
iban=str(iban),
|
|
bank_date=arrow.get(
|
|
datetime.datetime.strptime(str(row[1]), "%d/%m/%Y-%H:%M:%S")
|
|
).__str__(),
|
|
channel_branch=unidecode(str(row[3])),
|
|
currency_value=(
|
|
float(str(row[4]).replace(",", "")) if row[4] else 0
|
|
),
|
|
balance=float(str(row[5]).replace(",", "")) if row[5] else 0,
|
|
additional_balance=(
|
|
float(str(row[6]).replace(",", "")) if row[6] else 0
|
|
),
|
|
process_name=str(row[7]),
|
|
process_type=unidecode(str(row[8])),
|
|
process_comment=unidecode(str(row[9])),
|
|
bank_reference_code=str(row[15]),
|
|
))
|
|
return data_list
|
|
|
|
|
|
def app():
|
|
print("Hello from parserservice!")
|
|
with MongoProvider.mongo_client() as mongo_client:
|
|
mongo_provider = MongoProvider(
|
|
client=mongo_client,
|
|
database=MongoConfig.DATABASE_NAME,
|
|
storage_reason=[mongo_prefix, str(arrow.now().date())],
|
|
)
|
|
results = collect_excel_files_from_mongo_database(mongo_provider)
|
|
if not results:
|
|
return
|
|
|
|
for result in results:
|
|
filename, payload = result.get("filename"), result.get("payload")
|
|
|
|
# Create an in-memory file-like object
|
|
excel_frame = DataFrame(read_excel(io.BytesIO(payload)))
|
|
|
|
# Extract IBAN and root info from the xl file
|
|
collected_data_dict = parse_excel_file(excel_frame)
|
|
update_parsed_data_to_mongo_database(
|
|
mongo_provider=mongo_provider,
|
|
collected_data_dict=collected_data_dict,
|
|
filename=filename,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
while True:
|
|
app()
|
|
time.sleep(60)
|