python-account-miner/miner.py

267 lines
9.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from base_import import get_session_factory
from sqlalchemy import text as sqlalchemy_text
session_factory = get_session_factory()
session = session_factory()
# Add LIMIT and OFFSET directly to the SQL query
def query_to_run(limit: int, offset: int):
return sqlalchemy_text(f"""
SELECT a.id, a.uu_id, a.iban, a.bank_date, a.process_comment, a.payment_result_type,
b.part_code, COUNT(a2.id) AS sum_a2_id
FROM public.account_records AS a
INNER JOIN public.build_parts b ON b.id = a.build_parts_id
INNER JOIN public.api_enum_dropdown ae ON ae.id = a.payment_result_type
INNER JOIN public.build_living_space bl ON bl.id = a.living_space_id
INNER JOIN public.people p ON p.id = bl.person_id
LEFT JOIN public.account_records_model_train a2 ON a2.account_records_id = a.id
WHERE a.bank_date::date > '2023-06-30' and a2.id is null
GROUP BY a.id, a.uu_id, a.iban, a.bank_date, a.process_comment, a.payment_result_type, b.part_code
ORDER BY a.bank_date ASC
LIMIT {int(limit)} OFFSET {int(offset)};
""")
"""
52 56b75aec-d28f-4cd4-84e9-ea222cc1d9bd BuildTypes APT_KZN Apartman Kazan Dairesi
53 a9f854d1-d01d-4f2a-af5f-1ccf34193e0f BuildTypes APT_GRJ Apartman Garaj
54 ed7371a4-0a0a-491d-b1f9-015025b6ac91 BuildTypes APT_DP Apartman Depo
55 f6eb95dd-5ed0-407b-8205-4bc855199b06 BuildTypes DAIRE Apartman Dairesi
56 242bbe5e-44df-4f10-9583-9d80ff93c52d BuildTypes APT Apartman Binası
57 8920eb8b-a5aa-42c3-81d0-13afca85ba1f BuildTypes APT_YNT Apartman Yönetimi
58 a7b98daf-c83e-494d-8938-d716be131b5d BuildTypes APT_PRK Apartman Açık Park Alanı
59 628188d9-b5e3-493e-9a42-afac3f5bf816 BuildTypes APT_YSL Apartman Yeşil Alan
60 8b5bcca2-7702-4486-904c-d708248ccd4d BuildTypes APT_YOL Apartman Ara Yol
5 1b51381a-b5a9-485e-884e-fab07b4adf21 BuildDuesTypes BDT-S Service fee
6 4619b29f-7b60-4b95-9a97-50a4e5d40f94 BuildDuesTypes BDT-I Information
1 2d0127eb-899e-47c5-ad86-67a78174bf90 BuildDuesTypes BDT-D Bina Aidat
2 11656423-24b7-4ed9-96e7-1563f639da53 BuildDuesTypes BDT-A Bina Ek Aidat
3 c74c72f4-5e10-4d00-8016-4f9ddd50b3c4 BuildDuesTypes BDT-R Bina Tadilat
4 5edeb654-b7ce-4c1f-b7e3-2c717bb1d263 BuildDuesTypes BDT-L Bina Yasal Harcama
48 f14ae805-8238-438f-a522-d8ac6553f717 TimePeriod TP-W Weekly
49 184c3356-6397-476d-a965-45ddf26a4ff5 TimePeriod TP-M Monthly
50 ba36110f-7afe-4c41-bcad-f80ce71f626e TimePeriod TP-Q Quarterly
51 89ff94c6-126e-45c2-9bc7-6d1007d02528 TimePeriod TP-Y Yearly
32 3cf533a4-3947-4563-9a43-16ea2bab1119 PerComType 1 Person
"""
class Categories:
APTKZN = "APTKZN"
APTGRJ = "APTGRJ"
APTDP = "APTDP"
DAIRE = "DAIRE"
APT = "APT"
APTYNT = "APTYNT"
APTPRK = "APTPRK"
APTYSL = "APTYSL"
APTYOL = "APTYOL"
BDTI = "BDTI"
BDTD = "BDTD"
BDTA = "BDTA"
BDTR = "BDTR"
BDTL = "BDTL"
TPW = "TPW"
TPM = "TPM"
TPQ = "TPQ"
TPY = "TPY"
PERSON = "PERSON"
@classmethod
def get_category_id(cls, category_name):
category_dict = {
"APTKZN": (52, "56b75aec-d28f-4cd4-84e9-ea222cc1d9bd"),
"APTGRJ": (53, "a9f854d1-d01d-4f2a-af5f-1ccf34193e0f"),
"APTDP": (54, "ed7371a4-0a0a-491d-b1f9-015025b6ac91"),
"DAIRE": (55, "f6eb95dd-5ed0-407b-8205-4bc855199b06"),
"APT": (56, "242bbe5e-44df-4f10-9583-9d80ff93c52d"),
"APTYNT": (57, "8920eb8b-a5aa-42c3-81d0-13afca85ba1f"),
"APTPRK": (58, "a7b98daf-c83e-494d-8938-d716be131b5d"),
"APTYSL": (59, "628188d9-b5e3-493e-9a42-afac3f5bf816"),
"APTYOL": (60, "8b5bcca2-7702-4486-904c-d708248ccd4d"),
"BDTI": (5, "1b51381a-b5a9-485e-884e-fab07b4adf21"),
"BDTD": (1, "2d0127eb-899e-47c5-ad86-67a78174bf90"),
"BDTA": (2, "11656423-24b7-4ed9-96e7-1563f639da53"),
"BDTR": (3, "c74c72f4-5e10-4d00-8016-4f9ddd50b3c4"),
"BDTL": (4, "5edeb654-b7ce-4c1f-b7e3-2c717bb1d263"),
"TPW": (48, "f14ae805-8238-438f-a522-d8ac6553f717"),
"TPM": (49, "184c3356-6397-476d-a965-45ddf26a4ff5"),
"TPQ": (50, "ba36110f-7afe-4c41-bcad-f80ce71f626e"),
"TPY": (51, "89ff94c6-126e-45c2-9bc7-6d1007d02528"),
"PERSON": (32, "3cf533a4-3947-4563-9a43-16ea2bab1119"),
}
if not category_name in category_dict:
raise ValueError(f"Invalid category name: {category_name}")
return category_dict.get(category_name)
def get_model_train_query():
"""
Returns a SQLAlchemy text object for inserting model train data.
Parameters will be bound when executing the query.
"""
return sqlalchemy_text("""
INSERT INTO public.account_records_model_train (
account_records_id,
account_records_uu_id,
search_text,
start_index,
end_index,
category_id,
category_uu_id
)
VALUES (
:account_records_id,
:account_records_uu_id,
:search_text,
:start_index,
:end_index,
:category_id,
:category_uu_id
);
""")
def find_indices(search_text: str, target_text: str, offset: int = 1):
"""
Returns start and end indices of the search_text within target_text
Args:
search_text (str): The text to search for
target_text (str): The text to search within
offset (int, optional): Which occurrence to find (1 for first, 2 for second, etc). Defaults to 1.
Returns:
tuple: (start_index, end_index) of the found text, or (-1, -1) if not found or offset is too large
"""
if offset < 1:
offset = 1
current_pos = 0
occurrence_count = 0
while occurrence_count < offset:
start_index = target_text.find(search_text, current_pos)
if start_index == -1: # Not found
return -1, -1
occurrence_count += 1
if occurrence_count == offset:
end_index = start_index + len(search_text) - 1
return start_index, end_index + 1
# Move past this occurrence to find the next one
current_pos = start_index + 1
raise Exception("Offset is too large")
class AccountRecord:
def __init__(self, id, uu_id, iban, bank_date, process_comment, payment_result_type, part_code, sum_a2_id):
self.id = id
self.uu_id = str(uu_id)
self.iban = iban
self.bank_date = bank_date
self.process_comment = process_comment
self.payment_result_type = payment_result_type
self.part_code = part_code
self.sum_a2_id = sum_a2_id
def to_dict(self):
return {
"id": self.id,
"uu_id": self.uu_id,
"iban": self.iban,
"bank_date": self.bank_date,
"process_comment": self.process_comment,
"payment_result_type": self.payment_result_type,
"part_code": self.part_code,
"sum_a2_id": self.sum_a2_id,
}
# Execute the query and process results directly
results = session.execute(query_to_run(limit=1, offset=0))
account_record_dict = dict()
for result in results:
"""
(791, UUID('9d276cc8-289f-45c1-9805-44464af5d7bf'), 'TR400006400000142450093333',
datetime.datetime(2023, 7, 1, 12, 22, 27, tzinfo=datetime.timezone.utc),
'2 NOLU DAİRE TEMMUZ Ç3 AİDAT*SONGÜL VAR*Hİ7748686973', 1, 'DAIRE_2', 4)
"""
account_record = AccountRecord(*result)
account_record_dict = account_record.to_dict()
"""
"APTKZN": (52, "56b75aec-d28f-4cd4-84e9-ea222cc1d9bd"),
"APTGRJ": (53, "a9f854d1-d01d-4f2a-af5f-1ccf34193e0f"),
"APTDP": (54, "ed7371a4-0a0a-491d-b1f9-015025b6ac91"),
"DAIRE": (55, "f6eb95dd-5ed0-407b-8205-4bc855199b06"),
"APT": (56, "242bbe5e-44df-4f10-9583-9d80ff93c52d"),
"APTYNT": (57, "8920eb8b-a5aa-42c3-81d0-13afca85ba1f"),
"APTPRK": (58, "a7b98daf-c83e-494d-8938-d716be131b5d"),
"APTYSL": (59, "628188d9-b5e3-493e-9a42-afac3f5bf816"),
"APTYOL": (60, "8b5bcca2-7702-4486-904c-d708248ccd4d"),
"BDTI": (5, "1b51381a-b5a9-485e-884e-fab07b4adf21"),
"BDTD": (6, "4619b29f-7b60-4b95-9a97-50a4e5d40f94"),
"BDTA": (2, "11656423-24b7-4ed9-96e7-1563f639da53"),
"BDTR": (3, "c74c72f4-5e10-4d00-8016-4f9ddd50b3c4"),
"BDTL": (4, "5edeb654-b7ce-4c1f-b7e3-2c717bb1d263"),
"TPW": (48, "f14ae805-8238-438f-a522-d8ac6553f717"),
"TPM": (49, "184c3356-6397-476d-a965-45ddf26a4ff5"),
"TPQ": (50, "ba36110f-7afe-4c41-bcad-f80ce71f626e"),
"TPY": (51, "89ff94c6-126e-45c2-9bc7-6d1007d02528"),
"PCT": (32, "3cf533a4-3947-4563-9a43-16ea2bab1119"),
"""
print(account_record_dict['id'])
print(account_record_dict['uu_id'])
print(account_record_dict['process_comment'])
account_records_id = int(219)
account_records_uuid = str("5d301273-806c-47d6-aeeb-e056dc119494")
sample_text = "GÜNEŞ APARTMANI AİDAT EYLÜL*HASAN CİHAN ŞENKÜÇÜK*Hİ9021822604"
search_text = "HASAN CİHAN ŞENKÜÇÜK"
start_index, end_index = find_indices(search_text, sample_text, offset=1)
#exit()
print("start_index", start_index)
print("end_index", end_index)
category_id, category_uuid = Categories.get_category_id(Categories.PERSON)
write_dict = dict(
account_records_id=account_records_id,
account_records_uu_id=account_records_uuid,
search_text=search_text,
start_index=start_index,
end_index=end_index,
category_id=category_id,
category_uu_id=category_uuid
)
print('write_dict', write_dict)
# Prepare the parameters for the query
params = {
"account_records_id": account_records_id,
"account_records_uu_id": account_records_uuid,
"search_text": search_text,
"start_index": start_index,
"end_index": end_index,
"category_id": category_id,
"category_uu_id": category_uuid
}
# Get the parameterized query template
query_template = get_model_train_query()
# Print parameters for debugging
print("Parameters:", params)
# Execute the query with parameters
session.execute(query_template, params)
session.commit()