app accounts updated

2024-11-25 21:34:33 +03:00
parent a371d5d6e3
commit c525ac1117
4 changed files with 226 additions and 134 deletions
--- a/api_services/bank_actions/wag_account_record_parser.py
+++ b/api_services/bank_actions/wag_account_record_parser.py
@@ -16,6 +16,7 @@ from pydantic import BaseModel

 from databases.sql_models.company.company import Companies
 from databases.sql_models.identity.identity import People
+from service_account_records.regex_func import category_finder


 class InsertBudgetRecord(BaseModel):
@@ -127,19 +128,22 @@ def remove_spaces_from_string(remove_string: str):


 def get_garbage_words(comment: str, search_word: str):
-    garbage_words = remove_spaces_from_string(comment)
-    search_word = remove_spaces_from_string(search_word)
-    for letter in search_word.split(" "):
-        garbage_words = garbage_words.replace(remove_spaces_from_string(letter), "")
-    return str(remove_spaces_from_string(garbage_words)).upper()
+    garbage_words = unidecode(remove_spaces_from_string(comment))
+    search_word = unidecode(remove_spaces_from_string(search_word))
+    for word in search_word.split(" "):
+        garbage_words = garbage_words.replace(remove_spaces_from_string(unidecode(word)), "")
+    if cleaned_from_spaces := remove_spaces_from_string(garbage_words):
+        return str(unidecode(cleaned_from_spaces)).upper()
+    return None


 def remove_garbage_words(comment: str, garbage_word: str):
    cleaned_comment = remove_spaces_from_string(comment.replace("*", " "))
-    garbage_word = remove_spaces_from_string(garbage_word.replace("*", " "))
-    for letter in garbage_word.split(" "):
-        cleaned_comment = unidecode(remove_spaces_from_string(cleaned_comment))
-        cleaned_comment = cleaned_comment.replace(remove_spaces_from_string(letter), "")
+    if garbage_word:
+        garbage_word = remove_spaces_from_string(garbage_word.replace("*", " "))
+        for letter in garbage_word.split(" "):
+            cleaned_comment = unidecode(remove_spaces_from_string(cleaned_comment))
+            cleaned_comment = cleaned_comment.replace(remove_spaces_from_string(letter), "")
    return str(remove_spaces_from_string(cleaned_comment)).upper()


@@ -198,47 +202,67 @@ def parse_comment_for_living_space(
 ):
    comment = unidecode(comment)
    best_similarity = dict(
-        company=None, living_space=None, found_from=None, similarity=0.0, garbage=""
+        company=None, living_space=None, found_from=None, similarity=0.0, garbage="", cleaned=""
    )
    for person in living_space_dict[iban]["people"]:
        person: People = person
        first_name = unidecode(person.firstname).upper()
        last_name = unidecode(person.surname).upper()
-        middle_name = unidecode(person.middle_name).upper()
-        search_word = f"{first_name} {last_name}"
-        if middle_name:
-            search_word = f"{first_name} {middle_name} {last_name}"
-        garbage_words = get_garbage_words(comment, search_word)
-        cleaned_comment = remove_garbage_words(comment, garbage_words)
-        similarity_ratio = textdistance.jaro_winkler(cleaned_comment, search_word)
-        if similarity_ratio > float(best_similarity["similarity"]):
-            for living_space in living_space_dict[iban]["living_space"]:
-                if living_space.person_id == person.id:
-                    best_similarity = {
-                        "company": None,
-                        "living_space": living_space,
-                        "found_from": "Person Name",
-                        "similarity": similarity_ratio,
-                        "garbage": garbage_words,
-                    }
-            # print(
-            #     'cleaned_comment', cleaned_comment, '\n'
-            #     'search_word', search_word, '\n'
-            #     'best_similarity', best_similarity, '\n'
-            #     'person name', f"{first_name} {last_name}",  '\n'
-            #     'similarity_ratio', similarity_ratio, '\n'
-            #     'garbage_words', garbage
-            # )
+        search_word_list = [
+            remove_spaces_from_string("".join([f"{first_name} {last_name}"])),
+            remove_spaces_from_string("".join([f"{last_name} {first_name}"]))
+        ]
+        if middle_name := unidecode(person.middle_name).upper():
+            search_word_list.append(remove_spaces_from_string(f"{first_name} {middle_name} {last_name}"))
+            search_word_list.append(remove_spaces_from_string(f"{last_name} {middle_name} {first_name}"))
+
+        cleaned_comment = unidecode(comment).upper()
+        for search_word in search_word_list:
+            garbage_words = get_garbage_words(comment, unidecode(search_word))
+            if garbage_words:
+                garbage_words = unidecode(garbage_words).upper()
+                cleaned_comment = unidecode(remove_garbage_words(comment, garbage_words)).upper()
+            similarity_ratio = textdistance.jaro_winkler(cleaned_comment, str(search_word).upper())
+            if len(cleaned_comment) < len(f"{first_name}{last_name}"):
+                continue
+            if cleaned_comment and 0.9 < similarity_ratio <= 1:
+                print(
+                    'cleaned comment dict', dict(
+                    garbage=garbage_words, cleaned=cleaned_comment, similarity=similarity_ratio,
+                    search_word=search_word, comment=comment, last_similarity=float(best_similarity["similarity"])
+                ))
+            if similarity_ratio > float(best_similarity["similarity"]):
+                for living_space in living_space_dict[iban]["living_space"]:
+                    if living_space.person_id == person.id:
+                        best_similarity = {
+                            "company": None,
+                            "living_space": living_space,
+                            "found_from": "Person Name",
+                            "similarity": similarity_ratio,
+                            "garbage": garbage_words,
+                            "cleaned": cleaned_comment,
+                        }
    return best_similarity


+def parse_comment_for_build_parts(comment: str, max_build_part: int = 200, parse: str = 'DAIRE'):
+    results, results_list = category_finder(comment), []
+    print('results[parse]', results[parse])
+    for result in results[parse] or []:
+        if digits := "".join([letter for letter in str(result) if letter.isdigit()]):
+            print('digits', digits)
+            if int(digits) <= int(max_build_part):
+                results_list.append(int(digits))
+    return results_list or None
+
+
 def parse_comment_for_company_or_individual(comment: str):
    companies_list = Companies.filter_all(
        Companies.commercial_type != "Commercial", system=True
    ).data
    comment = unidecode(comment)
    best_similarity = dict(
-        company=None, living_space=None, found_from=None, similarity=0.0, garbage=""
+        company=None, living_space=None, found_from=None, similarity=0.0, garbage="", cleaned=""
    )
    for company in companies_list:
        search_word = unidecode(company.public_name)
@@ -252,6 +276,7 @@ def parse_comment_for_company_or_individual(comment: str):
                "found_from": "Customer Public Name",
                "similarity": similarity_ratio,
                "garbage": garbage_words,
+                "cleaned": cleaned_comment,
            }
            # print(
            #     'cleaned_comment', cleaned_comment, '\n'
@@ -272,22 +297,57 @@ def parse_comment_to_split_with_star(account_record: AccountRecords):
    return 1, account_record.process_comment


+def check_build_living_space_matches_with_build_parts(
+    living_space_dict: dict, best_similarity: dict, iban: str, whole_comment: str
+):
+    if 0.6 < float(best_similarity['similarity']) < 0.8:
+        build_parts = living_space_dict[iban]['build_parts']
+        if best_similarity['living_space']:
+            build_parts_id = best_similarity['living_space'].build_parts_id
+            parser_dict = dict(comment=str(whole_comment), max_build_part=len(build_parts))
+            print('build parts similarity', best_similarity, 'parser_dict', parser_dict)
+            results_list = parse_comment_for_build_parts(**parser_dict)
+            print('results_list', results_list)
+            if not results_list:
+                return best_similarity
+            for build_part in build_parts:
+                print('part_no', int(build_part.part_no), " | ", results_list)
+                print('build_part', int(build_part.id), int(build_parts_id))
+                print('cond', int(build_part.id) == int(build_parts_id))
+                print('cond2', int(build_part.part_no) in results_list)
+                if int(build_part.id) == int(build_parts_id) and int(build_part.part_no) in results_list:
+                    similarity = float(best_similarity['similarity'])
+                    best_similarity['similarity'] = (1 - similarity) / 2 + similarity
+                    print('similarity', best_similarity['similarity'])
+                    break
+    return best_similarity
+
+
 def parse_comment_with_name(
    account_record: AccountRecords, living_space_dict: dict = None
 ):
    comments = parse_comment_to_split_with_star(account_record=account_record)
    best_similarity = {"similarity": 0.0}
    comments_list, comments_length = comments[1:], int(comments[0])
+    print('comments_list', comments_list, 'comments_length', comments_length)
    if (
        int(account_record.currency_value) > 0
    ):  #   Build receive money from living space people
-        if not comments_length > 1:
+        living_space_matches = dict(
+            living_space_dict=living_space_dict,
+            iban=account_record.iban,
+            whole_comment=account_record.process_comment
+        )
+        if comments_length == 1:
            best_similarity = parse_comment_for_living_space(
                iban=account_record.iban,
                comment=comments_list[0],
                living_space_dict=living_space_dict,
            )
            best_similarity["send_person_id"] = best_similarity.get("customer_id", None)
+            living_space_matches['best_similarity'] = best_similarity
+            # if 0.5 < float(best_similarity['similarity']) < 0.8
+            best_similarity = check_build_living_space_matches_with_build_parts(**living_space_matches)
            return best_similarity
        for comment in comments_list:
            similarity_result = parse_comment_for_living_space(
@@ -299,6 +359,10 @@ def parse_comment_with_name(
                best_similarity["similarity"]
            ):
                best_similarity = similarity_result
+        living_space_matches['best_similarity'] = best_similarity
+        # if 0.5 < float(best_similarity['similarity']) < 0.8:
+        best_similarity = check_build_living_space_matches_with_build_parts(**living_space_matches)
+        print('last best_similarity', best_similarity)
        return best_similarity
    else:  #  Build pays money for service taken from company or individual
        if not comments_length > 1:
@@ -323,7 +387,7 @@ def parse_comment_with_name_iban_description(account_record: AccountRecords):
        BuildIbanDescription.iban == account_record.iban, system=True
    ).data
    best_similarity = dict(
-        company=None, living_space=None, found_from=None, similarity=0.0, garbage=""
+        company=None, living_space=None, found_from=None, similarity=0.0, garbage="", cleaned=""
    )
    for comment in comments_list:
        for iban_result in iban_results:
@@ -341,6 +405,7 @@ def parse_comment_with_name_iban_description(account_record: AccountRecords):
                    "found_from": "Customer Public Name Description",
                    "similarity": similarity_ratio,
                    "garbage": garbage_words,
+                    "cleaned": cleaned_comment,
                }
    return best_similarity
    # print('account_record.process_comment', account_record.process_comment)