diff --git a/api_services/bank_actions/wag_account_record_parser.py b/api_services/bank_actions/wag_account_record_parser.py index 89cbc7a..05aa7b5 100644 --- a/api_services/bank_actions/wag_account_record_parser.py +++ b/api_services/bank_actions/wag_account_record_parser.py @@ -16,6 +16,7 @@ from pydantic import BaseModel from databases.sql_models.company.company import Companies from databases.sql_models.identity.identity import People +from service_account_records.regex_func import category_finder class InsertBudgetRecord(BaseModel): @@ -127,19 +128,22 @@ def remove_spaces_from_string(remove_string: str): def get_garbage_words(comment: str, search_word: str): - garbage_words = remove_spaces_from_string(comment) - search_word = remove_spaces_from_string(search_word) - for letter in search_word.split(" "): - garbage_words = garbage_words.replace(remove_spaces_from_string(letter), "") - return str(remove_spaces_from_string(garbage_words)).upper() + garbage_words = unidecode(remove_spaces_from_string(comment)) + search_word = unidecode(remove_spaces_from_string(search_word)) + for word in search_word.split(" "): + garbage_words = garbage_words.replace(remove_spaces_from_string(unidecode(word)), "") + if cleaned_from_spaces := remove_spaces_from_string(garbage_words): + return str(unidecode(cleaned_from_spaces)).upper() + return None def remove_garbage_words(comment: str, garbage_word: str): cleaned_comment = remove_spaces_from_string(comment.replace("*", " ")) - garbage_word = remove_spaces_from_string(garbage_word.replace("*", " ")) - for letter in garbage_word.split(" "): - cleaned_comment = unidecode(remove_spaces_from_string(cleaned_comment)) - cleaned_comment = cleaned_comment.replace(remove_spaces_from_string(letter), "") + if garbage_word: + garbage_word = remove_spaces_from_string(garbage_word.replace("*", " ")) + for letter in garbage_word.split(" "): + cleaned_comment = unidecode(remove_spaces_from_string(cleaned_comment)) + cleaned_comment = cleaned_comment.replace(remove_spaces_from_string(letter), "") return str(remove_spaces_from_string(cleaned_comment)).upper() @@ -198,47 +202,67 @@ def parse_comment_for_living_space( ): comment = unidecode(comment) best_similarity = dict( - company=None, living_space=None, found_from=None, similarity=0.0, garbage="" + company=None, living_space=None, found_from=None, similarity=0.0, garbage="", cleaned="" ) for person in living_space_dict[iban]["people"]: person: People = person first_name = unidecode(person.firstname).upper() last_name = unidecode(person.surname).upper() - middle_name = unidecode(person.middle_name).upper() - search_word = f"{first_name} {last_name}" - if middle_name: - search_word = f"{first_name} {middle_name} {last_name}" - garbage_words = get_garbage_words(comment, search_word) - cleaned_comment = remove_garbage_words(comment, garbage_words) - similarity_ratio = textdistance.jaro_winkler(cleaned_comment, search_word) - if similarity_ratio > float(best_similarity["similarity"]): - for living_space in living_space_dict[iban]["living_space"]: - if living_space.person_id == person.id: - best_similarity = { - "company": None, - "living_space": living_space, - "found_from": "Person Name", - "similarity": similarity_ratio, - "garbage": garbage_words, - } - # print( - # 'cleaned_comment', cleaned_comment, '\n' - # 'search_word', search_word, '\n' - # 'best_similarity', best_similarity, '\n' - # 'person name', f"{first_name} {last_name}", '\n' - # 'similarity_ratio', similarity_ratio, '\n' - # 'garbage_words', garbage - # ) + search_word_list = [ + remove_spaces_from_string("".join([f"{first_name} {last_name}"])), + remove_spaces_from_string("".join([f"{last_name} {first_name}"])) + ] + if middle_name := unidecode(person.middle_name).upper(): + search_word_list.append(remove_spaces_from_string(f"{first_name} {middle_name} {last_name}")) + search_word_list.append(remove_spaces_from_string(f"{last_name} {middle_name} {first_name}")) + + cleaned_comment = unidecode(comment).upper() + for search_word in search_word_list: + garbage_words = get_garbage_words(comment, unidecode(search_word)) + if garbage_words: + garbage_words = unidecode(garbage_words).upper() + cleaned_comment = unidecode(remove_garbage_words(comment, garbage_words)).upper() + similarity_ratio = textdistance.jaro_winkler(cleaned_comment, str(search_word).upper()) + if len(cleaned_comment) < len(f"{first_name}{last_name}"): + continue + if cleaned_comment and 0.9 < similarity_ratio <= 1: + print( + 'cleaned comment dict', dict( + garbage=garbage_words, cleaned=cleaned_comment, similarity=similarity_ratio, + search_word=search_word, comment=comment, last_similarity=float(best_similarity["similarity"]) + )) + if similarity_ratio > float(best_similarity["similarity"]): + for living_space in living_space_dict[iban]["living_space"]: + if living_space.person_id == person.id: + best_similarity = { + "company": None, + "living_space": living_space, + "found_from": "Person Name", + "similarity": similarity_ratio, + "garbage": garbage_words, + "cleaned": cleaned_comment, + } return best_similarity +def parse_comment_for_build_parts(comment: str, max_build_part: int = 200, parse: str = 'DAIRE'): + results, results_list = category_finder(comment), [] + print('results[parse]', results[parse]) + for result in results[parse] or []: + if digits := "".join([letter for letter in str(result) if letter.isdigit()]): + print('digits', digits) + if int(digits) <= int(max_build_part): + results_list.append(int(digits)) + return results_list or None + + def parse_comment_for_company_or_individual(comment: str): companies_list = Companies.filter_all( Companies.commercial_type != "Commercial", system=True ).data comment = unidecode(comment) best_similarity = dict( - company=None, living_space=None, found_from=None, similarity=0.0, garbage="" + company=None, living_space=None, found_from=None, similarity=0.0, garbage="", cleaned="" ) for company in companies_list: search_word = unidecode(company.public_name) @@ -252,6 +276,7 @@ def parse_comment_for_company_or_individual(comment: str): "found_from": "Customer Public Name", "similarity": similarity_ratio, "garbage": garbage_words, + "cleaned": cleaned_comment, } # print( # 'cleaned_comment', cleaned_comment, '\n' @@ -272,22 +297,57 @@ def parse_comment_to_split_with_star(account_record: AccountRecords): return 1, account_record.process_comment +def check_build_living_space_matches_with_build_parts( + living_space_dict: dict, best_similarity: dict, iban: str, whole_comment: str +): + if 0.6 < float(best_similarity['similarity']) < 0.8: + build_parts = living_space_dict[iban]['build_parts'] + if best_similarity['living_space']: + build_parts_id = best_similarity['living_space'].build_parts_id + parser_dict = dict(comment=str(whole_comment), max_build_part=len(build_parts)) + print('build parts similarity', best_similarity, 'parser_dict', parser_dict) + results_list = parse_comment_for_build_parts(**parser_dict) + print('results_list', results_list) + if not results_list: + return best_similarity + for build_part in build_parts: + print('part_no', int(build_part.part_no), " | ", results_list) + print('build_part', int(build_part.id), int(build_parts_id)) + print('cond', int(build_part.id) == int(build_parts_id)) + print('cond2', int(build_part.part_no) in results_list) + if int(build_part.id) == int(build_parts_id) and int(build_part.part_no) in results_list: + similarity = float(best_similarity['similarity']) + best_similarity['similarity'] = (1 - similarity) / 2 + similarity + print('similarity', best_similarity['similarity']) + break + return best_similarity + + def parse_comment_with_name( account_record: AccountRecords, living_space_dict: dict = None ): comments = parse_comment_to_split_with_star(account_record=account_record) best_similarity = {"similarity": 0.0} comments_list, comments_length = comments[1:], int(comments[0]) + print('comments_list', comments_list, 'comments_length', comments_length) if ( int(account_record.currency_value) > 0 ): # Build receive money from living space people - if not comments_length > 1: + living_space_matches = dict( + living_space_dict=living_space_dict, + iban=account_record.iban, + whole_comment=account_record.process_comment + ) + if comments_length == 1: best_similarity = parse_comment_for_living_space( iban=account_record.iban, comment=comments_list[0], living_space_dict=living_space_dict, ) best_similarity["send_person_id"] = best_similarity.get("customer_id", None) + living_space_matches['best_similarity'] = best_similarity + # if 0.5 < float(best_similarity['similarity']) < 0.8 + best_similarity = check_build_living_space_matches_with_build_parts(**living_space_matches) return best_similarity for comment in comments_list: similarity_result = parse_comment_for_living_space( @@ -299,6 +359,10 @@ def parse_comment_with_name( best_similarity["similarity"] ): best_similarity = similarity_result + living_space_matches['best_similarity'] = best_similarity + # if 0.5 < float(best_similarity['similarity']) < 0.8: + best_similarity = check_build_living_space_matches_with_build_parts(**living_space_matches) + print('last best_similarity', best_similarity) return best_similarity else: # Build pays money for service taken from company or individual if not comments_length > 1: @@ -323,7 +387,7 @@ def parse_comment_with_name_iban_description(account_record: AccountRecords): BuildIbanDescription.iban == account_record.iban, system=True ).data best_similarity = dict( - company=None, living_space=None, found_from=None, similarity=0.0, garbage="" + company=None, living_space=None, found_from=None, similarity=0.0, garbage="", cleaned="" ) for comment in comments_list: for iban_result in iban_results: @@ -341,6 +405,7 @@ def parse_comment_with_name_iban_description(account_record: AccountRecords): "found_from": "Customer Public Name Description", "similarity": similarity_ratio, "garbage": garbage_words, + "cleaned": cleaned_comment, } return best_similarity # print('account_record.process_comment', account_record.process_comment) diff --git a/docker-compose.yml b/docker-compose.yml index d3e47af..e68040b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,8 +2,8 @@ services: commercial_mongo_service: container_name: commercial_mongo_service -# image: "bitnami/mongodb:latest" - image: "bitnami/mongodb:4.4.1-debian-10-r3" + image: "bitnami/mongodb:latest" +# image: "bitnami/mongodb:4.4.1-debian-10-r3" networks: - network_store_services environment: @@ -65,85 +65,85 @@ services: - wag_management_init_service - grafana - wag_management_service_second: - container_name: wag_management_service_second - restart: on-failure - build: - context: . - dockerfile: service_app/Dockerfile - ports: - - "41576:41575" - networks: - - network_store_services - depends_on: - - wag_management_init_service - - grafana +# wag_management_service_second: +# container_name: wag_management_service_second +# restart: on-failure +# build: +# context: . +# dockerfile: service_app/Dockerfile +# ports: +# - "41576:41575" +# networks: +# - network_store_services +# depends_on: +# - wag_management_init_service +# - grafana +# +# wag_management_init_service: +# container_name: wag_management_init_service +# build: +# context: . +# dockerfile: service_app_init/Dockerfile +# networks: +# - network_store_services +# depends_on: +# - postgres_commercial +# +# wag_bank_services: +# container_name: wag_bank_services +# restart: on-failure +# build: +# context: . +# dockerfile: service_app_banks/mailService.Dockerfile +# networks: +# - network_store_services +# depends_on: +# - postgres_commercial +# environment: +# - DATABASE_URL=postgresql+psycopg2://berkay_wag_user:berkay_wag_user_password@postgres_commercial:5432/wag_database - wag_management_init_service: - container_name: wag_management_init_service - build: - context: . - dockerfile: service_app_init/Dockerfile - networks: - - network_store_services - depends_on: - - postgres_commercial +# wag_account_services: +# container_name: wag_account_services +# restart: on-failure +# build: +# context: . +# dockerfile: service_account_records/account.Dockerfile +# networks: +# - network_store_services +# depends_on: +# - postgres_commercial +# environment: +# - DATABASE_URL=postgresql+psycopg2://berkay_wag_user:berkay_wag_user_password@postgres_commercial:5432/wag_database +# - PYTHONPATH=/ - wag_bank_services: - container_name: wag_bank_services - restart: on-failure - build: - context: . - dockerfile: service_app_banks/mailService.Dockerfile - networks: - - network_store_services - depends_on: - - postgres_commercial - environment: - - DATABASE_URL=postgresql+psycopg2://berkay_wag_user:berkay_wag_user_password@postgres_commercial:5432/wag_database - - wag_account_services: - container_name: wag_account_services - restart: on-failure - build: - context: . - dockerfile: service_account_records/account.Dockerfile - networks: - - network_store_services - depends_on: - - postgres_commercial - environment: - - DATABASE_URL=postgresql+psycopg2://berkay_wag_user:berkay_wag_user_password@postgres_commercial:5432/wag_database - - PYTHONPATH=/ - - prometheus: - image: prom/prometheus - container_name: prometheus - ports: - - "9090:9090" - volumes: - - ./prometheus_data/prometheus.yml:/etc/prometheus/prometheus.yml - command: - - '--config.file=/etc/prometheus/prometheus.yml' - networks: - - network_store_services - - grafana: - image: grafana/grafana - container_name: grafana - ports: - - "3000:3000" - depends_on: - - prometheus - networks: - - network_store_services - environment: - - GF_SECURITY_ADMIN_USER=admin - - GF_SECURITY_ADMIN_PASSWORD=admin - - GF_USERS_ALLOW_SIGN_UP=false - - GF_USERS_ALLOW_ORG_CREATE=false - volumes: - - grafana_data:/var/lib/grafana +# prometheus: +# image: prom/prometheus +# container_name: prometheus +# ports: +# - "9090:9090" +# volumes: +# - ./prometheus_data/prometheus.yml:/etc/prometheus/prometheus.yml +# command: +# - '--config.file=/etc/prometheus/prometheus.yml' +# networks: +# - network_store_services +# +# grafana: +# image: grafana/grafana +# container_name: grafana +# ports: +# - "3000:3000" +# depends_on: +# - prometheus +# networks: +# - network_store_services +# environment: +# - GF_SECURITY_ADMIN_USER=admin +# - GF_SECURITY_ADMIN_PASSWORD=admin +# - GF_USERS_ALLOW_SIGN_UP=false +# - GF_USERS_ALLOW_ORG_CREATE=false +# volumes: +# - grafana_data:/var/lib/grafana # wag_management_test_service: # container_name: wag_management_test_service @@ -175,8 +175,8 @@ networks: network_store_services: volumes: - wag_postgres_commercial_data: grafana_data: + wag_postgres_commercial_data: wag_commercial_mongodb_data: # environment: diff --git a/service_account_records/app_accounts.py b/service_account_records/app_accounts.py index 7104e8c..4b7f3f6 100644 --- a/service_account_records/app_accounts.py +++ b/service_account_records/app_accounts.py @@ -148,8 +148,10 @@ def account_get_people_and_living_space_info_via_iban() -> dict: if living_space.person_id ] people_list = People.filter_all( - People.id.in_(living_spaces_people), system=True + People.id.in_(living_spaces_people), + system=True ).data + print('build_parts', build_parts) build_living_space_dict[str(account_records_iban[0])] = { "people": list(people_list), "living_space": list(living_spaces), @@ -160,11 +162,10 @@ def account_get_people_and_living_space_info_via_iban() -> dict: def account_records_search(): build_living_space_dict = account_get_people_and_living_space_info_via_iban() - AccountRecords.filter_attr = account_list + AccountRecords.filter_attr, found_list = account_list, [] account_records_list: list[AccountRecords] = AccountRecords.filter_all( - AccountRecords.build_decision_book_id != None, system=True + # AccountRecords.build_decision_book_id != None, system=True ).data - found_list = [] for account_record in account_records_list: similarity_result = parse_comment_with_name( account_record=account_record, living_space_dict=build_living_space_dict @@ -172,9 +173,9 @@ def account_records_search(): fs, ac = similarity_result.get("similarity"), account_record.similarity or 0 if float(fs) >= 0.8 and float(fs) > float(ac): found_list.append(similarity_result) - account_save_search_result( - account_record=account_record, similarity_result=similarity_result - ) + # account_save_search_result( + # account_record=account_record, similarity_result=similarity_result + # ) else: similarity_result = parse_comment_with_name_iban_description( account_record=account_record @@ -182,9 +183,9 @@ def account_records_search(): fs, ac = similarity_result.get("similarity"), account_record.similarity or 0 if float(fs) >= 0.8 and float(fs) > float(ac): found_list.append(similarity_result) - account_save_search_result( - account_record=account_record, similarity_result=similarity_result - ) + # account_save_search_result( + # account_record=account_record, similarity_result=similarity_result + # ) print("Account Records Search : ", len(found_list), "/", len(account_records_list)) return @@ -309,9 +310,7 @@ def send_accounts_to_decision_payment(): AccountRecords.receive_debit == receive_enum.id, ).data for account_record in account_records_list: - current_currency_value = pay_the_registration( - account_record, receive_enum, debit_enum - ) + current_currency_value = pay_the_registration(account_record, receive_enum, debit_enum) if current_currency_value > 0: pay_the_registration(account_record, receive_enum, debit_enum, True) if abs(float(Decimal(account_record.remainder_balance))) == abs( @@ -319,14 +318,14 @@ def send_accounts_to_decision_payment(): ): account_record.update(status_id=97) account_record.save() - # todo If the payment is more than the amount, then create a new account record with the remaining amount + # # # todo If the payment is more than the amount, then create a new account record with the remaining amount return def account_records_service() -> None: - account_records_find_decision_book() + # account_records_find_decision_book() account_records_search() - send_accounts_to_decision_payment() + # send_accounts_to_decision_payment() return diff --git a/service_account_records/regex_func.py b/service_account_records/regex_func.py new file mode 100644 index 0000000..7989304 --- /dev/null +++ b/service_account_records/regex_func.py @@ -0,0 +1,28 @@ +import re +from difflib import get_close_matches + + +categories = { + "DAIRE": ["daire", "dagire", "daare", "nolu daire", "no", "nolu dairenin"], + "APARTMAN": ["apartman", "aparman", "aprmn"], + "VILLA": ["villa", "vlla"], + "BINA": ["bina", "binna"] +} + + +def word_straighten(word, ref_list, threshold=0.8): + matches = get_close_matches(word, ref_list, n=1, cutoff=threshold) + return matches[0] if matches else word + + +def category_finder(text, output_template="{kategori} {numara}"): + result = {category: [] for category in categories} # Sonuçları depolamak için bir sözlük + for category, patterns in categories.items(): + words = re.split(r'\W+', text) + straighten_words = [word_straighten(word, patterns) for word in words] + straighten_text = ' '.join(straighten_words) + pattern = r'(?:\b|\s|^)(?:' + '|'.join(map(re.escape, patterns)) + r')(?:\s*|:|\-|\#)*(\d+)(?:\b|$)' + if founds_list := re.findall(pattern, straighten_text, re.IGNORECASE): + list_of_output = [output_template.format(kategori=category, numara=num) for num in founds_list] + result[category].extend([i for i in list_of_output if str(i).replace(' ', '')]) + return result