|
|
|
|
@@ -131,7 +131,9 @@ def get_garbage_words(comment: str, search_word: str):
|
|
|
|
|
garbage_words = unidecode(remove_spaces_from_string(comment))
|
|
|
|
|
search_word = unidecode(remove_spaces_from_string(search_word))
|
|
|
|
|
for word in search_word.split(" "):
|
|
|
|
|
garbage_words = garbage_words.replace(remove_spaces_from_string(unidecode(word)), "")
|
|
|
|
|
garbage_words = garbage_words.replace(
|
|
|
|
|
remove_spaces_from_string(unidecode(word)), ""
|
|
|
|
|
)
|
|
|
|
|
if cleaned_from_spaces := remove_spaces_from_string(garbage_words):
|
|
|
|
|
return str(unidecode(cleaned_from_spaces)).upper()
|
|
|
|
|
return None
|
|
|
|
|
@@ -143,7 +145,9 @@ def remove_garbage_words(comment: str, garbage_word: str):
|
|
|
|
|
garbage_word = remove_spaces_from_string(garbage_word.replace("*", " "))
|
|
|
|
|
for letter in garbage_word.split(" "):
|
|
|
|
|
cleaned_comment = unidecode(remove_spaces_from_string(cleaned_comment))
|
|
|
|
|
cleaned_comment = cleaned_comment.replace(remove_spaces_from_string(letter), "")
|
|
|
|
|
cleaned_comment = cleaned_comment.replace(
|
|
|
|
|
remove_spaces_from_string(letter), ""
|
|
|
|
|
)
|
|
|
|
|
return str(remove_spaces_from_string(cleaned_comment)).upper()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -202,7 +206,12 @@ def parse_comment_for_living_space(
|
|
|
|
|
):
|
|
|
|
|
comment = unidecode(comment)
|
|
|
|
|
best_similarity = dict(
|
|
|
|
|
company=None, living_space=None, found_from=None, similarity=0.0, garbage="", cleaned=""
|
|
|
|
|
company=None,
|
|
|
|
|
living_space=None,
|
|
|
|
|
found_from=None,
|
|
|
|
|
similarity=0.0,
|
|
|
|
|
garbage="",
|
|
|
|
|
cleaned="",
|
|
|
|
|
)
|
|
|
|
|
for person in living_space_dict[iban]["people"]:
|
|
|
|
|
person: People = person
|
|
|
|
|
@@ -210,27 +219,41 @@ def parse_comment_for_living_space(
|
|
|
|
|
last_name = unidecode(person.surname).upper()
|
|
|
|
|
search_word_list = [
|
|
|
|
|
remove_spaces_from_string("".join([f"{first_name} {last_name}"])),
|
|
|
|
|
remove_spaces_from_string("".join([f"{last_name} {first_name}"]))
|
|
|
|
|
remove_spaces_from_string("".join([f"{last_name} {first_name}"])),
|
|
|
|
|
]
|
|
|
|
|
if middle_name := unidecode(person.middle_name).upper():
|
|
|
|
|
search_word_list.append(remove_spaces_from_string(f"{first_name} {middle_name} {last_name}"))
|
|
|
|
|
search_word_list.append(remove_spaces_from_string(f"{last_name} {middle_name} {first_name}"))
|
|
|
|
|
search_word_list.append(
|
|
|
|
|
remove_spaces_from_string(f"{first_name} {middle_name} {last_name}")
|
|
|
|
|
)
|
|
|
|
|
search_word_list.append(
|
|
|
|
|
remove_spaces_from_string(f"{last_name} {middle_name} {first_name}")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
cleaned_comment = unidecode(comment).upper()
|
|
|
|
|
for search_word in search_word_list:
|
|
|
|
|
garbage_words = get_garbage_words(comment, unidecode(search_word))
|
|
|
|
|
if garbage_words:
|
|
|
|
|
garbage_words = unidecode(garbage_words).upper()
|
|
|
|
|
cleaned_comment = unidecode(remove_garbage_words(comment, garbage_words)).upper()
|
|
|
|
|
similarity_ratio = textdistance.jaro_winkler(cleaned_comment, str(search_word).upper())
|
|
|
|
|
cleaned_comment = unidecode(
|
|
|
|
|
remove_garbage_words(comment, garbage_words)
|
|
|
|
|
).upper()
|
|
|
|
|
similarity_ratio = textdistance.jaro_winkler(
|
|
|
|
|
cleaned_comment, str(search_word).upper()
|
|
|
|
|
)
|
|
|
|
|
if len(cleaned_comment) < len(f"{first_name}{last_name}"):
|
|
|
|
|
continue
|
|
|
|
|
if cleaned_comment and 0.9 < similarity_ratio <= 1:
|
|
|
|
|
print(
|
|
|
|
|
'cleaned comment dict', dict(
|
|
|
|
|
garbage=garbage_words, cleaned=cleaned_comment, similarity=similarity_ratio,
|
|
|
|
|
search_word=search_word, comment=comment, last_similarity=float(best_similarity["similarity"])
|
|
|
|
|
))
|
|
|
|
|
"cleaned comment dict",
|
|
|
|
|
dict(
|
|
|
|
|
garbage=garbage_words,
|
|
|
|
|
cleaned=cleaned_comment,
|
|
|
|
|
similarity=similarity_ratio,
|
|
|
|
|
search_word=search_word,
|
|
|
|
|
comment=comment,
|
|
|
|
|
last_similarity=float(best_similarity["similarity"]),
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
if similarity_ratio > float(best_similarity["similarity"]):
|
|
|
|
|
for living_space in living_space_dict[iban]["living_space"]:
|
|
|
|
|
if living_space.person_id == person.id:
|
|
|
|
|
@@ -245,12 +268,14 @@ def parse_comment_for_living_space(
|
|
|
|
|
return best_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_comment_for_build_parts(comment: str, max_build_part: int = 200, parse: str = 'DAIRE'):
|
|
|
|
|
def parse_comment_for_build_parts(
|
|
|
|
|
comment: str, max_build_part: int = 200, parse: str = "DAIRE"
|
|
|
|
|
):
|
|
|
|
|
results, results_list = category_finder(comment), []
|
|
|
|
|
print('results[parse]', results[parse])
|
|
|
|
|
print("results[parse]", results[parse])
|
|
|
|
|
for result in results[parse] or []:
|
|
|
|
|
if digits := "".join([letter for letter in str(result) if letter.isdigit()]):
|
|
|
|
|
print('digits', digits)
|
|
|
|
|
print("digits", digits)
|
|
|
|
|
if int(digits) <= int(max_build_part):
|
|
|
|
|
results_list.append(int(digits))
|
|
|
|
|
return results_list or None
|
|
|
|
|
@@ -262,7 +287,12 @@ def parse_comment_for_company_or_individual(comment: str):
|
|
|
|
|
).data
|
|
|
|
|
comment = unidecode(comment)
|
|
|
|
|
best_similarity = dict(
|
|
|
|
|
company=None, living_space=None, found_from=None, similarity=0.0, garbage="", cleaned=""
|
|
|
|
|
company=None,
|
|
|
|
|
living_space=None,
|
|
|
|
|
found_from=None,
|
|
|
|
|
similarity=0.0,
|
|
|
|
|
garbage="",
|
|
|
|
|
cleaned="",
|
|
|
|
|
)
|
|
|
|
|
for company in companies_list:
|
|
|
|
|
search_word = unidecode(company.public_name)
|
|
|
|
|
@@ -300,25 +330,30 @@ def parse_comment_to_split_with_star(account_record: AccountRecords):
|
|
|
|
|
def check_build_living_space_matches_with_build_parts(
|
|
|
|
|
living_space_dict: dict, best_similarity: dict, iban: str, whole_comment: str
|
|
|
|
|
):
|
|
|
|
|
if 0.6 < float(best_similarity['similarity']) < 0.8:
|
|
|
|
|
build_parts = living_space_dict[iban]['build_parts']
|
|
|
|
|
if best_similarity['living_space']:
|
|
|
|
|
build_parts_id = best_similarity['living_space'].build_parts_id
|
|
|
|
|
parser_dict = dict(comment=str(whole_comment), max_build_part=len(build_parts))
|
|
|
|
|
print('build parts similarity', best_similarity, 'parser_dict', parser_dict)
|
|
|
|
|
if 0.6 < float(best_similarity["similarity"]) < 0.8:
|
|
|
|
|
build_parts = living_space_dict[iban]["build_parts"]
|
|
|
|
|
if best_similarity["living_space"]:
|
|
|
|
|
build_parts_id = best_similarity["living_space"].build_parts_id
|
|
|
|
|
parser_dict = dict(
|
|
|
|
|
comment=str(whole_comment), max_build_part=len(build_parts)
|
|
|
|
|
)
|
|
|
|
|
print("build parts similarity", best_similarity, "parser_dict", parser_dict)
|
|
|
|
|
results_list = parse_comment_for_build_parts(**parser_dict)
|
|
|
|
|
print('results_list', results_list)
|
|
|
|
|
print("results_list", results_list)
|
|
|
|
|
if not results_list:
|
|
|
|
|
return best_similarity
|
|
|
|
|
for build_part in build_parts:
|
|
|
|
|
print('part_no', int(build_part.part_no), " | ", results_list)
|
|
|
|
|
print('build_part', int(build_part.id), int(build_parts_id))
|
|
|
|
|
print('cond', int(build_part.id) == int(build_parts_id))
|
|
|
|
|
print('cond2', int(build_part.part_no) in results_list)
|
|
|
|
|
if int(build_part.id) == int(build_parts_id) and int(build_part.part_no) in results_list:
|
|
|
|
|
similarity = float(best_similarity['similarity'])
|
|
|
|
|
best_similarity['similarity'] = (1 - similarity) / 2 + similarity
|
|
|
|
|
print('similarity', best_similarity['similarity'])
|
|
|
|
|
print("part_no", int(build_part.part_no), " | ", results_list)
|
|
|
|
|
print("build_part", int(build_part.id), int(build_parts_id))
|
|
|
|
|
print("cond", int(build_part.id) == int(build_parts_id))
|
|
|
|
|
print("cond2", int(build_part.part_no) in results_list)
|
|
|
|
|
if (
|
|
|
|
|
int(build_part.id) == int(build_parts_id)
|
|
|
|
|
and int(build_part.part_no) in results_list
|
|
|
|
|
):
|
|
|
|
|
similarity = float(best_similarity["similarity"])
|
|
|
|
|
best_similarity["similarity"] = (1 - similarity) / 2 + similarity
|
|
|
|
|
print("similarity", best_similarity["similarity"])
|
|
|
|
|
break
|
|
|
|
|
return best_similarity
|
|
|
|
|
|
|
|
|
|
@@ -329,14 +364,14 @@ def parse_comment_with_name(
|
|
|
|
|
comments = parse_comment_to_split_with_star(account_record=account_record)
|
|
|
|
|
best_similarity = {"similarity": 0.0}
|
|
|
|
|
comments_list, comments_length = comments[1:], int(comments[0])
|
|
|
|
|
print('comments_list', comments_list, 'comments_length', comments_length)
|
|
|
|
|
print("comments_list", comments_list, "comments_length", comments_length)
|
|
|
|
|
if (
|
|
|
|
|
int(account_record.currency_value) > 0
|
|
|
|
|
): # Build receive money from living space people
|
|
|
|
|
living_space_matches = dict(
|
|
|
|
|
living_space_dict=living_space_dict,
|
|
|
|
|
iban=account_record.iban,
|
|
|
|
|
whole_comment=account_record.process_comment
|
|
|
|
|
whole_comment=account_record.process_comment,
|
|
|
|
|
)
|
|
|
|
|
if comments_length == 1:
|
|
|
|
|
best_similarity = parse_comment_for_living_space(
|
|
|
|
|
@@ -345,9 +380,11 @@ def parse_comment_with_name(
|
|
|
|
|
living_space_dict=living_space_dict,
|
|
|
|
|
)
|
|
|
|
|
best_similarity["send_person_id"] = best_similarity.get("customer_id", None)
|
|
|
|
|
living_space_matches['best_similarity'] = best_similarity
|
|
|
|
|
living_space_matches["best_similarity"] = best_similarity
|
|
|
|
|
# if 0.5 < float(best_similarity['similarity']) < 0.8
|
|
|
|
|
best_similarity = check_build_living_space_matches_with_build_parts(**living_space_matches)
|
|
|
|
|
best_similarity = check_build_living_space_matches_with_build_parts(
|
|
|
|
|
**living_space_matches
|
|
|
|
|
)
|
|
|
|
|
return best_similarity
|
|
|
|
|
for comment in comments_list:
|
|
|
|
|
similarity_result = parse_comment_for_living_space(
|
|
|
|
|
@@ -359,10 +396,12 @@ def parse_comment_with_name(
|
|
|
|
|
best_similarity["similarity"]
|
|
|
|
|
):
|
|
|
|
|
best_similarity = similarity_result
|
|
|
|
|
living_space_matches['best_similarity'] = best_similarity
|
|
|
|
|
living_space_matches["best_similarity"] = best_similarity
|
|
|
|
|
# if 0.5 < float(best_similarity['similarity']) < 0.8:
|
|
|
|
|
best_similarity = check_build_living_space_matches_with_build_parts(**living_space_matches)
|
|
|
|
|
print('last best_similarity', best_similarity)
|
|
|
|
|
best_similarity = check_build_living_space_matches_with_build_parts(
|
|
|
|
|
**living_space_matches
|
|
|
|
|
)
|
|
|
|
|
print("last best_similarity", best_similarity)
|
|
|
|
|
return best_similarity
|
|
|
|
|
else: # Build pays money for service taken from company or individual
|
|
|
|
|
if not comments_length > 1:
|
|
|
|
|
@@ -387,7 +426,12 @@ def parse_comment_with_name_iban_description(account_record: AccountRecords):
|
|
|
|
|
BuildIbanDescription.iban == account_record.iban, system=True
|
|
|
|
|
).data
|
|
|
|
|
best_similarity = dict(
|
|
|
|
|
company=None, living_space=None, found_from=None, similarity=0.0, garbage="", cleaned=""
|
|
|
|
|
company=None,
|
|
|
|
|
living_space=None,
|
|
|
|
|
found_from=None,
|
|
|
|
|
similarity=0.0,
|
|
|
|
|
garbage="",
|
|
|
|
|
cleaned="",
|
|
|
|
|
)
|
|
|
|
|
for comment in comments_list:
|
|
|
|
|
for iban_result in iban_results:
|
|
|
|
|
|