From 0073fbd33299a7a3d154827464bf0b9f22ae3a67 Mon Sep 17 00:00:00 2001 From: Atin Bainada <61903527+atin@users.noreply.github.com> Date: Sat, 24 Apr 2021 17:43:41 +0530 Subject: [PATCH] chore(Python): cleaning string before word count (#256) --- algorithms/Python/strings/word_count.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/algorithms/Python/strings/word_count.py b/algorithms/Python/strings/word_count.py index b4b32613..6119fd70 100644 --- a/algorithms/Python/strings/word_count.py +++ b/algorithms/Python/strings/word_count.py @@ -1,10 +1,25 @@ +import re + +test_string = '"the" is the most used word in the English language' + + def word_count(s: str) -> int: + """ + >>> word_count(test_string) + 10 + """ + s = re.sub('[^A-Za-z0-9 ]+', '', s) return len(s.lower().split()) def unique_word_count(s: str) -> int: + """ + >>> unique_word_count(test_string) + 8 + """ + s = re.sub('[^A-Za-z0-9 ]+', '', s) return len(set(s.lower().split())) - -for s in ("The Matrix", "To Be or Not to Be", "Kiss Kiss Bang Bang"): + +for s in ("The Matrix", "To Be or Not to Be", "Kiss Kiss Bang Bang", test_string): print(s, word_count(s), unique_word_count(s))