2021-04-24 12:13:41 +00:00
|
|
|
import re
|
|
|
|
|
|
|
|
test_string = '"the" is the most used word in the English language'
|
|
|
|
|
|
|
|
|
2021-04-15 11:16:20 +00:00
|
|
|
def word_count(s: str) -> int:
|
2021-04-24 12:13:41 +00:00
|
|
|
"""
|
|
|
|
>>> word_count(test_string)
|
|
|
|
10
|
|
|
|
"""
|
|
|
|
s = re.sub('[^A-Za-z0-9 ]+', '', s)
|
2021-04-15 11:16:20 +00:00
|
|
|
return len(s.lower().split())
|
|
|
|
|
|
|
|
|
|
|
|
def unique_word_count(s: str) -> int:
|
2021-04-24 12:13:41 +00:00
|
|
|
"""
|
|
|
|
>>> unique_word_count(test_string)
|
|
|
|
8
|
|
|
|
"""
|
|
|
|
s = re.sub('[^A-Za-z0-9 ]+', '', s)
|
2021-04-15 11:16:20 +00:00
|
|
|
return len(set(s.lower().split()))
|
|
|
|
|
2021-04-24 12:13:41 +00:00
|
|
|
|
|
|
|
for s in ("The Matrix", "To Be or Not to Be", "Kiss Kiss Bang Bang", test_string):
|
2021-04-15 11:16:20 +00:00
|
|
|
print(s, word_count(s), unique_word_count(s))
|