What are the naughty words which are used most frequently in passwords?
We need two things
The first is provided by https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words and The Second by Have I been pwned API.
Lets get started. Scroll to the bottom to see the results.
# Some imports
import requests
import pandas
from pwnedapi import Password
# Get the list of naugthy words
naughty_words = requests.get("https://raw.githubusercontent.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words/master/en").text.splitlines()
# Use pwnedapi to get the number of time the word has been used as a pasword.
password = Password(naughty_words[1])
naughty_word_with_password = [(word, Password(word)) for word in naughty_words]
def check_is_pwned(word, pasword):
is_pwned = pasword.is_pwned()
# print(word)
# print(pasword.pwned_count)
return is_pwned
naughty_word_with_password= [el for el in naughty_word_with_password if check_is_pwned(*el)]
naughty_word_with_pwned_count = {word: password.pwned_count for word, password in naughty_word_with_password}
# Find the top 40 words
naughty_password = sorted(naughty_word_with_pwned_count.items(), key=lambda x: x[1], reverse=True)[:40]
# What are the naughty passwords?
naughty_password
# Get the data in a dataframe
df = pandas.DataFrame(naughty_password, columns=["word", "pwned_count"])
df = df.set_index("word")
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 10]
plt = df.plot.barh()
plt