f, ax = plt.subplots()
ax.boxplot(all_scores, vert=False)
ax.set_yticklabels(["one-hot\nencoding", "similarity\nencoding"])
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// We can see that encoding the data using a SimilarityEncoder instead of
// OneHotEncoder helps a lot in improving the cross validation score!
After Change
import matplotlib.pyplot as plt
plt.ylabel("Encoding", size=17)
plt.xlabel("Prediction accuracy", size=17)
plt.yticks(size=17)
plt.tight_layout()
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// We can see that encoding the data using a SimilarityEncoder instead of