import matplotlib.pyplot as plt
f, ax = plt.subplots()
ax.boxplot(all_scores, vert=False)
ax.set_yticklabels(["one-hot\nencoding", "similarity\nencoding"])
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// We can see that encoding the data using a SimilarityEncoder instead of
// OneHotEncoder helps a lot in improving the cross validation score!
After Change
pipeline = make_pipeline(method)
// Now predict the census region of each participant
scores = cross_val_score(pipeline, df, y, cv=cv)
all_scores[method] = scores
print("%s encoding" % method)
print("Accuracy score: mean: %.3f; std: %.3f\n"