60cd627b07e790aaf07016467c3911f22ceb7182,solutionbox/code_free_ml/test_mltoolbox/test_analyze_data.py,TestLocalAnalyze,test_categorical,#TestLocalAnalyze#,136
Before Change
self.assertEqual(stats["column_stats"]["transport"]["vocab_size"], 6)
// Color column.
vocab_str = file_io.read_file_to_string(
os.path.join(output_folder, analyze_data.VOCAB_ANALYSIS_FILE % "color"))
vocab = pd.read_csv(six.StringIO(vocab_str),
header=None,
names=["color", "count"])
expected_vocab = pd.DataFrame(
After Change
{"color": {"transform": "one_hot"},
"transport": {"transform": "embedding"}})
stats = json.loads(
file_io.read_file_to_string(
os.path.join(output_folder, analyze_data.constant.STATS_FILE)).decode())
self.assertEqual(stats["column_stats"]["color"]["vocab_size"], 3)
self.assertEqual(stats["column_stats"]["transport"]["vocab_size"], 6)
// Color column.
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 4
Instances
Project Name: googledatalab/pydatalab
Commit Name: 60cd627b07e790aaf07016467c3911f22ceb7182
Time: 2017-06-06
Author: brandondutra@google.com
File Name: solutionbox/code_free_ml/test_mltoolbox/test_analyze_data.py
Class Name: TestLocalAnalyze
Method Name: test_categorical
Project Name: googledatalab/pydatalab
Commit Name: 60cd627b07e790aaf07016467c3911f22ceb7182
Time: 2017-06-06
Author: brandondutra@google.com
File Name: solutionbox/code_free_ml/test_mltoolbox/test_analyze_data.py
Class Name: TestCloudAnalyzeFromCSVFiles
Method Name: test_text
Project Name: googledatalab/pydatalab
Commit Name: 60cd627b07e790aaf07016467c3911f22ceb7182
Time: 2017-06-06
Author: brandondutra@google.com
File Name: solutionbox/code_free_ml/test_mltoolbox/test_analyze_data.py
Class Name: TestLocalAnalyze
Method Name: test_text
Project Name: googledatalab/pydatalab
Commit Name: 60cd627b07e790aaf07016467c3911f22ceb7182
Time: 2017-06-06
Author: brandondutra@google.com
File Name: solutionbox/code_free_ml/test_mltoolbox/test_analyze_data.py
Class Name: TestCloudAnalyzeFromCSVFiles
Method Name: test_categorical