0d078991bf6a056be5579cda14dd1f305078eb7d,tensorflow_transform/beam/cached_impl_test.py,CachedImplTest,test_caching_vocab_for_integer_categorical,#CachedImplTest#,566
Before Change
}, {
"x_vocab": -1,
}]
with beam_impl.Context(temp_dir=self.get_temp_dir()):
with beam.Pipeline() as p:
flat_data = p | "CreateInputData" >> beam.Create(
list(itertools.chain(*input_data_dict.values())))
cache_dict = {
span_0_key: {
b"__v0__VocabularyAccumulate[compute_and_apply_vocabulary/vocabulary]-\x05e\xfe4\x03H.P\xb5\xcb\xd22\xe3\x16\x15\xf8\xf5\xe38\xd9":
p | "CreateB" >> beam.Create(
[b"[-2, 2]", b"[-4, 1]", b"[-1, 1]", b"[4, 1]"]),
},
span_1_key: {},
}
transform_fn, cache_output = (
(flat_data, input_data_dict, cache_dict, input_metadata)
| "Analyze" >>
(beam_impl.AnalyzeDatasetWithCache(preprocessing_fn)))
dot_string = nodes.get_dot_graph(
[analysis_graph_builder._ANALYSIS_GRAPH]).to_string()
self.WriteRenderedDotFile(dot_string)
self.assertNotIn(span_0_key, cache_output)
_ = cache_output | "WriteCache" >> analyzer_cache.WriteAnalysisCacheToFS(
self._cache_dir)
transformed_dataset = (((input_data_dict[span_1_key], input_metadata),
transform_fn)
| "Transform" >> beam_impl.TransformDataset())
transformed_data, _ = transformed_dataset
beam_test_util.assert_that(
transformed_data,
beam_test_util.equal_to(expected_transformed_data),
label="first")
def test_non_frequency_vocabulary_merge(self):
This test compares vocabularies produced with and without cache.
mi_vocab_name = "mutual_information_vocab"
After Change
}, {
"x_vocab": -1,
}]
with _TestPipeline() as p:
flat_data = p | "CreateInputData" >> beam.Create(
list(itertools.chain(*input_data_dict.values())))
cache_dict = {
span_0_key: {
b"__v0__VocabularyAccumulate[compute_and_apply_vocabulary/vocabulary]-\x05e\xfe4\x03H.P\xb5\xcb\xd22\xe3\x16\x15\xf8\xf5\xe38\xd9":
p | "CreateB" >> beam.Create(
[b"[-2, 2]", b"[-4, 1]", b"[-1, 1]", b"[4, 1]"]),
},
span_1_key: {},
}
transform_fn, cache_output = (
(flat_data, input_data_dict, cache_dict, input_metadata)
| "Analyze" >>
(beam_impl.AnalyzeDatasetWithCache(preprocessing_fn)))
dot_string = nodes.get_dot_graph(
[analysis_graph_builder._ANALYSIS_GRAPH]).to_string()
self.WriteRenderedDotFile(dot_string)
self.assertNotIn(span_0_key, cache_output)
_ = cache_output | "WriteCache" >> analyzer_cache.WriteAnalysisCacheToFS(
self._cache_dir)
transformed_dataset = ((
(input_data_dict[span_1_key], input_metadata), transform_fn)
| "Transform" >> beam_impl.TransformDataset())
transformed_data, _ = transformed_dataset
beam_test_util.assert_that(
transformed_data,
beam_test_util.equal_to(expected_transformed_data),
label="first")
// 4 from analysis since 1 span was completely cached, and 4 from transform.
self.assertEqual(_get_counter_value(p.metrics, "num_instances"), 8)
self.assertEqual(_get_counter_value(p.metrics, "cache_entries_decoded"), 1)
self.assertEqual(_get_counter_value(p.metrics, "cache_entries_encoded"), 1)
self.assertEqual(_get_counter_value(p.metrics, "saved_models_created"), 2)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 9
Instances
Project Name: tensorflow/transform
Commit Name: 0d078991bf6a056be5579cda14dd1f305078eb7d
Time: 2019-04-29
Author: zoy@google.com
File Name: tensorflow_transform/beam/cached_impl_test.py
Class Name: CachedImplTest
Method Name: test_caching_vocab_for_integer_categorical
Project Name: tensorflow/transform
Commit Name: 0d078991bf6a056be5579cda14dd1f305078eb7d
Time: 2019-04-29
Author: zoy@google.com
File Name: tensorflow_transform/beam/cached_impl_test.py
Class Name: CachedImplTest
Method Name: test_single_phase_mixed_analyzer_run_once
Project Name: tensorflow/transform
Commit Name: 0d078991bf6a056be5579cda14dd1f305078eb7d
Time: 2019-04-29
Author: zoy@google.com
File Name: tensorflow_transform/beam/cached_impl_test.py
Class Name: CachedImplTest
Method Name: test_single_phase_run_twice