def _transform(args, cell):
env = google.datalab.utils.commands.notebook_environment()cell_data = google.datalab.utils.commands.parse_config(cell, env)google.datalab.utils.commands.validate_config(cell_data,
required_keys=["training_data"],
optional_keys=["cloud"])
training_data = cell_data["training_data"]
cmd_args = ["python", "transform.py",
"--output", _abs_path(args["output"]),
"--analysis", _abs_path(args["analysis"]),
"--prefix", args["prefix"]]
if args["cloud"]:
cmd_args.append("--cloud")
cmd_args.append("--async")
if args["shuffle"]:
cmd_args.append("--shuffle")
if args["batch_size"]:
cmd_args.extend(["--batch-size", str(args["batch_size"])])
if isinstance(training_data, dict):
if "csv" in training_data:
cmd_args.extend(["--csv", _abs_path(training_data["csv"])])
elif "bigquery_table" in training_data:
cmd_args.extend(["--bigquery", training_data["bigquery_table"]])
elif "bigquery_sql" in training_data:
// see https://cloud.google.com/bigquery/querying-data//temporary_and_permanent_tables
print("Creating temporary table that will be deleted in 24 hours")
r = bq.Query(training_data["bigquery_sql"]).execute().result()
cmd_args.extend(["--bigquery", r.full_name])
else:
raise ValueError("Invalid training_data dict. " +
"Requires either "csv" and "schema", or "bigquery".")
elif isinstance(training_data, google.datalab.ml.CsvDataSet):
for file_name in training_data.input_files:
cmd_args.append("--csv=" + _abs_path(file_name))
elif isinstance(training_data, google.datalab.ml.BigQueryDataSet):
cmd_args.extend(["--bigquery", training_data.table])
else:
raise ValueError("Invalid training data. Requires either a dict, " +
"a google.datalab.ml.CsvDataSet, or a google.datalab.ml.BigQueryDataSet.")
if "cloud" in cell_data:
cloud_config = cell_data["cloud"]
google.datalab.utils.commands.validate_config(
cloud_config,
required_keys=[],
optional_keys=["num_workers", "worker_machine_type", "project_id"])
After Change
raise ValueError("Invalid training data. Requires either a dict, " +
"a google.datalab.ml.CsvDataSet, or a google.datalab.ml.BigQueryDataSet.")
cloud_config = args["cloud_config"]
if cloud_config:
google.datalab.utils.commands.validate_config(
cloud_config,
required_keys=[],