fe358db6913812c1e3ba8cd8036d941c996fb413,deepchem/datasets/pcba_datasets.py,,load_pcba,#Any#Any#,30
Before Change
current_dir = os.path.dirname(os.path.realpath(__file__))
//Make directories to store the raw and featurized datasets.
feature_dir = os.path.join(base_dir, "features")
samples_dir = os.path.join(base_dir, "samples")
data_dir = os.path.join(base_dir, "dataset")
// Load PCBA dataset
print("About to load PCBA dataset.")
dataset_file = os.path.join(
current_dir, "../../datasets/pcba.csv.gz")
dataset = load_from_disk(dataset_file)
print("Columns of dataset: %s" % str(dataset.columns.values))
print("Number of examples in dataset: %s" % str(dataset.shape[0]))
// Featurize PCBA dataset
print("About to featurize PCBA dataset.")
featurizers = [CircularFingerprint(size=1024)]
all_PCBA_tasks = ["PCBA-1030","PCBA-1379","PCBA-1452","PCBA-1454","PCBA-1457",
"PCBA-1458","PCBA-1460","PCBA-1461","PCBA-1468","PCBA-1469",
"PCBA-1471","PCBA-1479","PCBA-1631","PCBA-1634","PCBA-1688",
"PCBA-1721","PCBA-2100","PCBA-2101","PCBA-2147","PCBA-2242",
"PCBA-2326","PCBA-2451","PCBA-2517","PCBA-2528","PCBA-2546",
"PCBA-2549","PCBA-2551","PCBA-2662","PCBA-2675","PCBA-2676",
"PCBA-411","PCBA-463254","PCBA-485281","PCBA-485290","PCBA-485294",
"PCBA-485297","PCBA-485313","PCBA-485314","PCBA-485341","PCBA-485349",
"PCBA-485353","PCBA-485360","PCBA-485364","PCBA-485367","PCBA-492947",
"PCBA-493208","PCBA-504327","PCBA-504332","PCBA-504333","PCBA-504339",
"PCBA-504444","PCBA-504466","PCBA-504467","PCBA-504706","PCBA-504842",
"PCBA-504845","PCBA-504847","PCBA-504891","PCBA-540276","PCBA-540317",
"PCBA-588342","PCBA-588453","PCBA-588456","PCBA-588579","PCBA-588590",
"PCBA-588591","PCBA-588795","PCBA-588855","PCBA-602179","PCBA-602233",
"PCBA-602310","PCBA-602313","PCBA-602332","PCBA-624170","PCBA-624171",
"PCBA-624173","PCBA-624202","PCBA-624246","PCBA-624287","PCBA-624288",
"PCBA-624291","PCBA-624296","PCBA-624297","PCBA-624417","PCBA-651635",
"PCBA-651644","PCBA-651768","PCBA-651965","PCBA-652025","PCBA-652104",
"PCBA-652105","PCBA-652106","PCBA-686970","PCBA-686978","PCBA-686979",
"PCBA-720504","PCBA-720532","PCBA-720542","PCBA-720551","PCBA-720553",
"PCBA-720579","PCBA-720580","PCBA-720707","PCBA-720708","PCBA-720709",
"PCBA-720711","PCBA-743255","PCBA-743266","PCBA-875","PCBA-881",
"PCBA-883","PCBA-884","PCBA-885","PCBA-887","PCBA-891","PCBA-899",
"PCBA-902","PCBA-903","PCBA-904","PCBA-912","PCBA-914","PCBA-915",
"PCBA-924","PCBA-925","PCBA-926","PCBA-927","PCBA-938","PCBA-995"]
featurizer = DataFeaturizer(tasks=all_PCBA_tasks,
smiles_field="smiles",
compound_featurizers=featurizers,
verbosity=verbosity)
featurized_samples = featurizer.featurize(
dataset_file, feature_dir,
samples_dir, shard_size=8192,
reload=reload)
dataset = Dataset(data_dir=data_dir, samples=featurized_samples,
featurizers=featurizers, tasks=all_PCBA_tasks,
verbosity=verbosity, reload=reload)
// Initialize transformers
input_transformers = []
output_transformers = []
weight_transformers = [
BalancingTransformer(transform_w=True, dataset=dataset)]
transformers = input_transformers + output_transformers + weight_transformers
if not reload:
print("About to transform data")
for transformer in transformers:
transformer.transform(dataset)
After Change
smiles_field="smiles",
featurizers=featurizers,
verbosity=verbosity)
if not reload or not os.path.exists(data_dir):
dataset = featurizer.featurize(dataset_file, data_dir)
regen = True
else:
dataset = Dataset(data_dir, reload=True)
// Initialize transformers
transformers = [
BalancingTransformer(transform_w=True, dataset=dataset)]
if regen:
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 7
Instances
Project Name: deepchem/deepchem
Commit Name: fe358db6913812c1e3ba8cd8036d941c996fb413
Time: 2016-05-31
Author: bharath.ramsundar@gmail.com
File Name: deepchem/datasets/pcba_datasets.py
Class Name:
Method Name: load_pcba
Project Name: deepchem/deepchem
Commit Name: fe358db6913812c1e3ba8cd8036d941c996fb413
Time: 2016-05-31
Author: bharath.ramsundar@gmail.com
File Name: deepchem/datasets/muv_datasets.py
Class Name:
Method Name: load_muv
Project Name: MaybeShewill-CV/lanenet-lane-detection
Commit Name: 74b2ee5fd7700ef6777df0655862366ce6332b3d
Time: 2020-06-12
Author: luoyao@baidu.com
File Name: data_provider/lanenet_data_feed_pipline.py
Class Name: LaneNetDataProducer
Method Name: __init__