351bcbc392bfa8869fc01e5a047273b53d24419e,finetune/utils.py,,finetune_to_indico_sequence,#Any#Any#Any#Any#Any#,312
Before Change
n_tokens = len(tokens)
doc_text = ""
doc_annotations = []
annotation_start = 0
annotation_end = 0
start_idx = 0
end_idx = 0
for sub_str, label in zip(doc_seq, label_seq):
stripped_text = sub_str.strip()
annotation_start = raw_text.find(stripped_text, annotation_end)
annotation_end = annotation_start + len(stripped_text)
if not subtoken_predictions:
// round to nearest token
while start_idx < n_tokens and annotation_start >= token_starts[start_idx]:
start_idx += 1
annotation_start = token_starts[start_idx - 1]
while end_idx < (n_tokens - 1) and annotation_end > token_ends[end_idx]:
end_idx += 1
annotation_end = token_ends[end_idx]
text = raw_text[annotation_start:annotation_end]
if label != none_value:
doc_annotations.append(
{
"start": annotation_start,
"end": annotation_end,
"label": label,
"text": text
}
)
annotations.append(doc_annotations)
return raw_texts, annotations
After Change
n_tokens = len(tokens)
doc_text = ""
doc_annotations = set([])
annotation_start = 0
annotation_end = 0
start_idx = 0
end_idx = 0
for sub_str, label in zip(doc_seq, label_seq):
stripped_text = sub_str.strip()
annotation_start = raw_text.find(stripped_text, annotation_end)
annotation_end = annotation_start + len(stripped_text)
if not subtoken_predictions:
// round to nearest token
while start_idx < n_tokens and annotation_start >= token_starts[start_idx]:
start_idx += 1
annotation_start = token_starts[start_idx - 1]
while end_idx < (n_tokens - 1) and annotation_end > token_ends[end_idx]:
end_idx += 1
annotation_end = token_ends[end_idx]
text = raw_text[annotation_start:annotation_end]
if label != none_value:
doc_annotations.add(
(
("start", annotation_start),
("end", annotation_end),
("label", label),
("text", text)
)
)
doc_annotations = sorted([dict(items) for items in doc_annotations], key=lambda x: x["start"])
annotations.append(doc_annotations)
return raw_texts, annotations
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 7
Instances
Project Name: IndicoDataSolutions/finetune
Commit Name: 351bcbc392bfa8869fc01e5a047273b53d24419e
Time: 2018-08-01
Author: madison@indico.io
File Name: finetune/utils.py
Class Name:
Method Name: finetune_to_indico_sequence
Project Name: allenai/allennlp
Commit Name: 40ec35876d38c4797ad3ee9bf911b019faa5a61d
Time: 2017-08-25
Author: mattg@allenai.org
File Name: allennlp/nn/initializers.py
Class Name: InitializerApplicator
Method Name: __call__
Project Name: facebookresearch/ParlAI
Commit Name: 2cbc5eb42ee7c742a5ea0eca3f2de945a513c00a
Time: 2019-07-31
Author: 5313281+dexterju@users.noreply.github.com
File Name: parlai/core/metrics.py
Class Name: Metrics
Method Name: __init__