dfc58ff63a64e9a3b83afaeded706192505f08fa,robotreviewer/textprocessing/pdfreader.py,PdfReader,parse_xml,#PdfReader#Any#,117
Before Change
output = MultiDict()
full_text_bits = []
author_list = []
author_bits = []
path = []
for event, elem in ET.iterparse(StringIO(xml_string.encode("utf-8")),events=("start", "end")):
if event == "start":
path.append(elem.tag)
elif event == "end":
if elem.tag=="{http://www.tei-c.org/ns/1.0}abstract":
output.grobid["abstract"] = (self._extract_text(elem))
elif elem.tag=="{http://www.tei-c.org/ns/1.0}title" and "{http://www.tei-c.org/ns/1.0}titleStmt" in path:
output.grobid["title"] = self._extract_text(elem)
elif elem.tag in ["{http://www.tei-c.org/ns/1.0}head", "{http://www.tei-c.org/ns/1.0}p"]:
full_text_bits.extend([self._extract_text(elem), "\n"])
elif elem.tag=="{http://www.tei-c.org/ns/1.0}forename":
author_bits.append(self._extract_text(elem))
elif elem.tag=="{http://www.tei-c.org/ns/1.0}surname":
author_bits.append(self._extract_text(elem))
author_list.append(author_bits)
author_bits = []
After Change
output.grobid["text"] = "\n".join(full_text_bits)
output.grobid["authors"] = author_list
log.info("author list: %s" % author_list)
return output
def _extract_text(self, elem):
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 6
Instances
Project Name: ijmarshall/robotreviewer
Commit Name: dfc58ff63a64e9a3b83afaeded706192505f08fa
Time: 2016-08-17
Author: byron.wallace@gmail.com
File Name: robotreviewer/textprocessing/pdfreader.py
Class Name: PdfReader
Method Name: parse_xml
Project Name: deepchem/deepchem
Commit Name: 3b3a06ad8402079c2d18718349d5f0f212ac7b81
Time: 2020-12-11
Author: nfrey213@gmail.com
File Name: deepchem/feat/base_classes.py
Class Name: ComplexFeaturizer
Method Name: featurize
Project Name: ijmarshall/robotreviewer
Commit Name: 6a7db8d13e7d24a209a0bb22a3ebde24057b0adb
Time: 2016-08-17
Author: byron.wallace@gmail.com
File Name: robotreviewer/textprocessing/pdfreader.py
Class Name: PdfReader
Method Name: parse_xml
Project Name: ilastik/ilastik
Commit Name: fe073644f6a8f37e9ce57df903bf12b560690fc3
Time: 2012-09-14
Author: christoph.straehle@iwr.uni-heidelberg.de
File Name: lazyflow/operators/obsolete/classifierOperators.py
Class Name: OpPredictRandomForest
Method Name: execute