b5a9d8e3c23b73f0050cff9f426260bd709d0a75,robotreviewer/textprocessing/pdfreader.py,PdfReader,parse_xml,#PdfReader#Any#,119
Before Change
output.grobid["text"] = "\n".join(full_text_bits)
output.grobid["authors"] = author_list
log.info("author list: %s" % author_list)
return output
def _extract_text(self, elem):
After Change
full_text_bits.extend([self._extract_text(elem), "\n"])
elif elem.tag=="{http://www.tei-c.org/ns/1.0}persName" and "{http://www.tei-c.org/ns/1.0}fileDesc" in path:
forenames = [e.text for e in elem.findall("{http://www.tei-c.org/ns/1.0}forename")]
lastnames = [e.text for e in elem.findall("{http://www.tei-c.org/ns/1.0}surname")]
initials = [f[0] for f in forenames]
// NB the format below is identical to that used in pubmed_robot.py
author_list.append({"initials": u"".join(initials),
"forename": u" ".join(forenames),
"lastname": u" ".join(lastnames)})
path.pop()
output.grobid["text"] = "\n".join(full_text_bits)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 7
Instances
Project Name: ijmarshall/robotreviewer
Commit Name: b5a9d8e3c23b73f0050cff9f426260bd709d0a75
Time: 2016-08-18
Author: mail@ijmarshall.com
File Name: robotreviewer/textprocessing/pdfreader.py
Class Name: PdfReader
Method Name: parse_xml
Project Name: ijmarshall/robotreviewer
Commit Name: 46c648c38a32af8431c76699e36000848b574d95
Time: 2016-08-18
Author: mail@ijmarshall.com
File Name: robotreviewer/textprocessing/pdfreader.py
Class Name: PdfReader
Method Name: parse_xml
Project Name: GoogleCloudPlatform/PerfKitBenchmarker
Commit Name: 70d943067abebd5d9e7a22c943a7f7317ae0eeed
Time: 2015-08-06
Author: vukasin.stefanovic92@gmail.com
File Name: perfkitbenchmarker/benchmarks/cloudsuite_web_search.py
Class Name:
Method Name: Run