for line in in_stream:
if not line.strip():
continue
info = json.loads(line.strip().decode("utf-8"))
text = info.get(u"surfaceText") or ""
line = "%(uri)s\t%(rel)s\t%(start)s\t%(end)s\t%(context)s\t%(weight)s\t%(sources)s\t%(id)s\t%(dataset)s\t%(text)s" % {
"uri": info[u"uri"],
After Change
def convert_to_tab_separated(input_filename, output_filename):
out_stream = codecs.open(output_filename, "w", encoding="utf-8")
for info in read_json_stream(input_filename):
text = info.get("surfaceText") or ""
line = "%(uri)s\t%(rel)s\t%(start)s\t%(end)s\t%(context)s\t%(weight)s\t%(sources)s\t%(id)s\t%(dataset)s\t%(text)s" % {