ec60b6d78f0fc423a8158d9f42210b878a8bcf0e,src/gensim/corpora/svmlightcorpus.py,SvmLightCorpus,__iter__,#SvmLightCorpus#,58
Before Change
Iterate over the corpus, returning one sparse vector at a time.
for lineNo, line in enumerate(open(self.fname)):
line = line[: line.find("//")].strip()
if not line:
continue // ignore comments and empty lines
parts = line.split()
if not parts:
raise ValueError("invalid format at line no. %i in %s" %
(lineNo, self.fname))
target, fields = parts[0], [part.rsplit(":", 1) for part in parts[1:]]
doc = [(int(p1) - 1, float(p2)) for p1, p2 in fields if p1 != "qid"] // ignore "qid" features, convert 1-based feature ids to 0-based
yield doc
@staticmethod
def saveCorpus(fname, corpus, id2word = None):
Save a corpus in the SVMlight format.
After Change
with open(self.fname) as fin:
for lineNo, line in enumerate(fin):
doc = self.line2doc(line)
if doc is not None:
yield doc
self.streamposition += len(line)
def line2doc(self, line):
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances Project Name: RaRe-Technologies/gensim
Commit Name: ec60b6d78f0fc423a8158d9f42210b878a8bcf0e
Time: 2011-02-16
Author: radimrehurek@seznam.cz
File Name: src/gensim/corpora/svmlightcorpus.py
Class Name: SvmLightCorpus
Method Name: __iter__
Project Name: theislab/scanpy
Commit Name: a9d88683e7e4275374191f5af212e24f849e5132
Time: 2017-08-29
Author: flying-sheep@web.de
File Name: docs/conf.py
Class Name:
Method Name: modpath
Project Name: sahana/eden
Commit Name: 8cf7b3d348ab23c2d0b0a232a19e00502d804109
Time: 2019-12-19
Author: dominic@nursix.org
File Name: controllers/errors.py
Class Name:
Method Name: index