ec60b6d78f0fc423a8158d9f42210b878a8bcf0e,src/gensim/corpora/svmlightcorpus.py,SvmLightCorpus,__iter__,#SvmLightCorpus#,58

Before Change


        
        Iterate over the corpus, returning one sparse vector at a time.
        
        for lineNo, line in enumerate(open(self.fname)):
            line = line[: line.find("//")].strip()
            if not line:
                continue // ignore comments and empty lines
            parts = line.split()
            if not parts:
                raise ValueError("invalid format at line no. %i in %s" %
                                 (lineNo, self.fname))
            target, fields = parts[0], [part.rsplit(":", 1) for part in parts[1:]]
            doc = [(int(p1) - 1, float(p2)) for p1, p2 in fields if p1 != "qid"] // ignore "qid" features, convert 1-based feature ids to 0-based
            yield doc
    

    @staticmethod
    def saveCorpus(fname, corpus, id2word = None):
        
        Save a corpus in the SVMlight format. 

After Change


        with open(self.fname) as fin:
            for lineNo, line in enumerate(fin):
                doc = self.line2doc(line)
                if doc is not None:
                    yield doc
                self.streamposition += len(line)
    
    
    def line2doc(self, line):
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: RaRe-Technologies/gensim
Commit Name: ec60b6d78f0fc423a8158d9f42210b878a8bcf0e
Time: 2011-02-16
Author: radimrehurek@seznam.cz
File Name: src/gensim/corpora/svmlightcorpus.py
Class Name: SvmLightCorpus
Method Name: __iter__


Project Name: theislab/scanpy
Commit Name: a9d88683e7e4275374191f5af212e24f849e5132
Time: 2017-08-29
Author: flying-sheep@web.de
File Name: docs/conf.py
Class Name:
Method Name: modpath


Project Name: sahana/eden
Commit Name: 8cf7b3d348ab23c2d0b0a232a19e00502d804109
Time: 2019-12-19
Author: dominic@nursix.org
File Name: controllers/errors.py
Class Name:
Method Name: index