1aa7e115fcf87b443373c14b7b2f3dd2e3383584,src/gensim/corpora/bleicorpus.py,BleiCorpus,saveCorpus,#Any#Any#Any#,73

Before Change


        
        logging.info("storing corpus in Blei"s LDA-C format: %s" % fname)
        fout = open(fname, "w")
        for doc in corpus:
            doc = list(doc)
            fout.write("%i %s\n" % (len(doc), " ".join("%i:%s" % p for p in doc)))
        fout.close()
        
        // write out vocabulary, in a format compatible with Blei"s topics.py script
        fnameVocab = fname + ".vocab"

After Change


        
        logging.info("storing corpus in Blei"s LDA-C format: %s" % fname)
        with open(fname, "w") as fout:
            offsets = []
            for doc in corpus:
                doc = list(doc)
                offsets.append(fout.tell())
                fout.write("%i %s\n" % (len(doc), " ".join("%i:%s" % p for p in doc)))
            fout.close()
            
            // write out vocabulary, in a format compatible with Blei"s topics.py script
            fnameVocab = fname + ".vocab"
            logging.info("saving vocabulary of %i words to %s" % (numTerms, fnameVocab))
            fout = open(fnameVocab, "w")
            for featureId in xrange(numTerms):
                fout.write("%s\n" % utils.toUtf8(id2word.get(featureId, "---")))
        
        return offsets
    
    def docbyoffset(self, offset):
        
        Return the document stored at file position `offset`.
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 7

Instances


Project Name: RaRe-Technologies/gensim
Commit Name: 1aa7e115fcf87b443373c14b7b2f3dd2e3383584
Time: 2011-02-19
Author: radimrehurek@seznam.cz
File Name: src/gensim/corpora/bleicorpus.py
Class Name: BleiCorpus
Method Name: saveCorpus


Project Name: RaRe-Technologies/gensim
Commit Name: 1aa7e115fcf87b443373c14b7b2f3dd2e3383584
Time: 2011-02-19
Author: radimrehurek@seznam.cz
File Name: src/gensim/corpora/lowcorpus.py
Class Name: LowCorpus
Method Name: saveCorpus


Project Name: RaRe-Technologies/gensim
Commit Name: 1aa7e115fcf87b443373c14b7b2f3dd2e3383584
Time: 2011-02-19
Author: radimrehurek@seznam.cz
File Name: src/gensim/corpora/svmlightcorpus.py
Class Name: SvmLightCorpus
Method Name: saveCorpus