b933a75bd436e7ecdb62778f591be3c6b52aa175,textacy/corpus.py,Corpus,add_doc,#Corpus#Any#Any#,218

Before Change


                print("**WARNING: Doc already associated with a Corpus; adding anyway...")
        doc.corpus_index = self.n_docs
        doc.corpus = self
        self.docs.append(doc)
        self.n_docs += 1
        self.n_tokens += doc.n_tokens
        // sentence segmentation requires parse; if not available, just skip this
        try:

After Change


            it will be deep-copied and then added as if a new document. A warning
            message will be logged. This is probably not a thing you should do.
        
        if isinstance(doc, Doc):
            if doc.spacy_vocab is not self.spacy_vocab:
                msg = "Doc.spacy_vocab {} != Corpus.spacy_vocab {}".format(
                    doc.spacy_vocab, self.spacy_vocab)
                raise ValueError(msg)
            if hasattr(doc, "corpus_index"):
                doc = copy.deepcopy(doc)
                // TODO: make this into a logging warning
                print("**WARNING: Doc already associated with a Corpus; adding anyway...")
            if metadata is not None:
                doc.metadata = metadata
            self._add_textacy_doc(doc)
        elif isinstance(doc, SpacyDoc):
            if doc.vocab is not self.spacy_vocab:
                msg = "SpacyDoc.vocab {} != Corpus.spacy_vocab {}".format(
                    doc.vocab, self.spacy_vocab)
                raise ValueError(msg)
            self._add_textacy_doc(
                Doc(doc, lang=self.spacy_lang, metadata=metadata))
        else:
            msg = "`doc` must be {}, not "{}"".format(
                {Doc, SpacyDoc}, type(doc))
            raise ValueError(msg)

    //////////////////////////////////
    // GET DOCUMENTS //

    def get_doc(self, index):
        
        Get a single :class:`textacy.Doc <Doc>` by its position ``index`` in
        the corpus.
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: chartbeat-labs/textacy
Commit Name: b933a75bd436e7ecdb62778f591be3c6b52aa175
Time: 2016-08-09
Author: burton@chartbeat.com
File Name: textacy/corpus.py
Class Name: Corpus
Method Name: add_doc


Project Name: scikit-optimize/scikit-optimize
Commit Name: 7bd676c9ab6b23ec93499fd72aeadee3a334a163
Time: 2017-08-18
Author: betatim@gmail.com
File Name: skopt/utils.py
Class Name:
Method Name: normalize_dimensions


Project Name: reinforceio/tensorforce
Commit Name: 8b61a18641fdaa14601d0ed2e3337c8ee7b65f7c
Time: 2020-03-22
Author: alexkuhnle@t-online.de
File Name: tensorforce/core/networks/preprocessor.py
Class Name: Preprocessor
Method Name: __init__