75c1231f77fc1ebc5af31d5c598f1353ea9aa5df,filter.py,ArticleFilter,generate_corpus,#ArticleFilter#Any#Any#Any#Any#Any#,101

Before Change


                //捨去已存在語料庫的標題或過短的標題
                continue

            if "Responses" in article.keys():
                article["Responses"] = self.clean_responses(article["Responses"])
                if no_content:
                    article.pop("Content")

            if drop_response:
                //捨去回應類文章與快訊文章, i.e Re: and Fw:
                if title.startswith("Re") or title.startswith("Fw"):
                    continue

After Change



            ////////////////////////////////////////////回應抽取與輸出////////////////////////////////////////////
            self.response_cache.append(article["Responses"])
            self.article_count += 1
            if self.article_count % 1000 == 0: // 每個 json 檔儲存 1000 篇文章的回應
                self.reply_index += 1
                with open("data/processed/reply/"+str(self.reply_index)+".json","w",encoding="utf-8") as reply:
                    reply.write(json.dumps(self.response_cache, indent=4, ensure_ascii=False))
                self.response_cache = []

            clean_article.append(article)

        return clean_article
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 9

Instances


Project Name: zake7749/PTT-Chat-Generator
Commit Name: 75c1231f77fc1ebc5af31d5c598f1353ea9aa5df
Time: 2016-10-20
Author: zake7749@gmail.com
File Name: filter.py
Class Name: ArticleFilter
Method Name: generate_corpus


Project Name: geek-ai/MAgent
Commit Name: 2c00aa18540df4ab4c3afeacd44b556b3ce90782
Time: 2017-12-01
Author: zhenglianmin96@163.com
File Name: python/magent/utility.py
Class Name:
Method Name: download_file


Project Name: azavea/raster-vision
Commit Name: afeec8a22c2936dd9b2a94eec64cd89442fe9598
Time: 2019-01-03
Author: jmcclain@azavea.com
File Name: rastervision/data/label_store/semantic_segmentation_raster_store.py
Class Name: SemanticSegmentationRasterStore
Method Name: save