75c1231f77fc1ebc5af31d5c598f1353ea9aa5df,filter.py,ArticleFilter,generate_corpus,#ArticleFilter#Any#Any#Any#Any#Any#,101
Before Change
//捨去已存在語料庫的標題或過短的標題
continue
if "Responses" in article.keys():
article["Responses"] = self.clean_responses(article["Responses"])
if no_content:
article.pop("Content")
if drop_response:
//捨去回應類文章與快訊文章, i.e Re: and Fw:
if title.startswith("Re") or title.startswith("Fw"):
continue
After Change
////////////////////////////////////////////回應抽取與輸出////////////////////////////////////////////
self.response_cache.append(article["Responses"])
self.article_count += 1
if self.article_count % 1000 == 0: // 每個 json 檔儲存 1000 篇文章的回應
self.reply_index += 1
with open("data/processed/reply/"+str(self.reply_index)+".json","w",encoding="utf-8") as reply:
reply.write(json.dumps(self.response_cache, indent=4, ensure_ascii=False))
self.response_cache = []
clean_article.append(article)
return clean_article
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 9
Instances
Project Name: zake7749/PTT-Chat-Generator
Commit Name: 75c1231f77fc1ebc5af31d5c598f1353ea9aa5df
Time: 2016-10-20
Author: zake7749@gmail.com
File Name: filter.py
Class Name: ArticleFilter
Method Name: generate_corpus
Project Name: geek-ai/MAgent
Commit Name: 2c00aa18540df4ab4c3afeacd44b556b3ce90782
Time: 2017-12-01
Author: zhenglianmin96@163.com
File Name: python/magent/utility.py
Class Name:
Method Name: download_file
Project Name: azavea/raster-vision
Commit Name: afeec8a22c2936dd9b2a94eec64cd89442fe9598
Time: 2019-01-03
Author: jmcclain@azavea.com
File Name: rastervision/data/label_store/semantic_segmentation_raster_store.py
Class Name: SemanticSegmentationRasterStore
Method Name: save