75c1231f77fc1ebc5af31d5c598f1353ea9aa5df,filter.py,ArticleFilter,generate_corpus,#ArticleFilter#Any#Any#Any#Any#Any#,101
Before Change
//捨去已存在語料庫的標題或過短的標題
continue
if "Responses" in article.keys() :
article["Responses"] = self.clean_responses(article["Responses"])
if no_content:
article.pop("Content")
After Change
////////////////////////////////////////////回應抽取與輸出////////////////////////////////////////////
self.response_cache.append(article["Responses"])
self.article_count += 1
if self.article_count % 1000 == 0: // 每個 json 檔儲存 1000 篇文章的回應
self.reply_index += 1
with open("data/processed/reply/"+str(self.reply_index)+".json","w",encoding="utf-8") as reply:
reply.write(json.dumps(self.response_cache, indent=4, ensure_ascii=False))
self.response_cache = []
clean_article.append(article)
return clean_article
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances Project Name: zake7749/PTT-Chat-Generator
Commit Name: 75c1231f77fc1ebc5af31d5c598f1353ea9aa5df
Time: 2016-10-20
Author: zake7749@gmail.com
File Name: filter.py
Class Name: ArticleFilter
Method Name: generate_corpus
Project Name: theislab/scanpy
Commit Name: 49899ef69309d2a0ab6ad785f455eb53a8443a06
Time: 2017-02-08
Author: f.alex.wolf@gmx.de
File Name: scanpy/tools/dpt.py
Class Name:
Method Name: plot_groups
Project Name: home-assistant/home-assistant
Commit Name: 06a20d0d1540d35dc4e2446592e1a8b08ef56420
Time: 2017-08-22
Author: wjs.scanlon@gmail.com
File Name: homeassistant/components/octoprint.py
Class Name: OctoPrintAPI
Method Name: get_tools