d3fd9a8a9681d74d0db8b8e8ac793502b7079d78,Reader.py,Reader,process_raw_data,#Reader#Any#Any#,30
Before Change
for filename in filenames:
with open(os.path.join(path, filename),"r", encoding="utf-8") as data:
res = self.select_articles(json.load(data))
f = open("data/processed/"+filename,"w", encoding="utf-8")
f.write(json.dumps(res, indent=4, ensure_ascii=False))
print("已處理 " + filename)
f.close()
def select_articles(self, articles, drop_response=True, negative_tag=None, no_content=True):
After Change
res = self.generate_corpus(json.load(data))
with open("data/processed/"+filename,"w", encoding="utf-8") as op:
op.write(json.dumps(res, indent=4, ensure_ascii=False))
print("已處理 " + filename)
def generate_corpus(self, articles, drop_response=True, negative_tag=None, no_content=True):
依據需求挑選出符合語料庫需求的文章
In pattern: SUPERPATTERN
Frequency: 20
Non-data size: 8
Instances
Project Name: zake7749/PTT-Chat-Generator
Commit Name: d3fd9a8a9681d74d0db8b8e8ac793502b7079d78
Time: 2016-10-09
Author: zake7749@gmail.com
File Name: Reader.py
Class Name: Reader
Method Name: process_raw_data
Project Name: bokeh/bokeh
Commit Name: 9aa8d8029264c9b27750ece8e31c64e097b666d8
Time: 2018-05-09
Author: mattpap@gmail.com
File Name: bokehjs/gulp/tasks/generate_defaults.py
Class Name:
Method Name: output_defaults_module
Project Name: pyinstaller/pyinstaller
Commit Name: 5dc9f3ee3cfd496bb11d457dbfe1d91d9f0566a2
Time: 2018-09-09
Author: contact@tiger-222.fr
File Name: PyInstaller/utils/cliutils/grab_version.py
Class Name:
Method Name: run
Project Name: TheAlgorithms/Python
Commit Name: 2d70e9f7475d1a23b89fd1bd7b0af01f3173b0b5
Time: 2019-01-08
Author: contact@tiger-222.fr
File Name: ciphers/rsa_cipher.py
Class Name:
Method Name: encryptAndWriteToFile
Project Name: nilearn/nilearn
Commit Name: b282567cada395b44099011f4403fe200aceedbf
Time: 2015-08-20
Author: mperezguevara@gmail.com
File Name: nilearn/datasets/tests/test_atlas.py
Class Name:
Method Name: test_fetch_atlas_aal_spm_12
Project Name: pysb/pysb
Commit Name: b9d8388cdd242a8c660d7a6de37d2374d048b649
Time: 2016-03-08
Author: bachmanjohn@gmail.com
File Name: pysb/kappa.py
Class Name:
Method Name: run_simulation
Project Name: PyMVPA/PyMVPA
Commit Name: 8af9bd7ff42efa77d70690e7c374b321967209af
Time: 2014-02-25
Author: nikolaas.oosterhof@unitn.it
File Name: mvpa2/tests/test_iohelpers.py
Class Name: IOHelperTests
Method Name: test_column_data_from_file
Project Name: home-assistant/home-assistant
Commit Name: 8bba0b88fd9329b44a53fcf43413e78ff2a55a30
Time: 2015-09-14
Author: jon@jonmaddox.com
File Name: homeassistant/__main__.py
Class Name:
Method Name: install_osx
Project Name: mne-tools/mne-python
Commit Name: 9aaa6636c3a508df1aa87c6a38d1a388b5de0b39
Time: 2018-09-14
Author: w.m.vanvliet@gmail.com
File Name: mne/report.py
Class Name: Report
Method Name: save
Project Name: TheAlgorithms/Python
Commit Name: 2d70e9f7475d1a23b89fd1bd7b0af01f3173b0b5
Time: 2019-01-08
Author: contact@tiger-222.fr
File Name: sorts/external-sort.py
Class Name: FileSplitter
Method Name: write_block
Project Name: datascienceinc/Skater
Commit Name: e533bc1eb253262785d3f889bf5deac1f77d3b1a
Time: 2018-04-29
Author: pramitchoudhary@gmail.com
File Name: skater/core/visualizer/text_relevance_visualizer.py
Class Name:
Method Name: _build_html_file
Project Name: RaRe-Technologies/gensim
Commit Name: 1aa7e115fcf87b443373c14b7b2f3dd2e3383584
Time: 2011-02-19
Author: radimrehurek@seznam.cz
File Name: src/gensim/corpora/lowcorpus.py
Class Name: LowCorpus
Method Name: saveCorpus
Project Name: TheAlgorithms/Python
Commit Name: 2d70e9f7475d1a23b89fd1bd7b0af01f3173b0b5
Time: 2019-01-08
Author: contact@tiger-222.fr
File Name: ciphers/transposition_cipher_encrypt_decrypt_file.py
Class Name:
Method Name: main
Project Name: pyinstaller/pyinstaller
Commit Name: 5fd013bfebe9b88425faf6f96ae13ccef1e7305e
Time: 2017-05-19
Author: nuada@users.noreply.github.com
File Name: PyInstaller/building/osx.py
Class Name: BUNDLE
Method Name: assemble
Project Name: pyinstaller/pyinstaller
Commit Name: 5dc9f3ee3cfd496bb11d457dbfe1d91d9f0566a2
Time: 2018-09-09
Author: contact@tiger-222.fr
File Name: PyInstaller/depend/bindepend.py
Class Name:
Method Name: check_extract_from_egg
Project Name: thtrieu/darkflow
Commit Name: f3ec07b55cdd88e642c42e9eb62452ca6cc4dd09
Time: 2017-02-22
Author: nathaniel.brough@gmail.com
File Name: net/yolov2/test.py
Class Name:
Method Name: postprocess
Project Name: home-assistant/home-assistant
Commit Name: 0376cc0917e0576b44509b37194f5081df0b3ffa
Time: 2018-01-31
Author: ville.skytta@iki.fi
File Name: homeassistant/components/light/greenwave.py
Class Name:
Method Name: setup_platform
Project Name: pysb/pysb
Commit Name: b9d8388cdd242a8c660d7a6de37d2374d048b649
Time: 2016-03-08
Author: bachmanjohn@gmail.com
File Name: pysb/kappa.py
Class Name:
Method Name: run_static_analysis
Project Name: Esri/raster-functions
Commit Name: bd4f7c7aaf161bb70f8e564bcb32a8a45418d003
Time: 2015-05-20
Author: akferoz@esri.com
File Name: scripts/setup.py
Class Name:
Method Name: downloadFile
Project Name: TheAlgorithms/Python
Commit Name: d8a62451031bcf67d5ce7e1e7fc730c65b7f649d
Time: 2016-10-08
Author: sutanto.maxedwin@gmail.com
File Name: other/word_patterns.py
Class Name:
Method Name: main