dad1abdcffb4d37256502a73a1c236aa2f07636b,pycorrector/bert/bert_corrector.py,BertCorrector,bert_correct,#BertCorrector#Any#,33

Before Change


        :return: list[list], [error_word, begin_pos, end_pos, error_type]
        
        maybe_errors = []
        for idx, s in enumerate(sentence):
            // 对非中文的错误不做处理
            if not is_chinese_string(s):
                continue

            sentence_lst = list(sentence)
            sentence_lst[idx] = self.mask
            sentence_new = "".join(sentence_lst)
            predicts = self.model(sentence_new)
            top_tokens = []
            for p in predicts:
                token_id = p.get("token", 0)
                token_str = self.model.tokenizer.convert_ids_to_tokens(token_id)
                top_tokens.append(token_str)

            if top_tokens and (s not in top_tokens):
                // 取得所有可能正确的词
                candidates = self.generate_items(s)
                if not candidates:
                    continue
                for token_str in top_tokens:
                    if token_str in candidates:
                        maybe_errors.append([s, token_str, idx, idx + 1])
                        break
        return maybe_errors


if __name__ == "__main__":

After Change


        // 编码统一,utf-8 to unicode
        text = convert_to_unicode(text)
        // 长句切分为短句
        blocks = self.split_2_short_text(text, include_symbol=True)
        for blk, start_idx in blocks:
            blk_new = ""
            for idx, s in enumerate(blk):
                // 对非中文的错误不做处理
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: shibing624/pycorrector
Commit Name: dad1abdcffb4d37256502a73a1c236aa2f07636b
Time: 2020-03-17
Author: xuming624@qq.com
File Name: pycorrector/bert/bert_corrector.py
Class Name: BertCorrector
Method Name: bert_correct


Project Name: shibing624/pycorrector
Commit Name: 8f76475c3e5b3533b40e7ba41ca11ec987ffe974
Time: 2020-03-15
Author: xuming624@qq.com
File Name: pycorrector/corrector.py
Class Name: Corrector
Method Name: correct


Project Name: shibing624/pycorrector
Commit Name: 8f76475c3e5b3533b40e7ba41ca11ec987ffe974
Time: 2020-03-15
Author: xuming624@qq.com
File Name: pycorrector/detector.py
Class Name: Detector
Method Name: detect