Source code for pythainlp.spell.core

# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0
"""
Spell checking functions
"""

import itertools
from typing import List

from pythainlp.spell import DEFAULT_SPELL_CHECKER


[docs]def spell(word: str, engine: str = "pn") -> List[str]: """ Provides a list of possible correct spellings of the given word. The list of words are from the words in the dictionary that incurs an edit distance value of 1 or 2. The result is a list of words sorted by their occurrences in the spelling dictionary in descending order. :param str word: Word to check spell of :param str engine: * *pn* - Peter Norvig's algorithm [#norvig_spellchecker]_ (default) * *phunspell* - A spell checker utilizing spylls, a port of Hunspell. * *symspellpy* - symspellpy is a Python port of SymSpell v6.5. * *tltk* - wrapper for `TLTK <https://pypi.org/project/tltk/>`_. :return: list of possible correct words within 1 or 2 edit distance and sorted by frequency of word occurrences in the spelling dictionary in descending order. :rtype: list[str] :Example: :: from pythainlp.spell import spell spell("เส้นตรบ", engine="pn") # output: ['เส้นตรง'] spell("เส้นตรบ") # output: ['เส้นตรง'] spell("เส้นตรบ", engine="tltk") # output: ['เส้นตรง'] spell("ครัช") # output: ['ครับ', 'ครัว', 'รัช', 'ครัม', 'ครัน', 'วรัช', 'ครัส', # 'ปรัช', 'บรัช', 'ครัง', 'คัช', 'คลัช', 'ครัย', 'ครัด'] spell("กระปิ") # output: ['กะปิ', 'กระบิ'] spell("สังเกตุ") # output: ['สังเกต'] spell("เหตการณ") # output: ['เหตุการณ์'] """ if engine == "phunspell": from pythainlp.spell.phunspell import spell as SPELL_CHECKER text_correct = SPELL_CHECKER(word) elif engine == "symspellpy": from pythainlp.spell.symspellpy import spell as SPELL_CHECKER text_correct = SPELL_CHECKER(word) elif engine == "tltk": from pythainlp.spell.tltk import spell as SPELL_CHECKER text_correct = SPELL_CHECKER(word) else: text_correct = DEFAULT_SPELL_CHECKER.spell(word) return text_correct
[docs]def correct(word: str, engine: str = "pn") -> str: """ Corrects the spelling of the given word by returning the correctly spelled word. :param str word: word to correct spelling of :param str engine: * *pn* - Peter Norvig's algorithm [#norvig_spellchecker]_ (default) * *phunspell* - A spell checker utilizing spylls, a port of Hunspell. * *symspellpy* - symspellpy is a Python port of SymSpell v6.5. * *wanchanberta_thai_grammarly* - WanchanBERTa Thai Grammarly :return: the corrected word :rtype: str :Example: :: from pythainlp.spell import correct correct("เส้นตรบ") # output: 'เส้นตรง' correct("ครัช") # output: 'ครับ' correct("สังเกตุ") # output: 'สังเกต' correct("กระปิ") # output: 'กะปิ' correct("เหตการณ") # output: 'เหตุการณ์' """ if engine == "phunspell": from pythainlp.spell.phunspell import correct as SPELL_CHECKER text_correct = SPELL_CHECKER(word) elif engine == "symspellpy": from pythainlp.spell.symspellpy import correct as SPELL_CHECKER text_correct = SPELL_CHECKER(word) elif engine == "wanchanberta_thai_grammarly": from pythainlp.spell.wanchanberta_thai_grammarly import correct as SPELL_CHECKER text_correct = SPELL_CHECKER(word) else: text_correct = DEFAULT_SPELL_CHECKER.correct(word) return text_correct
[docs]def spell_sent(list_words: List[str], engine: str = "pn") -> List[List[str]]: """ Provides a list of possible correct spellings of sentence :param List[str] list_words: list of words in sentence :param str engine: * *pn* - Peter Norvig's algorithm [#norvig_spellchecker]_ (default) * *phunspell* - A spell checker utilizing spylls, a port of Hunspell. * *symspellpy* - symspellpy is a Python port of SymSpell v6.5. :return: list of possibly correct words :rtype: List[List[str]] :Example: :: from pythainlp.spell import spell_sent spell_sent(["เด็","อินอร์เน็ต","แรง"],engine='symspellpy') # output: [['เด็ก', 'อินเทอร์เน็ต', 'แรง']] """ if engine == "symspellpy": from pythainlp.spell.symspellpy import spell_sent as symspellpy_spell list_new = symspellpy_spell(list_words) else: _temp = list( itertools.product(*[spell(i, engine=engine) for i in list_words]) ) list_new = [] for i in _temp: _temp2 = [] for j in i: _temp2.append(j) list_new.append(_temp2) return list_new
[docs]def correct_sent(list_words: List[str], engine: str = "pn") -> List[str]: """ Corrects and returns the spelling of the given sentence :param List[str] list_words: list of words in sentence :param str engine: * *pn* - Peter Norvig's algorithm [#norvig_spellchecker]_ (default) * *phunspell* - A spell checker utilizing spylls, a port of Hunspell. * *symspellpy* - symspellpy is a Python port of SymSpell v6.5. * *wanchanberta_thai_grammarly* - WanchanBERTa Thai Grammarly :return: the corrected list of words in sentence :rtype: List[str] :Example: :: from pythainlp.spell import correct_sent correct_sent(["เด็","อินอร์เน็ต","แรง"],engine='symspellpy') # output: ['เด็ก', 'อินเทอร์เน็ต', 'แรง'] """ return spell_sent(list_words, engine=engine)[0]