Source code for pythainlp.transliterate.core

# -*- coding: utf-8 -*-

DEFAULT_ROMANIZE_ENGINE = "royin"
DEFAULT_TRANSLITERATE_ENGINE = "thaig2p"
DEFAULT_PRONUNCIATE_ENGINE = "w2p"


[docs]def romanize(text: str, engine: str = DEFAULT_ROMANIZE_ENGINE) -> str: """ This function renders Thai words in the Latin alphabet or "romanization", using the Royal Thai General System of Transcription (RTGS) [#rtgs_transcription]_. RTGS is the official system published by the Royal Institute of Thailand. (Thai: ถอดเสียงภาษาไทยเป็นอักษรละติน) :param str text: Thai text to be romanized :param str engine: 'royin' (default) or 'thai2rom' :return: A string of Thai words rendered in the Latin alphabet. :rtype: str :Options for engines: * *royin* - (default) based on the Royal Thai General System of Transcription issued by Royal Institute of Thailand. * *thai2rom* - a deep learning-based Thai romanization engine (require PyTorch). * *tltk* - TLTK: Thai Language Toolkit :Example: :: from pythainlp.transliterate import romanize romanize("สามารถ", engine="royin") # output: 'samant' romanize("สามารถ", engine="thai2rom") # output: 'samat' romanize("สามารถ", engine="tltk") # output: 'samat' romanize("ภาพยนตร์", engine="royin") # output: 'phapn' romanize("ภาพยนตร์", engine="thai2rom") # output: 'phapphayon' """ if not text or not isinstance(text, str): return "" if engine == "thai2rom": from pythainlp.transliterate.thai2rom import romanize elif engine == "tltk": from pythainlp.transliterate.tltk import romanize else: # use default engine "royin" from pythainlp.transliterate.royin import romanize return romanize(text)
[docs]def transliterate( text: str, engine: str = DEFAULT_TRANSLITERATE_ENGINE ) -> str: """ This function transliterates Thai text. :param str text: Thai text to be transliterated :param str engine: 'icu', 'ipa', or 'thaig2p' (default) :return: A string of phonetic alphabets indicating how the input text should be pronounced. :rtype: str :Options for engines: * *thaig2p* - (default) Thai Grapheme-to-Phoneme, output is IPA (require PyTorch) * *icu* - pyicu, based on International Components for Unicode (ICU) * *ipa* - epitran, output is International Phonetic Alphabet (IPA) * *tltk_g2p* - Thai Grapheme-to-Phoneme from\ `TLTK <https://pypi.org/project/tltk/>`_., * *iso_11940* - Thai text into Latin characters with ISO 11940. * *tltk_ipa* - tltk, output is International Phonetic Alphabet (IPA) :Example: :: from pythainlp.transliterate import transliterate transliterate("สามารถ", engine="icu") # output: 's̄āmārt̄h' transliterate("สามารถ", engine="ipa") # output: 'saːmaːrot' transliterate("สามารถ", engine="thaig2p") # output: 's aː ˩˩˦ . m aː t̚ ˥˩' transliterate("สามารถ", engine="tltk_ipa") # output: 'saː5.maːt3' transliterate("สามารถ", engine="tltk_g2p") # output: 'saa4~maat2' transliterate("สามารถ", engine="iso_11940") # output: 's̄āmārt̄h' transliterate("ภาพยนตร์", engine="icu") # output: 'p̣hāphyntr̒' transliterate("ภาพยนตร์", engine="ipa") # output: 'pʰaːpjanot' transliterate("ภาพยนตร์", engine="thaig2p") # output: 'pʰ aː p̚ ˥˩ . pʰ a ˦˥ . j o n ˧' transliterate("ภาพยนตร์", engine="iso_11940") # output: 'p̣hāphyntr' """ if not text or not isinstance(text, str): return "" if engine == "icu" or engine == "pyicu": from pythainlp.transliterate.pyicu import transliterate elif engine == "ipa": from pythainlp.transliterate.ipa import transliterate elif engine == "tltk_g2p": from pythainlp.transliterate.tltk import tltk_g2p as transliterate elif engine == "tltk_ipa": from pythainlp.transliterate.tltk import tltk_ipa as transliterate elif engine == "iso_11940": from pythainlp.transliterate.iso_11940 import transliterate else: # use default engine: "thaig2p" from pythainlp.transliterate.thaig2p import transliterate return transliterate(text)
[docs]def pronunciate(word: str, engine: str = DEFAULT_PRONUNCIATE_ENGINE) -> str: """ This function pronunciates Thai word. :param str word: Thai text to be pronunciated :param str engine: 'w2p' (default) :return: A string of Thai letters indicating how the input text should be pronounced. :rtype: str :Options for engines: * *w2p* - Thai Word-to-Phoneme :Example: :: from pythainlp.transliterate import pronunciate pronunciate("สามารถ", engine="w2p") # output: 'สา-มาด' pronunciate("ภาพยนตร์", engine="w2p") # output: 'พาบ-พะ-ยน' """ if not word or not isinstance(word, str): return "" # if engine == "w2p": # has only one engine from pythainlp.transliterate.w2p import pronunciate return pronunciate(word)