Source code for pythainlp.transliterate.iso_11940

# -*- coding: utf-8 -*-
# Copyright (C) 2016-2023 PyThaiNLP Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Transliterating Thai text with ISO 11940

:See Also:
    * `Wikipedia \
        <https://en.wikipedia.org/wiki/ISO_11940>`_
"""
_consonants = {
    "ก": "k",
    "ข": "k̄h",
    "ฃ": "ḳ̄h",
    "ค": "kh",
    "ฅ": "k̛h",
    "ฆ": "ḳh",
    "ง": "ng",
    "จ": "c",
    "ฉ": "c̄h",
    "ช": "ch",
    "ซ": "s",
    "ฌ": "c̣h",
    "ญ": "ỵ",
    "ฎ": "ḍ",
    "ฏ": "ṭ",
    "ฐ": "ṭ̄h",
    "ฑ": "ṯh",
    "ฒ": "t̛h",
    "ณ": "ṇ",
    "ด": "d",
    "ต": "t",
    "ถ": "t̄h",
    "ท": "th",
    "ธ": "ṭh",
    "น": "n",
    "บ": "b",
    "ป": "p",
    "ผ": "p̄h",
    "ฝ": "f̄",
    "พ": "ph",
    "ฟ": "f",
    "ภ": "p̣h",
    "ม": "m",
    "ย": "y",
    "ร": "r",
    "ฤ": "v",
    "ล": "l",
    "ฦ": "ł",
    "ว": "w",
    "ศ": "ṣ̄",
    "ษ": "s̛̄",
    "ส": "s̄",
    "ห": "h̄",
    "ฬ": "ḷ",
    "อ": "x",
    "ฮ": "ḥ",
}

_vowels = {
    "ะ": "a",
    "ั": "ạ",
    "า": "ā",
    "ำ": "å",
    "ิ": "i",
    "ี": "ī",
    "ึ": "ụ",
    "ื": "ụ̄",
    "ุ": "u",
    "ู": "ū",
    "เ": "e",
    "แ": "æ",
    "โ": "o",
    "ใ": "ı",
    "ไ": "ị",
    "ฤ": "v",
    "ฤๅ": "vɨ",
    "ฦ": "ł",
    "ฦๅ": "łɨ",
    "ย": "y",
    "ว": "w",
    "อ": "x",
}

_tone_marks = {
    "่": "–̀".replace("–", ""),
    "้": "–̂".replace("–", ""),
    "๊": "–́".replace("–", ""),
    "๋": "–̌".replace("–", ""),
    "็": "–̆".replace("–", ""),
    "์": "–̒".replace("–", ""),
    "–๎".replace("–", ""): "~",
    "–ํ".replace("–", ""): "–̊".replace("–", ""),
    "–ฺ".replace("–", ""): "–̥".replace("–", ""),
}

_punctuation_and_digits = {
    "ๆ": "«",
    "ฯ": "ǂ",
    "๏": "§",
    "ฯ": "ǀ",
    "๚": "ǁ",
    "๛": "»",
    "๐": "0",
    "๑": "1",
    "๒": "2",
    "๓": "3",
    "๔": "4",
    "๕": "5",
    "๖": "6",
    "๗": "7",
    "๘": "8",
    "๙": "9",
}

_all_dict = {
    **_consonants,
    **_vowels,
    **_tone_marks,
    **_punctuation_and_digits,
}
_list_k = _all_dict.keys()


[docs]def transliterate(word: str) -> str: """ Use ISO 11940 for transliteration :param str text: Thai text to be transliterated. :return: A string of IPA indicating how the text should be pronounced. """ _new = "" for i in word: if i in _list_k: _new += _all_dict[i] else: _new += i return _new