Source code for pythainlp.util.keywords

# -*- coding: utf-8 -*-
from collections import Counter
from typing import Dict, List

from pythainlp.corpus import thai_stopwords

_STOPWORDS = thai_stopwords()


[docs]def rank(words: List[str], exclude_stopwords: bool = False) -> Counter: """ Sort words by frequency :param list words: a list of words :param bool exclude_stopwords: exclude stopwords :return: Counter """ if not words: return None if exclude_stopwords: words = [word for word in words if word not in _STOPWORDS] return Counter(words)
[docs]def find_keyword(word_list: List[str], min_len: int = 3) -> Dict[str, int]: """ :param list word_list: a list of words :param int min_len: a mininum length of keywords to look for :return: dict """ word_list = rank(word_list, exclude_stopwords=True) return {k: v for k, v in word_list.items() if v >= min_len}