Source code for pythainlp.summarize

# -*- coding: utf-8 -*-
"""
Summarization
"""

from typing import List

from pythainlp.tokenize import sent_tokenize

from .freq import FrequencySummarizer


[docs]def summarize( text: str, n: int, engine: str = "frequency", tokenizer: str = "newmm" ) -> List[str]: """ Thai text summarization :param str text: text to be summarized :param int n: number of sentences to be included in the summary :param str engine: text summarization engine :param str tokenizer: word tokenizer :return List[str] summary: list of selected sentences """ sents = [] if engine == "frequency": sents = FrequencySummarizer().summarize(text, n, tokenizer) else: # if engine not found, return first n sentences sents = sent_tokenize(text)[:n] return sents