Source code for pythainlp.word_vector.thai2vec

# -*- coding: utf-8 -*-
'''
Code by https://github.com/cstorm125/thai2vec/blob/master/notebooks/examples.ipynb
'''
from __future__ import absolute_import,unicode_literals
import six
import sys
if six.PY2:
	print("Thai sentiment in pythainlp. Not support python 2.7")
	sys.exit(0)
try:
	from gensim.models import KeyedVectors
	import numpy as np
except ImportError:
	from pythainlp.tools import install_package
	install_package('gensim')
	install_package('numpy')
	try:
		from gensim.models import KeyedVectors
		import numpy as np
	except ImportError:
		print("Error ! using 'pip install gensim numpy'")
		sys.exit(0)
from pythainlp.tokenize import word_tokenize
from pythainlp.corpus import get_file
from pythainlp.corpus import download as download_data
import os

def download():
	path = get_file('thai2vec02')
	if path==None:
		download_data('thai2vec02')
		path = get_file('thai2vec02')
	return path
[docs]def get_model(): ''' :return: Downloads the `gensim` model.''' return KeyedVectors.load_word2vec_format(download(),binary=False)
[docs]def most_similar_cosmul(positive,negative): ''' การใช้งาน input list ''' return get_model().most_similar_cosmul(positive=positive, negative=negative)
[docs]def doesnt_match(listdata): return get_model().doesnt_match(listdata)
[docs]def similarity(word1,word2): ''' :param str word1: first word :param str word2: second word :return: the cosine similarity between the two word vectors ''' return get_model().similarity(word1,word2)
[docs]def sentence_vectorizer(ss,dim=300,use_mean=False): s = word_tokenize(ss) vec = np.zeros((1,dim)) for word in s: if word in get_model().wv.index2word: vec+= get_model().wv.word_vec(word) else: pass if use_mean: vec /= len(s) return(vec)
[docs]def about(): return ''' thai2vec State-of-the-Art Language Modeling, Text Feature Extraction and Text Classification in Thai Language. Created as part of pyThaiNLP with ULMFit implementation from fast.ai Development : Charin Polpanumas GitHub : https://github.com/cstorm125/thai2vec '''