10.
# Python program to generate word vectors using Word2Vec
# importing all necessary modules
from [Link] import Word2Vec
import gensim
from [Link] import sent_tokenize, word_tokenize
import warnings
[Link](action='ignore')
# Reads ‘[Link]’ file
sample = open("C:\\Users\\Admin\\Desktop\\[Link]")
s = [Link]()
# Replaces escape character with space
f = [Link]("\n", " ")
data = []
# iterate through each sentence in the file
for i in sent_tokenize(f):
temp = []
# tokenize the sentence into words
for j in word_tokenize(i):
[Link]([Link]())
[Link](temp)
# Create CBOW model
model1 = [Link].Word2Vec(data, min_count=1,
vector_size=100, window=5)
# Print results
print("Cosine similarity between 'alice' " +
"and 'wonderland' - CBOW : ",
[Link]('alice', 'wonderland'))
print("Cosine similarity between 'alice' " +
"and 'machines' - CBOW : ",
[Link]('alice', 'machines'))
# Create Skip Gram model
model2 = [Link].Word2Vec(data, min_count=1, vector_size=100,
window=5, sg=1)
# Print results
print("Cosine similarity between 'alice' " +
"and 'wonderland' - Skip Gram : ",
[Link]('alice', 'wonderland'))
print("Cosine similarity between 'alice' " +
"and 'machines' - Skip Gram : ",
[Link]('alice', 'machines'))
Output :
Cosine similarity between 'alice' and 'wonderland' - CBOW : 0.999249298413
Cosine similarity between 'alice' and 'machines' - CBOW : 0.974911910445
Cosine similarity between 'alice' and 'wonderland' - Skip Gram : 0.885471373104
Cosine similarity between 'alice' and 'machines' - Skip Gram : 0.856892599521