Skip to content
Snippets Groups Projects
Commit 423c26c3 authored by Franck Dary's avatar Franck Dary
Browse files

Added script to print most ambiguous vectors from w2v generated from lexicon

parent eebab89e
No related branches found
No related tags found
No related merge requests found
#! /usr/bin/env python3
# Given a .w2v file, that has been produced by macaon_data/script/lefff2W2V.py, list the most ambiguous words.
import sys
def formatProba(f) :
if f > 0.0 :
return "\033[1m%.2f\033[0m"%f
return "%.2f"%f
if __name__ == "__main__" :
wordList = []
for line in open(sys.argv[1], "r") :
line = line.strip()
splited = line.split()
if len(splited) == 2 :
continue
word = splited[0]
vec = [float(c) for c in splited[1:]]
maxProba = max(vec)
wordList.append((maxProba, " ".join([formatProba(f) for f in vec]+[word])))
wordList.sort()
print("\n".join([e[1] for e in wordList if e[0] < 1.0]))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment