diff --git a/scripts/concatW2V.py b/scripts/concatW2V.py index 024459e032f090b5006bc585bfb22a90f3c89bce..8100d988d95e2fd9e84535d735e0d827aaa72df1 100755 --- a/scripts/concatW2V.py +++ b/scripts/concatW2V.py @@ -2,17 +2,27 @@ import sys -hadFirst = False +nbLines = 0 +embSize = None for filename in sys.argv[1:] : - prefix = filename.split('/')[-1].split('.')[0] for line in open(filename, "r") : line = line.strip() splited = line.split() if len(splited) == 2 : - if hadFirst : - continue - hadFirst = True - print(line) + if embSize is None : + embSize = int(splited[1]) + elif embSize != int(splited[1]) : + print("ERROR : incompatibles embedings sizes %d and %d"%(embSize, int(splited[1])), file=sys.stderr) + exit(1) else : + nbLines += 1 + +print(nbLines, embSize) +for filename in sys.argv[1:] : + prefix = filename.split('/')[-1].split('.')[0] + for line in open(filename, "r") : + line = line.strip() + splited = line.split() + if len(splited) > 2 : print(prefix+"_"+line)