From 4c71637569c96a5652569d9d59afc9d361ba615c Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Wed, 8 Sep 2021 15:52:57 +0200 Subject: [PATCH] Fixed script concatW2V --- scripts/concatW2V.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/scripts/concatW2V.py b/scripts/concatW2V.py index 024459e..8100d98 100755 --- a/scripts/concatW2V.py +++ b/scripts/concatW2V.py @@ -2,17 +2,27 @@ import sys -hadFirst = False +nbLines = 0 +embSize = None for filename in sys.argv[1:] : - prefix = filename.split('/')[-1].split('.')[0] for line in open(filename, "r") : line = line.strip() splited = line.split() if len(splited) == 2 : - if hadFirst : - continue - hadFirst = True - print(line) + if embSize is None : + embSize = int(splited[1]) + elif embSize != int(splited[1]) : + print("ERROR : incompatibles embedings sizes %d and %d"%(embSize, int(splited[1])), file=sys.stderr) + exit(1) else : + nbLines += 1 + +print(nbLines, embSize) +for filename in sys.argv[1:] : + prefix = filename.split('/')[-1].split('.')[0] + for line in open(filename, "r") : + line = line.strip() + splited = line.split() + if len(splited) > 2 : print(prefix+"_"+line) -- GitLab