From 4c71637569c96a5652569d9d59afc9d361ba615c Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Wed, 8 Sep 2021 15:52:57 +0200
Subject: [PATCH] Fixed script concatW2V

---
 scripts/concatW2V.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/scripts/concatW2V.py b/scripts/concatW2V.py
index 024459e..8100d98 100755
--- a/scripts/concatW2V.py
+++ b/scripts/concatW2V.py
@@ -2,17 +2,27 @@
 
 import sys
 
-hadFirst = False
+nbLines = 0
+embSize = None
 for filename in sys.argv[1:] :
-  prefix = filename.split('/')[-1].split('.')[0]
   for line in open(filename, "r") :
     line = line.strip()
     splited = line.split()
     if len(splited) == 2 :
-      if hadFirst :
-        continue
-      hadFirst = True
-      print(line)
+      if embSize is None :
+        embSize = int(splited[1])
+      elif embSize != int(splited[1]) :
+        print("ERROR : incompatibles embedings sizes %d and %d"%(embSize, int(splited[1])), file=sys.stderr)
+        exit(1)
     else :
+      nbLines += 1
+
+print(nbLines, embSize)
+for filename in sys.argv[1:] :
+  prefix = filename.split('/')[-1].split('.')[0]
+  for line in open(filename, "r") :
+    line = line.strip()
+    splited = line.split()
+    if len(splited) > 2 :
       print(prefix+"_"+line)
 
-- 
GitLab