From 813e816790508dde03342d9b70336ab9b63dba7f Mon Sep 17 00:00:00 2001
From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr>
Date: Tue, 5 Jul 2022 07:18:08 -0400
Subject: [PATCH] Compression

---
 .../monoview_classifiers/imbalance_bagging.py |  4 --
 .../multiview_platform/utils/compression.py   | 59 +++++++++++++++----
 2 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/summit/multiview_platform/monoview_classifiers/imbalance_bagging.py b/summit/multiview_platform/monoview_classifiers/imbalance_bagging.py
index 7c2b90bc..9dfa2e26 100644
--- a/summit/multiview_platform/monoview_classifiers/imbalance_bagging.py
+++ b/summit/multiview_platform/monoview_classifiers/imbalance_bagging.py
@@ -1,8 +1,4 @@
 from imblearn.ensemble import BalancedBaggingClassifier
-<<<<<<< HEAD
-=======
-
->>>>>>> private_algos
 import numpy as np
 
 
diff --git a/summit/multiview_platform/utils/compression.py b/summit/multiview_platform/utils/compression.py
index 8da8937e..a5ef2c4e 100644
--- a/summit/multiview_platform/utils/compression.py
+++ b/summit/multiview_platform/utils/compression.py
@@ -1,15 +1,54 @@
 import os
-import shutils
+import shutil
 import mmap
+import re
+
+
+def simplify_plotly(html_file_path, level=0):
+    arb = ['../' for _ in range(level)]
+    print(html_file_path)
+    with open(html_file_path, 'r') as html_file:
+        file_string = html_file.read()
+        new_str = re.sub(r'\<script type\="text/javascript"\>/\*\*(\n|.)*?27\)\}\)\);\</script\>', '<script src="{}plotly_js.js"></script>'.format("".join(arb)), file_string)
+        with open(os.path.join(os.path.dirname(html_file_path), "compressed_"+os.path.basename(html_file_path)), 'w') as new_html:
+            new_html.write(new_str)
+    os.remove(html_file_path)
+
+
+def explore_files(exp_path, level=0, compress=False):
+    if level==0:
+        shutil.copyfile('plotly_js.js', os.path.join(exp_path, "plotly_js.js"))
+
+    for name in os.listdir(exp_path):
+        new_path = os.path.join(exp_path, name)
+        if name.endswith('.html') and not name.startswith("compressed_"):
+            try:
+                simplify_plotly(new_path, level=level)
+            except:
+                pass
+        if os.path.isdir(new_path):
+            explore_files(new_path, level=level+1)
+    if level ==  0 and compress:
+        shutil.make_archive(exp_path, 'zip', exp_path)
+        os.rmdir(exp_path)
+
+
+def remove_compressed(exp_path):
+    for name in os.listdir(exp_path):
+        new_path = os.path.join(exp_path, name)
+        if name.startswith("compressed_") or name.endswith("_c.html") or name.endswith('_js.js'):
+            os.remove(new_path)
+        if os.path.isdir(new_path):
+            remove_compressed(new_path)
 
-def simplify_plotly(html_file_path):
-    with open(html_file_path, 'r+') as html_file:
-        s = mmap.mmap(html_file.fileno(), 0, access=mmap.ACCESS_WRITE)
-        print(s.find(b'<script type="text/javascript">/**'))
-        print(s.find(b'27)}));</script> ', ))
-        s = s[:181] + b'<script src="../_static/plotly_js.js"></script>' + s[3538962+17:]
-        print(s)
-        mmap.flush()
 
 if __name__=="__main__":
-    simplify_plotly("/home/baptiste/Documents/Gitwork/summit/results/hepatitis/debug_started_2022_03_16-15_06_55__/hepatitis-mean_on_10_iter-balanced_accuracy_p.html")
\ No newline at end of file
+    import os
+    explore_files("/home/baptiste/Documents/Gitwork/summit/results/BioBanQ_mv_status/debug_started_2022_07_01-22_23_04_bal_acc/")
+    # print(os.listdir("/home/baptiste/Documents/Gitwork/summit/results/"))
+    # for dataset in os.listdir("/home/baptiste/Documents/Gitwork/summit/results/"):
+    #     print(dataset)
+    #     dataset_path = os.path.join("/home/baptiste/Documents/Gitwork/summit/results/", dataset)
+    #     for exp_name in os.listdir(dataset_path):
+    #         exp_path = os.path.join(dataset_path, exp_name)
+    #         explore_files(exp_path, compress=False)
\ No newline at end of file
-- 
GitLab