From 8ba97220a33f220bafaeeb7aaaf26a57531a8785 Mon Sep 17 00:00:00 2001
From: Andreas Domanowski <andreas@domanowski.net>
Date: Tue, 28 Feb 2023 15:37:19 +0100
Subject: [PATCH] Add import from archive file

---
 .gitignore          |  4 ++-
 hedgedoc_import.py  | 66 ++++++++++++++++++++++++++++-----------------
 md-import-export.py |  5 ++--
 3 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/.gitignore b/.gitignore
index 4296ae1..ae80c39 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,6 @@ __pycache__/
 
 codimd-documents/
 
-.idea
\ No newline at end of file
+.idea
+
+*.zip
\ No newline at end of file
diff --git a/hedgedoc_import.py b/hedgedoc_import.py
index 0aeddc5..95d1b59 100644
--- a/hedgedoc_import.py
+++ b/hedgedoc_import.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+import json
 
 from common import get_sessionid
 from pathlib import Path
@@ -11,36 +12,52 @@ from zipfile import ZipFile
 
 def import_single_document(instance_url, hedgedoc_free_url, content, session_id):
     sanitized_free_url = hedgedoc_free_url.replace(" ", "%20")
-    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
+    request_url = instance_url + '/new/' + sanitized_free_url
 
-    url = instance_url + '/new/' + sanitized_free_url
+    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
     headers = {'User-Agent': user_agent, "Cookie": f"connect.hedgeDoc.sid={session_id}",
                "Content-Type": "text/markdown"}
 
-    pkt = str.encode(content)
-    req = urllib.request.Request(url, data=pkt, method='POST', headers=headers)
+    document_contents = str.encode(content)
+    req = urllib.request.Request(request_url, data=document_contents, method='POST', headers=headers)
     with urllib.request.urlopen(req) as response:
-        print("Go visit " + response.url + " with your browser in a logged-in session.")
+        return response.url
 
 
 def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file):
     folder = os.path.join(os.getcwd(), export_folder);
-    print("Trying to upload all files with extension \".md\" in folder \"" + folder + "\"");
-
-    #with ZipFile('images.zip') as zf:
-    #    for file in zf.namelist():
-    #        if not file.endswith('.png'):  # optional filtering by filetype
-    #            continue
-    #        with zf.open(file) as f:
-    #            image = pygame.image.load(f, namehint=file)
-
-    for relative_filename in os.listdir(folder):
-        if relative_filename.endswith(".md"):
-            addressable_filename = os.path.join(folder, relative_filename)
-            print("Trying to upload: " + addressable_filename)
-            markdown_content = Path(addressable_filename).read_text()
-            free_url = Path(relative_filename).stem
-            import_single_document(instance_url, free_url, markdown_content, session_id)
+    # get exported history map
+    with open(os.path.join(export_folder, "map.json")) as map_file:
+        history_dictionary = json.load(map_file)
+
+    # mapping from title of a document (= filename without md extension in archive) to its id (= note url in CodiMD)
+    lookup_map = {}
+    for entry in history_dictionary["history"]:
+        lookup_map[entry["text"]] = entry["id"]
+
+    # URLs to visit to make the new document available in the history
+    urls_to_visit = []
+
+    # iterate over files in archive
+    with ZipFile(archive_file) as zf:
+        for file in zf.namelist():
+            document_title = Path(file).stem
+            # check for every file if there is a match between its name (= title in history json) and an existing path
+            # for a document
+            document_content = None
+
+            if document_title in lookup_map:
+                print("filename: ", document_title, ", in lookupMap: ",
+                      lookup_map[document_title])
+                urls_to_visit.append(
+                    import_single_document(instance_url, lookup_map[document_title], document_content, session_id))
+            else:
+                print("no mapping found for ", document_title, ", uploading anyway")
+                # empty string implies HedgeDoc should create a new ID
+                urls_to_visit.append(import_single_document(instance_url, "", document_content, session_id))
+
+    browser = "firefox"
+    subprocess.run([browser] + urls_to_visit)
 
 
 def select_browser():
@@ -60,6 +77,7 @@ def select_browser():
 
 
 if __name__ == "__main__":
-    select_browser()
-    #subprocess.run(["firefox", "www.google.de"])
-    #import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents")
+    # select_browser()
+    import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", 123, "codimd-documents", "archive.zip")
+
+    # import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents")
diff --git a/md-import-export.py b/md-import-export.py
index 529a792..90508a7 100644
--- a/md-import-export.py
+++ b/md-import-export.py
@@ -3,5 +3,6 @@ from common import get_sessionid
 from hedgedoc_import import import_into_hedgedoc
 
 if __name__ == "__main__":
-    export_from_codimd("https://md.inf.tu-dresden.de", get_sessionid("connect.sid"), "codimd-documents")
-    import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents", "archive.zip")
\ No newline at end of file
+    export_folder = "codimd-documents"
+    export_from_codimd("https://md.inf.tu-dresden.de", get_sessionid("connect.sid"), export_folder)
+    import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), export_folder, "archive.zip")
\ No newline at end of file
-- 
GitLab