Add import from archive file

8ba97220 · Andreas Domanowski · b622ec75 · 8ba97220 · 8ba97220 · 8ba97220
Commit 8ba97220 authored 2 years ago by Andreas Domanowski
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,5 @@ __pycache__/
 codimd-documents/
 .idea
+*.zip
\ No newline at end of file
--- a/hedgedoc_import.py
+++ b/hedgedoc_import.py
 #!/usr/bin/env python3
+import json
 from common import get_sessionid
 from pathlib import Path
@@ -11,36 +12,52 @@ from zipfile import ZipFile
 def import_single_document(instance_url, hedgedoc_free_url, content, session_id):
    sanitized_free_url = hedgedoc_free_url.replace(" ", "%20")
-    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
+    request_url = instance_url + '/new/' + sanitized_free_url
-    url = instance_url + '/new/' + sanitized_free_url
+    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
    headers = {'User-Agent': user_agent, "Cookie": f"connect.hedgeDoc.sid={session_id}",
               "Content-Type": "text/markdown"}
-    pkt = str.encode(content)
+    document_contents = str.encode(content)
-    req = urllib.request.Request(url, data=pkt, method='POST', headers=headers)
+    req = urllib.request.Request(request_url, data=document_contents, method='POST', headers=headers)
    with urllib.request.urlopen(req) as response:
-        print("Go visit " + response.url + " with your browser in a logged-in session.")
+        return response.url
 def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file):
    folder = os.path.join(os.getcwd(), export_folder);
-    print("Trying to upload all files with extension \".md\" in folder \"" + folder + "\"");
+    # get exported history map
+    with open(os.path.join(export_folder, "map.json")) as map_file:
-    #with ZipFile('images.zip') as zf:
+        history_dictionary = json.load(map_file)
-    #    for file in zf.namelist():
-    #        if not file.endswith('.png'):  # optional filtering by filetype
+    # mapping from title of a document (= filename without md extension in archive) to its id (= note url in CodiMD)
-    #            continue
+    lookup_map = {}
-    #        with zf.open(file) as f:
+    for entry in history_dictionary["history"]:
-    #            image = pygame.image.load(f, namehint=file)
+        lookup_map[entry["text"]] = entry["id"]
-    for relative_filename in os.listdir(folder):
+    # URLs to visit to make the new document available in the history
-        if relative_filename.endswith(".md"):
+    urls_to_visit = []
-            addressable_filename = os.path.join(folder, relative_filename)
-            print("Trying to upload: " + addressable_filename)
+    # iterate over files in archive
-            markdown_content = Path(addressable_filename).read_text()
+    with ZipFile(archive_file) as zf:
-            free_url = Path(relative_filename).stem
+        for file in zf.namelist():
-            import_single_document(instance_url, free_url, markdown_content, session_id)
+            document_title = Path(file).stem
+            # check for every file if there is a match between its name (= title in history json) and an existing path
+            # for a document
+            document_content = None
+            if document_title in lookup_map:
+                print("filename: ", document_title, ", in lookupMap: ",
+                      lookup_map[document_title])
+                urls_to_visit.append(
+                    import_single_document(instance_url, lookup_map[document_title], document_content, session_id))
+            else:
+                print("no mapping found for ", document_title, ", uploading anyway")
+                # empty string implies HedgeDoc should create a new ID
+                urls_to_visit.append(import_single_document(instance_url, "", document_content, session_id))
+    browser = "firefox"
+    subprocess.run([browser] + urls_to_visit)
 def select_browser():
@@ -60,6 +77,7 @@ def select_browser():
 if __name__ == "__main__":
-    select_browser()
+    # select_browser()
-    #subprocess.run(["firefox", "www.google.de"])
+    import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", 123, "codimd-documents", "archive.zip")
    # import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents")
--- a/md-import-export.py
+++ b/md-import-export.py
@@ -3,5 +3,6 @@ from common import get_sessionid
 from hedgedoc_import import import_into_hedgedoc
 if __name__ == "__main__":
-    export_from_codimd("https://md.inf.tu-dresden.de", get_sessionid("connect.sid"), "codimd-documents")
+    export_folder = "codimd-documents"
-    import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents", "archive.zip")
+    export_from_codimd("https://md.inf.tu-dresden.de", get_sessionid("connect.sid"), export_folder)
\ No newline at end of file
+    import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), export_folder, "archive.zip")
\ No newline at end of file