Andreas Domanowski
--- a/import_md/hedgedoc_import.py 0 → 100644

+ 128

− 0

View file @ 9f8b5719

Open in Web IDE
+++ b/import_md/hedgedoc_import.py 0 → 100644

+ 128

− 0

View file @ 9f8b5719

Open in Web IDE
+#!/usr/bin/env python3
+import json
+from urllib.error import HTTPError
+from common import get_sessionid, print_block_heading
+from pathlib import Path
+import urllib.parse
+import urllib.request
+import os
+from zipfile import ZipFile
+
+
+def import_single_document(instance_url, hedgedoc_free_url, content, session_id):
+    sanitized_free_url = urllib.parse.quote(hedgedoc_free_url)
+    request_url = instance_url + '/new/' + sanitized_free_url
+
+    headers = {"Cookie": f"connect.hedgeDoc.sid={session_id}", "Content-Type": "text/markdown"}
+
+    req = urllib.request.Request(request_url, data=str.encode(content), method='POST', headers=headers)
+    # unfortunately, no error is thrown if a document is not created when session cookie is invalid
+    # HTTP 409 is ignored for the sake of simplicity. Handled in import_into_hedgedoc(..)
+    # Not optimal, but nobody ain't got time for that
+    with urllib.request.urlopen(req) as response:
+        if response.url == instance_url + "/":
+            raise SystemExit("Could not import document. Please check your HedgeDoc session cookie. Aborting...")
+        return response.url
+
+
+def check_file_exists(file):
+    if not os.path.exists(file):
+        raise SystemExit(
+            f"ERROR: File {file} does not exist. Export your data from CodiMD and re-execute this script again!"
+            f" Aborting...")
+    print(f"Required file {file} exists. Proceeding...")
+
+
+def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file):
+    print_block_heading(
+        f"Checking existence of archive file ({archive_file})")
+    check_file_exists(archive_file)
+    # get exported history map
+    history_json_filename = "history.json"
+    history_dictionary = {}
+    try:
+        with open(os.path.join(export_folder, "%s" % history_json_filename)) as map_file:
+            history_dictionary = json.load(map_file)
+    except FileNotFoundError:
+        print_block_heading(
+            f"INFO: could not find file {history_json_filename}. Continuing anyways with random generated paths for "
+            f"documents")
+
+    # mapping from title of a document (= filename without md extension in archive) to its id (= note url in CodiMD)
+    lookup_map = {}
+    if "history" in history_dictionary:
+        for entry in history_dictionary["history"]:
+            lookup_map[entry["text"]] = entry["id"]
+
+    # URLs to visit to make the new document available in the history
+    urls_to_visit = []
+
+    process_archive_export(archive_file, instance_url, lookup_map, session_id, urls_to_visit)
+    create_urls_to_visit_file("history_scripts/hedgedoc_documents_to_visit.url", urls_to_visit)
+
+
+def process_archive_export(archive_file, instance_url, lookup_map, session_id, urls_to_visit):
+    # iterate over files in archive
+    with ZipFile(archive_file) as zf:
+        print("Now scanning your provided archive file containing the documents you are the owner of")
+        print("If you visited your own document via a, e.g., \"codi-instance.tld/my_specified_free_url\" " +
+              "this script tries to migrate it to the HedgeDoc instance at \"hedgedoc-instance.tld/my_own_path\"")
+        print("If this is not possible, a new random URL for the document will be created")
+        print_block_heading("Iterating over files in archive and trying to upload them")
+        for file in zf.namelist():
+            document_title = Path(file).stem
+            # check for every file if there is a match between its name (= title in history json) and an existing path
+            # for a document
+            with zf.open(file) as f:
+                document_content = f.read().decode("UTF-8")
+
+            if document_title in lookup_map:
+                try_generate_free_url_document(document_content, document_title, instance_url, lookup_map, session_id,
+                                               urls_to_visit)
+            else:
+                print(
+                    f"According to your history (or lack thereof) , you did not visit \"{document_title}.md\" in the "
+                    f"CodiMD instance recently. Migrating the document and generating a new, random URL/path for it")
+                # empty string implies HedgeDoc should create a new ID
+                generated_url = import_single_document(instance_url, "", document_content, session_id)
+                print(f"New URL after document migration with new, random URL/subpath: "
+                      f"{generated_url}")
+                urls_to_visit.append(generated_url)
+            print()
+
+
+def try_generate_free_url_document(document_content, document_title, instance_url, lookup_map, session_id,
+                                   urls_to_visit):
+    print(
+        f"You visited your own document \"{document_title}.md\" via the identifier/path " +
+        f"\"{lookup_map[document_title]}\"")
+    print(f"Trying to migrate this document and make it available under the already visited path")
+    try:
+        new_url = import_single_document(instance_url, lookup_map[document_title], document_content,
+                                         session_id)
+        print(f"Migration was possible. New URL: {instance_url}/{lookup_map[document_title]}")
+    except HTTPError as error:
+        if error.status == 409:
+            print("ATTENTION: Could not migrate document with the same path. Uploading anyways and "
+                  "creating a new, random path")
+            new_url = import_single_document(instance_url, "", document_content, session_id)
+            print(f"New URL after document migration without migrating the URL/subpath: {new_url}")
+        else:
+            raise SystemExit("Could not create document. Please check your session cookie. Aborting...")
+    urls_to_visit.append(new_url)
+
+
+def create_urls_to_visit_file(filename, urls_to_visit):
+    print_block_heading("Creating file containg the URLs to visit")
+    print(f"A new file {filename} will be created. It contains all URLs you need to visit in order to make the"
+          f"migrated documents appear in your HedgeDoc history.")
+    print("This can be automated by running the scripts in the directory \"history_scripts\"")
+    print("BE AWARE: Opening a lot of tabs might be quite resource-intensive.")
+    with open(filename, 'w') as f:
+        for url in urls_to_visit:
+            f.write(url + "\n")
+
+
+if __name__ == "__main__":
+    import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("HedgeDoc", "connect.hedgeDoc.sid"),
+                         "codimd-documents", "../archive.zip")