From 13f564f939e515d4236ccfa009584d1ec15f13b9 Mon Sep 17 00:00:00 2001 From: Andreas Domanowski <andreas@domanowski.net> Date: Wed, 1 Mar 2023 12:12:40 +0100 Subject: [PATCH] WIP: Implement HedgeDoc import with visiting URLs after upload, Add accessibility check --- codimd_export.py | 2 +- hedgedoc_import.py | 81 +++++++++++++++++++++++++++++++--------------- 2 files changed, 56 insertions(+), 27 deletions(-) diff --git a/codimd_export.py b/codimd_export.py index 14e5d96..cf3f0b7 100755 --- a/codimd_export.py +++ b/codimd_export.py @@ -46,7 +46,7 @@ def export_from_codimd(instance_url, session_id, export_to): contents = slurp(f"{document_url}/download", session_id) with open(Path(target_dir, f"{document_id}.md"), mode="wb") as stream: stream.write(contents) - with open(Path(target_dir, f"map.map"), mode="w") as stream: + with open(Path(target_dir, f"history.json"), mode="w") as stream: json.dump(data, stream) num_ok += 1 except HTTPError as error: diff --git a/hedgedoc_import.py b/hedgedoc_import.py index 95d1b59..c539dfb 100644 --- a/hedgedoc_import.py +++ b/hedgedoc_import.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 import json - +from urllib.error import HTTPError from common import get_sessionid from pathlib import Path import urllib.parse @@ -10,24 +10,34 @@ import subprocess from zipfile import ZipFile +def check_accessibility(instance_url, session_id): + request_url = instance_url + '/me/' + headers = {"Cookie": f"connect.hedgeDoc.sid={session_id}"} + + req = urllib.request.Request(request_url, headers=headers) + with urllib.request.urlopen(req) as response: + response_json = json.load(response) + if response_json["status"] != "ok": + raise SystemExit(f"Could not access protected resources at {request_url}. Make sure that the specified " + f"cookie is correct. Aborting...") + + def import_single_document(instance_url, hedgedoc_free_url, content, session_id): sanitized_free_url = hedgedoc_free_url.replace(" ", "%20") request_url = instance_url + '/new/' + sanitized_free_url - user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' - headers = {'User-Agent': user_agent, "Cookie": f"connect.hedgeDoc.sid={session_id}", - "Content-Type": "text/markdown"} + headers = {"Cookie": f"connect.hedgeDoc.sid={session_id}", "Content-Type": "text/markdown"} - document_contents = str.encode(content) - req = urllib.request.Request(request_url, data=document_contents, method='POST', headers=headers) + req = urllib.request.Request(request_url, data=str.encode(content), method='POST', headers=headers) with urllib.request.urlopen(req) as response: return response.url def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file): - folder = os.path.join(os.getcwd(), export_folder); + check_accessibility(instance_url, session_id) + browser = select_browser() # get exported history map - with open(os.path.join(export_folder, "map.json")) as map_file: + with open(os.path.join(export_folder, "history.json")) as map_file: history_dictionary = json.load(map_file) # mapping from title of a document (= filename without md extension in archive) to its id (= note url in CodiMD) @@ -40,44 +50,63 @@ def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file): # iterate over files in archive with ZipFile(archive_file) as zf: + print("Now scanning your provided archive file containing the documents you are the owner of") + print("If you visited your own document via a, e.g., \"codi-instance.tld/my_own_path\") " + + "this script tries to migrate it to the HedgeDoc instance at \"hedgedoc-instance.tld/my_own_path\"") + print("If this is not possible, a new random URL for the document will be created") for file in zf.namelist(): document_title = Path(file).stem # check for every file if there is a match between its name (= title in history json) and an existing path # for a document - document_content = None + with zf.open(file) as f: + document_content = f.read().decode("UTF-8") if document_title in lookup_map: - print("filename: ", document_title, ", in lookupMap: ", - lookup_map[document_title]) - urls_to_visit.append( - import_single_document(instance_url, lookup_map[document_title], document_content, session_id)) + print( + f"\tYou visited your own document \"{document_title}\".md) via the path " + + f"\"{lookup_map[document_title]}\"") + print(f"\tTrying to migrate this document and make it available under the already visited path") + try: + new_url = import_single_document(instance_url, lookup_map[document_title], document_content, + session_id) + urls_to_visit.append(new_url) + except HTTPError as error: + if error.status == 409: + print("\tHTTP 409. Uploading anyways (new path, random ID)") + new_url = import_single_document(instance_url, "", document_content, session_id) + urls_to_visit.append(new_url) else: print("no mapping found for ", document_title, ", uploading anyway") # empty string implies HedgeDoc should create a new ID urls_to_visit.append(import_single_document(instance_url, "", document_content, session_id)) - - browser = "firefox" + print("Your specified browser now needs to visit every newly created document to ensure that it's available in" + "your history in HedgeDoc") subprocess.run([browser] + urls_to_visit) def select_browser(): - options = ['chrome', 'firefox', 'opera'] + print("Once you've uploaded all your documents, they unfortunately do not appear in your HedgeDoc history.") + print("To make sure that they are available to you, this script automatically visits all your newly uploaded " + "documents in your browser.") + print("Therefore, you need to specify your browser. It needs to be on your path with the same name as " + "specified here") + print("ATTENTION - this needs to be a browser where you have an active and logged-in HedgeDoc session") + supported_browsers = ["firefox", "opera", "safari", "google-chrome", "chromium"] user_input = '' - input_message = "Pick an option:\n" + input_message = "Specify a browser which holds an active and logged-in HedgeDoc session:\n" - for index, item in enumerate(options): - input_message += f'{index + 1}) {item}\n' + for i, browser_suggestion in enumerate(supported_browsers): + input_message += f'{i + 1}) {browser_suggestion}\n' - input_message += 'Your choice: ' + # input_message += 'Your choice: ' - while user_input not in map(str, range(1, len(options) + 1)): + while user_input not in map(str, range(1, len(supported_browsers) + 1)): user_input = input(input_message) - print('You picked: ' + options[int(user_input) - 1]) + print('You chose: ' + supported_browsers[int(user_input) - 1]) + return supported_browsers[int(user_input) - 1] if __name__ == "__main__": - # select_browser() - import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", 123, "codimd-documents", "archive.zip") - - # import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents") + import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), + "codimd-documents") -- GitLab