From 18127bebb27c6feefdeb919f0d904d1fcebd91b3 Mon Sep 17 00:00:00 2001 From: Andreas Domanowski <andreas@domanowski.net> Date: Wed, 1 Mar 2023 12:38:22 +0100 Subject: [PATCH] Refactor HedgeDoc importer, Export urls to visit to file --- .gitignore | 3 ++- hedgedoc_import.py | 35 ++++++++++++++++++++++++++++++----- md-import-export.py | 4 ++-- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index ae80c39..0283d2c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ codimd-documents/ .idea -*.zip \ No newline at end of file +*.zip +hedgedoc_documents_to_visit.url \ No newline at end of file diff --git a/hedgedoc_import.py b/hedgedoc_import.py index c539dfb..78936ae 100644 --- a/hedgedoc_import.py +++ b/hedgedoc_import.py @@ -35,6 +35,7 @@ def import_single_document(instance_url, hedgedoc_free_url, content, session_id) def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file): check_accessibility(instance_url, session_id) + # let user select browser browser = select_browser() # get exported history map with open(os.path.join(export_folder, "history.json")) as map_file: @@ -48,6 +49,12 @@ def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file): # URLs to visit to make the new document available in the history urls_to_visit = [] + iterate_over_archive(archive_file, instance_url, lookup_map, session_id, urls_to_visit) + visit_urls_in_browser(browser, urls_to_visit) + create_urls_to_visit_file("hedgedoc_documents_to_visit.url", urls_to_visit) + + +def iterate_over_archive(archive_file, instance_url, lookup_map, session_id, urls_to_visit): # iterate over files in archive with ZipFile(archive_file) as zf: print("Now scanning your provided archive file containing the documents you are the owner of") @@ -63,27 +70,45 @@ def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file): if document_title in lookup_map: print( - f"\tYou visited your own document \"{document_title}\".md) via the path " + + f"\tYou visited your own document \"{document_title}\".md) via the identifier/path " + f"\"{lookup_map[document_title]}\"") print(f"\tTrying to migrate this document and make it available under the already visited path") try: new_url = import_single_document(instance_url, lookup_map[document_title], document_content, session_id) urls_to_visit.append(new_url) + print(f"\tMigration was possible. New URL: {instance_url}/{lookup_map[document_title]}") except HTTPError as error: if error.status == 409: - print("\tHTTP 409. Uploading anyways (new path, random ID)") + print("\tATTENTION: Could not migrate document with the same path. Uploading anyways and " + "creating a new, random path") new_url = import_single_document(instance_url, "", document_content, session_id) + print(f"New URL after document migration without migrating the URL/subpath: {new_url}") urls_to_visit.append(new_url) else: - print("no mapping found for ", document_title, ", uploading anyway") + print(f"According to your history, you did not visit \"{document_title}.md\" in the CodiMD " + "instance recently. Migrating the document and generating a new, random URL/path for it") # empty string implies HedgeDoc should create a new ID - urls_to_visit.append(import_single_document(instance_url, "", document_content, session_id)) + generated_url = import_single_document(instance_url, "", document_content, session_id) + print(f"New URL after document migration with new, random URL/subpath: " + f"{generated_url}") + urls_to_visit.append(generated_url) + + +def visit_urls_in_browser(browser, urls_to_visit): print("Your specified browser now needs to visit every newly created document to ensure that it's available in" "your history in HedgeDoc") subprocess.run([browser] + urls_to_visit) +def create_urls_to_visit_file(filename, urls_to_visit): + with open(filename, 'w') as f: + for url in urls_to_visit: + f.write(url + "\n") + print( + "In preparation for the case that this did not work, all URLs are saved in the file \"hedgedoc_documents_to_visit.url\"") + + def select_browser(): print("Once you've uploaded all your documents, they unfortunately do not appear in your HedgeDoc history.") print("To make sure that they are available to you, this script automatically visits all your newly uploaded " @@ -109,4 +134,4 @@ def select_browser(): if __name__ == "__main__": import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), - "codimd-documents") + "codimd-documents", "archive.zip") diff --git a/md-import-export.py b/md-import-export.py index 90508a7..0467a36 100644 --- a/md-import-export.py +++ b/md-import-export.py @@ -4,5 +4,5 @@ from hedgedoc_import import import_into_hedgedoc if __name__ == "__main__": export_folder = "codimd-documents" - export_from_codimd("https://md.inf.tu-dresden.de", get_sessionid("connect.sid"), export_folder) - import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), export_folder, "archive.zip") \ No newline at end of file + export_from_codimd("http://localhost:3001", get_sessionid("connect.sid"), export_folder) + import_into_hedgedoc("http://hedgedoc:3000", get_sessionid("connect.hedgeDoc.sid"), export_folder, "archive.zip") -- GitLab