From 8ba97220a33f220bafaeeb7aaaf26a57531a8785 Mon Sep 17 00:00:00 2001 From: Andreas Domanowski <andreas@domanowski.net> Date: Tue, 28 Feb 2023 15:37:19 +0100 Subject: [PATCH] Add import from archive file --- .gitignore | 4 ++- hedgedoc_import.py | 66 ++++++++++++++++++++++++++++----------------- md-import-export.py | 5 ++-- 3 files changed, 48 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 4296ae1..ae80c39 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ __pycache__/ codimd-documents/ -.idea \ No newline at end of file +.idea + +*.zip \ No newline at end of file diff --git a/hedgedoc_import.py b/hedgedoc_import.py index 0aeddc5..95d1b59 100644 --- a/hedgedoc_import.py +++ b/hedgedoc_import.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import json from common import get_sessionid from pathlib import Path @@ -11,36 +12,52 @@ from zipfile import ZipFile def import_single_document(instance_url, hedgedoc_free_url, content, session_id): sanitized_free_url = hedgedoc_free_url.replace(" ", "%20") - user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' + request_url = instance_url + '/new/' + sanitized_free_url - url = instance_url + '/new/' + sanitized_free_url + user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' headers = {'User-Agent': user_agent, "Cookie": f"connect.hedgeDoc.sid={session_id}", "Content-Type": "text/markdown"} - pkt = str.encode(content) - req = urllib.request.Request(url, data=pkt, method='POST', headers=headers) + document_contents = str.encode(content) + req = urllib.request.Request(request_url, data=document_contents, method='POST', headers=headers) with urllib.request.urlopen(req) as response: - print("Go visit " + response.url + " with your browser in a logged-in session.") + return response.url def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file): folder = os.path.join(os.getcwd(), export_folder); - print("Trying to upload all files with extension \".md\" in folder \"" + folder + "\""); - - #with ZipFile('images.zip') as zf: - # for file in zf.namelist(): - # if not file.endswith('.png'): # optional filtering by filetype - # continue - # with zf.open(file) as f: - # image = pygame.image.load(f, namehint=file) - - for relative_filename in os.listdir(folder): - if relative_filename.endswith(".md"): - addressable_filename = os.path.join(folder, relative_filename) - print("Trying to upload: " + addressable_filename) - markdown_content = Path(addressable_filename).read_text() - free_url = Path(relative_filename).stem - import_single_document(instance_url, free_url, markdown_content, session_id) + # get exported history map + with open(os.path.join(export_folder, "map.json")) as map_file: + history_dictionary = json.load(map_file) + + # mapping from title of a document (= filename without md extension in archive) to its id (= note url in CodiMD) + lookup_map = {} + for entry in history_dictionary["history"]: + lookup_map[entry["text"]] = entry["id"] + + # URLs to visit to make the new document available in the history + urls_to_visit = [] + + # iterate over files in archive + with ZipFile(archive_file) as zf: + for file in zf.namelist(): + document_title = Path(file).stem + # check for every file if there is a match between its name (= title in history json) and an existing path + # for a document + document_content = None + + if document_title in lookup_map: + print("filename: ", document_title, ", in lookupMap: ", + lookup_map[document_title]) + urls_to_visit.append( + import_single_document(instance_url, lookup_map[document_title], document_content, session_id)) + else: + print("no mapping found for ", document_title, ", uploading anyway") + # empty string implies HedgeDoc should create a new ID + urls_to_visit.append(import_single_document(instance_url, "", document_content, session_id)) + + browser = "firefox" + subprocess.run([browser] + urls_to_visit) def select_browser(): @@ -60,6 +77,7 @@ def select_browser(): if __name__ == "__main__": - select_browser() - #subprocess.run(["firefox", "www.google.de"]) - #import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents") + # select_browser() + import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", 123, "codimd-documents", "archive.zip") + + # import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents") diff --git a/md-import-export.py b/md-import-export.py index 529a792..90508a7 100644 --- a/md-import-export.py +++ b/md-import-export.py @@ -3,5 +3,6 @@ from common import get_sessionid from hedgedoc_import import import_into_hedgedoc if __name__ == "__main__": - export_from_codimd("https://md.inf.tu-dresden.de", get_sessionid("connect.sid"), "codimd-documents") - import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents", "archive.zip") \ No newline at end of file + export_folder = "codimd-documents" + export_from_codimd("https://md.inf.tu-dresden.de", get_sessionid("connect.sid"), export_folder) + import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), export_folder, "archive.zip") \ No newline at end of file -- GitLab