diff --git a/.gitignore b/.gitignore index 4296ae1e4e7b8edf995c2a7bb5bb94149503020e..ae80c3946c59f8b027967fc668791830e6830e72 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ __pycache__/ codimd-documents/ -.idea \ No newline at end of file +.idea + +*.zip \ No newline at end of file diff --git a/hedgedoc_import.py b/hedgedoc_import.py index 0aeddc5e0365acaabfefea39fac8bc2df820ae05..95d1b59e19d19ac5ac662c5fa473f230748d7b64 100644 --- a/hedgedoc_import.py +++ b/hedgedoc_import.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import json from common import get_sessionid from pathlib import Path @@ -11,36 +12,52 @@ from zipfile import ZipFile def import_single_document(instance_url, hedgedoc_free_url, content, session_id): sanitized_free_url = hedgedoc_free_url.replace(" ", "%20") - user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' + request_url = instance_url + '/new/' + sanitized_free_url - url = instance_url + '/new/' + sanitized_free_url + user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' headers = {'User-Agent': user_agent, "Cookie": f"connect.hedgeDoc.sid={session_id}", "Content-Type": "text/markdown"} - pkt = str.encode(content) - req = urllib.request.Request(url, data=pkt, method='POST', headers=headers) + document_contents = str.encode(content) + req = urllib.request.Request(request_url, data=document_contents, method='POST', headers=headers) with urllib.request.urlopen(req) as response: - print("Go visit " + response.url + " with your browser in a logged-in session.") + return response.url def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file): folder = os.path.join(os.getcwd(), export_folder); - print("Trying to upload all files with extension \".md\" in folder \"" + folder + "\""); - - #with ZipFile('images.zip') as zf: - # for file in zf.namelist(): - # if not file.endswith('.png'): # optional filtering by filetype - # continue - # with zf.open(file) as f: - # image = pygame.image.load(f, namehint=file) - - for relative_filename in os.listdir(folder): - if relative_filename.endswith(".md"): - addressable_filename = os.path.join(folder, relative_filename) - print("Trying to upload: " + addressable_filename) - markdown_content = Path(addressable_filename).read_text() - free_url = Path(relative_filename).stem - import_single_document(instance_url, free_url, markdown_content, session_id) + # get exported history map + with open(os.path.join(export_folder, "map.json")) as map_file: + history_dictionary = json.load(map_file) + + # mapping from title of a document (= filename without md extension in archive) to its id (= note url in CodiMD) + lookup_map = {} + for entry in history_dictionary["history"]: + lookup_map[entry["text"]] = entry["id"] + + # URLs to visit to make the new document available in the history + urls_to_visit = [] + + # iterate over files in archive + with ZipFile(archive_file) as zf: + for file in zf.namelist(): + document_title = Path(file).stem + # check for every file if there is a match between its name (= title in history json) and an existing path + # for a document + document_content = None + + if document_title in lookup_map: + print("filename: ", document_title, ", in lookupMap: ", + lookup_map[document_title]) + urls_to_visit.append( + import_single_document(instance_url, lookup_map[document_title], document_content, session_id)) + else: + print("no mapping found for ", document_title, ", uploading anyway") + # empty string implies HedgeDoc should create a new ID + urls_to_visit.append(import_single_document(instance_url, "", document_content, session_id)) + + browser = "firefox" + subprocess.run([browser] + urls_to_visit) def select_browser(): @@ -60,6 +77,7 @@ def select_browser(): if __name__ == "__main__": - select_browser() - #subprocess.run(["firefox", "www.google.de"]) - #import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents") + # select_browser() + import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", 123, "codimd-documents", "archive.zip") + + # import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents") diff --git a/md-import-export.py b/md-import-export.py index 529a7925f5df5fdaa83630046d270a7bb95d3add..90508a78ee83ecbdf09311a5cf88efa66ad2d7f8 100644 --- a/md-import-export.py +++ b/md-import-export.py @@ -3,5 +3,6 @@ from common import get_sessionid from hedgedoc_import import import_into_hedgedoc if __name__ == "__main__": - export_from_codimd("https://md.inf.tu-dresden.de", get_sessionid("connect.sid"), "codimd-documents") - import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents", "archive.zip") \ No newline at end of file + export_folder = "codimd-documents" + export_from_codimd("https://md.inf.tu-dresden.de", get_sessionid("connect.sid"), export_folder) + import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), export_folder, "archive.zip") \ No newline at end of file