Skip to content
Snippets Groups Projects

Hedgedoc import

Merged Andreas Domanowski requested to merge hedgedoc-import into main
1 file
+ 2
0
Compare changes
  • Side-by-side
  • Inline
+ 128
0
#!/usr/bin/env python3
import json
from urllib.error import HTTPError
from common import get_sessionid, print_block_heading
from pathlib import Path
import urllib.parse
import urllib.request
import os
from zipfile import ZipFile
def import_single_document(instance_url, hedgedoc_free_url, content, session_id):
sanitized_free_url = urllib.parse.quote(hedgedoc_free_url)
request_url = instance_url + '/new/' + sanitized_free_url
headers = {"Cookie": f"connect.hedgeDoc.sid={session_id}", "Content-Type": "text/markdown"}
req = urllib.request.Request(request_url, data=str.encode(content), method='POST', headers=headers)
# unfortunately, no error is thrown if a document is not created when session cookie is invalid
# HTTP 409 is ignored for the sake of simplicity. Handled in import_into_hedgedoc(..)
# Not optimal, but nobody ain't got time for that
with urllib.request.urlopen(req) as response:
if response.url == instance_url + "/":
raise SystemExit("Could not import document. Please check your HedgeDoc session cookie. Aborting...")
return response.url
def check_file_exists(file):
if not os.path.exists(file):
raise SystemExit(
f"ERROR: File {file} does not exist. Export your data from CodiMD and re-execute this script again!"
f" Aborting...")
print(f"Required file {file} exists. Proceeding...")
def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file):
print_block_heading(
f"Checking existence of archive file ({archive_file})")
check_file_exists(archive_file)
# get exported history map
history_json_filename = "history.json"
history_dictionary = {}
try:
with open(os.path.join(export_folder, "%s" % history_json_filename)) as map_file:
history_dictionary = json.load(map_file)
except FileNotFoundError:
print_block_heading(
f"INFO: could not find file {history_json_filename}. Continuing anyways with random generated paths for "
f"documents")
# mapping from title of a document (= filename without md extension in archive) to its id (= note url in CodiMD)
lookup_map = {}
if "history" in history_dictionary:
for entry in history_dictionary["history"]:
lookup_map[entry["text"]] = entry["id"]
# URLs to visit to make the new document available in the history
urls_to_visit = []
process_archive_export(archive_file, instance_url, lookup_map, session_id, urls_to_visit)
create_urls_to_visit_file("history_scripts/hedgedoc_documents_to_visit.url", urls_to_visit)
def process_archive_export(archive_file, instance_url, lookup_map, session_id, urls_to_visit):
# iterate over files in archive
with ZipFile(archive_file) as zf:
print("Now scanning your provided archive file containing the documents you are the owner of")
print("If you visited your own document via a, e.g., \"codi-instance.tld/my_specified_free_url\" " +
"this script tries to migrate it to the HedgeDoc instance at \"hedgedoc-instance.tld/my_own_path\"")
print("If this is not possible, a new random URL for the document will be created")
print_block_heading("Iterating over files in archive and trying to upload them")
for file in zf.namelist():
document_title = Path(file).stem
# check for every file if there is a match between its name (= title in history json) and an existing path
# for a document
with zf.open(file) as f:
document_content = f.read().decode("UTF-8")
if document_title in lookup_map:
try_generate_free_url_document(document_content, document_title, instance_url, lookup_map, session_id,
urls_to_visit)
else:
print(
f"According to your history (or lack thereof) , you did not visit \"{document_title}.md\" in the "
f"CodiMD instance recently. Migrating the document and generating a new, random URL/path for it")
# empty string implies HedgeDoc should create a new ID
generated_url = import_single_document(instance_url, "", document_content, session_id)
print(f"New URL after document migration with new, random URL/subpath: "
f"{generated_url}")
urls_to_visit.append(generated_url)
print()
def try_generate_free_url_document(document_content, document_title, instance_url, lookup_map, session_id,
urls_to_visit):
print(
f"You visited your own document \"{document_title}.md\" via the identifier/path " +
f"\"{lookup_map[document_title]}\"")
print(f"Trying to migrate this document and make it available under the already visited path")
try:
new_url = import_single_document(instance_url, lookup_map[document_title], document_content,
session_id)
print(f"Migration was possible. New URL: {instance_url}/{lookup_map[document_title]}")
except HTTPError as error:
if error.status == 409:
print("ATTENTION: Could not migrate document with the same path. Uploading anyways and "
"creating a new, random path")
new_url = import_single_document(instance_url, "", document_content, session_id)
print(f"New URL after document migration without migrating the URL/subpath: {new_url}")
else:
raise SystemExit("Could not create document. Please check your session cookie. Aborting...")
urls_to_visit.append(new_url)
def create_urls_to_visit_file(filename, urls_to_visit):
print_block_heading("Creating file containg the URLs to visit")
print(f"A new file {filename} will be created. It contains all URLs you need to visit in order to make the"
f"migrated documents appear in your HedgeDoc history.")
print("This can be automated by running the scripts in the directory \"history_scripts\"")
print("BE AWARE: Opening a lot of tabs might be quite resource-intensive.")
with open(filename, 'w') as f:
for url in urls_to_visit:
f.write(url + "\n")
if __name__ == "__main__":
import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("HedgeDoc", "connect.hedgeDoc.sid"),
"codimd-documents", "../archive.zip")
Loading