Skip to content
Snippets Groups Projects
Commit 13f564f9 authored by Andreas Domanowski's avatar Andreas Domanowski
Browse files

WIP: Implement HedgeDoc import with visiting URLs after upload, Add accessibility check

parent 8ba97220
No related branches found
No related tags found
1 merge request!1Hedgedoc import
......@@ -46,7 +46,7 @@ def export_from_codimd(instance_url, session_id, export_to):
contents = slurp(f"{document_url}/download", session_id)
with open(Path(target_dir, f"{document_id}.md"), mode="wb") as stream:
stream.write(contents)
with open(Path(target_dir, f"map.map"), mode="w") as stream:
with open(Path(target_dir, f"history.json"), mode="w") as stream:
json.dump(data, stream)
num_ok += 1
except HTTPError as error:
......
#!/usr/bin/env python3
import json
from urllib.error import HTTPError
from common import get_sessionid
from pathlib import Path
import urllib.parse
......@@ -10,24 +10,34 @@ import subprocess
from zipfile import ZipFile
def check_accessibility(instance_url, session_id):
request_url = instance_url + '/me/'
headers = {"Cookie": f"connect.hedgeDoc.sid={session_id}"}
req = urllib.request.Request(request_url, headers=headers)
with urllib.request.urlopen(req) as response:
response_json = json.load(response)
if response_json["status"] != "ok":
raise SystemExit(f"Could not access protected resources at {request_url}. Make sure that the specified "
f"cookie is correct. Aborting...")
def import_single_document(instance_url, hedgedoc_free_url, content, session_id):
sanitized_free_url = hedgedoc_free_url.replace(" ", "%20")
request_url = instance_url + '/new/' + sanitized_free_url
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent, "Cookie": f"connect.hedgeDoc.sid={session_id}",
"Content-Type": "text/markdown"}
headers = {"Cookie": f"connect.hedgeDoc.sid={session_id}", "Content-Type": "text/markdown"}
document_contents = str.encode(content)
req = urllib.request.Request(request_url, data=document_contents, method='POST', headers=headers)
req = urllib.request.Request(request_url, data=str.encode(content), method='POST', headers=headers)
with urllib.request.urlopen(req) as response:
return response.url
def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file):
folder = os.path.join(os.getcwd(), export_folder);
check_accessibility(instance_url, session_id)
browser = select_browser()
# get exported history map
with open(os.path.join(export_folder, "map.json")) as map_file:
with open(os.path.join(export_folder, "history.json")) as map_file:
history_dictionary = json.load(map_file)
# mapping from title of a document (= filename without md extension in archive) to its id (= note url in CodiMD)
......@@ -40,44 +50,63 @@ def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file):
# iterate over files in archive
with ZipFile(archive_file) as zf:
print("Now scanning your provided archive file containing the documents you are the owner of")
print("If you visited your own document via a, e.g., \"codi-instance.tld/my_own_path\") " +
"this script tries to migrate it to the HedgeDoc instance at \"hedgedoc-instance.tld/my_own_path\"")
print("If this is not possible, a new random URL for the document will be created")
for file in zf.namelist():
document_title = Path(file).stem
# check for every file if there is a match between its name (= title in history json) and an existing path
# for a document
document_content = None
with zf.open(file) as f:
document_content = f.read().decode("UTF-8")
if document_title in lookup_map:
print("filename: ", document_title, ", in lookupMap: ",
lookup_map[document_title])
urls_to_visit.append(
import_single_document(instance_url, lookup_map[document_title], document_content, session_id))
print(
f"\tYou visited your own document \"{document_title}\".md) via the path " +
f"\"{lookup_map[document_title]}\"")
print(f"\tTrying to migrate this document and make it available under the already visited path")
try:
new_url = import_single_document(instance_url, lookup_map[document_title], document_content,
session_id)
urls_to_visit.append(new_url)
except HTTPError as error:
if error.status == 409:
print("\tHTTP 409. Uploading anyways (new path, random ID)")
new_url = import_single_document(instance_url, "", document_content, session_id)
urls_to_visit.append(new_url)
else:
print("no mapping found for ", document_title, ", uploading anyway")
# empty string implies HedgeDoc should create a new ID
urls_to_visit.append(import_single_document(instance_url, "", document_content, session_id))
browser = "firefox"
print("Your specified browser now needs to visit every newly created document to ensure that it's available in"
"your history in HedgeDoc")
subprocess.run([browser] + urls_to_visit)
def select_browser():
options = ['chrome', 'firefox', 'opera']
print("Once you've uploaded all your documents, they unfortunately do not appear in your HedgeDoc history.")
print("To make sure that they are available to you, this script automatically visits all your newly uploaded "
"documents in your browser.")
print("Therefore, you need to specify your browser. It needs to be on your path with the same name as "
"specified here")
print("ATTENTION - this needs to be a browser where you have an active and logged-in HedgeDoc session")
supported_browsers = ["firefox", "opera", "safari", "google-chrome", "chromium"]
user_input = ''
input_message = "Pick an option:\n"
input_message = "Specify a browser which holds an active and logged-in HedgeDoc session:\n"
for index, item in enumerate(options):
input_message += f'{index + 1}) {item}\n'
for i, browser_suggestion in enumerate(supported_browsers):
input_message += f'{i + 1}) {browser_suggestion}\n'
input_message += 'Your choice: '
# input_message += 'Your choice: '
while user_input not in map(str, range(1, len(options) + 1)):
while user_input not in map(str, range(1, len(supported_browsers) + 1)):
user_input = input(input_message)
print('You picked: ' + options[int(user_input) - 1])
print('You chose: ' + supported_browsers[int(user_input) - 1])
return supported_browsers[int(user_input) - 1]
if __name__ == "__main__":
# select_browser()
import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", 123, "codimd-documents", "archive.zip")
# import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents")
import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"),
"codimd-documents")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment