Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • main
1 result

Target

Select target project
  • stgroup/misc/codimd-export-script
1 result
Select Git revision
  • main
1 result
Show changes
Commits on Source (2)
import json
import sys import sys
import urllib.request
if not sys.platform.startswith("win"): if not sys.platform.startswith("win"):
from getpass import getpass from getpass import getpass
...@@ -21,3 +23,23 @@ def get_sessionid(service_name, cookie_key): ...@@ -21,3 +23,23 @@ def get_sessionid(service_name, cookie_key):
if sid.startswith("s%3A"): if sid.startswith("s%3A"):
return sid return sid
raise SystemExit(f"error: the supplied session id seems to be malformed") raise SystemExit(f"error: the supplied session id seems to be malformed")
def print_block_heading(message):
separator = "======================================================================================================"
print(separator)
print(message)
print(separator)
def check_accessibility(instance_url, session_id, cookie_key):
request_url = instance_url + '/me/'
headers = {"Cookie": f"{cookie_key}={session_id}"}
req = urllib.request.Request(request_url, headers=headers)
with urllib.request.urlopen(req) as response:
response_json = json.load(response)
if response_json["status"] != "ok":
raise SystemExit(f"Could not access protected resources at {request_url}. Make sure that the specified "
f"cookie is correct. Aborting...")
print(f"Could access protected resources at {instance_url}. Proceeding...")
...@@ -10,7 +10,7 @@ from urllib.parse import quote ...@@ -10,7 +10,7 @@ from urllib.parse import quote
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
import sys import sys
from common import get_sessionid from common import get_sessionid, check_accessibility, print_block_heading
def slurp(url, session_id): def slurp(url, session_id):
...@@ -31,15 +31,19 @@ def prepare_target_dir(pathname): ...@@ -31,15 +31,19 @@ def prepare_target_dir(pathname):
def export_from_codimd(instance_url, session_id, export_to): def export_from_codimd(instance_url, session_id, export_to):
check_accessibility(instance_url, session_id, "connect.sid")
"""Retrieve CodiMD document history and try to download each document.""" """Retrieve CodiMD document history and try to download each document."""
print_block_heading(f"Trying to fetch history ({instance_url})")
try: try:
data = json.loads(slurp(f"{instance_url}/history", session_id)) data = json.loads(slurp(f"{instance_url}/history", session_id))
except OSError as error: except OSError as error:
raise SystemExit(f"error: couldn't access the /history endpoint: {error}") raise SystemExit(f"error: couldn't access the /history endpoint: {error}")
except json.JSONDecodeError as error: except json.JSONDecodeError as error:
raise SystemExit(f"error: received malformed JSON: {error}") raise SystemExit(f"error: received malformed JSON: {error}")
print_block_heading(f"Preparing target directory ({export_to})")
target_dir = prepare_target_dir(export_to) target_dir = prepare_target_dir(export_to)
num_ok = num_fail = 0 num_ok = num_fail = 0
print_block_heading(f"Accessing history and trying to fetch each document")
for row in data["history"]: for row in data["history"]:
document_id = row["id"] document_id = row["id"]
document_url = f"{instance_url}/{quote(document_id)}" document_url = f"{instance_url}/{quote(document_id)}"
......
#!/usr/bin/env python3 #!/usr/bin/env python3
import json import json
from urllib.error import HTTPError from urllib.error import HTTPError
from common import get_sessionid from common import get_sessionid, check_accessibility, print_block_heading
from pathlib import Path from pathlib import Path
import urllib.parse import urllib.parse
import urllib.request import urllib.request
import os import os
import subprocess
from zipfile import ZipFile from zipfile import ZipFile
def check_accessibility(instance_url, session_id):
request_url = instance_url + '/me/'
headers = {"Cookie": f"connect.hedgeDoc.sid={session_id}"}
req = urllib.request.Request(request_url, headers=headers)
with urllib.request.urlopen(req) as response:
response_json = json.load(response)
if response_json["status"] != "ok":
raise SystemExit(f"Could not access protected resources at {request_url}. Make sure that the specified "
f"cookie is correct. Aborting...")
def import_single_document(instance_url, hedgedoc_free_url, content, session_id): def import_single_document(instance_url, hedgedoc_free_url, content, session_id):
sanitized_free_url = hedgedoc_free_url.replace(" ", "%20") sanitized_free_url = hedgedoc_free_url.replace(" ", "%20")
request_url = instance_url + '/new/' + sanitized_free_url request_url = instance_url + '/new/' + sanitized_free_url
...@@ -33,17 +20,19 @@ def import_single_document(instance_url, hedgedoc_free_url, content, session_id) ...@@ -33,17 +20,19 @@ def import_single_document(instance_url, hedgedoc_free_url, content, session_id)
return response.url return response.url
def check_archive_exists(archive_file): def check_file_exists(file):
if not os.path.exists(archive_file): if not os.path.exists(file):
raise SystemExit( raise SystemExit(
f"ERROR: File {archive_file} does not exist. Export your data from CodiMD and re-execute this script again!" f"ERROR: File {file} does not exist. Export your data from CodiMD and re-execute this script again!"
f" Aborting...") f" Aborting...")
print(f"Required file {file} exists. Proceeding...")
def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file): def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file):
print("Beginning import_md") print_block_heading(
check_archive_exists(archive_file) f"Checking existence of archive file ({archive_file}) exists and authorization at {instance_url}")
check_accessibility(instance_url, session_id) check_file_exists(archive_file)
check_accessibility(instance_url, session_id, "connect.hedgeDoc.sid")
# get exported history map # get exported history map
with open(os.path.join(export_folder, "history.json")) as map_file: with open(os.path.join(export_folder, "history.json")) as map_file:
history_dictionary = json.load(map_file) history_dictionary = json.load(map_file)
...@@ -56,11 +45,11 @@ def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file): ...@@ -56,11 +45,11 @@ def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file):
# URLs to visit to make the new document available in the history # URLs to visit to make the new document available in the history
urls_to_visit = [] urls_to_visit = []
iterate_over_archive(archive_file, instance_url, lookup_map, session_id, urls_to_visit) process_archive_export(archive_file, instance_url, lookup_map, session_id, urls_to_visit)
create_urls_to_visit_file("../hedgedoc_documents_to_visit.url", urls_to_visit) create_urls_to_visit_file("../hedgedoc_documents_to_visit.url", urls_to_visit)
def iterate_over_archive(archive_file, instance_url, lookup_map, session_id, urls_to_visit): def process_archive_export(archive_file, instance_url, lookup_map, session_id, urls_to_visit):
# iterate over files in archive # iterate over files in archive
with ZipFile(archive_file) as zf: with ZipFile(archive_file) as zf:
print("Now scanning your provided archive file containing the documents you are the owner of") print("Now scanning your provided archive file containing the documents you are the owner of")
...@@ -76,21 +65,22 @@ def iterate_over_archive(archive_file, instance_url, lookup_map, session_id, url ...@@ -76,21 +65,22 @@ def iterate_over_archive(archive_file, instance_url, lookup_map, session_id, url
if document_title in lookup_map: if document_title in lookup_map:
print( print(
f"\tYou visited your own document \"{document_title}\".md) via the identifier/path " + f"You visited your own document \"{document_title}\".md) via the identifier/path " +
f"\"{lookup_map[document_title]}\"") f"\"{lookup_map[document_title]}\"")
print(f"\tTrying to migrate this document and make it available under the already visited path") print(f"Trying to migrate this document and make it available under the already visited path")
try: try:
new_url = import_single_document(instance_url, lookup_map[document_title], document_content, new_url = import_single_document(instance_url, lookup_map[document_title], document_content,
session_id) session_id)
urls_to_visit.append(new_url) urls_to_visit.append(new_url)
print(f"\tMigration was possible. New URL: {instance_url}/{lookup_map[document_title]}") print(f"Migration was possible. New URL: {instance_url}/{lookup_map[document_title]}")
except HTTPError as error: except HTTPError as error:
if error.status == 409: if error.status == 409:
print("\tATTENTION: Could not migrate document with the same path. Uploading anyways and " print("ATTENTION: Could not migrate document with the same path. Uploading anyways and "
"creating a new, random path") "creating a new, random path")
new_url = import_single_document(instance_url, "", document_content, session_id) new_url = import_single_document(instance_url, "", document_content, session_id)
print(f"New URL after document migration without migrating the URL/subpath: {new_url}") print(f"New URL after document migration without migrating the URL/subpath: {new_url}")
urls_to_visit.append(new_url) urls_to_visit.append(new_url)
print()
else: else:
print(f"According to your history, you did not visit \"{document_title}.md\" in the CodiMD " print(f"According to your history, you did not visit \"{document_title}.md\" in the CodiMD "
"instance recently. Migrating the document and generating a new, random URL/path for it") "instance recently. Migrating the document and generating a new, random URL/path for it")
...@@ -99,9 +89,11 @@ def iterate_over_archive(archive_file, instance_url, lookup_map, session_id, url ...@@ -99,9 +89,11 @@ def iterate_over_archive(archive_file, instance_url, lookup_map, session_id, url
print(f"New URL after document migration with new, random URL/subpath: " print(f"New URL after document migration with new, random URL/subpath: "
f"{generated_url}") f"{generated_url}")
urls_to_visit.append(generated_url) urls_to_visit.append(generated_url)
print()
def create_urls_to_visit_file(filename, urls_to_visit): def create_urls_to_visit_file(filename, urls_to_visit):
print_block_heading("Creating file containg the URLs to visit")
print(f"A new file {filename} will be created. It contains all URLs you need to visit in order to make the" print(f"A new file {filename} will be created. It contains all URLs you need to visit in order to make the"
f"migrated documents appear in your HedgeDoc history.") f"migrated documents appear in your HedgeDoc history.")
print("This can be automated by running the scripts in the directory \"history_scripts\"") print("This can be automated by running the scripts in the directory \"history_scripts\"")
......
from export_md.codimd_export import export_from_codimd from export_md.codimd_export import export_from_codimd
from import_md.hedgedoc_import import import_into_hedgedoc from import_md.hedgedoc_import import import_into_hedgedoc
from common import get_sessionid from common import get_sessionid, print_block_heading
if __name__ == "__main__": if __name__ == "__main__":
export_folder = "codimd-documents" export_folder = "codimd-documents"
export_archive = "archive.zip" export_archive = "archive.zip"
print_block_heading("Beginning export from CodiMD...")
export_from_codimd("http://localhost:3001", get_sessionid("CodiMD", "connect.sid"), export_folder) export_from_codimd("http://localhost:3001", get_sessionid("CodiMD", "connect.sid"), export_folder)
print_block_heading("Beginning import to HedgeDoc...")
import_into_hedgedoc("http://hedgedoc:3000", get_sessionid("HedgeDoc", "connect.hedgeDoc.sid"), import_into_hedgedoc("http://hedgedoc:3000", get_sessionid("HedgeDoc", "connect.hedgeDoc.sid"),
export_folder, export_archive) export_folder, export_archive)