Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
CodiMD export script
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
stgroup
misc
CodiMD export script
Merge requests
!1
Hedgedoc import
Code
Review changes
Check out branch
Download
Patches
Plain diff
Expand sidebar
Merged
Hedgedoc import
hedgedoc-import
into
main
Overview
2
Commits
38
Pipelines
0
Changes
1
Merged
Andreas Domanowski
requested to merge
hedgedoc-import
into
main
2 years ago
Overview
2
Commits
38
Pipelines
0
Changes
1
0
0
Merge request reports
Viewing commit
9634364b
Prev
Next
Show latest version
1 file
+
2
−
0
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
9634364b
Add rudimentary error handling to POST requests
· 9634364b
Andreas Domanowski
authored
2 years ago
import_md/hedgedoc_import.py
0 → 100644
+
128
−
0
View file @ 9f8b5719
Edit in single-file editor
Open in Web IDE
#!/usr/bin/env python3
import
json
from
urllib.error
import
HTTPError
from
common
import
get_sessionid
,
print_block_heading
from
pathlib
import
Path
import
urllib.parse
import
urllib.request
import
os
from
zipfile
import
ZipFile
def
import_single_document
(
instance_url
,
hedgedoc_free_url
,
content
,
session_id
):
sanitized_free_url
=
urllib
.
parse
.
quote
(
hedgedoc_free_url
)
request_url
=
instance_url
+
'
/new/
'
+
sanitized_free_url
headers
=
{
"
Cookie
"
:
f
"
connect.hedgeDoc.sid=
{
session_id
}
"
,
"
Content-Type
"
:
"
text/markdown
"
}
req
=
urllib
.
request
.
Request
(
request_url
,
data
=
str
.
encode
(
content
),
method
=
'
POST
'
,
headers
=
headers
)
# unfortunately, no error is thrown if a document is not created when session cookie is invalid
# HTTP 409 is ignored for the sake of simplicity. Handled in import_into_hedgedoc(..)
# Not optimal, but nobody ain't got time for that
with
urllib
.
request
.
urlopen
(
req
)
as
response
:
if
response
.
url
==
instance_url
+
"
/
"
:
raise
SystemExit
(
"
Could not import document. Please check your HedgeDoc session cookie. Aborting...
"
)
return
response
.
url
def
check_file_exists
(
file
):
if
not
os
.
path
.
exists
(
file
):
raise
SystemExit
(
f
"
ERROR: File
{
file
}
does not exist. Export your data from CodiMD and re-execute this script again!
"
f
"
Aborting...
"
)
print
(
f
"
Required file
{
file
}
exists. Proceeding...
"
)
def
import_into_hedgedoc
(
instance_url
,
session_id
,
export_folder
,
archive_file
):
print_block_heading
(
f
"
Checking existence of archive file (
{
archive_file
}
)
"
)
check_file_exists
(
archive_file
)
# get exported history map
history_json_filename
=
"
history.json
"
history_dictionary
=
{}
try
:
with
open
(
os
.
path
.
join
(
export_folder
,
"
%s
"
%
history_json_filename
))
as
map_file
:
history_dictionary
=
json
.
load
(
map_file
)
except
FileNotFoundError
:
print_block_heading
(
f
"
INFO: could not find file
{
history_json_filename
}
. Continuing anyways with random generated paths for
"
f
"
documents
"
)
# mapping from title of a document (= filename without md extension in archive) to its id (= note url in CodiMD)
lookup_map
=
{}
if
"
history
"
in
history_dictionary
:
for
entry
in
history_dictionary
[
"
history
"
]:
lookup_map
[
entry
[
"
text
"
]]
=
entry
[
"
id
"
]
# URLs to visit to make the new document available in the history
urls_to_visit
=
[]
process_archive_export
(
archive_file
,
instance_url
,
lookup_map
,
session_id
,
urls_to_visit
)
create_urls_to_visit_file
(
"
history_scripts/hedgedoc_documents_to_visit.url
"
,
urls_to_visit
)
def
process_archive_export
(
archive_file
,
instance_url
,
lookup_map
,
session_id
,
urls_to_visit
):
# iterate over files in archive
with
ZipFile
(
archive_file
)
as
zf
:
print
(
"
Now scanning your provided archive file containing the documents you are the owner of
"
)
print
(
"
If you visited your own document via a, e.g.,
\"
codi-instance.tld/my_specified_free_url
\"
"
+
"
this script tries to migrate it to the HedgeDoc instance at
\"
hedgedoc-instance.tld/my_own_path
\"
"
)
print
(
"
If this is not possible, a new random URL for the document will be created
"
)
print_block_heading
(
"
Iterating over files in archive and trying to upload them
"
)
for
file
in
zf
.
namelist
():
document_title
=
Path
(
file
).
stem
# check for every file if there is a match between its name (= title in history json) and an existing path
# for a document
with
zf
.
open
(
file
)
as
f
:
document_content
=
f
.
read
().
decode
(
"
UTF-8
"
)
if
document_title
in
lookup_map
:
try_generate_free_url_document
(
document_content
,
document_title
,
instance_url
,
lookup_map
,
session_id
,
urls_to_visit
)
else
:
print
(
f
"
According to your history (or lack thereof) , you did not visit
\"
{
document_title
}
.md
\"
in the
"
f
"
CodiMD instance recently. Migrating the document and generating a new, random URL/path for it
"
)
# empty string implies HedgeDoc should create a new ID
generated_url
=
import_single_document
(
instance_url
,
""
,
document_content
,
session_id
)
print
(
f
"
New URL after document migration with new, random URL/subpath:
"
f
"
{
generated_url
}
"
)
urls_to_visit
.
append
(
generated_url
)
print
()
def
try_generate_free_url_document
(
document_content
,
document_title
,
instance_url
,
lookup_map
,
session_id
,
urls_to_visit
):
print
(
f
"
You visited your own document
\"
{
document_title
}
.md
\"
via the identifier/path
"
+
f
"
\"
{
lookup_map
[
document_title
]
}
\"
"
)
print
(
f
"
Trying to migrate this document and make it available under the already visited path
"
)
try
:
new_url
=
import_single_document
(
instance_url
,
lookup_map
[
document_title
],
document_content
,
session_id
)
print
(
f
"
Migration was possible. New URL:
{
instance_url
}
/
{
lookup_map
[
document_title
]
}
"
)
except
HTTPError
as
error
:
if
error
.
status
==
409
:
print
(
"
ATTENTION: Could not migrate document with the same path. Uploading anyways and
"
"
creating a new, random path
"
)
new_url
=
import_single_document
(
instance_url
,
""
,
document_content
,
session_id
)
print
(
f
"
New URL after document migration without migrating the URL/subpath:
{
new_url
}
"
)
else
:
raise
SystemExit
(
"
Could not create document. Please check your session cookie. Aborting...
"
)
urls_to_visit
.
append
(
new_url
)
def
create_urls_to_visit_file
(
filename
,
urls_to_visit
):
print_block_heading
(
"
Creating file containg the URLs to visit
"
)
print
(
f
"
A new file
{
filename
}
will be created. It contains all URLs you need to visit in order to make the
"
f
"
migrated documents appear in your HedgeDoc history.
"
)
print
(
"
This can be automated by running the scripts in the directory
\"
history_scripts
\"
"
)
print
(
"
BE AWARE: Opening a lot of tabs might be quite resource-intensive.
"
)
with
open
(
filename
,
'
w
'
)
as
f
:
for
url
in
urls_to_visit
:
f
.
write
(
url
+
"
\n
"
)
if
__name__
==
"
__main__
"
:
import_into_hedgedoc
(
"
https://md.inf.tu-dresden.de/notes
"
,
get_sessionid
(
"
HedgeDoc
"
,
"
connect.hedgeDoc.sid
"
),
"
codimd-documents
"
,
"
../archive.zip
"
)
Loading