Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
CodiMD export script
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
stgroup
misc
CodiMD export script
Commits
13f564f9
Commit
13f564f9
authored
2 years ago
by
Andreas Domanowski
Browse files
Options
Downloads
Patches
Plain Diff
WIP: Implement HedgeDoc import with visiting URLs after upload, Add accessibility check
parent
8ba97220
No related branches found
No related tags found
1 merge request
!1
Hedgedoc import
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
codimd_export.py
+1
-1
1 addition, 1 deletion
codimd_export.py
hedgedoc_import.py
+55
-26
55 additions, 26 deletions
hedgedoc_import.py
with
56 additions
and
27 deletions
codimd_export.py
+
1
−
1
View file @
13f564f9
...
...
@@ -46,7 +46,7 @@ def export_from_codimd(instance_url, session_id, export_to):
contents
=
slurp
(
f
"
{
document_url
}
/download
"
,
session_id
)
with
open
(
Path
(
target_dir
,
f
"
{
document_id
}
.md
"
),
mode
=
"
wb
"
)
as
stream
:
stream
.
write
(
contents
)
with
open
(
Path
(
target_dir
,
f
"
map.map
"
),
mode
=
"
w
"
)
as
stream
:
with
open
(
Path
(
target_dir
,
f
"
history.json
"
),
mode
=
"
w
"
)
as
stream
:
json
.
dump
(
data
,
stream
)
num_ok
+=
1
except
HTTPError
as
error
:
...
...
This diff is collapsed.
Click to expand it.
hedgedoc_import.py
+
55
−
26
View file @
13f564f9
#!/usr/bin/env python3
import
json
from
urllib.error
import
HTTPError
from
common
import
get_sessionid
from
pathlib
import
Path
import
urllib.parse
...
...
@@ -10,24 +10,34 @@ import subprocess
from
zipfile
import
ZipFile
def
check_accessibility
(
instance_url
,
session_id
):
request_url
=
instance_url
+
'
/me/
'
headers
=
{
"
Cookie
"
:
f
"
connect.hedgeDoc.sid=
{
session_id
}
"
}
req
=
urllib
.
request
.
Request
(
request_url
,
headers
=
headers
)
with
urllib
.
request
.
urlopen
(
req
)
as
response
:
response_json
=
json
.
load
(
response
)
if
response_json
[
"
status
"
]
!=
"
ok
"
:
raise
SystemExit
(
f
"
Could not access protected resources at
{
request_url
}
. Make sure that the specified
"
f
"
cookie is correct. Aborting...
"
)
def
import_single_document
(
instance_url
,
hedgedoc_free_url
,
content
,
session_id
):
sanitized_free_url
=
hedgedoc_free_url
.
replace
(
"
"
,
"
%20
"
)
request_url
=
instance_url
+
'
/new/
'
+
sanitized_free_url
user_agent
=
'
Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)
'
headers
=
{
'
User-Agent
'
:
user_agent
,
"
Cookie
"
:
f
"
connect.hedgeDoc.sid=
{
session_id
}
"
,
"
Content-Type
"
:
"
text/markdown
"
}
headers
=
{
"
Cookie
"
:
f
"
connect.hedgeDoc.sid=
{
session_id
}
"
,
"
Content-Type
"
:
"
text/markdown
"
}
document_contents
=
str
.
encode
(
content
)
req
=
urllib
.
request
.
Request
(
request_url
,
data
=
document_contents
,
method
=
'
POST
'
,
headers
=
headers
)
req
=
urllib
.
request
.
Request
(
request_url
,
data
=
str
.
encode
(
content
),
method
=
'
POST
'
,
headers
=
headers
)
with
urllib
.
request
.
urlopen
(
req
)
as
response
:
return
response
.
url
def
import_into_hedgedoc
(
instance_url
,
session_id
,
export_folder
,
archive_file
):
folder
=
os
.
path
.
join
(
os
.
getcwd
(),
export_folder
);
check_accessibility
(
instance_url
,
session_id
)
browser
=
select_browser
()
# get exported history map
with
open
(
os
.
path
.
join
(
export_folder
,
"
map
.json
"
))
as
map_file
:
with
open
(
os
.
path
.
join
(
export_folder
,
"
history
.json
"
))
as
map_file
:
history_dictionary
=
json
.
load
(
map_file
)
# mapping from title of a document (= filename without md extension in archive) to its id (= note url in CodiMD)
...
...
@@ -40,44 +50,63 @@ def import_into_hedgedoc(instance_url, session_id, export_folder, archive_file):
# iterate over files in archive
with
ZipFile
(
archive_file
)
as
zf
:
print
(
"
Now scanning your provided archive file containing the documents you are the owner of
"
)
print
(
"
If you visited your own document via a, e.g.,
\"
codi-instance.tld/my_own_path
\"
)
"
+
"
this script tries to migrate it to the HedgeDoc instance at
\"
hedgedoc-instance.tld/my_own_path
\"
"
)
print
(
"
If this is not possible, a new random URL for the document will be created
"
)
for
file
in
zf
.
namelist
():
document_title
=
Path
(
file
).
stem
# check for every file if there is a match between its name (= title in history json) and an existing path
# for a document
document_content
=
None
with
zf
.
open
(
file
)
as
f
:
document_content
=
f
.
read
().
decode
(
"
UTF-8
"
)
if
document_title
in
lookup_map
:
print
(
"
filename:
"
,
document_title
,
"
, in lookupMap:
"
,
lookup_map
[
document_title
])
urls_to_visit
.
append
(
import_single_document
(
instance_url
,
lookup_map
[
document_title
],
document_content
,
session_id
))
print
(
f
"
\t
You visited your own document
\"
{
document_title
}
\"
.md) via the path
"
+
f
"
\"
{
lookup_map
[
document_title
]
}
\"
"
)
print
(
f
"
\t
Trying to migrate this document and make it available under the already visited path
"
)
try
:
new_url
=
import_single_document
(
instance_url
,
lookup_map
[
document_title
],
document_content
,
session_id
)
urls_to_visit
.
append
(
new_url
)
except
HTTPError
as
error
:
if
error
.
status
==
409
:
print
(
"
\t
HTTP 409. Uploading anyways (new path, random ID)
"
)
new_url
=
import_single_document
(
instance_url
,
""
,
document_content
,
session_id
)
urls_to_visit
.
append
(
new_url
)
else
:
print
(
"
no mapping found for
"
,
document_title
,
"
, uploading anyway
"
)
# empty string implies HedgeDoc should create a new ID
urls_to_visit
.
append
(
import_single_document
(
instance_url
,
""
,
document_content
,
session_id
))
browser
=
"
firefox
"
print
(
"
Your specified browser now needs to visit every newly created document to ensure that it
'
s available in
"
"
your history in HedgeDoc
"
)
subprocess
.
run
([
browser
]
+
urls_to_visit
)
def
select_browser
():
options
=
[
'
chrome
'
,
'
firefox
'
,
'
opera
'
]
print
(
"
Once you
'
ve uploaded all your documents, they unfortunately do not appear in your HedgeDoc history.
"
)
print
(
"
To make sure that they are available to you, this script automatically visits all your newly uploaded
"
"
documents in your browser.
"
)
print
(
"
Therefore, you need to specify your browser. It needs to be on your path with the same name as
"
"
specified here
"
)
print
(
"
ATTENTION - this needs to be a browser where you have an active and logged-in HedgeDoc session
"
)
supported_browsers
=
[
"
firefox
"
,
"
opera
"
,
"
safari
"
,
"
google-chrome
"
,
"
chromium
"
]
user_input
=
''
input_message
=
"
Pick an opt
ion:
\n
"
input_message
=
"
Specify a browser which holds an active and logged-in HedgeDoc sess
ion:
\n
"
for
i
ndex
,
item
in
enumerate
(
option
s
):
input_message
+=
f
'
{
i
ndex
+
1
}
)
{
item
}
\n
'
for
i
,
browser_suggestion
in
enumerate
(
supported_browser
s
):
input_message
+=
f
'
{
i
+
1
}
)
{
browser_suggestion
}
\n
'
input_message
+=
'
Your choice:
'
#
input_message += 'Your choice: '
while
user_input
not
in
map
(
str
,
range
(
1
,
len
(
option
s
)
+
1
)):
while
user_input
not
in
map
(
str
,
range
(
1
,
len
(
supported_browser
s
)
+
1
)):
user_input
=
input
(
input_message
)
print
(
'
You picked:
'
+
options
[
int
(
user_input
)
-
1
])
print
(
'
You chose:
'
+
supported_browsers
[
int
(
user_input
)
-
1
])
return
supported_browsers
[
int
(
user_input
)
-
1
]
if
__name__
==
"
__main__
"
:
# select_browser()
import_into_hedgedoc
(
"
https://md.inf.tu-dresden.de/notes
"
,
123
,
"
codimd-documents
"
,
"
archive.zip
"
)
# import_into_hedgedoc("https://md.inf.tu-dresden.de/notes", get_sessionid("connect.hedgeDoc.sid"), "codimd-documents")
import_into_hedgedoc
(
"
https://md.inf.tu-dresden.de/notes
"
,
get_sessionid
(
"
connect.hedgeDoc.sid
"
),
"
codimd-documents
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment