#!/usr/bin/env python3
"""
# Script ob-bookmarks
## Metadata
**Kind**: #obsidian/ob-script
**Language**: #python
**Parent**:: [[Obsidian Chore Scripts]], [[Executable Markdown File]]
## Synopsis
Export Chrome bookmarks into Obsidian.
Example:: [[Tools (Bookmarks)]]
## Script
```python
# """
from pathlib import Path
from urllib.parse import unquote
import tempfile
import json
import textwrap
import os
import re
import shutil
import filecmp
import subprocess
# ruff: noqa: E501
BOOKMARKLET_PREFIX = "javascript:"
BOOKMARKLET_PREFIX_LEN = len(BOOKMARKLET_PREFIX)
FOLDER_COMMENT_PREFIX = "edge://favorites/?id="
PRETTIER_EXISTS = shutil.which("prettier") is not None
DATA_URL_RE = re.compile("data:(text/html;)?(charset=[^,]+,)?")
METADATA_RE = re.compile(r"^([a-zA-Z0-9 _-]+):: ", re.M)
NEWLINE_RE = re.compile(r" (\\\\)+ ")
export_dir = Path.home() / "Dropbox" / "Brain" / "robot" / "Bookmarks Library"
prettier_exe = "prettier.cmd" if os.name == "nt" else "prettier"
def decode_javascript(encoded_js):
unquoted = unquote(encoded_js)
if PRETTIER_EXISTS:
return subprocess.check_output(
[prettier_exe, "--parser", "flow"], input=unquoted, shell=True, text=True
).rstrip()
return unquoted.rstrip()
def decode_html(encoded_html):
unquoted = unquote(encoded_html)
if PRETTIER_EXISTS:
return (
subprocess.check_output(
[
prettier_exe,
"--parser",
"html",
"--html-whitespace-sensitivity",
"ignore",
],
input=unquoted.encode("utf-8"),
)
.decode("utf-8")
.rstrip()
)
return unquoted.rstrip()
def replace_newlines(text):
return NEWLINE_RE.sub(lambda m: "\n" * (len(m.group(1)) // 2), text)
def format_folder_note(note):
if note.startswith("# "):
try:
note = note.split(" \\\\ ", maxsplit=1)[1]
except IndexError:
return ""
note = METADATA_RE.sub(r"**\1**:: ", replace_newlines(note))
if note.startswith("#"):
return "**Tags**:: " + note
return note
TRANSLATIONS = {
"其他收藏夹": "Other Favorites",
"收藏夹栏": "Favorites Bar",
"Other favorites": "Other Favorites",
"Favorites bar": "Favorites Bar",
}
def translate_name(name):
return TRANSLATIONS.get(name, name)
def export_bookmarks_folder(dir, folder, parent):
dir.mkdir(exist_ok=True)
folder["name"] = translate_name(folder["name"])
name = folder["name"]
if len(folder["children"]) == 0:
return
with open(dir / f"{name} (Bookmarks).md", "w", newline="\n") as md_file:
first_child = folder["children"][0]
has_folder_note = first_child["type"] != "folder" and first_child[
"url"
].startswith(FOLDER_COMMENT_PREFIX)
if has_folder_note and "#private" in first_child["name"]:
print("---\npublish: false\n---", file=md_file)
print(f"# {name} (Bookmarks)\n", file=md_file)
print("## Metadata\n", file=md_file)
if parent is not None:
print(
f'**Parent**:: [[{parent["name"]} (Bookmarks)|{parent["name"]}]]',
file=md_file,
)
print(
"**Kind**:: #bookmarks-collection\n**Source**:: #from/browser", file=md_file
)
print("**Generated by**:: [[ob-bookmarks]]", file=md_file)
print(
f'**Chrome URL**:: `edge://favorites/{"" if parent is None else "?id=" + folder["id"]}`',
file=md_file,
)
if has_folder_note:
folder_note = format_folder_note(first_child["name"])
if folder_note != "":
print(folder_note, file=md_file)
print("", file=md_file)
print("## Children\n", file=md_file)
for child in folder["children"]:
if (
child["name"] == "§ Inbox"
or child["name"] == "Ω Archive"
or "#private" in child["name"]
):
pass
elif child["type"] == "folder":
child["name"] = translate_name(child["name"])
export_bookmarks_folder(dir / child["name"], child, folder)
print(
f'- 📁 [[{child["name"]} (Bookmarks)|{child["name"]}]]',
file=md_file,
)
else:
url = child["url"]
split_parts = replace_newlines(child["name"]).splitlines(keepends=False)
child_name = split_parts[0]
description = "\n".join(split_parts[1:])
anchor = child["id"]
data_url_match = DATA_URL_RE.match(url)
if data_url_match:
data_url_syntax = ""
data_url_content = url
if "html" in data_url_match.group(1):
data_url_syntax = "html"
data_url_content = decode_html(url[data_url_match.end() :])
print(
textwrap.dedent(
f"""\
- {child_name} #bookmarklet ^{anchor}
```{data_url_syntax}
{{}}
```
"""
).format(textwrap.indent(data_url_content, " ")),
file=md_file,
)
elif url.startswith(BOOKMARKLET_PREFIX):
print(
textwrap.dedent(
"""\
- {} #bookmarklet ^{}
```javascript
{}
```
"""
).format(
child_name,
anchor,
textwrap.indent(
decode_javascript(url[BOOKMARKLET_PREFIX_LEN:]), " "
),
),
file=md_file,
)
elif not url.startswith(FOLDER_COMMENT_PREFIX):
print(
f"- {child_name} [{url.split('://')[1].split('/')[0]}]({url}) ^{anchor}",
file=md_file,
)
if description != "":
if description.startswith("#"):
description = description.replace("\n", "\n\n", 1)
if not url.startswith(FOLDER_COMMENT_PREFIX):
print("", file=md_file)
print(
textwrap.indent(description, " "),
file=md_file,
)
print("", file=md_file)
tmp_dir = Path(tempfile.mkdtemp())
db_file = (
Path.home() / "AppData/Local/Microsoft/Edge/User Data/Default/Bookmarks"
if os.name == "nt"
else Path.home() / "Library/Application Support/Microsoft Edge/Default/Bookmarks"
)
with (db_file).open() as fd:
roots = json.load(fd)["roots"]
roots_folder = {
"name": "Roots",
"type": "folder",
"children": [roots["bookmark_bar"], roots["other"]],
}
export_bookmarks_folder(tmp_dir, roots_folder, None)
for root, dirs, files in os.walk(tmp_dir):
for f in files:
src_file = Path(root) / f
relative_path = src_file.relative_to(tmp_dir)
target_file = export_dir / relative_path
target_file.parent.mkdir(exist_ok=True)
if not target_file.exists() or not filecmp.cmp(
src_file, target_file, shallow=False
):
print("NEW:", relative_path)
os.replace(src_file, target_file)
else:
# print('SKIP:', relative_path)
pass
shutil.rmtree(tmp_dir)
"""
# vim: ft=python
```
"""