ob-bookmarks - ~iany/kb

#!/usr/bin/env python3 """ # Script ob-bookmarks ## Metadata **Kind**: #obsidian/ob-script **Language**: #python **Parent**:: [[Obsidian Chore Scripts]], [[Executable Markdown File]] ## Synopsis Export Chrome bookmarks into Obsidian. Example:: [[Tools (Bookmarks)]] ## Script ```python #""" from pathlib import Path from urllib.parse import unquote import tempfile import json import textwrap import os import re import shutil import filecmp import subprocess # ruff: noqa: E501 BOOKMARKLET_PREFIX = "javascript:" BOOKMARKLET_PREFIX_LEN = len(BOOKMARKLET_PREFIX) FOLDER_COMMENT_PREFIX = "edge://favorites/?id=" PRETTIER_EXISTS = shutil.which("prettier") is not None DATA_URL_RE = re.compile("data:(text/html;)?(charset=[^,]+,)?") METADATA_RE = re.compile(r"^([a-zA-Z0-9 _-]+):: ", re.M) NEWLINE_RE = re.compile(r" (\\\\)+ ") export_dir = Path.home() / "Dropbox" / "Brain" / "robot" / "Bookmarks Library" prettier_exe = "prettier.cmd" if os.name == "nt" else "prettier" def decode_javascript(encoded_js): unquoted = unquote(encoded_js) if PRETTIER_EXISTS: return subprocess.check_output( [prettier_exe, "--parser", "flow"], input=unquoted, shell=True, text=True ).rstrip() return unquoted.rstrip() def decode_html(encoded_html): unquoted = unquote(encoded_html) if PRETTIER_EXISTS: return ( subprocess.check_output( [ prettier_exe, "--parser", "html", "--html-whitespace-sensitivity", "ignore", ], input=unquoted.encode("utf-8"), ) .decode("utf-8") .rstrip() ) return unquoted.rstrip() def replace_newlines(text): return NEWLINE_RE.sub(lambda m: "\n" * (len(m.group(1)) // 2), text) def format_folder_note(note): if note.startswith("# "): try: note = note.split(" \\\\ ", maxsplit=1)[1] except IndexError: return "" note = METADATA_RE.sub(r"**\1**:: ", replace_newlines(note)) if note.startswith("#"): return "**Tags**:: " + note return note TRANSLATIONS = { "其他收藏夹": "Other Favorites", "收藏夹栏": "Favorites Bar", "Other favorites": "Other Favorites", "Favorites bar": "Favorites Bar", } def translate_name(name): return TRANSLATIONS.get(name, name) def export_bookmarks_folder(dir, folder, parent): dir.mkdir(exist_ok=True) folder["name"] = translate_name(folder["name"]) name = folder["name"] if len(folder["children"]) == 0: return with open(dir / f"{name} (Bookmarks).md", "w", newline="\n") as md_file: first_child = folder["children"][0] has_folder_note = first_child["type"] != "folder" and first_child[ "url" ].startswith(FOLDER_COMMENT_PREFIX) if (has_folder_note and "#private" in first_child["name"]) or name == "Work": print("---\npublish: false\n---", file=md_file) print(f"# {name} (Bookmarks)\n", file=md_file) print("## Metadata\n", file=md_file) if parent is not None: print( f"**Parent**:: [[{parent['name']} (Bookmarks)|{parent['name']}]]", file=md_file, ) print( "**Kind**:: #bookmarks-collection\n**Source**:: #from/browser", file=md_file ) print("**Generated by**:: [[ob-bookmarks]]", file=md_file) print( f"**Chrome URL**:: `edge://favorites/{'' if parent is None else '?id=' + folder['id']}`", file=md_file, ) if has_folder_note: folder_note = format_folder_note(first_child["name"]) if folder_note != "": print(folder_note, file=md_file) print("", file=md_file) print("## Children\n", file=md_file) for child in folder["children"]: if ( child["name"] == "§ Inbox" or child["name"] == "Ω Archive" or "#private" in child["name"] ): pass elif child["type"] == "folder": child["name"] = translate_name(child["name"]) export_bookmarks_folder(dir / child["name"], child, folder) print( f"- 📁 [[{child['name']} (Bookmarks)|{child['name']}]]", file=md_file, ) else: url = child["url"] split_parts = replace_newlines(child["name"]).splitlines(keepends=False) child_name = split_parts[0] description = "\n".join(split_parts[1:]) anchor = child["id"] data_url_match = DATA_URL_RE.match(url) if data_url_match: data_url_syntax = "" data_url_content = url if "html" in data_url_match.group(1): data_url_syntax = "html" data_url_content = decode_html(url[data_url_match.end() :]) print( textwrap.dedent( f"""\ - {child_name} #bookmarklet ^{anchor} ```{data_url_syntax} {{}} ``` """ ).format(textwrap.indent(data_url_content, " ")), file=md_file, ) elif url.startswith(BOOKMARKLET_PREFIX): print( textwrap.dedent( """\ - {} #bookmarklet ^{} ```javascript {} ``` """ ).format( child_name, anchor, textwrap.indent( decode_javascript(url[BOOKMARKLET_PREFIX_LEN:]), " " ), ), file=md_file, ) elif not url.startswith(FOLDER_COMMENT_PREFIX): print( f"- {child_name} [{url.split('://')[1].split('/')[0]}]({url}) ^{anchor}", file=md_file, ) if description != "": if description.startswith("#"): description = description.replace("\n", "\n\n", 1) if not url.startswith(FOLDER_COMMENT_PREFIX): print("", file=md_file) print( textwrap.indent(description, " "), file=md_file, ) print("", file=md_file) tmp_dir = Path(tempfile.mkdtemp()) db_file = ( Path.home() / "AppData/Local/Microsoft/Edge/User Data/Default/Bookmarks" if os.name == "nt" else Path.home() / "Library/Application Support/Microsoft Edge/Default/Bookmarks" ) with (db_file).open() as fd: roots = json.load(fd)["roots"] roots_folder = { "name": "Roots", "type": "folder", "children": [roots["bookmark_bar"], roots["other"]], } export_bookmarks_folder(tmp_dir, roots_folder, None) for root, dirs, files in os.walk(tmp_dir): for f in files: src_file = Path(root) / f relative_path = src_file.relative_to(tmp_dir) target_file = export_dir / relative_path target_file.parent.mkdir(exist_ok=True) if not target_file.exists() or not filecmp.cmp( src_file, target_file, shallow=False ): print("NEW:", relative_path) os.replace(src_file, target_file) else: # print('SKIP:', relative_path) pass shutil.rmtree(tmp_dir) """ # vim: ft=python ``` """