quartz-research-note/quartz/plugins/transformers/ofm.ts

602 lines
20 KiB
TypeScript
Raw Normal View History

2023-06-01 16:33:20 +00:00
import { QuartzTransformerPlugin } from "../types"
2024-01-05 08:29:34 +00:00
import { Root, Html, BlockContent, DefinitionContent, Paragraph, Code } from "mdast"
import { Element, Literal, Root as HtmlRoot } from "hast"
2024-01-05 08:29:34 +00:00
import { ReplaceFunction, findAndReplace as mdastFindReplace } from "mdast-util-find-and-replace"
2023-07-23 00:27:41 +00:00
import { slug as slugAnchor } from "github-slugger"
2023-06-01 16:33:20 +00:00
import rehypeRaw from "rehype-raw"
2024-02-05 10:45:36 +00:00
import { SKIP, visit } from "unist-util-visit"
2023-06-06 07:00:38 +00:00
import path from "path"
import { JSResource } from "../../util/resources"
2023-06-17 20:08:06 +00:00
// @ts-ignore
import calloutScript from "../../components/scripts/callout.inline.ts"
import { FilePath, pathToRoot, slugTag, slugifyFilePath } from "../../util/path"
import { toHast } from "mdast-util-to-hast"
import { toHtml } from "hast-util-to-html"
import { PhrasingContent } from "mdast-util-find-and-replace/lib"
import { capitalize } from "../../util/lang"
2024-01-05 08:29:34 +00:00
import { PluggableList } from "unified"
2023-06-01 16:33:20 +00:00
export interface Options {
2023-07-10 02:32:24 +00:00
comments: boolean
2023-06-01 16:33:20 +00:00
highlight: boolean
wikilinks: boolean
2023-06-06 05:14:17 +00:00
callouts: boolean
2023-06-13 05:41:42 +00:00
mermaid: boolean
2023-07-23 21:02:57 +00:00
parseTags: boolean
2024-02-05 10:45:36 +00:00
parseArrows: boolean
parseBlockReferences: boolean
enableInHtmlEmbed: boolean
2024-02-05 10:45:36 +00:00
enableYouTubeEmbed: boolean
enableVideoEmbed: boolean
2023-06-01 16:33:20 +00:00
}
const defaultOptions: Options = {
2023-07-10 02:32:24 +00:00
comments: true,
2023-06-01 16:33:20 +00:00
highlight: true,
2023-06-03 19:07:19 +00:00
wikilinks: true,
2023-06-13 05:41:42 +00:00
callouts: true,
2023-07-08 21:36:02 +00:00
mermaid: true,
2023-07-23 21:02:57 +00:00
parseTags: true,
2024-02-05 10:45:36 +00:00
parseArrows: true,
parseBlockReferences: true,
enableInHtmlEmbed: false,
2024-02-05 10:45:36 +00:00
enableYouTubeEmbed: true,
enableVideoEmbed: true,
2023-06-06 05:14:17 +00:00
}
2024-02-05 10:45:36 +00:00
const calloutMapping = {
note: "note",
abstract: "abstract",
2023-09-12 06:00:21 +00:00
summary: "abstract",
tldr: "abstract",
info: "info",
todo: "todo",
tip: "tip",
hint: "tip",
important: "tip",
success: "success",
check: "success",
done: "success",
question: "question",
help: "question",
faq: "question",
warning: "warning",
attention: "warning",
caution: "warning",
failure: "failure",
missing: "failure",
fail: "failure",
danger: "danger",
error: "danger",
bug: "bug",
example: "example",
quote: "quote",
cite: "quote",
2024-02-05 10:45:36 +00:00
} as const
const arrowMapping: Record<string, string> = {
"->": "&rarr;",
"-->": "&rArr;",
"=>": "&rArr;",
"==>": "&rArr;",
"<-": "&larr;",
"<--": "&lArr;",
"<=": "&lArr;",
"<==": "&lArr;",
}
2024-02-05 10:45:36 +00:00
function canonicalizeCallout(calloutName: string): keyof typeof calloutMapping {
const normalizedCallout = calloutName.toLowerCase() as keyof typeof calloutMapping
// if callout is not recognized, make it a custom one
return calloutMapping[normalizedCallout] ?? calloutName
2023-06-01 16:33:20 +00:00
}
2024-01-05 08:29:34 +00:00
export const externalLinkRegex = /^https?:\/\//i
2024-02-05 10:45:36 +00:00
export const arrowRegex = new RegExp(/(-{1,2}>|={1,2}>|<-{1,2}|<={1,2})/, "g")
// !? -> optional embedding
// \[\[ -> open brace
// ([^\[\]\|\#]+) -> one or more non-special characters ([,],|, or #) (name)
// (#[^\[\]\|\#]+)? -> # then one or more non-special characters (heading link)
// (\|[^\[\]\#]+)? -> | then one or more non-special characters (alias)
2024-01-05 08:29:34 +00:00
export const wikilinkRegex = new RegExp(
2024-02-05 10:45:36 +00:00
/!?\[\[([^\[\]\|\#]+)?(#+[^\[\]\|\#]+)?(\|[^\[\]\#]+)?\]\]/,
2024-01-05 08:29:34 +00:00
"g",
)
const highlightRegex = new RegExp(/==([^=]+)==/, "g")
2024-02-05 10:45:36 +00:00
const commentRegex = new RegExp(/%%[\s\S]*?%%/, "g")
// from https://github.com/escwxyz/remark-obsidian-callout/blob/main/src/index.ts
const calloutRegex = new RegExp(/^\[\!(\w+)\]([+-]?)/)
2023-08-12 17:16:55 +00:00
const calloutLineRegex = new RegExp(/^> *\[\!\w+\][+-]?.*$/, "gm")
// (?:^| ) -> non-capturing group, tag should start be separated by a space or be the start of the line
// #(...) -> capturing group, tag itself must start with #
2024-01-05 08:29:34 +00:00
// (?:[-_\p{L}\d\p{Z}])+ -> non-capturing group, non-empty string of (Unicode-aware) alpha-numeric characters and symbols, hyphens and/or underscores
// (?:\/[-_\p{L}\d\p{Z}]+)*) -> non-capturing group, matches an arbitrary number of tag strings separated by "/"
2024-02-05 10:45:36 +00:00
const tagRegex = new RegExp(/(?:^| )#((?:[-_\p{L}\p{Emoji}\d])+(?:\/[-_\p{L}\p{Emoji}\d]+)*)/, "gu")
const blockReferenceRegex = new RegExp(/\^([-_A-Za-z0-9]+)$/, "g")
const ytLinkRegex = /^.*(youtu.be\/|v\/|u\/\w\/|embed\/|watch\?v=|\&v=)([^#\&\?]*).*/
const videoExtensionRegex = new RegExp(/\.(mp4|webm|ogg|avi|mov|flv|wmv|mkv|mpg|mpeg|3gp|m4v)$/)
const wikilinkImageEmbedRegex = new RegExp(
/^(?<alt>(?!^\d*x?\d*$).*?)?(\|?\s*?(?<width>\d+)(x(?<height>\d+))?)?$/,
)
2023-07-23 00:27:41 +00:00
export const ObsidianFlavoredMarkdown: QuartzTransformerPlugin<Partial<Options> | undefined> = (
userOpts,
) => {
const opts = { ...defaultOptions, ...userOpts }
2023-08-12 17:16:55 +00:00
const mdastToHtml = (ast: PhrasingContent | Paragraph) => {
const hast = toHast(ast, { allowDangerousHtml: true })!
return toHtml(hast, { allowDangerousHtml: true })
}
return {
name: "ObsidianFlavoredMarkdown",
2023-07-24 07:04:01 +00:00
textTransform(_ctx, src) {
2024-02-05 10:45:36 +00:00
// do comments at text level
if (opts.comments) {
if (src instanceof Buffer) {
src = src.toString()
}
src = src.replace(commentRegex, "")
}
2023-08-12 17:16:55 +00:00
// pre-transform blockquotes
if (opts.callouts) {
2024-01-05 08:29:34 +00:00
if (src instanceof Buffer) {
src = src.toString()
}
2024-02-05 10:45:36 +00:00
src = src.replace(calloutLineRegex, (value) => {
2023-08-12 17:16:55 +00:00
// force newline after title of callout
return value + "\n> "
})
}
// pre-transform wikilinks (fix anchors to things that may contain illegal syntax e.g. codeblocks, latex)
if (opts.wikilinks) {
2024-01-05 08:29:34 +00:00
if (src instanceof Buffer) {
src = src.toString()
}
2024-02-05 10:45:36 +00:00
src = src.replace(wikilinkRegex, (value, ...capture) => {
2024-01-05 08:29:34 +00:00
const [rawFp, rawHeader, rawAlias]: (string | undefined)[] = capture
const fp = rawFp ?? ""
2024-01-05 08:29:34 +00:00
const anchor = rawHeader?.trim().replace(/^#+/, "")
const blockRef = Boolean(anchor?.startsWith("^")) ? "^" : ""
const displayAnchor = anchor ? `#${blockRef}${slugAnchor(anchor)}` : ""
2023-08-12 07:03:11 +00:00
const displayAlias = rawAlias ?? rawHeader?.replace("#", "|") ?? ""
const embedDisplay = value.startsWith("!") ? "!" : ""
2024-01-05 08:29:34 +00:00
if (rawFp?.match(externalLinkRegex)) {
return `${embedDisplay}[${displayAlias.replace(/^\|/, "")}](${rawFp})`
}
return `${embedDisplay}[[${fp}${displayAnchor}${displayAlias}]]`
})
}
2023-08-12 17:16:55 +00:00
return src
},
markdownPlugins() {
const plugins: PluggableList = []
2024-01-05 08:29:34 +00:00
// regex replacements
plugins.push(() => {
return (tree: Root, file) => {
const replacements: [RegExp, string | ReplaceFunction][] = []
const base = pathToRoot(file.data.slug!)
if (opts.wikilinks) {
replacements.push([
wikilinkRegex,
(value: string, ...capture: string[]) => {
let [rawFp, rawHeader, rawAlias] = capture
const fp = rawFp?.trim() ?? ""
const anchor = rawHeader?.trim() ?? ""
const alias = rawAlias?.slice(1).trim()
// embed cases
if (value.startsWith("!")) {
const ext: string = path.extname(fp).toLowerCase()
const url = slugifyFilePath(fp as FilePath)
2024-02-05 10:45:36 +00:00
if ([".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg", ".webp"].includes(ext)) {
const match = wikilinkImageEmbedRegex.exec(alias ?? "")
const alt = match?.groups?.alt ?? ""
const width = match?.groups?.width ?? "auto"
const height = match?.groups?.height ?? "auto"
2024-01-05 08:29:34 +00:00
return {
type: "image",
url,
data: {
hProperties: {
width,
height,
2024-02-05 10:45:36 +00:00
alt,
2024-01-05 08:29:34 +00:00
},
2023-07-23 00:27:41 +00:00
},
2024-01-05 08:29:34 +00:00
}
} else if ([".mp4", ".webm", ".ogv", ".mov", ".mkv"].includes(ext)) {
return {
type: "html",
value: `<video src="${url}" controls></video>`,
}
} else if (
[".mp3", ".webm", ".wav", ".m4a", ".ogg", ".3gp", ".flac"].includes(ext)
) {
return {
type: "html",
value: `<audio src="${url}" controls></audio>`,
}
} else if ([".pdf"].includes(ext)) {
return {
type: "html",
value: `<iframe src="${url}"></iframe>`,
}
2024-02-05 10:45:36 +00:00
} else {
2024-01-05 08:29:34 +00:00
const block = anchor
return {
type: "html",
data: { hProperties: { transclude: true } },
value: `<blockquote class="transclude" data-url="${url}" data-block="${block}"><a href="${
url + anchor
}" class="transclude-inner">Transclude of ${url}${block}</a></blockquote>`,
}
}
2024-01-05 08:29:34 +00:00
// otherwise, fall through to regular link
2023-06-06 07:00:38 +00:00
}
2024-01-05 08:29:34 +00:00
// internal link
const url = fp + anchor
return {
type: "link",
url,
children: [
{
type: "text",
value: alias ?? fp,
},
],
}
},
])
}
2023-06-06 07:00:38 +00:00
2024-01-05 08:29:34 +00:00
if (opts.highlight) {
replacements.push([
highlightRegex,
(_value: string, ...capture: string[]) => {
const [inner] = capture
return {
type: "html",
value: `<span class="text-highlight">${inner}</span>`,
}
},
])
}
2023-06-01 16:33:20 +00:00
2024-02-05 10:45:36 +00:00
if (opts.parseArrows) {
2024-01-05 08:29:34 +00:00
replacements.push([
2024-02-05 10:45:36 +00:00
arrowRegex,
(value: string, ..._capture: string[]) => {
const maybeArrow = arrowMapping[value]
if (maybeArrow === undefined) return SKIP
2024-01-05 08:29:34 +00:00
return {
2024-02-05 10:45:36 +00:00
type: "html",
value: `<span>${maybeArrow}</span>`,
2024-01-05 08:29:34 +00:00
}
},
])
}
2023-07-23 00:27:41 +00:00
2024-01-05 08:29:34 +00:00
if (opts.parseTags) {
replacements.push([
tagRegex,
(_value: string, tag: string) => {
// Check if the tag only includes numbers
if (/^\d+$/.test(tag)) {
return false
}
tag = slugTag(tag)
2024-02-05 10:45:36 +00:00
if (file.data.frontmatter) {
const noteTags = file.data.frontmatter.tags ?? []
file.data.frontmatter.tags = [...new Set([...noteTags, tag])]
2024-01-05 08:29:34 +00:00
}
return {
type: "link",
url: base + `/tags/${tag}`,
data: {
hProperties: {
className: ["tag-link"],
},
},
children: [
{
type: "text",
value: `#${tag}`,
},
],
}
},
])
}
if (opts.enableInHtmlEmbed) {
visit(tree, "html", (node: Html) => {
for (const [regex, replace] of replacements) {
if (typeof replace === "string") {
node.value = node.value.replace(regex, replace)
} else {
2024-02-05 10:45:36 +00:00
node.value = node.value.replace(regex, (substring: string, ...args) => {
2024-01-05 08:29:34 +00:00
const replaceValue = replace(substring, ...args)
if (typeof replaceValue === "string") {
return replaceValue
} else if (Array.isArray(replaceValue)) {
return replaceValue.map(mdastToHtml).join("")
} else if (typeof replaceValue === "object" && replaceValue !== null) {
return mdastToHtml(replaceValue)
} else {
return substring
}
})
}
2023-07-10 02:32:24 +00:00
}
})
}
2024-01-05 08:29:34 +00:00
mdastFindReplace(tree, replacements)
}
})
2023-06-01 16:33:20 +00:00
2024-02-05 10:45:36 +00:00
if (opts.enableVideoEmbed) {
plugins.push(() => {
return (tree: Root, _file) => {
visit(tree, "image", (node, index, parent) => {
if (parent && index != undefined && videoExtensionRegex.test(node.url)) {
const newNode: Html = {
type: "html",
value: `<video controls src="${node.url}"></video>`,
}
parent.children.splice(index, 1, newNode)
return SKIP
}
})
}
})
}
if (opts.callouts) {
plugins.push(() => {
return (tree: Root, _file) => {
visit(tree, "blockquote", (node) => {
if (node.children.length === 0) {
return
}
2023-06-06 05:14:17 +00:00
// find first line
const firstChild = node.children[0]
if (firstChild.type !== "paragraph" || firstChild.children[0]?.type !== "text") {
return
}
2023-06-06 05:14:17 +00:00
const text = firstChild.children[0].value
2024-02-05 10:45:36 +00:00
const restOfTitle = firstChild.children.slice(1)
const [firstLine, ...remainingLines] = text.split("\n")
const remainingText = remainingLines.join("\n")
2023-06-06 05:14:17 +00:00
const match = firstLine.match(calloutRegex)
if (match && match.input) {
const [calloutDirective, typeString, collapseChar] = match
2024-02-05 10:45:36 +00:00
const calloutType = canonicalizeCallout(typeString.toLowerCase())
const collapse = collapseChar === "+" || collapseChar === "-"
const defaultState = collapseChar === "-" ? "collapsed" : "expanded"
2024-02-05 10:45:36 +00:00
const titleContent = match.input.slice(calloutDirective.length).trim()
const useDefaultTitle = titleContent === "" && restOfTitle.length === 0
2023-08-12 17:16:55 +00:00
const titleNode: Paragraph = {
2023-08-12 17:17:07 +00:00
type: "paragraph",
2024-02-05 10:45:36 +00:00
children: [
{
type: "text",
value: useDefaultTitle ? capitalize(calloutType) : titleContent + " ",
},
...restOfTitle,
],
2023-08-12 17:16:55 +00:00
}
const title = mdastToHtml(titleNode)
2023-06-06 05:14:17 +00:00
2024-02-05 10:45:36 +00:00
const toggleIcon = `<div class="fold-callout-icon"></div>`
2023-06-17 20:08:06 +00:00
2024-01-05 08:29:34 +00:00
const titleHtml: Html = {
type: "html",
value: `<div
2023-06-06 05:14:17 +00:00
class="callout-title"
>
2024-02-05 10:45:36 +00:00
<div class="callout-icon"></div>
2023-06-06 05:14:17 +00:00
<div class="callout-title-inner">${title}</div>
2023-06-17 20:08:06 +00:00
${collapse ? toggleIcon : ""}
2023-07-23 00:27:41 +00:00
</div>`,
}
2023-06-06 05:14:17 +00:00
2023-08-12 17:16:55 +00:00
const blockquoteContent: (BlockContent | DefinitionContent)[] = [titleHtml]
if (remainingText.length > 0) {
blockquoteContent.push({
2023-07-23 00:27:41 +00:00
type: "paragraph",
children: [
{
type: "text",
value: remainingText,
},
],
})
}
2023-06-06 05:14:17 +00:00
// replace first line of blockquote with title and rest of the paragraph text
node.children.splice(0, 1, ...blockquoteContent)
2023-06-06 05:14:17 +00:00
// add properties to base blockquote
node.data = {
hProperties: {
...(node.data?.hProperties ?? {}),
2024-02-05 10:45:36 +00:00
className: `callout ${calloutType} ${collapse ? "is-collapsible" : ""} ${
2023-08-12 17:17:07 +00:00
defaultState === "collapsed" ? "is-collapsed" : ""
}`,
"data-callout": calloutType,
"data-callout-fold": collapse,
2023-07-23 00:27:41 +00:00
},
2023-06-06 05:14:17 +00:00
}
}
})
}
})
}
2023-06-13 05:41:42 +00:00
if (opts.mermaid) {
plugins.push(() => {
return (tree: Root, _file) => {
2023-07-23 00:27:41 +00:00
visit(tree, "code", (node: Code) => {
if (node.lang === "mermaid") {
2023-06-13 05:41:42 +00:00
node.data = {
hProperties: {
2023-07-23 21:02:57 +00:00
className: ["mermaid"],
2023-07-23 00:27:41 +00:00
},
2023-06-13 05:41:42 +00:00
}
}
})
}
})
}
return plugins
},
htmlPlugins() {
2024-01-05 08:29:34 +00:00
const plugins: PluggableList = [rehypeRaw]
if (opts.parseBlockReferences) {
plugins.push(() => {
const inlineTagTypes = new Set(["p", "li"])
const blockTagTypes = new Set(["blockquote"])
2024-01-05 08:29:34 +00:00
return (tree: HtmlRoot, file) => {
file.data.blocks = {}
visit(tree, "element", (node, index, parent) => {
if (blockTagTypes.has(node.tagName)) {
const nextChild = parent?.children.at(index! + 2) as Element
if (nextChild && nextChild.tagName === "p") {
const text = nextChild.children.at(0) as Literal
if (text && text.value && text.type === "text") {
const matches = text.value.match(blockReferenceRegex)
if (matches && matches.length >= 1) {
parent!.children.splice(index! + 2, 1)
const block = matches[0].slice(1)
if (!Object.keys(file.data.blocks!).includes(block)) {
node.properties = {
...node.properties,
id: block,
}
file.data.blocks![block] = node
}
}
}
}
} else if (inlineTagTypes.has(node.tagName)) {
const last = node.children.at(-1) as Literal
if (last && last.value && typeof last.value === "string") {
const matches = last.value.match(blockReferenceRegex)
if (matches && matches.length >= 1) {
last.value = last.value.slice(0, -matches[0].length)
const block = matches[0].slice(1)
if (!Object.keys(file.data.blocks!).includes(block)) {
node.properties = {
...node.properties,
id: block,
}
file.data.blocks![block] = node
}
}
}
}
})
2024-01-05 08:29:34 +00:00
file.data.htmlAst = tree
}
})
}
2024-02-05 10:45:36 +00:00
if (opts.enableYouTubeEmbed) {
plugins.push(() => {
return (tree: HtmlRoot) => {
visit(tree, "element", (node) => {
if (node.tagName === "img" && typeof node.properties.src === "string") {
const match = node.properties.src.match(ytLinkRegex)
const videoId = match && match[2].length == 11 ? match[2] : null
if (videoId) {
node.tagName = "iframe"
node.properties = {
class: "external-embed",
allow: "fullscreen",
frameborder: 0,
width: "600px",
height: "350px",
src: `https://www.youtube.com/embed/${videoId}`,
}
}
}
})
}
})
}
return plugins
2023-06-13 05:41:42 +00:00
},
2023-06-17 19:07:40 +00:00
externalResources() {
2023-06-17 20:08:06 +00:00
const js: JSResource[] = []
if (opts.callouts) {
js.push({
script: calloutScript,
2023-07-23 00:27:41 +00:00
loadTime: "afterDOMReady",
contentType: "inline",
2023-06-17 20:08:06 +00:00
})
}
if (opts.mermaid) {
js.push({
script: `
2023-06-17 19:07:40 +00:00
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.esm.min.mjs';
const darkMode = document.documentElement.getAttribute('saved-theme') === 'dark'
mermaid.initialize({
startOnLoad: false,
securityLevel: 'loose',
theme: darkMode ? 'dark' : 'default'
});
2023-08-05 05:35:21 +00:00
document.addEventListener('nav', async () => {
await mermaid.run({
querySelector: '.mermaid'
})
2023-08-05 05:35:21 +00:00
});
2023-06-17 19:07:40 +00:00
`,
2023-07-23 00:27:41 +00:00
loadTime: "afterDOMReady",
moduleType: "module",
contentType: "inline",
2023-06-17 20:08:06 +00:00
})
2023-06-17 19:07:40 +00:00
}
2023-06-17 20:08:06 +00:00
return { js }
2023-07-23 00:27:41 +00:00
},
2023-06-01 16:33:20 +00:00
}
}
declare module "vfile" {
interface DataMap {
blocks: Record<string, Element>
htmlAst: HtmlRoot
}
2024-02-05 10:45:36 +00:00
}