From dc6f9f4b933e46ceb59c3ea3a5bb0ba2d43e44b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 08:49:34 +0200 Subject: [PATCH 01/47] feat: build Markdown oages for all pages --- DESCRIPTION | 1 + R/build-mds.R | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++ R/build.R | 1 + 3 files changed, 87 insertions(+) create mode 100644 R/build-mds.R diff --git a/DESCRIPTION b/DESCRIPTION index 0e976e982..3ed224b65 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,6 +55,7 @@ Suggests: knitr (>= 1.50), magick, methods, + pandoc, pkgload (>= 1.0.2), quarto, rsconnect, diff --git a/R/build-mds.R b/R/build-mds.R new file mode 100644 index 000000000..aed64154b --- /dev/null +++ b/R/build-mds.R @@ -0,0 +1,85 @@ +build_mds <- function(pkg = ".") { + pkg <- as_pkgdown(pkg) + + if (refuses_mds(pkg)) { + return() + } + + url <- paste0(config_pluck_string(pkg, "url"), "/") + if (is.null(url)) { + return() + } + + cli::cli_rule("Building Markdowns") + if (pkg$development$in_dev && pkg$bs_version > 3) { + url <- paste0(url, pkg$prefix) + } + + paths <- get_site_paths(pkg) + + purrr::walk(paths, build_md, pkg = pkg) + + invisible() +} + +refuses_mds <- function(pkg) { + !config_pluck_bool(pkg, "template.build-md", default = TRUE) +} + +build_md <- function(path, pkg) { + rlang::check_installed("pandoc") + path <- file.path(pkg[["dst_path"]], path) + + main_html <- xml2::read_html(path) |> + xml2::xml_find_first(".//main") + + # uninformative image (logo) + source link + title <- xml2::xml_find_first(main_html, ".//h1") + xml2::xml_remove( + xml2::xml_find_first(main_html, ".//div[@class='page-header']") + ) + + xml2::xml_add_child( + main_html, + title, + .where = 0 + ) + + # clean links + xml2::xml_remove( + xml2::xml_find_all(main_html, ".//a[@class='anchor']") + ) + + internal_links <- xml2::xml_find_all( + main_html, + ".//a[not(@class='external-link')]" + ) + purrr::walk(internal_links, add_website_url, pkg = pkg) + + xml2::xml_set_attr( + xml2::xml_find_all(main_html, ".//a[@class='external-link']"), + attr = "class", + value = NULL + ) + + pandoc::pandoc_convert( + text = main_html, + from = "html", + to = "markdown_strict", + output = sprintf("%s.md", path) + ) +} + +add_website_url <- function(node, pkg) { + url <- paste0(config_pluck_string(pkg, "url"), "/") + + if (pkg$development$in_dev && pkg$bs_version > 3) { + url <- paste0(url, pkg$prefix) + } + + xml2::xml_set_attr( + node, + attr = "href", + value = sprintf("%s%s.md", url, xml2::xml_attr(node, "href")) + ) +} diff --git a/R/build.R b/R/build.R index 1b7bdaa14..35cd46b5e 100644 --- a/R/build.R +++ b/R/build.R @@ -467,6 +467,7 @@ build_site_local <- function( build_tutorials(pkg, override = override, preview = FALSE) build_news(pkg, override = override, preview = FALSE) build_sitemap(pkg) + build_mds(pkg) build_redirects(pkg, override = override) if (pkg$bs_version == 3) { build_docsearch_json(pkg) From b14b8a01a4b017dfc4ee87195621db7fc282eaad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 09:03:13 +0200 Subject: [PATCH 02/47] feat: add llms.txt --- R/build-mds.R | 30 +++++++++++++++++++++++++++++ R/build.R | 4 +++- inst/BS5/templates/content-llms.txt | 5 +++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 inst/BS5/templates/content-llms.txt diff --git a/R/build-mds.R b/R/build-mds.R index aed64154b..279c782be 100644 --- a/R/build-mds.R +++ b/R/build-mds.R @@ -68,6 +68,36 @@ build_md <- function(path, pkg) { to = "markdown_strict", output = sprintf("%s.md", path) ) + + readme <- brio::read_lines(file.path(pkg[["dst_path"]], "index.html.md")) |> + paste(collapse = "\n") + + reference_path <- file.path(pkg[["dst_path"]], "reference", "index.html.md") + if (file.exists(reference_path)) { + reference <- brio::read_lines(reference_path) |> + paste(collapse = "\n") + } else { + reference <- "" + } + + article_path <- file.path(pkg[["dst_path"]], "articles", "index.html.md") + if (file.exists(article_path)) { + articles <- brio::read_lines(article_path) |> + paste(collapse = "\n") + } else { + articles <- "" + } + + llms_lines <- whisker::whisker.render( + read_file(find_template("content", "llms", ext = ".txt", pkg = pkg)), + data = list( + readme = readme, + reference = reference, + articles = articles + ) + ) + + brio::write_lines(llms_lines, file.path(pkg[["dst_path"]], "llms.txt")) } add_website_url <- function(node, pkg) { diff --git a/R/build.R b/R/build.R index 35cd46b5e..8211aae2a 100644 --- a/R/build.R +++ b/R/build.R @@ -467,7 +467,9 @@ build_site_local <- function( build_tutorials(pkg, override = override, preview = FALSE) build_news(pkg, override = override, preview = FALSE) build_sitemap(pkg) - build_mds(pkg) + if (pkg$bs_version > 3) { + build_mds(pkg) + } build_redirects(pkg, override = override) if (pkg$bs_version == 3) { build_docsearch_json(pkg) diff --git a/inst/BS5/templates/content-llms.txt b/inst/BS5/templates/content-llms.txt new file mode 100644 index 000000000..0f1a288d0 --- /dev/null +++ b/inst/BS5/templates/content-llms.txt @@ -0,0 +1,5 @@ +{{{readme}}} + +{{{reference}}} + +{{{articles}}} \ No newline at end of file From 3b2018ff9dd19c99558289e07af7d825e7ac3eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 09:17:08 +0200 Subject: [PATCH 03/47] dl --- R/build-mds.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/build-mds.R b/R/build-mds.R index 279c782be..4cbcd6c8b 100644 --- a/R/build-mds.R +++ b/R/build-mds.R @@ -65,7 +65,7 @@ build_md <- function(path, pkg) { pandoc::pandoc_convert( text = main_html, from = "html", - to = "markdown_strict", + to = "markdown_strict+definition_lists", output = sprintf("%s.md", path) ) From 07679cd79d0756f179c8aae5eefb3fb36db05562 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 09:18:30 +0200 Subject: [PATCH 04/47] change type --- R/build-mds.R | 2 +- inst/BS5/templates/{content-llms.txt => summary-llms.txt} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename inst/BS5/templates/{content-llms.txt => summary-llms.txt} (100%) diff --git a/R/build-mds.R b/R/build-mds.R index 4cbcd6c8b..8e17d66ff 100644 --- a/R/build-mds.R +++ b/R/build-mds.R @@ -89,7 +89,7 @@ build_md <- function(path, pkg) { } llms_lines <- whisker::whisker.render( - read_file(find_template("content", "llms", ext = ".txt", pkg = pkg)), + read_file(find_template("summary", "llms", ext = ".txt", pkg = pkg)), data = list( readme = readme, reference = reference, diff --git a/inst/BS5/templates/content-llms.txt b/inst/BS5/templates/summary-llms.txt similarity index 100% rename from inst/BS5/templates/content-llms.txt rename to inst/BS5/templates/summary-llms.txt From b4aee5909c0fc689e640c4ff4b9842c1eb596f22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 09:19:44 +0200 Subject: [PATCH 05/47] lint --- R/build-mds.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/build-mds.R b/R/build-mds.R index 8e17d66ff..9ca9a679a 100644 --- a/R/build-mds.R +++ b/R/build-mds.R @@ -28,7 +28,7 @@ refuses_mds <- function(pkg) { build_md <- function(path, pkg) { rlang::check_installed("pandoc") - path <- file.path(pkg[["dst_path"]], path) + path <- path(pkg[["dst_path"]], path) main_html <- xml2::read_html(path) |> xml2::xml_find_first(".//main") @@ -69,19 +69,19 @@ build_md <- function(path, pkg) { output = sprintf("%s.md", path) ) - readme <- brio::read_lines(file.path(pkg[["dst_path"]], "index.html.md")) |> + readme <- brio::read_lines(path(pkg[["dst_path"]], "index.html.md")) |> paste(collapse = "\n") - reference_path <- file.path(pkg[["dst_path"]], "reference", "index.html.md") - if (file.exists(reference_path)) { + reference_path <- path(pkg[["dst_path"]], "reference", "index.html.md") + if (file_exists(reference_path)) { reference <- brio::read_lines(reference_path) |> paste(collapse = "\n") } else { reference <- "" } - article_path <- file.path(pkg[["dst_path"]], "articles", "index.html.md") - if (file.exists(article_path)) { + article_path <- path(pkg[["dst_path"]], "articles", "index.html.md") + if (file_exists(article_path)) { articles <- brio::read_lines(article_path) |> paste(collapse = "\n") } else { @@ -97,7 +97,7 @@ build_md <- function(path, pkg) { ) ) - brio::write_lines(llms_lines, file.path(pkg[["dst_path"]], "llms.txt")) + brio::write_lines(llms_lines, path(pkg[["dst_path"]], "llms.txt")) } add_website_url <- function(node, pkg) { From 4808f7b95ed41b743c8e9d085ca7bef3bd11eb67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 09:34:04 +0200 Subject: [PATCH 06/47] fix links --- R/build-mds.R | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/R/build-mds.R b/R/build-mds.R index 9ca9a679a..523411504 100644 --- a/R/build-mds.R +++ b/R/build-mds.R @@ -54,7 +54,13 @@ build_md <- function(path, pkg) { main_html, ".//a[not(@class='external-link')]" ) - purrr::walk(internal_links, add_website_url, pkg = pkg) + + purrr::walk( + internal_links, + add_website_url, + pkg = pkg, + root = fs::path_rel(fs::path_dir(path), start = pkg$dst_path) + ) xml2::xml_set_attr( xml2::xml_find_all(main_html, ".//a[@class='external-link']"), @@ -100,13 +106,16 @@ build_md <- function(path, pkg) { brio::write_lines(llms_lines, path(pkg[["dst_path"]], "llms.txt")) } -add_website_url <- function(node, pkg) { - url <- paste0(config_pluck_string(pkg, "url"), "/") - +add_website_url <- function(node, pkg, root) { + url <- sprintf("%s/", config_pluck_string(pkg, "url")) if (pkg$development$in_dev && pkg$bs_version > 3) { url <- paste0(url, pkg$prefix) } + if (root != ".") { + url <- sprintf("%s%s/", url, root) + } + xml2::xml_set_attr( node, attr = "href", From 31246644bb6e1120324fe49da4f184892deb0d74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 09:37:26 +0200 Subject: [PATCH 07/47] no brio --- R/build-mds.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/build-mds.R b/R/build-mds.R index 523411504..2f1324992 100644 --- a/R/build-mds.R +++ b/R/build-mds.R @@ -71,16 +71,16 @@ build_md <- function(path, pkg) { pandoc::pandoc_convert( text = main_html, from = "html", - to = "markdown_strict+definition_lists", + to = "markdown_strict+definition_lists+footnotes", output = sprintf("%s.md", path) ) - readme <- brio::read_lines(path(pkg[["dst_path"]], "index.html.md")) |> + readme <- read_lines(path(pkg[["dst_path"]], "index.html.md")) |> paste(collapse = "\n") reference_path <- path(pkg[["dst_path"]], "reference", "index.html.md") if (file_exists(reference_path)) { - reference <- brio::read_lines(reference_path) |> + reference <- read_lines(reference_path) |> paste(collapse = "\n") } else { reference <- "" @@ -88,7 +88,7 @@ build_md <- function(path, pkg) { article_path <- path(pkg[["dst_path"]], "articles", "index.html.md") if (file_exists(article_path)) { - articles <- brio::read_lines(article_path) |> + articles <- read_lines(article_path) |> paste(collapse = "\n") } else { articles <- "" @@ -103,7 +103,7 @@ build_md <- function(path, pkg) { ) ) - brio::write_lines(llms_lines, path(pkg[["dst_path"]], "llms.txt")) + write_lines(llms_lines, path(pkg[["dst_path"]], "llms.txt")) } add_website_url <- function(node, pkg, root) { From 9ae414616bbe2c27a201851144cf6af2bb668ae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 09:39:57 +0200 Subject: [PATCH 08/47] fix as not always an index (test failing) --- R/build-mds.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/build-mds.R b/R/build-mds.R index 2f1324992..c82c295bd 100644 --- a/R/build-mds.R +++ b/R/build-mds.R @@ -75,8 +75,13 @@ build_md <- function(path, pkg) { output = sprintf("%s.md", path) ) - readme <- read_lines(path(pkg[["dst_path"]], "index.html.md")) |> - paste(collapse = "\n") + readme_path <- path(pkg[["dst_path"]], "index.html.md") + if (file_exists(readme_path)) { + readme <- read_lines(readme_path) |> + paste(collapse = "\n") + } else { + readme <- "" + } reference_path <- path(pkg[["dst_path"]], "reference", "index.html.md") if (file_exists(reference_path)) { From 63bdedda24121fb8f7d6b3394a84cfb62b7cb3b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 09:55:45 +0200 Subject: [PATCH 09/47] fix test? --- R/build-mds.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/build-mds.R b/R/build-mds.R index c82c295bd..27eae8735 100644 --- a/R/build-mds.R +++ b/R/build-mds.R @@ -33,6 +33,10 @@ build_md <- function(path, pkg) { main_html <- xml2::read_html(path) |> xml2::xml_find_first(".//main") + if (length(main_html) == 0) { + return() + } + # uninformative image (logo) + source link title <- xml2::xml_find_first(main_html, ".//h1") xml2::xml_remove( From 214208bdfb5343b922eaafc94716c87febfc7a18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 09:56:45 +0200 Subject: [PATCH 10/47] tweak --- .github/workflows/pkgdown.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 04d9d6498..c43ec708e 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -39,7 +39,7 @@ jobs: - uses: r-lib/actions/setup-tinytex@v2 - name: Build site - run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE, override = list(url = "")) shell: Rscript {0} - name: Deploy to GitHub pages 🚀 From 0029edd7b5dd20664cc324bb3d1537296259e505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 26 Sep 2025 10:02:42 +0200 Subject: [PATCH 11/47] fix test? --- tests/testthat/test-build.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-build.R b/tests/testthat/test-build.R index b3156b403..c23b89ced 100644 --- a/tests/testthat/test-build.R +++ b/tests/testthat/test-build.R @@ -14,7 +14,8 @@ test_that("build_site can be made unquiet", { build_site( pkg, quiet = FALSE, - preview = FALSE + preview = FALSE, + override = list(template = list(`build-md` = FALSE)) ) ) ) @@ -24,7 +25,8 @@ test_that("build_site can be made unquiet", { capture.output( build_site( pkg, - preview = FALSE + preview = FALSE, + override = list(template = list(`build-md` = FALSE)) ) ) ) From 74472e13e4d52803908201dfb178822650e0d67d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Tue, 7 Oct 2025 12:17:28 +0200 Subject: [PATCH 12/47] fences --- R/build-mds.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/build-mds.R b/R/build-mds.R index 27eae8735..705274982 100644 --- a/R/build-mds.R +++ b/R/build-mds.R @@ -75,7 +75,7 @@ build_md <- function(path, pkg) { pandoc::pandoc_convert( text = main_html, from = "html", - to = "markdown_strict+definition_lists+footnotes", + to = "markdown_strict+definition_lists+footnotes+backtick_code_blocks", output = sprintf("%s.md", path) ) From 04e95d6d19507f11d0b24366baf2f382834d4551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Tue, 7 Oct 2025 12:42:49 +0200 Subject: [PATCH 13/47] move code --- R/build-mds.R | 72 +++++++++++++++++++++++++-------------------------- R/build.R | 2 +- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/R/build-mds.R b/R/build-mds.R index 705274982..4b365662c 100644 --- a/R/build-mds.R +++ b/R/build-mds.R @@ -1,4 +1,4 @@ -build_mds <- function(pkg = ".") { +build_llm_docs <- function(pkg = ".") { pkg <- as_pkgdown(pkg) if (refuses_mds(pkg)) { @@ -19,6 +19,41 @@ build_mds <- function(pkg = ".") { purrr::walk(paths, build_md, pkg = pkg) + readme_path <- path(pkg[["dst_path"]], "index.html.md") + if (file_exists(readme_path)) { + readme <- read_lines(readme_path) |> + paste(collapse = "\n") + } else { + readme <- "" + } + + reference_path <- path(pkg[["dst_path"]], "reference", "index.html.md") + if (file_exists(reference_path)) { + reference <- read_lines(reference_path) |> + paste(collapse = "\n") + } else { + reference <- "" + } + + article_path <- path(pkg[["dst_path"]], "articles", "index.html.md") + if (file_exists(article_path)) { + articles <- read_lines(article_path) |> + paste(collapse = "\n") + } else { + articles <- "" + } + + llms_lines <- whisker::whisker.render( + read_file(find_template("summary", "llms", ext = ".txt", pkg = pkg)), + data = list( + readme = readme, + reference = reference, + articles = articles + ) + ) + + write_lines(llms_lines, path(pkg[["dst_path"]], "llms.txt")) + invisible() } @@ -78,41 +113,6 @@ build_md <- function(path, pkg) { to = "markdown_strict+definition_lists+footnotes+backtick_code_blocks", output = sprintf("%s.md", path) ) - - readme_path <- path(pkg[["dst_path"]], "index.html.md") - if (file_exists(readme_path)) { - readme <- read_lines(readme_path) |> - paste(collapse = "\n") - } else { - readme <- "" - } - - reference_path <- path(pkg[["dst_path"]], "reference", "index.html.md") - if (file_exists(reference_path)) { - reference <- read_lines(reference_path) |> - paste(collapse = "\n") - } else { - reference <- "" - } - - article_path <- path(pkg[["dst_path"]], "articles", "index.html.md") - if (file_exists(article_path)) { - articles <- read_lines(article_path) |> - paste(collapse = "\n") - } else { - articles <- "" - } - - llms_lines <- whisker::whisker.render( - read_file(find_template("summary", "llms", ext = ".txt", pkg = pkg)), - data = list( - readme = readme, - reference = reference, - articles = articles - ) - ) - - write_lines(llms_lines, path(pkg[["dst_path"]], "llms.txt")) } add_website_url <- function(node, pkg, root) { diff --git a/R/build.R b/R/build.R index 8211aae2a..678c64590 100644 --- a/R/build.R +++ b/R/build.R @@ -468,7 +468,7 @@ build_site_local <- function( build_news(pkg, override = override, preview = FALSE) build_sitemap(pkg) if (pkg$bs_version > 3) { - build_mds(pkg) + build_llm_docs(pkg) } build_redirects(pkg, override = override) if (pkg$bs_version == 3) { From 675a955beb2adc964e2c7b2f15f5137b3c61cf47 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 7 Oct 2025 07:22:34 -0500 Subject: [PATCH 14/47] Update file based on new function name --- R/{build-mds.R => build-llm.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{build-mds.R => build-llm.R} (100%) diff --git a/R/build-mds.R b/R/build-llm.R similarity index 100% rename from R/build-mds.R rename to R/build-llm.R From 46642ecc89c2bd97dac4634ccd6f8a6287a8af71 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 7 Oct 2025 07:29:14 -0500 Subject: [PATCH 15/47] Simplify llms.txt generation --- R/build-llm.R | 59 +++++++---------------------- inst/BS5/templates/summary-llms.txt | 5 --- 2 files changed, 14 insertions(+), 50 deletions(-) delete mode 100644 inst/BS5/templates/summary-llms.txt diff --git a/R/build-llm.R b/R/build-llm.R index 4b365662c..27c3a8cfa 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -1,6 +1,5 @@ build_llm_docs <- function(pkg = ".") { pkg <- as_pkgdown(pkg) - if (refuses_mds(pkg)) { return() } @@ -10,49 +9,20 @@ build_llm_docs <- function(pkg = ".") { return() } - cli::cli_rule("Building Markdowns") + cli::cli_rule("Building llm documentation") if (pkg$development$in_dev && pkg$bs_version > 3) { url <- paste0(url, pkg$prefix) } paths <- get_site_paths(pkg) - purrr::walk(paths, build_md, pkg = pkg) - readme_path <- path(pkg[["dst_path"]], "index.html.md") - if (file_exists(readme_path)) { - readme <- read_lines(readme_path) |> - paste(collapse = "\n") - } else { - readme <- "" - } - - reference_path <- path(pkg[["dst_path"]], "reference", "index.html.md") - if (file_exists(reference_path)) { - reference <- read_lines(reference_path) |> - paste(collapse = "\n") - } else { - reference <- "" - } - - article_path <- path(pkg[["dst_path"]], "articles", "index.html.md") - if (file_exists(article_path)) { - articles <- read_lines(article_path) |> - paste(collapse = "\n") - } else { - articles <- "" - } - - llms_lines <- whisker::whisker.render( - read_file(find_template("summary", "llms", ext = ".txt", pkg = pkg)), - data = list( - readme = readme, - reference = reference, - articles = articles - ) + index <- c( + read_file_if_exists(path(pkg$dst_path, "index.html.md")), + read_file_if_exists(path(pkg$dst_path, "reference", "index.html.md")), + read_file_if_exists(path(pkg$dst_path, "articles", "index.html.md")) ) - - write_lines(llms_lines, path(pkg[["dst_path"]], "llms.txt")) + writeLines(index, path(pkg$dst_path, "llms.txt")) invisible() } @@ -61,6 +31,12 @@ refuses_mds <- function(pkg) { !config_pluck_bool(pkg, "template.build-md", default = TRUE) } +read_file_if_exists <- function(path) { + if (file_exists(path)) { + read_lines(path) + } +} + build_md <- function(path, pkg) { rlang::check_installed("pandoc") path <- path(pkg[["dst_path"]], path) @@ -77,17 +53,10 @@ build_md <- function(path, pkg) { xml2::xml_remove( xml2::xml_find_first(main_html, ".//div[@class='page-header']") ) - - xml2::xml_add_child( - main_html, - title, - .where = 0 - ) + xml2::xml_add_child(main_html, title, .where = 0) # clean links - xml2::xml_remove( - xml2::xml_find_all(main_html, ".//a[@class='anchor']") - ) + xml2::xml_remove(xml2::xml_find_all(main_html, ".//a[@class='anchor']")) internal_links <- xml2::xml_find_all( main_html, diff --git a/inst/BS5/templates/summary-llms.txt b/inst/BS5/templates/summary-llms.txt deleted file mode 100644 index 0f1a288d0..000000000 --- a/inst/BS5/templates/summary-llms.txt +++ /dev/null @@ -1,5 +0,0 @@ -{{{readme}}} - -{{{reference}}} - -{{{articles}}} \ No newline at end of file From c3d752fa5a2f77e1cf35b15498d0d7fc32bf85cf Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 7 Oct 2025 07:30:13 -0500 Subject: [PATCH 16/47] Be less conditional --- R/build-llm.R | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index 27c3a8cfa..32c60c212 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -1,13 +1,5 @@ build_llm_docs <- function(pkg = ".") { pkg <- as_pkgdown(pkg) - if (refuses_mds(pkg)) { - return() - } - - url <- paste0(config_pluck_string(pkg, "url"), "/") - if (is.null(url)) { - return() - } cli::cli_rule("Building llm documentation") if (pkg$development$in_dev && pkg$bs_version > 3) { @@ -27,10 +19,6 @@ build_llm_docs <- function(pkg = ".") { invisible() } -refuses_mds <- function(pkg) { - !config_pluck_bool(pkg, "template.build-md", default = TRUE) -} - read_file_if_exists <- function(path) { if (file_exists(path)) { read_lines(path) From b883843530ce16a2e2e1df3f863c5ee104445a04 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 7 Oct 2025 07:35:10 -0500 Subject: [PATCH 17/47] style tweaks --- R/build-llm.R | 44 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index 32c60c212..dce31a652 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -1,13 +1,10 @@ build_llm_docs <- function(pkg = ".") { + rlang::check_installed("pandoc") pkg <- as_pkgdown(pkg) cli::cli_rule("Building llm documentation") - if (pkg$development$in_dev && pkg$bs_version > 3) { - url <- paste0(url, pkg$prefix) - } - paths <- get_site_paths(pkg) - purrr::walk(paths, build_md, pkg = pkg) + purrr::walk(paths, convert_md, pkg = pkg) index <- c( read_file_if_exists(path(pkg$dst_path, "index.html.md")), @@ -19,45 +16,33 @@ build_llm_docs <- function(pkg = ".") { invisible() } -read_file_if_exists <- function(path) { - if (file_exists(path)) { - read_lines(path) - } -} - -build_md <- function(path, pkg) { - rlang::check_installed("pandoc") +convert_md <- function(path, pkg) { path <- path(pkg[["dst_path"]], path) - main_html <- xml2::read_html(path) |> - xml2::xml_find_first(".//main") - + html <- xml2::read_html(path) + main_html <- xml2::xml_find_first(html, ".//main") if (length(main_html) == 0) { return() } - # uninformative image (logo) + source link + # simplify page header (which includes logo + source link) title <- xml2::xml_find_first(main_html, ".//h1") xml2::xml_remove( xml2::xml_find_first(main_html, ".//div[@class='page-header']") ) xml2::xml_add_child(main_html, title, .where = 0) - # clean links + # drop anchors xml2::xml_remove(xml2::xml_find_all(main_html, ".//a[@class='anchor']")) - internal_links <- xml2::xml_find_all( - main_html, - ".//a[not(@class='external-link')]" - ) - + # replace all internal links with absolute link to .md + internal <- xml2::xml_find_all(main_html, ".//a[not(@class='external-link')]") purrr::walk( - internal_links, + internal, add_website_url, pkg = pkg, root = fs::path_rel(fs::path_dir(path), start = pkg$dst_path) ) - xml2::xml_set_attr( xml2::xml_find_all(main_html, ".//a[@class='external-link']"), attr = "class", @@ -72,6 +57,8 @@ build_md <- function(path, pkg) { ) } +# Helpers --------------------------------------------------------------------- + add_website_url <- function(node, pkg, root) { url <- sprintf("%s/", config_pluck_string(pkg, "url")) if (pkg$development$in_dev && pkg$bs_version > 3) { @@ -88,3 +75,10 @@ add_website_url <- function(node, pkg, root) { value = sprintf("%s%s.md", url, xml2::xml_attr(node, "href")) ) } + + +read_file_if_exists <- function(path) { + if (file_exists(path)) { + read_lines(path) + } +} From 7fa82a042a670225963aaf019b5f0ca90f506793 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 7 Oct 2025 07:46:35 -0500 Subject: [PATCH 18/47] Vectorise absolute urls --- R/build-llm.R | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index dce31a652..6ce33fdcf 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -32,22 +32,20 @@ convert_md <- function(path, pkg) { ) xml2::xml_add_child(main_html, title, .where = 0) - # drop anchors + # drop internal anchors xml2::xml_remove(xml2::xml_find_all(main_html, ".//a[@class='anchor']")) - # replace all internal links with absolute link to .md - internal <- xml2::xml_find_all(main_html, ".//a[not(@class='external-link')]") - purrr::walk( - internal, - add_website_url, - pkg = pkg, - root = fs::path_rel(fs::path_dir(path), start = pkg$dst_path) - ) - xml2::xml_set_attr( - xml2::xml_find_all(main_html, ".//a[@class='external-link']"), - attr = "class", - value = NULL - ) + # replace all links with absolute link to .md + if (!is.null(pkg$meta$url)) { + url <- paste0(pkg$meta$url, "/") + if (pkg$development$in_dev && pkg$bs_version > 3) { + url <- paste0(url, pkg$prefix) + } + a <- xml2::xml_find_all(main_html, ".//a") + href_absolute <- xml2::url_absolute(xml2::xml_attr(a, "href"), url) + xml2::xml_attr(a, "href") <- href_absolute + xml2::xml_attr(a, "class") <- NULL + } pandoc::pandoc_convert( text = main_html, @@ -59,24 +57,6 @@ convert_md <- function(path, pkg) { # Helpers --------------------------------------------------------------------- -add_website_url <- function(node, pkg, root) { - url <- sprintf("%s/", config_pluck_string(pkg, "url")) - if (pkg$development$in_dev && pkg$bs_version > 3) { - url <- paste0(url, pkg$prefix) - } - - if (root != ".") { - url <- sprintf("%s%s/", url, root) - } - - xml2::xml_set_attr( - node, - attr = "href", - value = sprintf("%s%s.md", url, xml2::xml_attr(node, "href")) - ) -} - - read_file_if_exists <- function(path) { if (file_exists(path)) { read_lines(path) From b2f7af35171a5d10142f9e8227a6c0c5f4c3f6cd Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 7 Oct 2025 07:46:44 -0500 Subject: [PATCH 19/47] Tweak title --- R/build-llm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/build-llm.R b/R/build-llm.R index 6ce33fdcf..d72e10391 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -2,7 +2,7 @@ build_llm_docs <- function(pkg = ".") { rlang::check_installed("pandoc") pkg <- as_pkgdown(pkg) - cli::cli_rule("Building llm documentation") + cli::cli_rule("Building docs for llms") paths <- get_site_paths(pkg) purrr::walk(paths, convert_md, pkg = pkg) From 05b14bd25b53a897da80d0063d6588957dd2a50a Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 7 Oct 2025 07:56:38 -0500 Subject: [PATCH 20/47] Use .md extension --- R/build-llm.R | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index d72e10391..f233d0497 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -7,9 +7,9 @@ build_llm_docs <- function(pkg = ".") { purrr::walk(paths, convert_md, pkg = pkg) index <- c( - read_file_if_exists(path(pkg$dst_path, "index.html.md")), - read_file_if_exists(path(pkg$dst_path, "reference", "index.html.md")), - read_file_if_exists(path(pkg$dst_path, "articles", "index.html.md")) + read_file_if_exists(path(pkg$dst_path, "index.md")), + read_file_if_exists(path(pkg$dst_path, "reference", "index.md")), + read_file_if_exists(path(pkg$dst_path, "articles", "index.md")) ) writeLines(index, path(pkg$dst_path, "llms.txt")) @@ -36,22 +36,24 @@ convert_md <- function(path, pkg) { xml2::xml_remove(xml2::xml_find_all(main_html, ".//a[@class='anchor']")) # replace all links with absolute link to .md + a <- xml2::xml_find_all(main_html, ".//a") if (!is.null(pkg$meta$url)) { url <- paste0(pkg$meta$url, "/") if (pkg$development$in_dev && pkg$bs_version > 3) { url <- paste0(url, pkg$prefix) } - a <- xml2::xml_find_all(main_html, ".//a") - href_absolute <- xml2::url_absolute(xml2::xml_attr(a, "href"), url) - xml2::xml_attr(a, "href") <- href_absolute - xml2::xml_attr(a, "class") <- NULL + a_external <- a[!grepl("external-link", xml2::xml_attr(a, "class"))] + + href_absolute <- xml2::url_absolute(xml2::xml_attr(a_external, "href"), url) + href_absolute <- path_ext_set(href_absolute, "md") } + xml2::xml_attr(a, "class") <- NULL pandoc::pandoc_convert( text = main_html, from = "html", to = "markdown_strict+definition_lists+footnotes+backtick_code_blocks", - output = sprintf("%s.md", path) + output = path_ext_set(path, "md") ) } From 917994f8003025a9b5e4d108919b051451c66712 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 10 Oct 2025 15:01:21 +0200 Subject: [PATCH 21/47] undo test changes, no config at the moment --- tests/testthat/test-build.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-build.R b/tests/testthat/test-build.R index c23b89ced..b3156b403 100644 --- a/tests/testthat/test-build.R +++ b/tests/testthat/test-build.R @@ -14,8 +14,7 @@ test_that("build_site can be made unquiet", { build_site( pkg, quiet = FALSE, - preview = FALSE, - override = list(template = list(`build-md` = FALSE)) + preview = FALSE ) ) ) @@ -25,8 +24,7 @@ test_that("build_site can be made unquiet", { capture.output( build_site( pkg, - preview = FALSE, - override = list(template = list(`build-md` = FALSE)) + preview = FALSE ) ) ) From 8639edd29c7608b4e10688f8162b6d9b1ede65cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 10 Oct 2025 15:05:09 +0200 Subject: [PATCH 22/47] this one always exists --- R/build-llm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/build-llm.R b/R/build-llm.R index f233d0497..d560adf6c 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -7,7 +7,7 @@ build_llm_docs <- function(pkg = ".") { purrr::walk(paths, convert_md, pkg = pkg) index <- c( - read_file_if_exists(path(pkg$dst_path, "index.md")), + read_lines(path(pkg$dst_path, "index.md")), read_file_if_exists(path(pkg$dst_path, "reference", "index.md")), read_file_if_exists(path(pkg$dst_path, "articles", "index.md")) ) From c7d372132bdf3b320353940d2768dcc9e05128ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 10 Oct 2025 15:07:50 +0200 Subject: [PATCH 23/47] extract fn --- R/build-llm.R | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index d560adf6c..26146a0c7 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -36,18 +36,7 @@ convert_md <- function(path, pkg) { xml2::xml_remove(xml2::xml_find_all(main_html, ".//a[@class='anchor']")) # replace all links with absolute link to .md - a <- xml2::xml_find_all(main_html, ".//a") - if (!is.null(pkg$meta$url)) { - url <- paste0(pkg$meta$url, "/") - if (pkg$development$in_dev && pkg$bs_version > 3) { - url <- paste0(url, pkg$prefix) - } - a_external <- a[!grepl("external-link", xml2::xml_attr(a, "class"))] - - href_absolute <- xml2::url_absolute(xml2::xml_attr(a_external, "href"), url) - href_absolute <- path_ext_set(href_absolute, "md") - } - xml2::xml_attr(a, "class") <- NULL + create_absolute_links(main_html, pkg) pandoc::pandoc_convert( text = main_html, @@ -64,3 +53,18 @@ read_file_if_exists <- function(path) { read_lines(path) } } + +create_absolute_links <- function(main_html, pkg) { + a <- xml2::xml_find_all(main_html, ".//a") + if (!is.null(pkg$meta$url)) { + url <- paste0(pkg$meta$url, "/") + if (pkg$development$in_dev && pkg$bs_version > 3) { + url <- paste0(url, pkg$prefix) + } + a_external <- a[!grepl("external-link", xml2::xml_attr(a, "class"))] + + href_absolute <- xml2::url_absolute(xml2::xml_attr(a_external, "href"), url) + href_absolute <- path_ext_set(href_absolute, "md") + } + xml2::xml_attr(a, "class") <- NULL +} From 83a4b8e2cfb383ffab762505185b19eadb0b73f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 10 Oct 2025 15:36:05 +0200 Subject: [PATCH 24/47] add small test --- R/build-llm.R | 12 ++++++++---- tests/testthat/_snaps/build-llm/index.md | 1 + tests/testthat/_snaps/build-llm/llms.txt | 2 ++ tests/testthat/test-build-llm.R | 17 +++++++++++++++++ 4 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/_snaps/build-llm/index.md create mode 100644 tests/testthat/_snaps/build-llm/llms.txt create mode 100644 tests/testthat/test-build-llm.R diff --git a/R/build-llm.R b/R/build-llm.R index 26146a0c7..3ff83302a 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -4,6 +4,7 @@ build_llm_docs <- function(pkg = ".") { cli::cli_rule("Building docs for llms") paths <- get_site_paths(pkg) + purrr::walk(paths, convert_md, pkg = pkg) index <- c( @@ -27,10 +28,13 @@ convert_md <- function(path, pkg) { # simplify page header (which includes logo + source link) title <- xml2::xml_find_first(main_html, ".//h1") - xml2::xml_remove( - xml2::xml_find_first(main_html, ".//div[@class='page-header']") - ) - xml2::xml_add_child(main_html, title, .where = 0) + # website for a package without README/index.md + if (length(title) > 0) { + xml2::xml_remove( + xml2::xml_find_first(main_html, ".//div[@class='page-header']") + ) + xml2::xml_add_child(main_html, title, .where = 0) + } # drop internal anchors xml2::xml_remove(xml2::xml_find_all(main_html, ".//a[@class='anchor']")) diff --git a/tests/testthat/_snaps/build-llm/index.md b/tests/testthat/_snaps/build-llm/index.md new file mode 100644 index 000000000..835679966 --- /dev/null +++ b/tests/testthat/_snaps/build-llm/index.md @@ -0,0 +1 @@ +My package does great things! diff --git a/tests/testthat/_snaps/build-llm/llms.txt b/tests/testthat/_snaps/build-llm/llms.txt new file mode 100644 index 000000000..c4d91e8c9 --- /dev/null +++ b/tests/testthat/_snaps/build-llm/llms.txt @@ -0,0 +1,2 @@ +My package does great things! +# diff --git a/tests/testthat/test-build-llm.R b/tests/testthat/test-build-llm.R new file mode 100644 index 000000000..5873fd342 --- /dev/null +++ b/tests/testthat/test-build-llm.R @@ -0,0 +1,17 @@ +test_that("build_llm_docs() works", { + skip_if_no_pandoc() + pkg <- local_pkgdown_site( + desc = list( + Package = "pkgdown", + Description = "My package does great things!" + ) + ) + + build_site(pkg, install = FALSE, new_process = FALSE) + + llms_txt <- path(pkg$dst_path, "llms.txt") + expect_snapshot_file(llms_txt) + + index_md <- path(pkg$dst_path, "index.md") + expect_snapshot_file(index_md) +}) From c1d127129bbd387d69d8d8106151f5969c3050be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 10 Oct 2025 15:59:42 +0200 Subject: [PATCH 25/47] lifecycle, thanks Claude --- R/build-llm.R | 2 ++ inst/pandoc/badge.lua | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 inst/pandoc/badge.lua diff --git a/R/build-llm.R b/R/build-llm.R index 3ff83302a..1937603de 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -42,10 +42,12 @@ convert_md <- function(path, pkg) { # replace all links with absolute link to .md create_absolute_links(main_html, pkg) + lua_filter <- system.file("pandoc", "badge.lua", package = "pkgdown") pandoc::pandoc_convert( text = main_html, from = "html", to = "markdown_strict+definition_lists+footnotes+backtick_code_blocks", + args = c(sprintf("--lua-filter=%s", lua_filter)), output = path_ext_set(path, "md") ) } diff --git a/inst/pandoc/badge.lua b/inst/pandoc/badge.lua new file mode 100644 index 000000000..432daab37 --- /dev/null +++ b/inst/pandoc/badge.lua @@ -0,0 +1,19 @@ +function Link(el) + -- Check if this is a lifecycle.r-lib.org link + if string.match(el.target, "lifecycle%.r%-lib%.org") then + -- Check if it contains exactly one image + if #el.content == 1 and el.content[1].t == "Image" then + local img = el.content[1] + local img_src = img.src + + -- Extract lifecycle stage from the image filename + local stage = string.match(img_src, "lifecycle%-(%w+)%.svg") + if stage then + return pandoc.Strong({pandoc.Str(stage)}) + end + end + end + + -- Return unchanged + return el +end \ No newline at end of file From 6eb626a19bbd50f1dd65826bb227b65446080169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Mon, 13 Oct 2025 12:31:46 +0200 Subject: [PATCH 26/47] footnote --- R/build-llm.R | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/R/build-llm.R b/R/build-llm.R index 1937603de..c777bcab5 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -36,6 +36,9 @@ convert_md <- function(path, pkg) { xml2::xml_add_child(main_html, title, .where = 0) } + # fix footnotes + convert_popovers_to_footnotes(main_html) + # drop internal anchors xml2::xml_remove(xml2::xml_find_all(main_html, ".//a[@class='anchor']")) @@ -74,3 +77,78 @@ create_absolute_links <- function(main_html, pkg) { } xml2::xml_attr(a, "class") <- NULL } + +convert_popovers_to_footnotes <- function(main_html) { + # Find all popover footnote references + popover_refs <- xml2::xml_find_all( + main_html, + ".//a[@class='footnote-ref'][@data-bs-content]" + ) + + if (length(popover_refs) == 0) { + return(main_html) + } + + # Create footnotes section if it doesn't exist + footnotes_section <- xml2::xml_find_first( + main_html, + ".//section[@class='footnotes']" + ) + if (length(footnotes_section) == 0) { + # Add footnotes section at the end of main + footnotes_section <- xml2::xml_add_child( + main_html, + "section", + id = "footnotes", + class = "footnotes footnotes-end-of-document", + role = "doc-endnotes" + ) + xml2::xml_add_child(footnotes_section, "hr") + footnotes_ol <- xml2::xml_add_child(footnotes_section, "ol") + } else { + footnotes_ol <- xml2::xml_find_first(footnotes_section, ".//ol") + } + + # Process each popover reference using purrr + purrr::iwalk(popover_refs, function(ref, i) { + # Extract footnote content from data-bs-content + content <- xml2::xml_attr(ref, "data-bs-content") + + # Decode HTML entities in the content + content <- xml2::xml_text(xml2::read_html(paste0( + "
", + content, + "
" + ))) + + # Create footnote ID + fn_id <- paste0("fn", i) + fnref_id <- paste0("fnref", i) + + # Update the reference link + xml2::xml_attr(ref, "href") <- paste0("#", fn_id) + xml2::xml_attr(ref, "class") <- "footnote-ref" + xml2::xml_attr(ref, "id") <- fnref_id + xml2::xml_attr(ref, "role") <- "doc-noteref" + xml2::xml_set_attr(ref, "tabindex", NULL) + xml2::xml_set_attr(ref, "data-bs-toggle", NULL) + xml2::xml_set_attr(ref, "data-bs-content", NULL) + + # Create footnote list item + fn_li <- xml2::xml_add_child(footnotes_ol, "li", id = fn_id) + fn_p <- xml2::xml_add_child(fn_li, "p") + xml2::xml_text(fn_p) <- content + + # Add back reference + back_ref <- xml2::xml_add_child( + fn_p, + "a", + "↩︎", + href = paste0("#", fnref_id), + class = "footnote-back", + role = "doc-backlink" + ) + }) + + return(main_html) +} From 89072f01fc2445457a728ea3dd83de1d935fdb7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Thu, 16 Oct 2025 12:53:52 +0200 Subject: [PATCH 27/47] footnotes best bet --- R/build-llm.R | 63 ++++++++++++++------------------------------------- 1 file changed, 17 insertions(+), 46 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index c777bcab5..bc6265be7 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -44,13 +44,11 @@ convert_md <- function(path, pkg) { # replace all links with absolute link to .md create_absolute_links(main_html, pkg) - - lua_filter <- system.file("pandoc", "badge.lua", package = "pkgdown") + xml2::write_html(main_html, path_ext_set(path, "hhtml")) pandoc::pandoc_convert( text = main_html, from = "html", - to = "markdown_strict+definition_lists+footnotes+backtick_code_blocks", - args = c(sprintf("--lua-filter=%s", lua_filter)), + to = "gfm+definition_lists-raw_html", output = path_ext_set(path, "md") ) } @@ -79,23 +77,20 @@ create_absolute_links <- function(main_html, pkg) { } convert_popovers_to_footnotes <- function(main_html) { - # Find all popover footnote references popover_refs <- xml2::xml_find_all( main_html, - ".//a[@class='footnote-ref'][@data-bs-content]" + ".//a[@class='footnote-ref']" ) - if (length(popover_refs) == 0) { - return(main_html) + return() } - # Create footnotes section if it doesn't exist + # Create footnotes section footnotes_section <- xml2::xml_find_first( main_html, ".//section[@class='footnotes']" ) if (length(footnotes_section) == 0) { - # Add footnotes section at the end of main footnotes_section <- xml2::xml_add_child( main_html, "section", @@ -109,46 +104,22 @@ convert_popovers_to_footnotes <- function(main_html) { footnotes_ol <- xml2::xml_find_first(footnotes_section, ".//ol") } - # Process each popover reference using purrr purrr::iwalk(popover_refs, function(ref, i) { - # Extract footnote content from data-bs-content - content <- xml2::xml_attr(ref, "data-bs-content") - - # Decode HTML entities in the content - content <- xml2::xml_text(xml2::read_html(paste0( - "
", - content, - "
" - ))) - - # Create footnote ID + text_content <- xml2::xml_attr(ref, "data-bs-content") fn_id <- paste0("fn", i) fnref_id <- paste0("fnref", i) + xml2::xml_attrs(ref) <- list( + href = paste0("#", fn_id), + id = fnref_id, + role = "doc-noteref", + class = "footnote-ref" + ) + print(as.character(ref)) - # Update the reference link - xml2::xml_attr(ref, "href") <- paste0("#", fn_id) - xml2::xml_attr(ref, "class") <- "footnote-ref" - xml2::xml_attr(ref, "id") <- fnref_id - xml2::xml_attr(ref, "role") <- "doc-noteref" - xml2::xml_set_attr(ref, "tabindex", NULL) - xml2::xml_set_attr(ref, "data-bs-toggle", NULL) - xml2::xml_set_attr(ref, "data-bs-content", NULL) - - # Create footnote list item fn_li <- xml2::xml_add_child(footnotes_ol, "li", id = fn_id) - fn_p <- xml2::xml_add_child(fn_li, "p") - xml2::xml_text(fn_p) <- content - - # Add back reference - back_ref <- xml2::xml_add_child( - fn_p, - "a", - "↩︎", - href = paste0("#", fnref_id), - class = "footnote-back", - role = "doc-backlink" - ) + parsed_content <- xml2::read_html(text_content) |> + xml2::xml_find_first(".//body") |> + xml2::xml_children() + purrr::walk(parsed_content, \(x) xml2::xml_add_child(fn_li, x)) }) - - return(main_html) } From 1785c692c3976de5362f297b252b3cd4603b3a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Thu, 16 Oct 2025 13:11:13 +0200 Subject: [PATCH 28/47] badges --- R/build-llm.R | 27 +++++++++++++++++++++++++-- inst/pandoc/badge.lua | 19 ------------------- 2 files changed, 25 insertions(+), 21 deletions(-) delete mode 100644 inst/pandoc/badge.lua diff --git a/R/build-llm.R b/R/build-llm.R index bc6265be7..49f5a34c4 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -39,12 +39,15 @@ convert_md <- function(path, pkg) { # fix footnotes convert_popovers_to_footnotes(main_html) + # fix badges + convert_lifecycle_badges(main_html) + # drop internal anchors xml2::xml_remove(xml2::xml_find_all(main_html, ".//a[@class='anchor']")) # replace all links with absolute link to .md create_absolute_links(main_html, pkg) - xml2::write_html(main_html, path_ext_set(path, "hhtml")) + pandoc::pandoc_convert( text = main_html, from = "html", @@ -114,7 +117,6 @@ convert_popovers_to_footnotes <- function(main_html) { role = "doc-noteref", class = "footnote-ref" ) - print(as.character(ref)) fn_li <- xml2::xml_add_child(footnotes_ol, "li", id = fn_id) parsed_content <- xml2::read_html(text_content) |> @@ -123,3 +125,24 @@ convert_popovers_to_footnotes <- function(main_html) { purrr::walk(parsed_content, \(x) xml2::xml_add_child(fn_li, x)) }) } + +convert_lifecycle_badges <- function(html) { + badges <- xml2::xml_find_all(html, ".//a[contains(@href, 'lifecycle.r')]") + + if (length(badges) == 0) { + return(invisible()) + } + + purrr::walk(badges, \(x) { + stage <- sub( + "https://lifecycle.r-lib.org/articles/stages.html#", + "", + xml2::xml_attr(x, "href") + ) + xml2::xml_replace( + x, + "strong", + stage + ) + }) +} diff --git a/inst/pandoc/badge.lua b/inst/pandoc/badge.lua deleted file mode 100644 index 432daab37..000000000 --- a/inst/pandoc/badge.lua +++ /dev/null @@ -1,19 +0,0 @@ -function Link(el) - -- Check if this is a lifecycle.r-lib.org link - if string.match(el.target, "lifecycle%.r%-lib%.org") then - -- Check if it contains exactly one image - if #el.content == 1 and el.content[1].t == "Image" then - local img = el.content[1] - local img_src = img.src - - -- Extract lifecycle stage from the image filename - local stage = string.match(img_src, "lifecycle%-(%w+)%.svg") - if stage then - return pandoc.Strong({pandoc.Str(stage)}) - end - end - end - - -- Return unchanged - return el -end \ No newline at end of file From cb0ee64f08abc8d187eab464972c86f4ac0f7336 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Thu, 16 Oct 2025 13:18:55 +0200 Subject: [PATCH 29/47] fix chunks --- R/build-llm.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/R/build-llm.R b/R/build-llm.R index 49f5a34c4..9e5cd0f64 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -39,6 +39,9 @@ convert_md <- function(path, pkg) { # fix footnotes convert_popovers_to_footnotes(main_html) + # fix code + convert_code_chunks(main_html) + # fix badges convert_lifecycle_badges(main_html) @@ -146,3 +149,19 @@ convert_lifecycle_badges <- function(html) { ) }) } + +convert_code_chunks <- function(html) { + code <- xml2::xml_find_all(html, ".//pre[contains(@class, 'sourceCode')]") + + purrr::walk( + code, + \(x) { + lang <- trimws(sub( + "sourceCode", + "", + sub("downlit", "", xml2::xml_attr(x, "class")) + )) + xml2::xml_attr(x, "class") <- lang + } + ) +} From 916d01be2edc9fa8880b1ea37621e5d01fbd88b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Thu, 16 Oct 2025 13:30:39 +0200 Subject: [PATCH 30/47] test --- tests/testthat/_snaps/build-llm/llm.md | 39 ++++++++++++++++++++++++++ tests/testthat/assets/llm.html | 37 ++++++++++++++++++++++++ tests/testthat/test-build-llm.R | 13 +++++++++ 3 files changed, 89 insertions(+) create mode 100644 tests/testthat/_snaps/build-llm/llm.md create mode 100644 tests/testthat/assets/llm.html diff --git a/tests/testthat/_snaps/build-llm/llm.md b/tests/testthat/_snaps/build-llm/llm.md new file mode 100644 index 000000000..cf9ec23bf --- /dev/null +++ b/tests/testthat/_snaps/build-llm/llm.md @@ -0,0 +1,39 @@ +# Automatically link references and articles in an HTML page + +**deprecated** + +`intro`: “Get Startedâ€?, which links to a vignette or article with +the same name as the package[¹](#fn1). + +``` yaml +template: + includes: + before_title: + after_navbar: +``` + +These inclusions will appear on all screen sizes, and will not be +collapsed into the the navbar drop down. + +You can also customise the colour scheme of the navbar by using the +`type` and `bg` parameters. See above for details. + +``` r +usethis::create_package("~/desktop/testpackage") +# ... edit files ... +pkgdown::build_site(tmp, new_process = FALSE, preview = FALSE) +``` + +Once you have built a minimal package that recreates the error, create a +GitHub repository from the package (e.g. with +[`usethis::use_git()`](https://usethis.r-lib.org/reference/use_git.html) + +[`usethis::use_github()`](https://usethis.r-lib.org/reference/use_github.html)), +and file an issue with a link to the repository. + +------------------------------------------------------------------------ + +1. Note that dots (`.`) in the package name need to be replaced by + hyphens (`-`) in the vignette filename to be recognized as the + intro. That means for a package `foo.bar` the intro needs to be + named `foo-bar.Rmd`. diff --git a/tests/testthat/assets/llm.html b/tests/testthat/assets/llm.html new file mode 100644 index 000000000..183947336 --- /dev/null +++ b/tests/testthat/assets/llm.html @@ -0,0 +1,37 @@ + +
+ +

[Deprecated]

+ + +intro: “Get Started”, which links to a vignette or +article with the same name as the package1. + +
template:
+  includes:
+    before_title: <!-- inserted before the package title in the header ->
+    before_navbar: <!-- inserted before the navbar links -->
+    after_navbar: <!-- inserted after the navbar links -->
+

These inclusions will appear on all screen sizes, and will not be +collapsed into the the navbar drop down.

+

You can also customise the colour scheme of the navbar by using the +type and bg parameters. See above for +details.

+ +
+usethis::create_package("~/desktop/testpackage")
+# ... edit files ...
+pkgdown::build_site(tmp, new_process = FALSE, preview = FALSE)
+

Once you have built a minimal package that recreates the error, create a GitHub repository from the package (e.g. with usethis::use_git() + usethis::use_github()), and file an issue with a link to the repository.

+ +
\ No newline at end of file diff --git a/tests/testthat/test-build-llm.R b/tests/testthat/test-build-llm.R index 5873fd342..b33640b36 100644 --- a/tests/testthat/test-build-llm.R +++ b/tests/testthat/test-build-llm.R @@ -15,3 +15,16 @@ test_that("build_llm_docs() works", { index_md <- path(pkg$dst_path, "index.md") expect_snapshot_file(index_md) }) + +test_that("convert_md() works", { + skip_if_no_pandoc() + pkg <- local_pkgdown_site() + + dir <- withr::local_tempdir() + pkg$dst_path <- dir + + html <- test_path("assets", "llm.html") + fs::file_copy(html, path(dir, "llm.html")) + + expect_snapshot_file(convert_md("llm.html", pkg)) +}) From 1ab432dfebf7758aa84a37487e65c9c7b3f8824d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 17 Oct 2025 08:02:54 +0200 Subject: [PATCH 31/47] add test and fix for untested thing :melting_face: --- R/build-llm.R | 8 ++++++-- tests/testthat/_snaps/build-llm/llm.md | 7 +++++++ tests/testthat/assets/llm.html | 7 +++++++ tests/testthat/test-build-llm.R | 1 + 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index 9e5cd0f64..ec3bb6bfc 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -74,10 +74,14 @@ create_absolute_links <- function(main_html, pkg) { if (pkg$development$in_dev && pkg$bs_version > 3) { url <- paste0(url, pkg$prefix) } - a_external <- a[!grepl("external-link", xml2::xml_attr(a, "class"))] + a_internal <- a[ + !startsWith(xml2::xml_attr(a, "href"), "https") & + !startsWith(xml2::xml_attr(a, "href"), "#") + ] - href_absolute <- xml2::url_absolute(xml2::xml_attr(a_external, "href"), url) + href_absolute <- xml2::url_absolute(xml2::xml_attr(a_internal, "href"), url) href_absolute <- path_ext_set(href_absolute, "md") + xml2::xml_attr(a_internal, "href") <- href_absolute } xml2::xml_attr(a, "class") <- NULL } diff --git a/tests/testthat/_snaps/build-llm/llm.md b/tests/testthat/_snaps/build-llm/llm.md index cf9ec23bf..a3d0252d9 100644 --- a/tests/testthat/_snaps/build-llm/llm.md +++ b/tests/testthat/_snaps/build-llm/llm.md @@ -31,6 +31,13 @@ GitHub repository from the package (e.g. with [`usethis::use_github()`](https://usethis.r-lib.org/reference/use_github.html)), and file an issue with a link to the repository. +pkgdown is designed to make it quick and easy to build a website for +your package. You can see pkgdown in action at +: this is the output of pkgdown applied to +the latest version of pkgdown. Learn more in +[`vignette("pkgdown")`](https:/pkgdown.r-lib.org/articles/pkgdown.md) or +[`?build_site`](https:/pkgdown.r-lib.org/reference/build_site.md). + ------------------------------------------------------------------------ 1. Note that dots (`.`) in the package name need to be replaced by diff --git a/tests/testthat/assets/llm.html b/tests/testthat/assets/llm.html index 183947336..4d74c5624 100644 --- a/tests/testthat/assets/llm.html +++ b/tests/testthat/assets/llm.html @@ -34,4 +34,11 @@
pkgdown is designed to make it quick and easy to build a website for your package. + You can see pkgdown in action at https://pkgdown.r-lib.org: + this is the output of pkgdown applied to the latest version of pkgdown. + Learn more in vignette("pkgdown") + or ?build_site.

+ +
\ No newline at end of file diff --git a/tests/testthat/test-build-llm.R b/tests/testthat/test-build-llm.R index b33640b36..1384a99bc 100644 --- a/tests/testthat/test-build-llm.R +++ b/tests/testthat/test-build-llm.R @@ -19,6 +19,7 @@ test_that("build_llm_docs() works", { test_that("convert_md() works", { skip_if_no_pandoc() pkg <- local_pkgdown_site() + pkg$meta$url <- "https://pkgdown.r-lib.org" dir <- withr::local_tempdir() pkg$dst_path <- dir From 68cbe750f5f6ca60f6608990861e341d6c0f6bdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 17 Oct 2025 08:07:06 +0200 Subject: [PATCH 32/47] :see_no_evil: --- R/build-llm.R | 3 ++- tests/testthat/_snaps/build-llm/llm.md | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index ec3bb6bfc..ec1d92fad 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -80,9 +80,10 @@ create_absolute_links <- function(main_html, pkg) { ] href_absolute <- xml2::url_absolute(xml2::xml_attr(a_internal, "href"), url) - href_absolute <- path_ext_set(href_absolute, "md") + href_absolute <- sub("html$", "md", href_absolute) xml2::xml_attr(a_internal, "href") <- href_absolute } + xml2::xml_attr(a, "class") <- NULL } diff --git a/tests/testthat/_snaps/build-llm/llm.md b/tests/testthat/_snaps/build-llm/llm.md index a3d0252d9..0b486eb86 100644 --- a/tests/testthat/_snaps/build-llm/llm.md +++ b/tests/testthat/_snaps/build-llm/llm.md @@ -35,8 +35,8 @@ pkgdown is designed to make it quick and easy to build a website for your package. You can see pkgdown in action at : this is the output of pkgdown applied to the latest version of pkgdown. Learn more in -[`vignette("pkgdown")`](https:/pkgdown.r-lib.org/articles/pkgdown.md) or -[`?build_site`](https:/pkgdown.r-lib.org/reference/build_site.md). +[`vignette("pkgdown")`](https://pkgdown.r-lib.org/articles/pkgdown.md) +or [`?build_site`](https://pkgdown.r-lib.org/reference/build_site.md). ------------------------------------------------------------------------ From c4e33209b182065b5421b67c7e86f13041ebcb84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 17 Oct 2025 08:13:55 +0200 Subject: [PATCH 33/47] fix --- R/build-llm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/build-llm.R b/R/build-llm.R index ec1d92fad..18220e3f3 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -12,7 +12,7 @@ build_llm_docs <- function(pkg = ".") { read_file_if_exists(path(pkg$dst_path, "reference", "index.md")), read_file_if_exists(path(pkg$dst_path, "articles", "index.md")) ) - writeLines(index, path(pkg$dst_path, "llms.txt")) + write_lines(index, path(pkg$dst_path, "llms.txt")) invisible() } From 3a681afccc2228e2a58b77efc4d31df88e482e1a Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 17 Oct 2025 14:20:51 -0500 Subject: [PATCH 34/47] Docs, refactoring, more tests --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/build-llm.R | 186 +++++++++++++------------ R/build.R | 1 + man/build_articles.Rd | 1 + man/build_home.Rd | 1 + man/build_llm_docs.Rd | 33 +++++ man/build_news.Rd | 1 + man/build_reference.Rd | 1 + man/build_site.Rd | 1 + man/build_tutorials.Rd | 1 + man/pkgdown-package.Rd | 2 +- man/test-dont.Rd | 4 +- tests/testthat/_snaps/build-llm/llm.md | 47 +------ tests/testthat/assets/llm.html | 64 +++------ tests/testthat/test-build-llm.R | 61 ++++++-- 16 files changed, 218 insertions(+), 189 deletions(-) create mode 100644 man/build_llm_docs.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 3ed224b65..d307b98f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -76,5 +76,5 @@ Config/testthat/start-first: build-article, build-quarto-article, Config/usethis/last-upkeep: 2025-09-07 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 SystemRequirements: pandoc diff --git a/NAMESPACE b/NAMESPACE index a9289d152..fc91394e3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -116,6 +116,7 @@ export(build_articles_index) export(build_favicons) export(build_home) export(build_home_index) +export(build_llm_docs) export(build_news) export(build_redirects) export(build_reference) diff --git a/R/build-llm.R b/R/build-llm.R index 18220e3f3..751c431e6 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -1,11 +1,45 @@ +#' Build docs for LLMs +#' +#' @description +#' `build_llm_docs()` creates an `LLMs.txt` at the root of your site +#' that contains the contents of your `README.md`, your reference index, +#' and your articles index. It also creates a `.md` file for every existing +#' `.html` file in your site. Together, this gives an LLM an overview of your +#' package and the ability to find out more by following links. +#' +#' If you don't want these files generated for your site, you can opt-out by +#' adding the following to your `pkgdown.yml`: +#' +#' ```yaml +#' llm-docs: false +#' ``` +#' +#' @family site components +#' @inheritParams build_site +#' @export build_llm_docs <- function(pkg = ".") { - rlang::check_installed("pandoc") pkg <- as_pkgdown(pkg) + if (isFALSE(pkg$meta$`llm-docs`)) { + return(invisible()) + } cli::cli_rule("Building docs for llms") - paths <- get_site_paths(pkg) - purrr::walk(paths, convert_md, pkg = pkg) + if (!is.null(pkg$meta$url)) { + url <- paste0(pkg$meta$url, "/") + if (pkg$development$in_dev) { + url <- paste0(url, pkg$prefix) + } + } else { + url <- NULL + } + + paths <- get_site_paths(pkg) + purrr::walk(paths, \(path) { + src_path <- path(pkg[["dst_path"]], path) + dst_path <- path_ext_set(src_path, "md") + convert_md(src_path, dst_path, url) + }) index <- c( read_lines(path(pkg$dst_path, "index.md")), @@ -17,81 +51,66 @@ build_llm_docs <- function(pkg = ".") { invisible() } -convert_md <- function(path, pkg) { - path <- path(pkg[["dst_path"]], path) - - html <- xml2::read_html(path) +convert_md <- function(src_path, dst_path, url = NULL) { + html <- xml2::read_html(src_path) main_html <- xml2::xml_find_first(html, ".//main") if (length(main_html) == 0) { return() } - # simplify page header (which includes logo + source link) - title <- xml2::xml_find_first(main_html, ".//h1") - # website for a package without README/index.md - if (length(title) > 0) { - xml2::xml_remove( - xml2::xml_find_first(main_html, ".//div[@class='page-header']") - ) - xml2::xml_add_child(main_html, title, .where = 0) - } - - # fix footnotes - convert_popovers_to_footnotes(main_html) - - # fix code - convert_code_chunks(main_html) - - # fix badges - convert_lifecycle_badges(main_html) + simplify_page_header(main_html) + simplify_anchors(main_html) + simplify_code(main_html) + simplify_popovers_to_footnotes(main_html) + simplify_lifecycle_badges(main_html) + create_absolute_links(main_html, url) - # drop internal anchors - xml2::xml_remove(xml2::xml_find_all(main_html, ".//a[@class='anchor']")) + path <- file_temp() + xml2::write_html(html, path, format = FALSE) + on.exit(unlink(path), add = TRUE) - # replace all links with absolute link to .md - create_absolute_links(main_html, pkg) - - pandoc::pandoc_convert( - text = main_html, + rmarkdown::pandoc_convert( + input = path, + output = dst_path, from = "html", to = "gfm+definition_lists-raw_html", - output = path_ext_set(path, "md") ) } # Helpers --------------------------------------------------------------------- -read_file_if_exists <- function(path) { - if (file_exists(path)) { - read_lines(path) +# simplify page header (which includes logo + source link) +simplify_page_header <- function(html) { + title <- xml2::xml_find_first(html, ".//h1") + # website for a package without README/index.md + if (length(title) > 0) { + xml2::xml_remove(xml2::xml_find_first(html, ".//div[@class='page-header']")) + xml2::xml_add_child(html, title, .where = 0) } + invisible() } -create_absolute_links <- function(main_html, pkg) { - a <- xml2::xml_find_all(main_html, ".//a") - if (!is.null(pkg$meta$url)) { - url <- paste0(pkg$meta$url, "/") - if (pkg$development$in_dev && pkg$bs_version > 3) { - url <- paste0(url, pkg$prefix) - } - a_internal <- a[ - !startsWith(xml2::xml_attr(a, "href"), "https") & - !startsWith(xml2::xml_attr(a, "href"), "#") - ] - - href_absolute <- xml2::url_absolute(xml2::xml_attr(a_internal, "href"), url) - href_absolute <- sub("html$", "md", href_absolute) - xml2::xml_attr(a_internal, "href") <- href_absolute +# drop internal anchors +simplify_anchors <- function(html) { + xml2::xml_remove(xml2::xml_find_all(html, ".//a[@class='anchor']")) + invisible() +} + +# strip extraneoous classes +simplify_code <- function(html) { + extract_lang <- function(class) { + trimws(gsub("sourceCode|downlit", "", class)) } + code <- xml2::xml_find_all(html, ".//pre[contains(@class, 'sourceCode')]") - xml2::xml_attr(a, "class") <- NULL + purrr::walk(code, \(x) { + xml2::xml_attr(x, "class") <- extract_lang(xml2::xml_attr(x, "class")) + }) + invisible() } -convert_popovers_to_footnotes <- function(main_html) { - popover_refs <- xml2::xml_find_all( - main_html, - ".//a[@class='footnote-ref']" - ) +simplify_popovers_to_footnotes <- function(main_html) { + popover_refs <- xml2::xml_find_all(main_html, ".//a[@class='footnote-ref']") if (length(popover_refs) == 0) { return() } @@ -134,39 +153,32 @@ convert_popovers_to_footnotes <- function(main_html) { }) } -convert_lifecycle_badges <- function(html) { - badges <- xml2::xml_find_all(html, ".//a[contains(@href, 'lifecycle.r')]") +simplify_lifecycle_badges <- function(html) { + badges <- xml2::xml_find_all(html, "//span[contains(@class, 'lifecycle')]") + xml2::xml_replace(badges, "strong", xml2::xml_text(badges)) + invisible() +} - if (length(badges) == 0) { - return(invisible()) +create_absolute_links <- function(main_html, url) { + if (is.null(url)) { + return() } - purrr::walk(badges, \(x) { - stage <- sub( - "https://lifecycle.r-lib.org/articles/stages.html#", - "", - xml2::xml_attr(x, "href") - ) - xml2::xml_replace( - x, - "strong", - stage - ) - }) -} + a <- xml2::xml_find_all(main_html, ".//a") + href <- xml2::xml_attr(a, "href") -convert_code_chunks <- function(html) { - code <- xml2::xml_find_all(html, ".//pre[contains(@class, 'sourceCode')]") + a_internal <- a[!startsWith(href, "https") & !startsWith(href, "#")] + href_absolute <- xml2::url_absolute(xml2::xml_attr(a_internal, "href"), url) + href_absolute <- sub("html$", "md", href_absolute) - purrr::walk( - code, - \(x) { - lang <- trimws(sub( - "sourceCode", - "", - sub("downlit", "", xml2::xml_attr(x, "class")) - )) - xml2::xml_attr(x, "class") <- lang - } - ) + xml2::xml_attr(a_internal, "href") <- href_absolute + xml2::xml_attr(a, "class") <- NULL + + invisible() +} + +read_file_if_exists <- function(path) { + if (file_exists(path)) { + read_lines(path) + } } diff --git a/R/build.R b/R/build.R index 678c64590..f3d31c6ea 100644 --- a/R/build.R +++ b/R/build.R @@ -10,6 +10,7 @@ #' * [build_tutorials()] #' * [build_news()] #' * [build_redirects()] +#' * [build_llm_docs()] #' #' See the documentation for the each function to learn how to control #' that aspect of the site. This page documents options that affect the diff --git a/man/build_articles.Rd b/man/build_articles.Rd index 67b18f900..108e0ae25 100644 --- a/man/build_articles.Rd +++ b/man/build_articles.Rd @@ -261,6 +261,7 @@ as HTML widgets. \seealso{ Other site components: \code{\link{build_home}()}, +\code{\link{build_llm_docs}()}, \code{\link{build_news}()}, \code{\link{build_reference}()}, \code{\link{build_tutorials}()} diff --git a/man/build_home.Rd b/man/build_home.Rd index 6630fbc36..dc588e297 100644 --- a/man/build_home.Rd +++ b/man/build_home.Rd @@ -298,6 +298,7 @@ Or completely remove it: \seealso{ Other site components: \code{\link{build_articles}()}, +\code{\link{build_llm_docs}()}, \code{\link{build_news}()}, \code{\link{build_reference}()}, \code{\link{build_tutorials}()} diff --git a/man/build_llm_docs.Rd b/man/build_llm_docs.Rd new file mode 100644 index 000000000..204ffdf70 --- /dev/null +++ b/man/build_llm_docs.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build-llm.R +\name{build_llm_docs} +\alias{build_llm_docs} +\title{Build docs for LLMs} +\usage{ +build_llm_docs(pkg = ".") +} +\arguments{ +\item{pkg}{Path to package.} +} +\description{ +\code{build_llm_docs()} creates an \code{LLMs.txt} at the root of your site +that contains the contents of your \code{README.md}, your reference index, +and your articles index. It also creates a \code{.md} file for every existing +\code{.html} file in your site. Together, this gives an LLM an overview of your +package and the ability to find out more by following links. + +If you don't want these files generated for your site, you can opt-out by +adding the following to your \code{pkgdown.yml}: + +\if{html}{\out{
}}\preformatted{llm-docs: false +}\if{html}{\out{
}} +} +\seealso{ +Other site components: +\code{\link{build_articles}()}, +\code{\link{build_home}()}, +\code{\link{build_news}()}, +\code{\link{build_reference}()}, +\code{\link{build_tutorials}()} +} +\concept{site components} diff --git a/man/build_news.Rd b/man/build_news.Rd index 194d6b7b2..b6d9a61d2 100644 --- a/man/build_news.Rd +++ b/man/build_news.Rd @@ -85,6 +85,7 @@ Suppress the default addition of CRAN release dates with: Other site components: \code{\link{build_articles}()}, \code{\link{build_home}()}, +\code{\link{build_llm_docs}()}, \code{\link{build_reference}()}, \code{\link{build_tutorials}()} } diff --git a/man/build_reference.Rd b/man/build_reference.Rd index 3ae9edd16..cb14c2795 100644 --- a/man/build_reference.Rd +++ b/man/build_reference.Rd @@ -190,6 +190,7 @@ as HTML widgets. Other site components: \code{\link{build_articles}()}, \code{\link{build_home}()}, +\code{\link{build_llm_docs}()}, \code{\link{build_news}()}, \code{\link{build_tutorials}()} } diff --git a/man/build_site.Rd b/man/build_site.Rd index ff5ac2ddd..c63f98cb3 100644 --- a/man/build_site.Rd +++ b/man/build_site.Rd @@ -71,6 +71,7 @@ take \code{quiet} arguments.} \item \code{\link[=build_tutorials]{build_tutorials()}} \item \code{\link[=build_news]{build_news()}} \item \code{\link[=build_redirects]{build_redirects()}} +\item \code{\link[=build_llm_docs]{build_llm_docs()}} } See the documentation for the each function to learn how to control diff --git a/man/build_tutorials.Rd b/man/build_tutorials.Rd index a2cb248bc..0934c3656 100644 --- a/man/build_tutorials.Rd +++ b/man/build_tutorials.Rd @@ -47,6 +47,7 @@ section. This should be a list where each element specifies: Other site components: \code{\link{build_articles}()}, \code{\link{build_home}()}, +\code{\link{build_llm_docs}()}, \code{\link{build_news}()}, \code{\link{build_reference}()} } diff --git a/man/pkgdown-package.Rd b/man/pkgdown-package.Rd index 519c5c181..62bb3c11f 100644 --- a/man/pkgdown-package.Rd +++ b/man/pkgdown-package.Rd @@ -32,7 +32,7 @@ Authors: Other contributors: \itemize{ - \item Posit Software, PBC (03wc8by49) [copyright holder, funder] + \item Posit Software, PBC (\href{https://ror.org/03wc8by49}{ROR}) [copyright holder, funder] } } diff --git a/man/test-dont.Rd b/man/test-dont.Rd index c0b63c0ec..7d7bba880 100644 --- a/man/test-dont.Rd +++ b/man/test-dont.Rd @@ -56,10 +56,10 @@ x # should be 4 x <- 1 -\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (FALSE) withAutoprint(\{ # examplesIf} x <- 2 \dontshow{\}) # examplesIf} -\dontshow{if (TRUE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (TRUE) withAutoprint(\{ # examplesIf} x <- 3 \dontshow{\}) # examplesIf} x # should be 3 diff --git a/tests/testthat/_snaps/build-llm/llm.md b/tests/testthat/_snaps/build-llm/llm.md index 0b486eb86..ae8665643 100644 --- a/tests/testthat/_snaps/build-llm/llm.md +++ b/tests/testthat/_snaps/build-llm/llm.md @@ -1,46 +1,5 @@ -# Automatically link references and articles in an HTML page +# Page title -**deprecated** +## Heading -`intro`: “Get Startedâ€?, which links to a vignette or article with -the same name as the package[¹](#fn1). - -``` yaml -template: - includes: - before_title: - after_navbar: -``` - -These inclusions will appear on all screen sizes, and will not be -collapsed into the the navbar drop down. - -You can also customise the colour scheme of the navbar by using the -`type` and `bg` parameters. See above for details. - -``` r -usethis::create_package("~/desktop/testpackage") -# ... edit files ... -pkgdown::build_site(tmp, new_process = FALSE, preview = FALSE) -``` - -Once you have built a minimal package that recreates the error, create a -GitHub repository from the package (e.g. with -[`usethis::use_git()`](https://usethis.r-lib.org/reference/use_git.html) + -[`usethis::use_github()`](https://usethis.r-lib.org/reference/use_github.html)), -and file an issue with a link to the repository. - -pkgdown is designed to make it quick and easy to build a website for -your package. You can see pkgdown in action at -: this is the output of pkgdown applied to -the latest version of pkgdown. Learn more in -[`vignette("pkgdown")`](https://pkgdown.r-lib.org/articles/pkgdown.md) -or [`?build_site`](https://pkgdown.r-lib.org/reference/build_site.md). - ------------------------------------------------------------------------- - -1. Note that dots (`.`) in the package name need to be replaced by - hyphens (`-`) in the vignette filename to be recognized as the - intro. That means for a package `foo.bar` the intro needs to be - named `foo-bar.Rmd`. +Some text diff --git a/tests/testthat/assets/llm.html b/tests/testthat/assets/llm.html index 4d74c5624..c908ab2f1 100644 --- a/tests/testthat/assets/llm.html +++ b/tests/testthat/assets/llm.html @@ -1,44 +1,22 @@ -
- -

[Deprecated]

- - -intro: “Get Started”, which links to a vignette or -article with the same name as the package1. - -
template:
-  includes:
-    before_title: <!-- inserted before the package title in the header ->
-    before_navbar: <!-- inserted before the navbar links -->
-    after_navbar: <!-- inserted after the navbar links -->
-

These inclusions will appear on all screen sizes, and will not be -collapsed into the the navbar drop down.

-

You can also customise the colour scheme of the navbar by using the -type and bg parameters. See above for -details.

- -
-usethis::create_package("~/desktop/testpackage")
-# ... edit files ...
-pkgdown::build_site(tmp, new_process = FALSE, preview = FALSE)
-

Once you have built a minimal package that recreates the error, create a GitHub repository from the package (e.g. with usethis::use_git() + usethis::use_github()), and file an issue with a link to the repository.

- -
pkgdown is designed to make it quick and easy to build a website for your package. - You can see pkgdown in action at https://pkgdown.r-lib.org: - this is the output of pkgdown applied to the latest version of pkgdown. - Learn more in vignette("pkgdown") - or ?build_site.

- - -
\ No newline at end of file + + +
+ + +

Heading

+ +

Some text

+
+ + diff --git a/tests/testthat/test-build-llm.R b/tests/testthat/test-build-llm.R index 1384a99bc..2ad472bf9 100644 --- a/tests/testthat/test-build-llm.R +++ b/tests/testthat/test-build-llm.R @@ -1,13 +1,14 @@ -test_that("build_llm_docs() works", { +test_that("integration test for build_llm_docs()", { skip_if_no_pandoc() pkg <- local_pkgdown_site( desc = list( Package = "pkgdown", Description = "My package does great things!" - ) + ), + meta = list(url = "https://pkgdown.r-lib.org") ) - build_site(pkg, install = FALSE, new_process = FALSE) + suppressMessages(build_site(pkg, devel = TRUE)) llms_txt <- path(pkg$dst_path, "llms.txt") expect_snapshot_file(llms_txt) @@ -16,16 +17,54 @@ test_that("build_llm_docs() works", { expect_snapshot_file(index_md) }) -test_that("convert_md() works", { +test_that("integration test for convert_md()", { skip_if_no_pandoc() - pkg <- local_pkgdown_site() - pkg$meta$url <- "https://pkgdown.r-lib.org" - dir <- withr::local_tempdir() - pkg$dst_path <- dir + path <- withr::local_tempfile(pattern = "pkgdown-llm") + convert_md(test_path("assets", "llm.html"), path) + expect_snapshot_file(path, name = "llm.md") +}) + +test_that("simplifies page header", { + html <- xml2::read_html( + r"( +
)" + ) + simplify_page_header(xml2::xml_find_first(html, ".//main")) + expect_equal(xpath_contents(html, ".//main"), "

Package index

") +}) + +test_that("replaces lifecycle badges with strong text", { + html <- xml2::read_html( + r"(deprecated)" + ) + simplify_lifecycle_badges(html) + expect_equal(xpath_contents(html, ".//body"), "deprecated") +}) - html <- test_path("assets", "llm.html") - fs::file_copy(html, path(dir, "llm.html")) +test_that("converts internal urls to absolute with .md ending", { + html <- xml2::read_html( + r"( + link + link + link + )" + ) + create_absolute_links(html, "https://pkgdown.r-lib.org") + expect_equal( + xpath_attr(html, ".//a", "href"), + c( + "https://pkgdown.r-lib.org/llm.md", + "#fragment", + "https://example.org" + ) + ) +}) - expect_snapshot_file(convert_md("llm.html", pkg)) +test_that("strip extra classes from pre", { + html <- xml2::read_html(r"(
1+1
)") + simplify_code(html) + expect_equal(xpath_attr(html, ".//pre", "class"), "r") }) From 6d42ec1ce9c30ae8601bb9ede1b209e4ac7e0313 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 17 Oct 2025 14:25:54 -0500 Subject: [PATCH 35/47] Add news bullet --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 3f255a728..76360bf80 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # pkgdown (development version) +* New `build_llm_docs()` generates a `LLMs.txt` at the root directory of your site, and provides a `.md` version of every page. You can disable by adding `llm-docs: false` to your `_pkgdown.yaml` (#2914, @maelle) * Links generated with `\code{\link{foo}()}` now have the `()` moved into the `` in the generated output (@maelle). * Plots in dark mode are now transformed with a CSS filter to improve their visibility (thanks to @gadenbuie). From c5a9e0304f36d0f937ee6f1e215eb82e3f914840 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 17 Oct 2025 14:26:59 -0500 Subject: [PATCH 36/47] Revert workflow change --- .github/workflows/pkgdown.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index c43ec708e..04d9d6498 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -39,7 +39,7 @@ jobs: - uses: r-lib/actions/setup-tinytex@v2 - name: Build site - run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE, override = list(url = "")) + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0} - name: Deploy to GitHub pages 🚀 From bfa081f40b66c9c85b4a66dbc69254fb08bb0453 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 17 Oct 2025 14:27:31 -0500 Subject: [PATCH 37/47] Use fs function --- R/build-llm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/build-llm.R b/R/build-llm.R index 751c431e6..5d17e668a 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -67,7 +67,7 @@ convert_md <- function(src_path, dst_path, url = NULL) { path <- file_temp() xml2::write_html(html, path, format = FALSE) - on.exit(unlink(path), add = TRUE) + on.exit(file_delete(path), add = TRUE) rmarkdown::pandoc_convert( input = path, From 9a8a38f8cdd4be46a13f2bfbf0e531d5c58c65bf Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 17 Oct 2025 14:33:13 -0500 Subject: [PATCH 38/47] Drop full site integration test; add empty lines --- R/build-llm.R | 2 ++ tests/testthat/_snaps/build-llm.md | 7 +++++++ tests/testthat/_snaps/build-llm/index.md | 1 - tests/testthat/_snaps/build-llm/llm.md | 5 ----- tests/testthat/_snaps/build-llm/llms.txt | 2 -- tests/testthat/test-build-llm.R | 21 +-------------------- 6 files changed, 10 insertions(+), 28 deletions(-) create mode 100644 tests/testthat/_snaps/build-llm.md delete mode 100644 tests/testthat/_snaps/build-llm/index.md delete mode 100644 tests/testthat/_snaps/build-llm/llm.md delete mode 100644 tests/testthat/_snaps/build-llm/llms.txt diff --git a/R/build-llm.R b/R/build-llm.R index 5d17e668a..8dd31dbd5 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -43,7 +43,9 @@ build_llm_docs <- function(pkg = ".") { index <- c( read_lines(path(pkg$dst_path, "index.md")), + "", read_file_if_exists(path(pkg$dst_path, "reference", "index.md")), + "", read_file_if_exists(path(pkg$dst_path, "articles", "index.md")) ) write_lines(index, path(pkg$dst_path, "llms.txt")) diff --git a/tests/testthat/_snaps/build-llm.md b/tests/testthat/_snaps/build-llm.md new file mode 100644 index 000000000..1c61df0aa --- /dev/null +++ b/tests/testthat/_snaps/build-llm.md @@ -0,0 +1,7 @@ +# integration test for convert_md() + + Code + readLines(path) + Output + [1] "# Page title" "" "## Heading" "" "Some text" + diff --git a/tests/testthat/_snaps/build-llm/index.md b/tests/testthat/_snaps/build-llm/index.md deleted file mode 100644 index 835679966..000000000 --- a/tests/testthat/_snaps/build-llm/index.md +++ /dev/null @@ -1 +0,0 @@ -My package does great things! diff --git a/tests/testthat/_snaps/build-llm/llm.md b/tests/testthat/_snaps/build-llm/llm.md deleted file mode 100644 index ae8665643..000000000 --- a/tests/testthat/_snaps/build-llm/llm.md +++ /dev/null @@ -1,5 +0,0 @@ -# Page title - -## Heading - -Some text diff --git a/tests/testthat/_snaps/build-llm/llms.txt b/tests/testthat/_snaps/build-llm/llms.txt deleted file mode 100644 index c4d91e8c9..000000000 --- a/tests/testthat/_snaps/build-llm/llms.txt +++ /dev/null @@ -1,2 +0,0 @@ -My package does great things! -# diff --git a/tests/testthat/test-build-llm.R b/tests/testthat/test-build-llm.R index 2ad472bf9..82abdb2d2 100644 --- a/tests/testthat/test-build-llm.R +++ b/tests/testthat/test-build-llm.R @@ -1,28 +1,9 @@ -test_that("integration test for build_llm_docs()", { - skip_if_no_pandoc() - pkg <- local_pkgdown_site( - desc = list( - Package = "pkgdown", - Description = "My package does great things!" - ), - meta = list(url = "https://pkgdown.r-lib.org") - ) - - suppressMessages(build_site(pkg, devel = TRUE)) - - llms_txt <- path(pkg$dst_path, "llms.txt") - expect_snapshot_file(llms_txt) - - index_md <- path(pkg$dst_path, "index.md") - expect_snapshot_file(index_md) -}) - test_that("integration test for convert_md()", { skip_if_no_pandoc() path <- withr::local_tempfile(pattern = "pkgdown-llm") convert_md(test_path("assets", "llm.html"), path) - expect_snapshot_file(path, name = "llm.md") + expect_snapshot(readLines(path)) }) test_that("simplifies page header", { From 552761c3d1a4f3a83785ddcfa94de7201c701225 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 17 Oct 2025 14:42:58 -0500 Subject: [PATCH 39/47] Fix another lint --- tests/testthat/_snaps/build-llm.md | 2 +- tests/testthat/test-build-llm.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/_snaps/build-llm.md b/tests/testthat/_snaps/build-llm.md index 1c61df0aa..463e79679 100644 --- a/tests/testthat/_snaps/build-llm.md +++ b/tests/testthat/_snaps/build-llm.md @@ -1,7 +1,7 @@ # integration test for convert_md() Code - readLines(path) + read_lines(path) Output [1] "# Page title" "" "## Heading" "" "Some text" diff --git a/tests/testthat/test-build-llm.R b/tests/testthat/test-build-llm.R index 82abdb2d2..6f32865e4 100644 --- a/tests/testthat/test-build-llm.R +++ b/tests/testthat/test-build-llm.R @@ -3,7 +3,7 @@ test_that("integration test for convert_md()", { path <- withr::local_tempfile(pattern = "pkgdown-llm") convert_md(test_path("assets", "llm.html"), path) - expect_snapshot(readLines(path)) + expect_snapshot(read_lines(path)) }) test_that("simplifies page header", { From 694b688a167be2d978e76066160a03206c8d729b Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 17 Oct 2025 16:02:06 -0500 Subject: [PATCH 40/47] Write out the correct content Annoyingly I couldn't reprex the issue I was seeing on the live stie --- R/build-llm.R | 2 +- tests/testthat/_snaps/build-llm.md | 8 ++++++-- tests/testthat/test-build-llm.R | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index 8dd31dbd5..024b3006d 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -68,7 +68,7 @@ convert_md <- function(src_path, dst_path, url = NULL) { create_absolute_links(main_html, url) path <- file_temp() - xml2::write_html(html, path, format = FALSE) + xml2::write_html(main_html, path, format = FALSE) on.exit(file_delete(path), add = TRUE) rmarkdown::pandoc_convert( diff --git a/tests/testthat/_snaps/build-llm.md b/tests/testthat/_snaps/build-llm.md index 463e79679..e0146b987 100644 --- a/tests/testthat/_snaps/build-llm.md +++ b/tests/testthat/_snaps/build-llm.md @@ -1,7 +1,11 @@ # integration test for convert_md() Code - read_lines(path) + write_lines(read_lines(path), stdout()) Output - [1] "# Page title" "" "## Heading" "" "Some text" + # Page title + + ## Heading + + Some text diff --git a/tests/testthat/test-build-llm.R b/tests/testthat/test-build-llm.R index 6f32865e4..daf8eb5c0 100644 --- a/tests/testthat/test-build-llm.R +++ b/tests/testthat/test-build-llm.R @@ -3,7 +3,7 @@ test_that("integration test for convert_md()", { path <- withr::local_tempfile(pattern = "pkgdown-llm") convert_md(test_path("assets", "llm.html"), path) - expect_snapshot(read_lines(path)) + expect_snapshot(write_lines(read_lines(path), stdout())) }) test_that("simplifies page header", { From acc393df9c0de6b3e6d94c55ca7f38d78c0dcb41 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 17 Oct 2025 16:22:24 -0500 Subject: [PATCH 41/47] Construct full base url --- R/build-llm.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index 024b3006d..65b7af64f 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -38,7 +38,8 @@ build_llm_docs <- function(pkg = ".") { purrr::walk(paths, \(path) { src_path <- path(pkg[["dst_path"]], path) dst_path <- path_ext_set(src_path, "md") - convert_md(src_path, dst_path, url) + base_url <- xml2::url_absolute(paste0(path_dir(path), "/"), url) + convert_md(src_path, dst_path, base_url) }) index <- c( @@ -167,6 +168,8 @@ create_absolute_links <- function(main_html, url) { } a <- xml2::xml_find_all(main_html, ".//a") + xml2::xml_attr(a, "class") <- NULL + href <- xml2::xml_attr(a, "href") a_internal <- a[!startsWith(href, "https") & !startsWith(href, "#")] @@ -174,7 +177,6 @@ create_absolute_links <- function(main_html, url) { href_absolute <- sub("html$", "md", href_absolute) xml2::xml_attr(a_internal, "href") <- href_absolute - xml2::xml_attr(a, "class") <- NULL invisible() } From ab3a2df2ee2e1615d328e9182d711cd3517f8701 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 17 Oct 2025 16:32:14 -0500 Subject: [PATCH 42/47] Handle other style of lifecycle badge --- R/build-llm.R | 12 +++++++++++- tests/testthat/test-build-llm.R | 10 ++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index 65b7af64f..2e126f1ba 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -157,8 +157,18 @@ simplify_popovers_to_footnotes <- function(main_html) { } simplify_lifecycle_badges <- function(html) { + # on reference index badges <- xml2::xml_find_all(html, "//span[contains(@class, 'lifecycle')]") - xml2::xml_replace(badges, "strong", xml2::xml_text(badges)) + xml2::xml_replace(badges, "strong", paste0("[", xml2::xml_text(badges), "]")) + + # on individual pages + badges <- xml2::xml_find_all( + html, + "//a[.//img[starts-with(@src, 'figures/lifecycle-')]]" + ) + imgs <- xml2::xml_find_first(badges, ".//img") + xml2::xml_replace(badges, "strong", tolower(xml2::xml_attr(imgs, "alt"))) + invisible() } diff --git a/tests/testthat/test-build-llm.R b/tests/testthat/test-build-llm.R index daf8eb5c0..c70e60c47 100644 --- a/tests/testthat/test-build-llm.R +++ b/tests/testthat/test-build-llm.R @@ -19,10 +19,16 @@ test_that("simplifies page header", { test_that("replaces lifecycle badges with strong text", { html <- xml2::read_html( - r"(deprecated)" + r"( + deprecated + [Experimental] + )" ) simplify_lifecycle_badges(html) - expect_equal(xpath_contents(html, ".//body"), "deprecated") + expect_equal( + xpath_text(html, ".//strong"), + c("[deprecated]", "[experimental]") + ) }) test_that("converts internal urls to absolute with .md ending", { From 61f4f482652f740293284e2b8c6b4678b13ba750 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Mon, 20 Oct 2025 09:04:28 -0500 Subject: [PATCH 43/47] Simplify dls --- R/build-llm-dl.R | 66 +++++++++++++++++++ R/build-llm.R | 1 + R/tweak-reference.R | 11 +++- .../templates/content-reference-index.html | 4 +- tests/testthat/_snaps/build-llm-dl.md | 19 ++++++ tests/testthat/test-build-llm-dl.R | 59 +++++++++++++++++ 6 files changed, 156 insertions(+), 4 deletions(-) create mode 100644 R/build-llm-dl.R create mode 100644 tests/testthat/_snaps/build-llm-dl.md create mode 100644 tests/testthat/test-build-llm-dl.R diff --git a/R/build-llm-dl.R b/R/build-llm-dl.R new file mode 100644 index 000000000..eaf9283b3 --- /dev/null +++ b/R/build-llm-dl.R @@ -0,0 +1,66 @@ +simplify_dls <- function(html) { + dls <- xml2::xml_find_all(html, ".//dl") + for (dl in dls) { + simplify_dl(dl) + } + invisible() +} + +simplify_dl <- function(dl) { + children <- xml2::xml_children(dl) + + names <- xml2::xml_name(children) + if (!is_simple_dl(names)) { + cli::cli_warn("Skipping this
: not a simple term-definition list") + return() + } + + groups <- split(children, (seq_along(children) - 1) %/% 2) + + bullets <- lapply(groups, create_li_from_group) + ul <- xml2::read_xml("
    ") + xml_insert(ul, bullets) + + xml2::xml_replace(dl, ul) +} + +# Must have an even number of children that alternate between dt and dd +is_simple_dl <- function(names) { + if (length(names) %% 2 != 0) { + return(FALSE) + } + odd <- names[seq_along(names) %% 2 == 1] + even <- names[seq_along(names) %% 2 == 0] + + all(odd == "dt") && all(even == "dd") +} + +create_li_from_group <- function(group) { + dt <- group[[1]] + dd <- group[[2]] + + if (has_children(dd)) { + # params case + para <- xml2::read_xml("

    ") + xml_insert(para, xml2::xml_contents(dt)) + xml2::xml_add_child(para, xml_text_node(": ")) + + bullet <- xml2::read_xml("
  • ") + xml2::xml_add_child(bullet, para) + } else { + # reference index + bullet <- xml2::read_xml("
  • ") + xml_insert(bullet, xml2::xml_contents(dt)) + xml2::xml_add_child(bullet, xml_text_node(": ")) + } + xml_insert(bullet, xml2::xml_contents(dd)) + + bullet +} + +has_children <- function(x) length(xml2::xml_children(x)) > 0 + +xml_text_node <- function(x) { + span <- xml2::read_xml(paste0("", x, "")) + xml2::xml_find_first(span, ".//text()") +} diff --git a/R/build-llm.R b/R/build-llm.R index 2e126f1ba..e4185c369 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -66,6 +66,7 @@ convert_md <- function(src_path, dst_path, url = NULL) { simplify_code(main_html) simplify_popovers_to_footnotes(main_html) simplify_lifecycle_badges(main_html) + simplify_dls(main_html) create_absolute_links(main_html, url) path <- file_temp() diff --git a/R/tweak-reference.R b/R/tweak-reference.R index 5ac643bf0..5ff9252e8 100644 --- a/R/tweak-reference.R +++ b/R/tweak-reference.R @@ -84,14 +84,21 @@ tweak_highlight_other <- function(div) { xml_replace_contents <- function(node, new) { xml2::xml_remove(xml2::xml_contents(node)) + xml_insert(node, new) +} + +xml_insert <- function(node, new) { + if (is.list(new)) { + contents <- new + } else { + contents <- xml2::xml_contents(new) + } - contents <- xml2::xml_contents(new) for (child in contents) { xml2::xml_add_child(node, child) } } - tweak_extra_logo <- function(html) { img <- xml2::xml_find_all( html, diff --git a/inst/BS5/templates/content-reference-index.html b/inst/BS5/templates/content-reference-index.html index d9a981066..810c2f8fc 100644 --- a/inst/BS5/templates/content-reference-index.html +++ b/inst/BS5/templates/content-reference-index.html @@ -10,14 +10,14 @@

    {{{pagetitle}}}

    {{#subtitle}}

    {{{.}}}

    {{/subtitle}} {{#desc}}
    {{{desc}}}
    {{/desc}} - {{#topics}}
    +
    {{#topics}}
    {{#has_icons}}{{#icon}}{{/icon}}{{/has_icons}} {{#aliases}}{{{.}}} {{/aliases}} {{#lifecycle}}{{.}}{{/lifecycle}}
    {{{title}}}
    -
    {{/topics}} + {{/topics}}
    {{/rows}} diff --git a/tests/testthat/_snaps/build-llm-dl.md b/tests/testthat/_snaps/build-llm-dl.md new file mode 100644 index 000000000..26b6a5f89 --- /dev/null +++ b/tests/testthat/_snaps/build-llm-dl.md @@ -0,0 +1,19 @@ +# dd with block elements simplifies correctly + + Code + xpath_xml(html, ".//li") + Output +
  • +

    a:

    +

    b

    +

    c

    +
  • + +# warns if not applied + + Code + . <- simplify_dls(html) + Condition + Warning: + Skipping this
    : not a simple term-definition list + diff --git a/tests/testthat/test-build-llm-dl.R b/tests/testthat/test-build-llm-dl.R new file mode 100644 index 000000000..345b60823 --- /dev/null +++ b/tests/testthat/test-build-llm-dl.R @@ -0,0 +1,59 @@ +test_that("single dt/dd pair converts to simple li", { + html <- xml2::read_html("
    ") + simplify_dls(html) + + expect_equal(xpath_length(html, ".//dl"), 0) + expect_equal(xpath_length(html, ".//ul"), 1) +}) + +test_that("single dt/dd pair converts to simple li", { + html <- xml2::read_html( + "
    +
    a
    +
    b
    +
    " + ) + simplify_dls(html) + + expect_equal(xpath_length(html, ".//dl"), 0) + expect_equal(xpath_text(html, ".//li"), "a: b") +}) + +test_that("dd with block elements simplifies correctly", { + html <- xml2::read_html( + "
    +
    a
    +
    +

    b

    +

    c

    +
    +
    " + ) + simplify_dls(html) + + expect_equal(xpath_length(html, ".//dl"), 0) + expect_equal(xpath_length(html, ".//ul"), 1) + expect_snapshot(xpath_xml(html, ".//li")) +}) + +test_that("warns if not applied", { + html <- xml2::read_html( + " +
    +
    a
    +
    + " + ) + expect_snapshot(. <- simplify_dls(html)) +}) + +test_that("correctly detects simple dls", { + expect_false(is_simple_dl("dt")) + expect_false(is_simple_dl(c("dd", "dt"))) + expect_false(is_simple_dl(c("dt", "dd", "dt"))) + expect_false(is_simple_dl(c("dd", "dt", "dd", "dt"))) + + expect_true(is_simple_dl(c())) + expect_true(is_simple_dl(c("dt", "dd"))) + expect_true(is_simple_dl(c("dt", "dd", "dt", "dd"))) +}) From f3d39dea1253674692d2cf5d718c11e7dc0d8ae3 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Mon, 20 Oct 2025 09:05:47 -0500 Subject: [PATCH 44/47] Don't need pandoc anymore --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5937d50ec..cc44c07e9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,7 +55,6 @@ Suggests: knitr (>= 1.50), magick, methods, - pandoc, pkgload (>= 1.0.2), quarto, rsconnect, From b9c22e9dd86b2046e3259f707709058fecfab26a Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Mon, 20 Oct 2025 10:42:16 -0500 Subject: [PATCH 45/47] Always replace extension (even if no url) --- R/build-llm.R | 17 +++++++---------- tests/testthat/test-build-llm.R | 6 ++++++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index e4185c369..31731540e 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -173,21 +173,18 @@ simplify_lifecycle_badges <- function(html) { invisible() } -create_absolute_links <- function(main_html, url) { - if (is.null(url)) { - return() - } - +create_absolute_links <- function(main_html, url = NULL) { a <- xml2::xml_find_all(main_html, ".//a") xml2::xml_attr(a, "class") <- NULL href <- xml2::xml_attr(a, "href") + is_internal <- !startsWith(href, "https") & !startsWith(href, "#") + if (!is.null(url)) { + href[is_internal] <- xml2::url_absolute(href[is_internal], url) + } + href[is_internal] <- sub("html$", "md", href[is_internal]) - a_internal <- a[!startsWith(href, "https") & !startsWith(href, "#")] - href_absolute <- xml2::url_absolute(xml2::xml_attr(a_internal, "href"), url) - href_absolute <- sub("html$", "md", href_absolute) - - xml2::xml_attr(a_internal, "href") <- href_absolute + xml2::xml_attr(a[is_internal], "href") <- href[is_internal] invisible() } diff --git a/tests/testthat/test-build-llm.R b/tests/testthat/test-build-llm.R index c70e60c47..5c71f6474 100644 --- a/tests/testthat/test-build-llm.R +++ b/tests/testthat/test-build-llm.R @@ -50,6 +50,12 @@ test_that("converts internal urls to absolute with .md ending", { ) }) +test_that("adjusts extension even without url", { + html <- xml2::read_html(r"(link)") + create_absolute_links(html) + expect_equal(xpath_attr(html, ".//a", "href"), "llm.md") +}) + test_that("strip extra classes from pre", { html <- xml2::read_html(r"(
    1+1
    )") simplify_code(html) From 378aa066258ceff6ad7d2b9da953e68c3693f035 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Mon, 20 Oct 2025 10:43:56 -0500 Subject: [PATCH 46/47] Oops --- R/tweak-reference.R | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/R/tweak-reference.R b/R/tweak-reference.R index 5ff9252e8..93b0c0f78 100644 --- a/R/tweak-reference.R +++ b/R/tweak-reference.R @@ -84,17 +84,12 @@ tweak_highlight_other <- function(div) { xml_replace_contents <- function(node, new) { xml2::xml_remove(xml2::xml_contents(node)) - xml_insert(node, new) + contents <- xml2::xml_contents(new) + xml_insert(node, contents) } xml_insert <- function(node, new) { - if (is.list(new)) { - contents <- new - } else { - contents <- xml2::xml_contents(new) - } - - for (child in contents) { + for (child in new) { xml2::xml_add_child(node, child) } } From c3f64a28865dc619db9488314b2f4fd1935e335a Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Mon, 20 Oct 2025 13:57:01 -0500 Subject: [PATCH 47/47] Ensure `url_absolute()` only called with urls --- R/build-llm.R | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/R/build-llm.R b/R/build-llm.R index 31731540e..a5f3172c8 100644 --- a/R/build-llm.R +++ b/R/build-llm.R @@ -25,21 +25,11 @@ build_llm_docs <- function(pkg = ".") { cli::cli_rule("Building docs for llms") - if (!is.null(pkg$meta$url)) { - url <- paste0(pkg$meta$url, "/") - if (pkg$development$in_dev) { - url <- paste0(url, pkg$prefix) - } - } else { - url <- NULL - } - paths <- get_site_paths(pkg) purrr::walk(paths, \(path) { src_path <- path(pkg[["dst_path"]], path) dst_path <- path_ext_set(src_path, "md") - base_url <- xml2::url_absolute(paste0(path_dir(path), "/"), url) - convert_md(src_path, dst_path, base_url) + convert_md(src_path, dst_path, full_url(pkg, path)) }) index <- c( @@ -54,6 +44,19 @@ build_llm_docs <- function(pkg = ".") { invisible() } +full_url <- function(pkg, path) { + if (is.null(pkg$meta$url)) { + return() + } + + url <- paste0(pkg$meta$url, "/") + if (pkg$development$in_dev) { + url <- paste0(url, pkg$prefix) + } + + xml2::url_absolute(paste0(path_dir(path), "/"), url) +} + convert_md <- function(src_path, dst_path, url = NULL) { html <- xml2::read_html(src_path) main_html <- xml2::xml_find_first(html, ".//main")