Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
dc6f9f4
feat: build Markdown oages for all pages
maelle Sep 26, 2025
b14b8a0
feat: add llms.txt
maelle Sep 26, 2025
3b2018f
dl
maelle Sep 26, 2025
07679cd
change type
maelle Sep 26, 2025
b4aee59
lint
maelle Sep 26, 2025
4808f7b
fix links
maelle Sep 26, 2025
3124664
no brio
maelle Sep 26, 2025
9ae4146
fix as not always an index (test failing)
maelle Sep 26, 2025
63bdedd
fix test?
maelle Sep 26, 2025
214208b
tweak
maelle Sep 26, 2025
0029edd
fix test?
maelle Sep 26, 2025
74472e1
fences
maelle Oct 7, 2025
04e95d6
move code
maelle Oct 7, 2025
675a955
Update file based on new function name
hadley Oct 7, 2025
46642ec
Simplify llms.txt generation
hadley Oct 7, 2025
c3d752f
Be less conditional
hadley Oct 7, 2025
b883843
style tweaks
hadley Oct 7, 2025
7fa82a0
Vectorise absolute urls
hadley Oct 7, 2025
b2f7af3
Tweak title
hadley Oct 7, 2025
05b14bd
Use .md extension
hadley Oct 7, 2025
917994f
undo test changes, no config at the moment
maelle Oct 10, 2025
8639edd
this one always exists
maelle Oct 10, 2025
c7d3721
extract fn
maelle Oct 10, 2025
83a4b8e
add small test
maelle Oct 10, 2025
c1d1271
lifecycle, thanks Claude
maelle Oct 10, 2025
6eb626a
footnote
maelle Oct 13, 2025
89072f0
footnotes best bet
maelle Oct 16, 2025
1785c69
badges
maelle Oct 16, 2025
cb0ee64
fix chunks
maelle Oct 16, 2025
916d01b
test
maelle Oct 16, 2025
1ab432d
add test and fix for untested thing :melting_face:
maelle Oct 17, 2025
68cbe75
:see_no_evil:
maelle Oct 17, 2025
c4e3320
fix
maelle Oct 17, 2025
3a681af
Docs, refactoring, more tests
hadley Oct 17, 2025
b1ba868
Merged origin/main into llms
hadley Oct 17, 2025
6d42ec1
Add news bullet
hadley Oct 17, 2025
c5a9e03
Revert workflow change
hadley Oct 17, 2025
bfa081f
Use fs function
hadley Oct 17, 2025
9a8a38f
Drop full site integration test; add empty lines
hadley Oct 17, 2025
552761c
Fix another lint
hadley Oct 17, 2025
694b688
Write out the correct content
hadley Oct 17, 2025
acc393d
Construct full base url
hadley Oct 17, 2025
ab3a2df
Handle other style of lifecycle badge
hadley Oct 17, 2025
61f4f48
Simplify dls
hadley Oct 20, 2025
f3d39de
Don't need pandoc anymore
hadley Oct 20, 2025
b9c22e9
Always replace extension (even if no url)
hadley Oct 20, 2025
378aa06
Oops
hadley Oct 20, 2025
c3f64a2
Ensure `url_absolute()` only called with urls
hadley Oct 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,5 @@ Config/testthat/start-first: build-article, build-quarto-article,
Config/usethis/last-upkeep: 2025-09-07
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
SystemRequirements: pandoc
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ export(build_articles_index)
export(build_favicons)
export(build_home)
export(build_home_index)
export(build_llm_docs)
export(build_news)
export(build_redirects)
export(build_reference)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pkgdown (development version)

* New `build_llm_docs()` generates a `LLMs.txt` at the root directory of your site, and provides a `.md` version of every page. You can disable by adding `llm-docs: false` to your `_pkgdown.yaml` (#2914, @maelle)
* Links generated with `\code{\link{foo}()}` now have the `()` moved into the `<a>` in the generated output (@maelle).
* Plots in dark mode are now transformed with a CSS filter to improve their
visibility (thanks to @gadenbuie).
Expand Down
66 changes: 66 additions & 0 deletions R/build-llm-dl.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
simplify_dls <- function(html) {
dls <- xml2::xml_find_all(html, ".//dl")
for (dl in dls) {
simplify_dl(dl)
}
invisible()
}

simplify_dl <- function(dl) {
children <- xml2::xml_children(dl)

names <- xml2::xml_name(children)
if (!is_simple_dl(names)) {
cli::cli_warn("Skipping this <dl>: not a simple term-definition list")
return()
}

groups <- split(children, (seq_along(children) - 1) %/% 2)

bullets <- lapply(groups, create_li_from_group)
ul <- xml2::read_xml("<ul></ul>")
xml_insert(ul, bullets)

xml2::xml_replace(dl, ul)
}

# Must have an even number of children that alternate between dt and dd
is_simple_dl <- function(names) {
if (length(names) %% 2 != 0) {
return(FALSE)
}
odd <- names[seq_along(names) %% 2 == 1]
even <- names[seq_along(names) %% 2 == 0]

all(odd == "dt") && all(even == "dd")
}

create_li_from_group <- function(group) {
dt <- group[[1]]
dd <- group[[2]]

if (has_children(dd)) {
# params case
para <- xml2::read_xml("<p></p>")
xml_insert(para, xml2::xml_contents(dt))
xml2::xml_add_child(para, xml_text_node(": "))

bullet <- xml2::read_xml("<li></li>")
xml2::xml_add_child(bullet, para)
} else {
# reference index
bullet <- xml2::read_xml("<li></li>")
xml_insert(bullet, xml2::xml_contents(dt))
xml2::xml_add_child(bullet, xml_text_node(": "))
}
xml_insert(bullet, xml2::xml_contents(dd))

bullet
}

has_children <- function(x) length(xml2::xml_children(x)) > 0

xml_text_node <- function(x) {
span <- xml2::read_xml(paste0("<span>", x, "</span>"))
xml2::xml_find_first(span, ".//text()")
}
199 changes: 199 additions & 0 deletions R/build-llm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#' Build docs for LLMs
#'
#' @description
#' `build_llm_docs()` creates an `LLMs.txt` at the root of your site
#' that contains the contents of your `README.md`, your reference index,
#' and your articles index. It also creates a `.md` file for every existing
#' `.html` file in your site. Together, this gives an LLM an overview of your
#' package and the ability to find out more by following links.
#'
#' If you don't want these files generated for your site, you can opt-out by
#' adding the following to your `pkgdown.yml`:
#'
#' ```yaml
#' llm-docs: false
#' ```
#'
#' @family site components
#' @inheritParams build_site
#' @export
build_llm_docs <- function(pkg = ".") {
pkg <- as_pkgdown(pkg)
if (isFALSE(pkg$meta$`llm-docs`)) {
return(invisible())
}

cli::cli_rule("Building docs for llms")

paths <- get_site_paths(pkg)
purrr::walk(paths, \(path) {
src_path <- path(pkg[["dst_path"]], path)
dst_path <- path_ext_set(src_path, "md")
convert_md(src_path, dst_path, full_url(pkg, path))
})

index <- c(
read_lines(path(pkg$dst_path, "index.md")),
"",
read_file_if_exists(path(pkg$dst_path, "reference", "index.md")),
"",
read_file_if_exists(path(pkg$dst_path, "articles", "index.md"))
)
write_lines(index, path(pkg$dst_path, "llms.txt"))

invisible()
}

full_url <- function(pkg, path) {
if (is.null(pkg$meta$url)) {
return()
}

url <- paste0(pkg$meta$url, "/")
if (pkg$development$in_dev) {
url <- paste0(url, pkg$prefix)
}

xml2::url_absolute(paste0(path_dir(path), "/"), url)
}

convert_md <- function(src_path, dst_path, url = NULL) {
html <- xml2::read_html(src_path)
main_html <- xml2::xml_find_first(html, ".//main")
if (length(main_html) == 0) {
return()
}

simplify_page_header(main_html)
simplify_anchors(main_html)
simplify_code(main_html)
simplify_popovers_to_footnotes(main_html)
simplify_lifecycle_badges(main_html)
simplify_dls(main_html)
create_absolute_links(main_html, url)

path <- file_temp()
xml2::write_html(main_html, path, format = FALSE)
on.exit(file_delete(path), add = TRUE)

rmarkdown::pandoc_convert(
input = path,
output = dst_path,
from = "html",
to = "gfm+definition_lists-raw_html",
)
}

# Helpers ---------------------------------------------------------------------

# simplify page header (which includes logo + source link)
simplify_page_header <- function(html) {
title <- xml2::xml_find_first(html, ".//h1")
# website for a package without README/index.md
if (length(title) > 0) {
xml2::xml_remove(xml2::xml_find_first(html, ".//div[@class='page-header']"))
xml2::xml_add_child(html, title, .where = 0)
}
invisible()
}

# drop internal anchors
simplify_anchors <- function(html) {
xml2::xml_remove(xml2::xml_find_all(html, ".//a[@class='anchor']"))
invisible()
}

# strip extraneoous classes
simplify_code <- function(html) {
extract_lang <- function(class) {
trimws(gsub("sourceCode|downlit", "", class))
}
code <- xml2::xml_find_all(html, ".//pre[contains(@class, 'sourceCode')]")

purrr::walk(code, \(x) {
xml2::xml_attr(x, "class") <- extract_lang(xml2::xml_attr(x, "class"))
})
invisible()
}

simplify_popovers_to_footnotes <- function(main_html) {
popover_refs <- xml2::xml_find_all(main_html, ".//a[@class='footnote-ref']")
if (length(popover_refs) == 0) {
return()
}

# Create footnotes section
footnotes_section <- xml2::xml_find_first(
main_html,
".//section[@class='footnotes']"
)
if (length(footnotes_section) == 0) {
footnotes_section <- xml2::xml_add_child(
main_html,
"section",
id = "footnotes",
class = "footnotes footnotes-end-of-document",
role = "doc-endnotes"
)
xml2::xml_add_child(footnotes_section, "hr")
footnotes_ol <- xml2::xml_add_child(footnotes_section, "ol")
} else {
footnotes_ol <- xml2::xml_find_first(footnotes_section, ".//ol")
}

purrr::iwalk(popover_refs, function(ref, i) {
text_content <- xml2::xml_attr(ref, "data-bs-content")
fn_id <- paste0("fn", i)
fnref_id <- paste0("fnref", i)
xml2::xml_attrs(ref) <- list(
href = paste0("#", fn_id),
id = fnref_id,
role = "doc-noteref",
class = "footnote-ref"
)

fn_li <- xml2::xml_add_child(footnotes_ol, "li", id = fn_id)
parsed_content <- xml2::read_html(text_content) |>
xml2::xml_find_first(".//body") |>
xml2::xml_children()
purrr::walk(parsed_content, \(x) xml2::xml_add_child(fn_li, x))
})
}

simplify_lifecycle_badges <- function(html) {
# on reference index
badges <- xml2::xml_find_all(html, "//span[contains(@class, 'lifecycle')]")
xml2::xml_replace(badges, "strong", paste0("[", xml2::xml_text(badges), "]"))

# on individual pages
badges <- xml2::xml_find_all(
html,
"//a[.//img[starts-with(@src, 'figures/lifecycle-')]]"
)
imgs <- xml2::xml_find_first(badges, ".//img")
xml2::xml_replace(badges, "strong", tolower(xml2::xml_attr(imgs, "alt")))

invisible()
}

create_absolute_links <- function(main_html, url = NULL) {
a <- xml2::xml_find_all(main_html, ".//a")
xml2::xml_attr(a, "class") <- NULL

href <- xml2::xml_attr(a, "href")
is_internal <- !startsWith(href, "https") & !startsWith(href, "#")
if (!is.null(url)) {
href[is_internal] <- xml2::url_absolute(href[is_internal], url)
}
href[is_internal] <- sub("html$", "md", href[is_internal])

xml2::xml_attr(a[is_internal], "href") <- href[is_internal]

invisible()
}

read_file_if_exists <- function(path) {
if (file_exists(path)) {
read_lines(path)
}
}
4 changes: 4 additions & 0 deletions R/build.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#' * [build_tutorials()]
#' * [build_news()]
#' * [build_redirects()]
#' * [build_llm_docs()]
#'
#' See the documentation for the each function to learn how to control
#' that aspect of the site. This page documents options that affect the
Expand Down Expand Up @@ -467,6 +468,9 @@ build_site_local <- function(
build_tutorials(pkg, override = override, preview = FALSE)
build_news(pkg, override = override, preview = FALSE)
build_sitemap(pkg)
if (pkg$bs_version > 3) {
build_llm_docs(pkg)
}
build_redirects(pkg, override = override)
if (pkg$bs_version == 3) {
build_docsearch_json(pkg)
Expand Down
8 changes: 5 additions & 3 deletions R/tweak-reference.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,16 @@ tweak_highlight_other <- function(div) {

xml_replace_contents <- function(node, new) {
xml2::xml_remove(xml2::xml_contents(node))

contents <- xml2::xml_contents(new)
for (child in contents) {
xml_insert(node, contents)
}

xml_insert <- function(node, new) {
for (child in new) {
xml2::xml_add_child(node, child)
}
}


tweak_extra_logo <- function(html) {
img <- xml2::xml_find_all(
html,
Expand Down
4 changes: 2 additions & 2 deletions inst/BS5/templates/content-reference-index.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ <h1>{{{pagetitle}}}</h1>
{{#subtitle}}<h3>{{{.}}}</h3>{{/subtitle}}
{{#desc}}<div class="section-desc">{{{desc}}}</div>{{/desc}}

{{#topics}}<dl>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@maelle we were accidentally generating a definition list for every definition 😬

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ouch!! 🙈

<dl>{{#topics}}
<dt>
{{#has_icons}}{{#icon}}<a class="icon" href="{{path}}"><img src="icons/{{{.}}}" alt=""/></a>{{/icon}}{{/has_icons}}
{{#aliases}}<code><a href="{{path}}">{{{.}}}</a></code> {{/aliases}}
{{#lifecycle}}<span class="badge lifecycle lifecycle-{{.}}">{{.}}</span>{{/lifecycle}}
</dt>
<dd>{{{title}}}</dd>
</dl>{{/topics}}
{{/topics}}</dl>
</div>{{/rows}}
</main>

Expand Down
1 change: 1 addition & 0 deletions man/build_articles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/build_home.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions man/build_llm_docs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading