-
Notifications
You must be signed in to change notification settings - Fork 339
Build documentation for LLMs #2917
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
dc6f9f4
b14b8a0
3b2018f
07679cd
b4aee59
4808f7b
3124664
9ae4146
63bdedd
214208b
0029edd
74472e1
04e95d6
675a955
46642ec
c3d752f
b883843
7fa82a0
b2f7af3
05b14bd
917994f
8639edd
c7d3721
83a4b8e
c1d1271
6eb626a
89072f0
1785c69
cb0ee64
916d01b
1ab432d
68cbe75
c4e3320
3a681af
b1ba868
6d42ec1
c5a9e03
bfa081f
9a8a38f
552761c
694b688
acc393d
ab3a2df
61f4f48
f3d39de
b9c22e9
378aa06
c3f64a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| simplify_dls <- function(html) { | ||
| dls <- xml2::xml_find_all(html, ".//dl") | ||
| for (dl in dls) { | ||
| simplify_dl(dl) | ||
| } | ||
| invisible() | ||
| } | ||
|
|
||
| simplify_dl <- function(dl) { | ||
| children <- xml2::xml_children(dl) | ||
|
|
||
| names <- xml2::xml_name(children) | ||
| if (!is_simple_dl(names)) { | ||
| cli::cli_warn("Skipping this <dl>: not a simple term-definition list") | ||
| return() | ||
| } | ||
|
|
||
| groups <- split(children, (seq_along(children) - 1) %/% 2) | ||
|
|
||
| bullets <- lapply(groups, create_li_from_group) | ||
| ul <- xml2::read_xml("<ul></ul>") | ||
| xml_insert(ul, bullets) | ||
|
|
||
| xml2::xml_replace(dl, ul) | ||
| } | ||
|
|
||
| # Must have an even number of children that alternate between dt and dd | ||
| is_simple_dl <- function(names) { | ||
| if (length(names) %% 2 != 0) { | ||
| return(FALSE) | ||
| } | ||
| odd <- names[seq_along(names) %% 2 == 1] | ||
| even <- names[seq_along(names) %% 2 == 0] | ||
|
|
||
| all(odd == "dt") && all(even == "dd") | ||
| } | ||
|
|
||
| create_li_from_group <- function(group) { | ||
| dt <- group[[1]] | ||
| dd <- group[[2]] | ||
|
|
||
| if (has_children(dd)) { | ||
| # params case | ||
| para <- xml2::read_xml("<p></p>") | ||
| xml_insert(para, xml2::xml_contents(dt)) | ||
| xml2::xml_add_child(para, xml_text_node(": ")) | ||
|
|
||
| bullet <- xml2::read_xml("<li></li>") | ||
| xml2::xml_add_child(bullet, para) | ||
| } else { | ||
| # reference index | ||
| bullet <- xml2::read_xml("<li></li>") | ||
| xml_insert(bullet, xml2::xml_contents(dt)) | ||
| xml2::xml_add_child(bullet, xml_text_node(": ")) | ||
| } | ||
| xml_insert(bullet, xml2::xml_contents(dd)) | ||
|
|
||
| bullet | ||
| } | ||
|
|
||
| has_children <- function(x) length(xml2::xml_children(x)) > 0 | ||
|
|
||
| xml_text_node <- function(x) { | ||
| span <- xml2::read_xml(paste0("<span>", x, "</span>")) | ||
| xml2::xml_find_first(span, ".//text()") | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,199 @@ | ||
| #' Build docs for LLMs | ||
| #' | ||
| #' @description | ||
| #' `build_llm_docs()` creates an `LLMs.txt` at the root of your site | ||
| #' that contains the contents of your `README.md`, your reference index, | ||
| #' and your articles index. It also creates a `.md` file for every existing | ||
| #' `.html` file in your site. Together, this gives an LLM an overview of your | ||
| #' package and the ability to find out more by following links. | ||
| #' | ||
| #' If you don't want these files generated for your site, you can opt-out by | ||
| #' adding the following to your `pkgdown.yml`: | ||
| #' | ||
| #' ```yaml | ||
| #' llm-docs: false | ||
| #' ``` | ||
| #' | ||
| #' @family site components | ||
| #' @inheritParams build_site | ||
| #' @export | ||
| build_llm_docs <- function(pkg = ".") { | ||
| pkg <- as_pkgdown(pkg) | ||
| if (isFALSE(pkg$meta$`llm-docs`)) { | ||
| return(invisible()) | ||
| } | ||
|
|
||
| cli::cli_rule("Building docs for llms") | ||
|
|
||
| paths <- get_site_paths(pkg) | ||
| purrr::walk(paths, \(path) { | ||
| src_path <- path(pkg[["dst_path"]], path) | ||
| dst_path <- path_ext_set(src_path, "md") | ||
| convert_md(src_path, dst_path, full_url(pkg, path)) | ||
| }) | ||
|
|
||
| index <- c( | ||
| read_lines(path(pkg$dst_path, "index.md")), | ||
| "", | ||
| read_file_if_exists(path(pkg$dst_path, "reference", "index.md")), | ||
| "", | ||
| read_file_if_exists(path(pkg$dst_path, "articles", "index.md")) | ||
| ) | ||
| write_lines(index, path(pkg$dst_path, "llms.txt")) | ||
|
|
||
| invisible() | ||
| } | ||
|
|
||
| full_url <- function(pkg, path) { | ||
| if (is.null(pkg$meta$url)) { | ||
| return() | ||
| } | ||
|
|
||
| url <- paste0(pkg$meta$url, "/") | ||
| if (pkg$development$in_dev) { | ||
| url <- paste0(url, pkg$prefix) | ||
| } | ||
|
|
||
| xml2::url_absolute(paste0(path_dir(path), "/"), url) | ||
| } | ||
|
|
||
| convert_md <- function(src_path, dst_path, url = NULL) { | ||
| html <- xml2::read_html(src_path) | ||
| main_html <- xml2::xml_find_first(html, ".//main") | ||
| if (length(main_html) == 0) { | ||
| return() | ||
| } | ||
|
|
||
| simplify_page_header(main_html) | ||
| simplify_anchors(main_html) | ||
| simplify_code(main_html) | ||
| simplify_popovers_to_footnotes(main_html) | ||
| simplify_lifecycle_badges(main_html) | ||
| simplify_dls(main_html) | ||
| create_absolute_links(main_html, url) | ||
|
|
||
| path <- file_temp() | ||
| xml2::write_html(main_html, path, format = FALSE) | ||
| on.exit(file_delete(path), add = TRUE) | ||
|
|
||
| rmarkdown::pandoc_convert( | ||
| input = path, | ||
| output = dst_path, | ||
| from = "html", | ||
| to = "gfm+definition_lists-raw_html", | ||
| ) | ||
| } | ||
|
|
||
| # Helpers --------------------------------------------------------------------- | ||
|
|
||
| # simplify page header (which includes logo + source link) | ||
| simplify_page_header <- function(html) { | ||
| title <- xml2::xml_find_first(html, ".//h1") | ||
| # website for a package without README/index.md | ||
| if (length(title) > 0) { | ||
| xml2::xml_remove(xml2::xml_find_first(html, ".//div[@class='page-header']")) | ||
| xml2::xml_add_child(html, title, .where = 0) | ||
| } | ||
| invisible() | ||
| } | ||
|
|
||
| # drop internal anchors | ||
| simplify_anchors <- function(html) { | ||
| xml2::xml_remove(xml2::xml_find_all(html, ".//a[@class='anchor']")) | ||
| invisible() | ||
| } | ||
|
|
||
| # strip extraneoous classes | ||
| simplify_code <- function(html) { | ||
| extract_lang <- function(class) { | ||
| trimws(gsub("sourceCode|downlit", "", class)) | ||
| } | ||
| code <- xml2::xml_find_all(html, ".//pre[contains(@class, 'sourceCode')]") | ||
|
|
||
| purrr::walk(code, \(x) { | ||
| xml2::xml_attr(x, "class") <- extract_lang(xml2::xml_attr(x, "class")) | ||
| }) | ||
| invisible() | ||
| } | ||
|
|
||
| simplify_popovers_to_footnotes <- function(main_html) { | ||
| popover_refs <- xml2::xml_find_all(main_html, ".//a[@class='footnote-ref']") | ||
| if (length(popover_refs) == 0) { | ||
| return() | ||
| } | ||
|
|
||
| # Create footnotes section | ||
| footnotes_section <- xml2::xml_find_first( | ||
| main_html, | ||
| ".//section[@class='footnotes']" | ||
| ) | ||
| if (length(footnotes_section) == 0) { | ||
| footnotes_section <- xml2::xml_add_child( | ||
| main_html, | ||
| "section", | ||
| id = "footnotes", | ||
| class = "footnotes footnotes-end-of-document", | ||
| role = "doc-endnotes" | ||
| ) | ||
| xml2::xml_add_child(footnotes_section, "hr") | ||
| footnotes_ol <- xml2::xml_add_child(footnotes_section, "ol") | ||
| } else { | ||
| footnotes_ol <- xml2::xml_find_first(footnotes_section, ".//ol") | ||
| } | ||
|
|
||
| purrr::iwalk(popover_refs, function(ref, i) { | ||
| text_content <- xml2::xml_attr(ref, "data-bs-content") | ||
| fn_id <- paste0("fn", i) | ||
| fnref_id <- paste0("fnref", i) | ||
| xml2::xml_attrs(ref) <- list( | ||
| href = paste0("#", fn_id), | ||
| id = fnref_id, | ||
| role = "doc-noteref", | ||
| class = "footnote-ref" | ||
| ) | ||
|
|
||
| fn_li <- xml2::xml_add_child(footnotes_ol, "li", id = fn_id) | ||
| parsed_content <- xml2::read_html(text_content) |> | ||
| xml2::xml_find_first(".//body") |> | ||
| xml2::xml_children() | ||
| purrr::walk(parsed_content, \(x) xml2::xml_add_child(fn_li, x)) | ||
| }) | ||
| } | ||
|
|
||
| simplify_lifecycle_badges <- function(html) { | ||
| # on reference index | ||
| badges <- xml2::xml_find_all(html, "//span[contains(@class, 'lifecycle')]") | ||
| xml2::xml_replace(badges, "strong", paste0("[", xml2::xml_text(badges), "]")) | ||
|
|
||
| # on individual pages | ||
| badges <- xml2::xml_find_all( | ||
| html, | ||
| "//a[.//img[starts-with(@src, 'figures/lifecycle-')]]" | ||
| ) | ||
| imgs <- xml2::xml_find_first(badges, ".//img") | ||
| xml2::xml_replace(badges, "strong", tolower(xml2::xml_attr(imgs, "alt"))) | ||
|
|
||
| invisible() | ||
| } | ||
|
|
||
| create_absolute_links <- function(main_html, url = NULL) { | ||
| a <- xml2::xml_find_all(main_html, ".//a") | ||
| xml2::xml_attr(a, "class") <- NULL | ||
|
|
||
| href <- xml2::xml_attr(a, "href") | ||
| is_internal <- !startsWith(href, "https") & !startsWith(href, "#") | ||
| if (!is.null(url)) { | ||
| href[is_internal] <- xml2::url_absolute(href[is_internal], url) | ||
| } | ||
| href[is_internal] <- sub("html$", "md", href[is_internal]) | ||
|
|
||
| xml2::xml_attr(a[is_internal], "href") <- href[is_internal] | ||
|
|
||
| invisible() | ||
| } | ||
|
|
||
| read_file_if_exists <- function(path) { | ||
| if (file_exists(path)) { | ||
| read_lines(path) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,14 +10,14 @@ <h1>{{{pagetitle}}}</h1> | |
| {{#subtitle}}<h3>{{{.}}}</h3>{{/subtitle}} | ||
| {{#desc}}<div class="section-desc">{{{desc}}}</div>{{/desc}} | ||
|
|
||
| {{#topics}}<dl> | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @maelle we were accidentally generating a definition list for every definition 😬
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ouch!! 🙈 |
||
| <dl>{{#topics}} | ||
| <dt> | ||
| {{#has_icons}}{{#icon}}<a class="icon" href="{{path}}"><img src="icons/{{{.}}}" alt=""/></a>{{/icon}}{{/has_icons}} | ||
| {{#aliases}}<code><a href="{{path}}">{{{.}}}</a></code> {{/aliases}} | ||
| {{#lifecycle}}<span class="badge lifecycle lifecycle-{{.}}">{{.}}</span>{{/lifecycle}} | ||
| </dt> | ||
| <dd>{{{title}}}</dd> | ||
| </dl>{{/topics}} | ||
| {{/topics}}</dl> | ||
| </div>{{/rows}} | ||
| </main> | ||
|
|
||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Uh oh!
There was an error while loading. Please reload this page.