Read HTML headers and text from file
Value
A data frame with a column called "text" and header columns limited to page elements like h1, h2, and h3, as included in the numeric range of headers
Examples
if (FALSE) {
library(dplyr)
library(stringr)
library(tmtyro)
orlando <-
"http://gutenberg.net.au/ebooks02/0200331h.html" |>
download_once() |>
parse_html() |>
filter(str_detect(part, "CHAPTER")) |>
mutate(
chapter = str_extract(part, "\\d"),
author = "Virginia Woolf") |>
select(author, title, chapter, text) |>
drop_na(chapter) |>
identify_by(title, chapter) |>
load_texts()
}
