Split a column of text using tidytext::unnest_tokens()
, flattening the table into one token per row while also omitting any token that is present only in a capitalized form.
Examples
if (FALSE) { # \dontrun{
mysteries <-
load_texts("mystery-novels",
to_lower = FALSE) |>
unnest_without_caps()
# Since `unnest_without_caps()` is
# incorporated into `load_texts()`,
# it may be unnecessary for many
# scenarios.
mysteries <-
load_texts("mystery-novels",
remove_names = TRUE)
} # }