Skip to contents

Visualize bigram chains

Usage

plot_bigrams(
  df,
  feature = word,
  random_seed = TRUE,
  set_seed = NULL,
  legend = FALSE,
  top_n = 35
)

Arguments

df

A tidy data frame potentially containing a column called "word" or columns called "word_1" and "word_2".

feature

The feature to use when constructing ngrams

random_seed

Whether to randomize the creation of the network chart.

set_seed

A specific seed to use if not random

legend

Whether to show a legend for the edge color

top_n

The number of pairs to visualize

Value

A ggplot2 object

Examples

if (FALSE) { # \dontrun{
# It isn't necessary to use add_ngrams()
df |>
  plot_bigrams()

# Adding them first allows for filtering steps
df |>
  add_ngrams() |>
  drop_stopwords(word_1) |>
  drop_stopwords(word_2) |>
  plot_bigrams()

# Only bigrams influence the visualization These show the same networks:
df |>
  add_ngrams() |>
  plot_bigrams()

df |>
  add_ngrams(4) |>
  plot_bigrams()

} # }

dubliners <- get_gutenberg_corpus(2814) |>
  load_texts() |>
  identify_by(part) |>
  standardize_titles()

dubliners |>
  plot_bigrams()


# Loading `ggraph` enables edge to show connection strengths
library(ggraph)

dubliners |>
  plot_bigrams()


dubliners |>
  add_ngrams(2) |>
  drop_stopwords(feature = word_1) |>
  drop_stopwords(feature = word_2) |>
  plot_bigrams()


dubliners |>
  dplyr::filter(doc_id == "The Dead") |>
  plot_bigrams(top_n = 70) |>
  change_colors(c("black", "orange"))
#> Scale for edge_colour is already present.
#> Adding another scale for edge_colour, which will replace the existing scale.