6  コーディングメニュー

Code
suppressPackageStartupMessages({
  library(ggplot2)
  library(ca)
  library(duckdb)
})
drv <- duckdb::duckdb()
con <- duckdb::dbConnect(drv, dbdir = "tutorial_jp/kokoro.duckdb", read_only = TRUE)

tbl <-
  readxl::read_xls("tutorial_jp/kokoro.xls",
    col_names = c("text", "section", "chapter", "label"),
    skip = 1
  ) |>
  dplyr::mutate(
    doc_id = factor(dplyr::row_number()),
    dplyr::across(where(is.character), ~ audubon::strj_normalize(.))
  ) |>
  dplyr::filter(!gibasa::is_blank(text)) |>
  dplyr::relocate(doc_id, text, section, label, chapter)

6.1 単純集計(A.7.1)

Code
rules <- list(
  "人の死" = c("死後", "死病", "死期", "死因", "死骸", "生死", "自殺", "殉死", "頓死", "変死", "亡", "死ぬ", "亡くなる", "殺す", "亡くす", "死"),
  "恋愛" = c("愛", "恋", "愛す", "愛情", "恋人", "愛人", "恋愛", "失恋", "恋しい"),
  "友情" = c("友達", "友人", "旧友", "親友", "朋友", "友", "級友"),
  "信用・不信" = c("信用", "信じる", "信ずる", "不信", "疑い", "疑惑", "疑念", "猜疑", "狐疑", "疑問", "疑い深い", "疑う", "疑る", "警戒"),
  "病気" = c("医者", "病人", "病室", "病院", "病症", "病状", "持病", "死病", "主治医", "精神病", "仮病", "病気", "看病", "大病", "病む", "病")
) |>
  quanteda::dictionary()

dfm <-
  dplyr::tbl(con, "tokens") |>
  dplyr::mutate(token = dplyr::if_else(is.na(original), token, original)) |>
  dplyr::count(doc_id, token) |>
  dplyr::collect() |>
  tidytext::cast_dfm(doc_id, token, n) |>
  quanteda::dfm_lookup(rules)

dfm |>
  quanteda::convert(to = "data.frame") |>
  dplyr::mutate(`コードなし` = as.numeric(rowSums(dplyr::pick(where(is.numeric))) == 0)) |>
  tidyr::pivot_longer(cols = !doc_id, names_to = "code", values_to = "count") |>
  dplyr::summarise(
    total = sum(count),
    prop = total / dplyr::n(),
    .by = code
  )
#> # A tibble: 6 × 3
#>   code       total   prop
#>   <chr>      <dbl>  <dbl>
#> 1 人の死       148 0.122 
#> 2 恋愛          65 0.0536
#> 3 友情          51 0.0420
#> 4 信用・不信   123 0.101 
#> 5 病気         150 0.124 
#> 6 コードなし   919 0.758

6.2 クロス集計(A.7.2)

6.2.1 クロス表

Code
dfm <-
  dplyr::tbl(con, "tokens") |>
  dplyr::mutate(token = dplyr::if_else(is.na(original), token, original)) |>
  dplyr::count(label, token) |>
  dplyr::collect() |>
  tidytext::cast_dfm(label, token, n) |>
  quanteda::dfm_lookup(rules)

dfm |>
  quanteda::convert(to = "data.frame") |>
  dplyr::mutate(`コードなし` = as.numeric(rowSums(dplyr::pick(where(is.numeric))) == 0)) |>
  tidyr::pivot_longer(cols = !doc_id, names_to = "code", values_to = "count") |>
  dplyr::left_join(
    dplyr::distinct(tbl, label, section),
    by = dplyr::join_by(doc_id == label)
  ) |>
  tidyr::uncount(count) |>
  crosstable::crosstable(section, by = code, total = "both") |>
  crosstable::as_flextable()

label

variable

code

Total

コードなし

信用・不信

人の死

病気

友情

恋愛

section

[1]上_先生と私

6 (3.26%)

36 (19.57%)

53 (28.80%)

51 (27.72%)

17 (9.24%)

21 (11.41%)

184 (33.21%)

[2]中_両親と私

0 (0%)

13 (10.57%)

30 (24.39%)

76 (61.79%)

4 (3.25%)

0 (0%)

123 (22.20%)

[3]下_先生と遺書

11 (4.45%)

74 (29.96%)

65 (26.32%)

23 (9.31%)

30 (12.15%)

44 (17.81%)

247 (44.58%)

Total

17 (3.07%)

123 (22.20%)

148 (26.71%)

150 (27.08%)

51 (9.21%)

65 (11.73%)

554 (100.00%)

6.2.2 ヒートマップ

横に長すぎてラベルが見づらい。

Code
dfm |>
  quanteda::convert(to = "data.frame") |>
  dplyr::mutate(`コードなし` = as.numeric(rowSums(dplyr::pick(where(is.numeric))) == 0)) |>
  tidyr::pivot_longer(cols = !doc_id, names_to = "code", values_to = "count") |>
  dplyr::filter(count > 0) |>
  ggplot(aes(x = factor(doc_id, levels = unique(tbl$label)), y = code)) +
  geom_raster(aes(fill = count)) +
  labs(x = element_blank(), y = element_blank()) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 1))

6.2.3 バルーンプロット

Code
dfm <-
  dplyr::tbl(con, "tokens") |>
  dplyr::mutate(token = dplyr::if_else(is.na(original), token, original)) |>
  dplyr::count(section, token) |>
  dplyr::collect() |>
  tidytext::cast_dfm(section, token, n) |>
  quanteda::dfm_lookup(rules)

dat <- dfm |>
  quanteda::convert(to = "data.frame") |>
  dplyr::mutate(`コードなし` = as.numeric(rowSums(dplyr::pick(where(is.numeric))) == 0)) |>
  tidyr::pivot_longer(cols = !doc_id, names_to = "code", values_to = "count")

clusters <- dat |>
  tidytext::cast_dfm(doc_id, code, count) |>
  proxyC::dist(margin = 2, method = "euclidean") |>
  as.dist() |>
  hclust(method = "ward.D2")

dat |>
  ggpubr::ggballoonplot(x = "doc_id", y = "code", size = "count", color = "gray", fill = "#f5f5f5", show.label = TRUE) +
  ggh4x::scale_y_dendrogram(hclust = clusters)

6.3 類似度行列(A.7.3)

Code
dfm <-
  dplyr::tbl(con, "tokens") |>
  dplyr::mutate(token = dplyr::if_else(is.na(original), token, original)) |>
  dplyr::count(label, token) |>
  dplyr::collect() |>
  tidytext::cast_dfm(label, token, n) |>
  quanteda::dfm_lookup(rules) |>
  quanteda::dfm_weight(scheme = "boolean")

quanteda.textstats::textstat_simil(dfm, margin = "features", method = "jaccard")
#> textstat_simil object; method = "jaccard"
#>            人の死   恋愛   友情 信用・不信   病気
#> 人の死      1.000 0.1129 0.1186      0.253 0.4310
#> 恋愛        0.113 1.0000 0.0652      0.283 0.0476
#> 友情        0.119 0.0652 1.0000      0.175 0.1053
#> 信用・不信  0.253 0.2833 0.1746      1.000 0.2817
#> 病気        0.431 0.0476 0.1053      0.282 1.0000

6.4 その他の分析(A.7.4-8)

基本的に抽出語メニューのときと同じやり方でグラフをつくることができるはず。階層的クラスター分析、共起ネットワーク、SOMについては省略する。

6.4.1 対応分析

Code
library(ca)

quanteda.textmodels::textmodel_ca(dfm, nd = 2, sparse = TRUE) |>
  plot()

6.4.2 多次元尺度構成法(MDS)

Code
simil <- dfm |>
  proxyC::simil(margin = 2, method = "jaccard")

dat <- MASS::sammon(1 - simil, k = 2) |>
  purrr::pluck("points")
#> Initial stress        : 0.09511
#> stress after  10 iters: 0.03043, magic = 0.500
#> stress after  20 iters: 0.03038, magic = 0.500
Code
dat <- dat |>
  dplyr::as_tibble(
    rownames = "label",
    .name_repair = ~ c("Dim1", "Dim2")
  ) |>
  dplyr::mutate(
    clust = (hclust(
      proxyC::dist(dat, method = "euclidean") |> as.dist(),
      method = "ward.D2"
    ) |> cutree(k = 3))[label]
  )

dat |>
  ggplot(aes(x = Dim1, y = Dim2, label = label, col = factor(clust))) +
  geom_point(alpha = .3, show.legend = FALSE) +
  ggrepel::geom_label_repel(show.legend = FALSE) +
  theme_classic()

6.4.3 LSS🍳

極性をあらわす少数の種語を使いつつ、指定した語と共起する語や文書について1次元の極性を与える手法らしい。LSXというパッケージとして実装されている。

本来はk(Truncated SVDにおけるランク)は200~300程度を指定するため、相当の量の文書が必要。提案論文では、おおむね40文程度の長さの文書が5,000~10,000文書くらい必要と書かれている。ここでは分析にかける文書が足りていないので、意味を解釈できる結果は得られていないと思う。

Code
rules <-
  list(
    "人の死" = c("死後", "死病", "死期", "死因", "死骸", "生死", "自殺", "殉死", "頓死", "変死", "亡", "死ぬ", "亡くなる", "殺す", "亡くす", "死"),
    "恋愛" = c("愛", "恋", "愛す", "愛情", "恋人", "愛人", "恋愛", "失恋", "恋しい"),
    "友情" = c("友達", "友人", "旧友", "親友", "朋友", "友", "級友"),
    "信用・不信" = c("信用", "信じる", "信ずる", "不信", "疑い", "疑惑", "疑念", "猜疑", "狐疑", "疑問", "疑い深い", "疑う", "疑る", "警戒"),
    "病気" = c("医者", "病人", "病室", "病院", "病症", "病状", "持病", "死病", "主治医", "精神病", "仮病", "病気", "看病", "大病", "病む", "病")
  ) |>
  quanteda::dictionary()

# 日本語評価極性辞書(用言編) https://www.cl.ecei.tohoku.ac.jp/Open_Resources-Japanese_Sentiment_Polarity_Dictionary.html
pn <-
  readr::read_tsv(
    "https://www.cl.ecei.tohoku.ac.jp/resources/sent_lex/wago.121808.pn",
    col_names = c("polarity", "word"),
    show_col_types = FALSE
  )

# 極性辞書をもとに種語を用意する
seed <- pn |>
  dplyr::inner_join(
    dplyr::tbl(con, "tokens") |>
      dplyr::filter(pos == "動詞") |>
      dplyr::select(token, pos, original) |>
      dplyr::distinct() |>
      dplyr::collect(),
    by = c("word" = "token")
  ) |>
  dplyr::mutate(
    polarity = dplyr::if_else(
      stringr::str_detect(polarity, "ネガ"),
      "negative",
      "positive"
    ),
    token = dplyr::if_else(is.na(original), word, original),
    token = paste(token, pos, sep = "/")
  ) |>
  dplyr::distinct(polarity, token) |>
  dplyr::reframe(dict = list(token), .by = polarity) |>
  tibble::deframe()

seed <- seed |>
  quanteda::dictionary() |>
  LSX::as.seedwords(upper = 2, lower = 1) # ここではpositiveが2番目, negativeが1番目
#> Registered S3 methods overwritten by 'LSX':
#>   method                       from               
#>   print.coefficients_textmodel quanteda.textmodels
#>   print.statistics_textmodel   quanteda.textmodels
#>   print.summary.textmodel      quanteda.textmodels

toks <-
  dplyr::tbl(con, "tokens") |>
  dplyr::filter(
    pos %in% c(
      "名詞", "名詞C",
      "地名", "人名", "組織名", "固有名詞",
      "動詞", "未知語", "タグ"
    )
  ) |>
  dplyr::mutate(
    token = dplyr::if_else(is.na(original), token, original),
    token = paste(token, pos, sep = "/")
  ) |>
  dplyr::select(label, token) |>
  dplyr::collect() |>
  dplyr::reframe(dict = list(token), .by = label) |>
  tibble::deframe() |>
  quanteda::as.tokens()

term <-
  LSX::char_context(
    toks,
    pattern = rules$`信用・不信`,
    window = 10,
    valuetype = "regex",
    case_insensitive = FALSE,
    min_count = 2,
    p = 0.05
  ) |>
  toupper()
Code
lss <-
  LSX::textmodel_lss(
    quanteda::dfm(toks),
    seeds = seed,
    terms = term,
    k = 20,
    include_data = TRUE,
    group_data = TRUE
  )
#> Warning in simil$seed: 'seed' の 'seeds' への部分的マッチ

単語の極性。

Code
LSX::textplot_terms(lss)

文書の極性。ここでは文書の数が少ないのでこのようにプロットしているが、実際にはもっと大量の文書を分析にかけるはずなので、文書を横軸にとってpolarityの曲線を描く可視化例がパッケージのvignetteで紹介されている。

Code
tibble::tibble(
  docs = factor(unique(tbl$label), levels = unique(tbl$label)),
  polarity = predict(lss)[as.character(docs)],
  section = tbl$section[match(docs, tbl$label)]
) |>
  dplyr::filter(!is.na(polarity)) |>
  ggplot(aes(x = docs, y = polarity, fill = section)) +
  geom_bar(stat = "identity", show.legend = FALSE) +
  coord_flip() +
  theme_bw()

6.4.4 半教師ありトピックモデル🍳

コーディングルールを種語(キーワード)と見なして、半教師ありのトピックモデリングをおこなう。

KH Coderのコーディングルールは一つの文書に複数のルールがマッチすることがあると想定しているものなので、トピックモデルとは考え方が異なる点には注意が必要。

Code
rules <- list(
  "人の死" = c("死後", "死病", "死期", "死因", "死骸", "生死", "自殺", "殉死", "頓死", "変死", "亡", "死ぬ", "亡くなる", "殺す", "亡くす", "死"),
  "恋愛" = c("愛", "恋", "愛す", "愛情", "恋人", "愛人", "恋愛", "失恋", "恋しい"),
  # "友情" = c("友達", "友人", "旧友", "親友", "朋友", "友", "級友"),
  # "信用・不信" = c("信用", "信じる", "信ずる", "不信", "疑い", "疑惑", "疑念", "猜疑", "狐疑", "疑問", "疑い深い", "疑う", "疑る", "警戒"),
  "病気" = c("医者", "病人", "病室", "病院", "病症", "病状", "持病", "死病", "主治医", "精神病", "仮病", "病気", "看病", "大病", "病む", "病")
)

dfm <-
  dplyr::tbl(con, "tokens") |>
  dplyr::filter(
    pos %in% c(
      "名詞", "名詞B", "名詞C",
      "地名", "人名", "組織名", "固有名詞",
      "動詞", "未知語", "タグ"
    )
  ) |>
  dplyr::mutate(
    token = dplyr::if_else(is.na(original), token, original)
  ) |>
  dplyr::count(doc_id, token) |>
  dplyr::collect() |>
  tidytext::cast_dfm(doc_id, token, n)

文書集合内でのキーワードの出現割合。

Code
dfm |>
  keyATM::keyATM_read(check = FALSE) |>
  keyATM::visualize_keywords(rules)
#> ℹ Using quanteda dfm.
#> Warning: Keywords are pruned because they do not appear in the documents: 死後, 自殺,
#> 殉死, 頓死, 変死, 亡, 恋愛, 失恋, 恋しい, 病気, 看病, and 大病

実装としては、ここではkeyATMを使う。seededlda::textmodel_seededlda()も試したのだが、あまりいい感じにfitしなかった。

Code
keyatm_fit <- dfm |>
  keyATM::keyATM_read(check = FALSE) |>
  keyATM::keyATM(
    rules,
    no_keyword_topics = 6,
    model = "base",
    options = list(
      seed = 123,
      iterations = 2000,
      verbose = FALSE
    )
  )
#> ℹ Using quanteda dfm.
#> ⠙ Initializing the model
#> Warning: Upper case letters are used. Please review preprocessing steps.
#> Warning: Keywords are pruned because they do not appear in the documents: 死後, 自殺,
#> 殉死, 頓死, 変死, 亡, 恋愛, 失恋, 恋しい, 病気, 看病, and 大病
#> ✔ Initializing the model [281ms]
#> 
#> ⠙ Fitting the model: 2000 iterations
#> Fitting the model ■■■■■                             14% |  ETA:  9s
#> Fitting the model ■■■■■■■■■■■■■■                    44% |  ETA:  6s
#> Fitting the model ■■■■■■■■■■■■■■■■■■■■■■■           74% |  ETA:  3s
#> ⠙ Fitting the model: 2000 iterations
✔ Fitting the model: 2000 iterations [10s]
#> 
#> ⠙ Creating an output object
#> ✔ Creating an output object [292ms]

ちゃんとfitしているか確認する。

Code
patchwork::wrap_plots(
  keyATM::plot_modelfit(keyatm_fit) |> purrr::pluck("figure"),
  keyATM::plot_alpha(keyatm_fit) |> purrr::pluck("figure"),
  nrow = 2
)

トピックの比率。

Code
keyATM::plot_pi(keyatm_fit)
#> ℹ Plotting pi from the final MCMC draw. Please set `store_pi` to `TRUE` if you want to plot pi over iterations.

各トピックにおける生起確率の高い語。

Code
dat <-
  keyATM::top_words(keyatm_fit, n = 30) |>
  dplyr::as_tibble() |>
  tidyr::pivot_longer(everything(), names_to = "topic", values_to = "term") |>
  dplyr::filter(!stringr::str_starts(topic, "Other")) |>
  dplyr::mutate(
    count = quanteda::colSums(dfm)[stringr::str_remove(term, "(\\s\\[.\\])")]
  )

reactable::reactable(
  dat,
  filterable = TRUE,
  defaultColDef = reactable::colDef(
    cell = reactablefmtr::data_bars(dat, text_position = "outside-base")
  )
)

Code
duckdb::dbDisconnect(con)
duckdb::duckdb_shutdown(drv)

sessioninfo::session_info(info = "packages")
#> ═ Session info ═══════════════════════════════════════════════════════════════
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package             * version    date (UTC) lib source
#>  abind                 1.4-5      2016-07-21 [1] RSPM (R 4.4.0)
#>  askpass               1.2.0      2023-09-03 [1] RSPM (R 4.4.0)
#>  audubon               0.5.2      2024-04-27 [1] https://paithiov909.r-universe.dev (R 4.4.0)
#>  backports             1.5.0      2024-05-23 [1] RSPM (R 4.4.0)
#>  bit                   4.0.5      2022-11-15 [1] RSPM (R 4.4.0)
#>  bit64                 4.0.5      2020-08-30 [1] RSPM (R 4.4.0)
#>  blob                  1.2.4      2023-03-17 [1] RSPM (R 4.4.0)
#>  broom                 1.0.6      2024-05-17 [1] CRAN (R 4.4.0)
#>  ca                  * 0.71.1     2020-01-24 [1] RSPM (R 4.4.0)
#>  cachem                1.1.0      2024-05-16 [1] CRAN (R 4.4.0)
#>  car                   3.1-2      2023-03-30 [1] RSPM (R 4.4.0)
#>  carData               3.0-5      2022-01-06 [1] RSPM (R 4.4.0)
#>  cellranger            1.1.0      2016-07-27 [1] RSPM (R 4.4.0)
#>  checkmate             2.3.1      2023-12-04 [1] RSPM (R 4.4.0)
#>  cli                   3.6.3      2024-06-21 [1] CRAN (R 4.4.1)
#>  codetools             0.2-19     2023-02-01 [4] CRAN (R 4.2.2)
#>  colorspace            2.1-0      2023-01-23 [1] RSPM (R 4.4.0)
#>  crayon                1.5.3      2024-06-20 [1] RSPM (R 4.4.0)
#>  crosstable            0.7.0      2023-11-12 [1] RSPM (R 4.4.0)
#>  crosstalk             1.2.1      2023-11-23 [1] RSPM (R 4.4.0)
#>  crul                  1.4.2      2024-04-09 [1] RSPM (R 4.4.0)
#>  curl                  5.2.1      2024-03-01 [1] RSPM (R 4.4.0)
#>  data.table            1.15.4     2024-03-30 [1] RSPM (R 4.4.0)
#>  DBI                 * 1.2.3      2024-06-02 [1] RSPM (R 4.4.0)
#>  dbplyr                2.5.0      2024-03-19 [1] RSPM (R 4.4.0)
#>  digest                0.6.36     2024-06-23 [1] RSPM (R 4.4.0)
#>  dplyr                 1.1.4      2023-11-17 [1] RSPM (R 4.4.0)
#>  duckdb              * 1.0.0      2024-06-13 [1] CRAN (R 4.4.0)
#>  evaluate              0.24.0     2024-06-10 [1] RSPM (R 4.4.0)
#>  fansi                 1.0.6      2023-12-08 [1] RSPM (R 4.4.0)
#>  farver                2.1.2      2024-05-13 [1] CRAN (R 4.4.0)
#>  fastmap               1.2.0      2024-05-15 [1] RSPM (R 4.4.0)
#>  fastmatch             1.1-4      2023-08-18 [1] RSPM (R 4.4.0)
#>  flextable             0.9.6      2024-05-05 [1] RSPM (R 4.4.0)
#>  fontBitstreamVera     0.1.1      2017-02-01 [1] RSPM (R 4.4.0)
#>  fontLiberation        0.1.0      2016-10-15 [1] RSPM (R 4.4.0)
#>  fontquiver            0.2.1      2017-02-01 [1] RSPM (R 4.4.0)
#>  forcats               1.0.0      2023-01-29 [1] RSPM (R 4.4.0)
#>  foreach               1.5.2      2022-02-02 [1] RSPM (R 4.4.0)
#>  gdtools               0.3.7      2024-03-05 [1] RSPM (R 4.4.0)
#>  generics              0.1.3      2022-07-05 [1] RSPM (R 4.4.0)
#>  gfonts                0.2.0      2023-01-08 [1] RSPM (R 4.4.0)
#>  ggdendro              0.2.0      2024-02-23 [1] RSPM (R 4.4.0)
#>  ggh4x                 0.2.8      2024-01-23 [1] RSPM (R 4.4.0)
#>  ggplot2             * 3.5.1      2024-04-23 [1] RSPM (R 4.4.0)
#>  ggpubr                0.6.0      2023-02-10 [1] RSPM (R 4.4.0)
#>  ggrepel               0.9.5      2024-01-10 [1] RSPM (R 4.4.0)
#>  ggsignif              0.6.4      2022-10-13 [1] RSPM (R 4.4.0)
#>  gibasa                1.1.0.9004 2024-04-25 [1] https://paithiov909.r-universe.dev (R 4.3.3)
#>  glmnet                4.1-8      2023-08-22 [1] RSPM (R 4.4.0)
#>  glue                  1.7.0      2024-01-09 [1] RSPM (R 4.4.0)
#>  gtable                0.3.5      2024-04-22 [1] RSPM (R 4.4.0)
#>  hms                   1.1.3      2023-03-21 [1] RSPM (R 4.4.0)
#>  htmltools             0.5.8.1    2024-04-04 [1] RSPM (R 4.4.0)
#>  htmlwidgets           1.6.4      2023-12-06 [1] RSPM (R 4.4.0)
#>  httpcode              0.3.0      2020-04-10 [1] RSPM (R 4.4.0)
#>  httpuv                1.6.15     2024-03-26 [1] RSPM (R 4.4.0)
#>  iterators             1.0.14     2022-02-05 [1] RSPM (R 4.4.0)
#>  janeaustenr           1.0.0      2022-08-26 [1] RSPM (R 4.4.0)
#>  jsonlite              1.8.8      2023-12-04 [1] RSPM (R 4.4.0)
#>  keyATM                0.5.2      2024-04-24 [1] RSPM (R 4.4.0)
#>  knitr                 1.47       2024-05-29 [1] CRAN (R 4.4.0)
#>  labeling              0.4.3      2023-08-29 [1] RSPM (R 4.4.0)
#>  later                 1.3.2      2023-12-06 [1] RSPM (R 4.4.0)
#>  lattice               0.22-5     2023-10-24 [4] CRAN (R 4.3.1)
#>  LiblineaR             2.10-23    2023-12-11 [1] CRAN (R 4.4.0)
#>  lifecycle             1.0.4      2023-11-07 [1] RSPM (R 4.4.0)
#>  locfit                1.5-9.10   2024-06-24 [1] RSPM (R 4.4.0)
#>  LSX                   1.4.0      2024-03-05 [1] RSPM (R 4.4.0)
#>  magrittr              2.0.3      2022-03-30 [1] RSPM (R 4.4.0)
#>  MASS                  7.3-60     2023-05-04 [4] CRAN (R 4.3.1)
#>  Matrix                1.6-5      2024-01-11 [4] CRAN (R 4.3.3)
#>  memoise               2.0.1      2021-11-26 [1] RSPM (R 4.4.0)
#>  mime                  0.12       2021-09-28 [1] RSPM (R 4.4.0)
#>  munsell               0.5.1      2024-04-01 [1] RSPM (R 4.4.0)
#>  nsyllable             1.0.1      2022-02-28 [1] CRAN (R 4.4.0)
#>  officer               0.6.6      2024-05-05 [1] RSPM (R 4.4.0)
#>  openssl               2.2.0      2024-05-16 [1] CRAN (R 4.4.0)
#>  patchwork             1.2.0      2024-01-08 [1] RSPM (R 4.4.0)
#>  pillar                1.9.0      2023-03-22 [1] RSPM (R 4.4.0)
#>  pkgconfig             2.0.3      2019-09-22 [1] RSPM (R 4.4.0)
#>  promises              1.3.0      2024-04-05 [1] RSPM (R 4.4.0)
#>  proxyC                0.4.1      2024-04-07 [1] CRAN (R 4.4.0)
#>  purrr                 1.0.2      2023-08-10 [1] RSPM (R 4.4.0)
#>  quanteda              4.0.2      2024-04-24 [1] CRAN (R 4.4.0)
#>  quanteda.textmodels   0.9.7      2024-04-11 [1] CRAN (R 4.4.0)
#>  quanteda.textstats    0.97       2024-04-08 [1] CRAN (R 4.4.0)
#>  R.cache               0.16.0     2022-07-21 [1] RSPM (R 4.4.0)
#>  R.methodsS3           1.8.2      2022-06-13 [1] RSPM (R 4.4.0)
#>  R.oo                  1.26.0     2024-01-24 [1] RSPM (R 4.4.0)
#>  R.utils               2.12.3     2023-11-18 [1] RSPM (R 4.4.0)
#>  R6                    2.5.1      2021-08-19 [1] RSPM (R 4.4.0)
#>  ragg                  1.3.2      2024-05-15 [1] RSPM (R 4.4.0)
#>  Rcpp                  1.0.12     2024-01-09 [1] RSPM (R 4.4.0)
#>  RcppParallel          5.1.7      2023-02-27 [1] RSPM (R 4.4.0)
#>  reactable             0.4.4      2023-03-12 [1] RSPM (R 4.4.0)
#>  reactablefmtr         2.0.0      2022-03-16 [1] RSPM (R 4.4.0)
#>  reactR                0.6.0      2024-06-26 [1] RSPM (R 4.4.0)
#>  readr                 2.1.5      2024-01-10 [1] RSPM (R 4.4.0)
#>  readxl                1.4.3      2023-07-06 [1] RSPM (R 4.4.0)
#>  rlang                 1.1.4      2024-06-04 [1] RSPM (R 4.4.0)
#>  rmarkdown             2.27       2024-05-17 [1] CRAN (R 4.4.0)
#>  RSpectra              0.16-1     2022-04-24 [1] CRAN (R 4.4.0)
#>  rstatix               0.7.2      2023-02-01 [1] RSPM (R 4.4.0)
#>  sass                  0.4.9      2024-03-15 [1] RSPM (R 4.4.0)
#>  scales                1.3.0      2023-11-28 [1] RSPM (R 4.4.0)
#>  sessioninfo           1.2.2      2021-12-06 [1] RSPM (R 4.4.0)
#>  shape                 1.4.6.1    2024-02-23 [1] RSPM (R 4.4.0)
#>  shiny                 1.8.1.1    2024-04-02 [1] RSPM (R 4.4.0)
#>  SnowballC             0.7.1      2023-04-25 [1] RSPM (R 4.4.0)
#>  SparseM               1.84       2024-06-25 [1] RSPM (R 4.4.0)
#>  stopwords             2.3        2021-10-28 [1] RSPM (R 4.4.0)
#>  stringi               1.8.4      2024-05-06 [1] CRAN (R 4.4.0)
#>  stringr               1.5.1      2023-11-14 [1] RSPM (R 4.4.0)
#>  styler                1.10.3     2024-04-07 [1] RSPM (R 4.4.0)
#>  survival              3.7-0      2024-06-05 [4] CRAN (R 4.4.0)
#>  systemfonts           1.1.0      2024-05-15 [1] RSPM (R 4.4.0)
#>  textshaping           0.4.0      2024-05-24 [1] RSPM (R 4.4.0)
#>  tibble                3.2.1      2023-03-20 [1] RSPM (R 4.4.0)
#>  tidyr                 1.3.1      2024-01-24 [1] RSPM (R 4.4.0)
#>  tidyselect            1.2.1      2024-03-11 [1] RSPM (R 4.4.0)
#>  tidytext              0.4.2      2024-04-10 [1] RSPM (R 4.4.0)
#>  tokenizers            0.3.0      2022-12-22 [1] RSPM (R 4.4.0)
#>  tzdb                  0.4.0      2023-05-12 [1] RSPM (R 4.4.0)
#>  utf8                  1.2.4      2023-10-22 [1] RSPM (R 4.4.0)
#>  uuid                  1.2-0      2024-01-14 [1] RSPM (R 4.4.0)
#>  V8                    4.4.2      2024-02-15 [1] RSPM (R 4.4.0)
#>  vctrs                 0.6.5      2023-12-01 [1] RSPM (R 4.4.0)
#>  vroom                 1.6.5      2023-12-05 [1] RSPM (R 4.4.0)
#>  withr                 3.0.0      2024-01-16 [1] RSPM (R 4.4.0)
#>  xfun                  0.45       2024-06-16 [1] RSPM (R 4.4.0)
#>  xml2                  1.3.6      2023-12-04 [1] RSPM (R 4.4.0)
#>  xtable                1.8-4      2019-04-21 [1] RSPM (R 4.4.0)
#>  yaml                  2.3.8      2023-12-11 [1] RSPM (R 4.4.0)
#>  zip                   2.3.1      2024-01-27 [1] RSPM (R 4.4.0)
#> 
#>  [1] /home/paithiov909/R/x86_64-pc-linux-gnu-library/4.4
#>  [2] /usr/local/lib/R/site-library
#>  [3] /usr/lib/R/site-library
#>  [4] /usr/lib/R/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────