--- title: "Studying Zelenskiy / Official: War, Peace, and Language" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Studying Zelenskiy / Official: War, Peace, and Language} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ## Goal We study: 1. How often the channel mentions **war** and **peace** over time. 2. Which adjectives are used to describe **peace** (word before “peace”). 3. The dynamics of the word **“powerful.”** ```{r sample_data, include=FALSE} sample <- readRDS(system.file("extdata/vignettes/channel_sample.rds", package = "telegramR")) msgs_sample <- sample$msgs library(telegramR) library(dplyr) library(stringr) library(lubridate) library(tidyr) library(ggplot2) ``` ## Setup ```{r setup, eval=FALSE} library(telegramR) library(dplyr) library(stringr) library(lubridate) library(tidyr) library(ggplot2) # your api_id <- 123456 api_hash <- "0123456789abcdef0123456789abcdef" client <- TelegramClient$new("my_session", api_id, api_hash) client$connect() ``` If not authorized: ```{r coderequest, eval=FALSE} client$send_code_request("your_phone_number") ``` After recieving code in Telegram app: ```{r code, eval=FALSE} client$sign_in(phone = "your_phone_number", code = "code") ``` ## Download Messages You can download a fixed number of recent posts or the full history. ```{r download, eval=FALSE} msgs <- download_channel_messages( client, "V_Zelenskiy_official", start_date = "2022-01-01", limit = Inf ) ``` ```{r smpls, include=FALSE} msgs <- msgs_sample ``` ```{r msgs_sample_output} head(msgs) ``` ## Optional: Reactions and Replies ```{r reactions_replies, eval=FALSE} # Reactions per post reactions <- download_channel_reactions( client, "V_Zelenskiy_official", limit = 1000 ) # Replies to the latest 50 posts (if comments are enabled) replies <- download_channel_replies( client, "V_Zelenskiy_official", message_limit = 50, reply_limit = Inf ) ``` ## Preprocess ```{r preprocess, eval=TRUE} msgs <- msgs %>% filter(!is.na(text)) %>% mutate( day = as.Date(date), text_lower = str_to_lower(text) ) ``` ## 1) War vs. Peace Over Time (weekly dynamics) ```{r war_peace, eval=TRUE} # Count mentions per day (simple word match) war_peace <- msgs %>% mutate( war_mentions = str_count(text_lower, "\\bwar\\b") + str_count(text_lower, "\\bвійн.(.||)\\b"), peace_mentions = str_count(text_lower, "\\bpeace\\b") + str_count(text_lower, "\\bмир(.||)(.||)\\b") ) %>% mutate(day = lubridate::round_date(day, unit = "week")) %>% group_by(day) %>% summarise( war = sum(war_mentions, na.rm = TRUE), peace = sum(peace_mentions, na.rm = TRUE), .groups = "drop" ) %>% pivot_longer(cols = c(war, peace), names_to = "term", values_to = "count") # Plot (guard against empty/NA dates) if (nrow(war_peace) > 0 && all(!is.na(war_peace$day))) { war_peace %>% ggplot(aes(day, count, fill = term, group=term)) + geom_area(position = "fill") + labs(title = "Mentions of war and peace", x = NULL, y = "Count") + scale_x_date(expand = c(0, 0, 0, 0)) + scale_y_continuous(expand = c(0, 0, 0, 0), labels = scales::percent) + scale_fill_manual(values = c("lightblue", "red")) + annotate("text", x = ymd("2025-05-01"), y = 0.9, label = "PEACE") + annotate("text", x = ymd("2025-05-01"), y = 0.1, label = "WAR") + theme_minimal() + theme(legend.position = "none") } else { tibble::tibble() } ``` ### Example Output (Bundled Sample) ```{r war_peace_sample} library(dplyr) library(stringr) library(lubridate) library(tidyr) msgs <- msgs_sample %>% filter(!is.na(text)) %>% mutate( day = as.Date(date), text_lower = str_to_lower(text) ) war_peace <- msgs %>% mutate( war_mentions = str_count(text_lower, "\\bwar\\b"), peace_mentions = str_count(text_lower, "\\bpeace\\b") ) %>% group_by(day) %>% summarise( war = sum(war_mentions, na.rm = TRUE), peace = sum(peace_mentions, na.rm = TRUE), .groups = "drop" ) %>% pivot_longer(cols = c(war, peace), names_to = "term", values_to = "count") if (nrow(war_peace) == 0 || any(is.na(war_peace$day))) { tibble::tibble() } else { war_peace } ``` ## 2) Adjectives Before “Peace” We extract the token immediately preceding “peace”. ```{r peace_adj, eval=TRUE} # ========================= # Peace/Mир adjective mining + stopword filtering + EN/UKR unification + monthly dynamics # ========================= # ---- packages ---- library(dplyr) library(tidyr) library(stringr) library(stringi) library(lubridate) library(ggplot2) library(scales) library(stopwords) library(RColorBrewer) # ---- stopwords: EN + UK (ISO) ---- stop_en <- stopwords("en", source = "snowball") stop_uk <- stopwords("uk", source = "stopwords-iso") all_stops <- unique(c(stop_en, stop_uk)) %>% str_to_lower() # ---- ensure we have text_lower + normalized text ---- # If text_lower already exists in msgs, it will be used; otherwise uses `text`. msgs2 <- msgs %>% mutate( text_lower = if ("text_lower" %in% names(.)) text_lower else str_to_lower(text), text_norm = stringi::stri_trans_nfc(text_lower) ) # ---- ensure we have a parsed datetime column called `date` ---- # Tries common column names; adjust if needed. date_col <- intersect(c("date", "created_at", "timestamp", "datetime", "time"), names(msgs2))[1] if (is.na(date_col)) stop("No date column found. Add/rename a date column (e.g., date, created_at, timestamp).") msgs2 <- msgs2 %>% mutate( date = .data[[date_col]], date = case_when( inherits(date, "POSIXt") ~ as.POSIXct(date), inherits(date, "Date") ~ as.POSIXct(date), TRUE ~ suppressWarnings(lubridate::ymd_hms(date, quiet = TRUE)) ) ) if (anyNA(msgs2$date)) { missing_idx <- is.na(msgs2$date) msgs2$date[missing_idx] <- as.POSIXct( suppressWarnings(lubridate::ymd(msgs2[[date_col]][missing_idx], quiet = TRUE)) ) } # ---- extract the word immediately before peace|мир (row-level, long) ---- adj_long <- msgs2 %>% mutate( adj = stringi::stri_extract_all_regex( text_norm, # safer than \\b for Unicode: grab letters preceded by non-letter, followed by peace/мир "(?% tidyr::unnest_longer(adj) %>% mutate( adj = str_to_lower(stringi::stri_trans_nfc(adj)) ) %>% filter(!is.na(adj), adj != "", !(adj %in% all_stops)) # ---- unify EN <-> UKR equivalents (add Ukrainian where missing) ---- # Canonical labels = Ukrainian forms (where possible) equiv_map <- c( # ===================== # REAL — реальний # ===================== "real" = "real", "реальний"="real","реального"="real","реальному"="real","реальним"="real", "реальним"="real","реальні"="real","реальна"="real","реальну"="real", "реальною"="real","реальне"="real","реальних"="real","реальними"="real", # ===================== # LASTING — тривалий # ===================== "lasting"="lasting", "тривалий"="lasting","тривалого"="lasting","тривалому"="lasting", "тривалим"="lasting","тривалі"="lasting","тривала"="lasting", "тривалу"="lasting","тривалою"="lasting","тривале"="lasting", "тривалих"="lasting","тривалими"="lasting", # ===================== # GLOBAL — глобальний # ===================== "global"="global", "глобальний"="global","глобального"="global","глобальному"="global", "глобальним"="global","глобальні"="global","глобальна"="global", "глобальну"="global","глобальною"="global","глобальне"="global", "глобальних"="global","глобальними"="global", # ===================== # UKRAINIAN — український # ===================== "ukrainian"="ukrainian", "український"="ukrainian","українського"="ukrainian","українському"="ukrainian", "українським"="ukrainian","українські"="ukrainian","українська"="ukrainian", "українську"="ukrainian","українською"="ukrainian","українське"="ukrainian", "українських"="ukrainian","українськими"="ukrainian", # ===================== # JUST — справедливий # ===================== "just"="just", "справедливий"="just","справедливого"="just","справедливому"="just", "справедливим"="just","справедливі"="just","справедлива"="just", "справедливу"="just","справедливою"="just","справедливе"="just", "справедливих"="just","справедливими"="just", # ===================== # TRUE — справжній # ===================== "true"="true", "справжній"="true","справжнього"="true","справжньому"="true", "справжнім"="true","справжні"="true","справжня"="true", "справжню"="true","справжньою"="true","справжнє"="true", "справжніх"="true","справжніми"="true", # ===================== # RELIABLE — надійний # ===================== "reliable"="reliable", "надійний"="reliable","надійного"="reliable","надійному"="reliable", "надійним"="reliable","надійні"="reliable","надійна"="reliable", "надійну"="reliable","надійною"="reliable","надійне"="reliable", "надійних"="reliable","надійними"="reliable", # ===================== # WORTHY — достойний # ===================== "worthy"="worthy", "достойний"="worthy","достойного"="worthy","достойному"="worthy", "достойним"="worthy","достойні"="worthy","достойна"="worthy", "достойну"="worthy","достойною"="worthy","достойне"="worthy", "достойних"="worthy","достойними"="worthy" ) adj_long2 <- adj_long %>% mutate( adj_group = recode(adj, !!!equiv_map, .default = adj) ) # ---- overall counts (merged) ---- peace_adjectives_merged <- adj_long2 %>% count(adj_group, sort = TRUE) # show top results peace_adjectives_merged %>% filter(n >= 5) %>% head(20) # ---- monthly dynamics (merged) ---- target_groups <- unique(equiv_map) monthly_adj_merged <- adj_long2 %>% mutate(month = as.Date(floor_date(date, "month"))) %>% filter(adj_group %in% target_groups) %>% count(month, adj_group) %>% arrange(month) ``` ```{r plt, eval=TRUE} # ---- plot: proportional stacked area (100%) with ColorBrewer ---- # full month sequence all_months <- seq.Date( as.Date(floor_date(min(adj_long2$date, na.rm = TRUE), "month")), as.Date(floor_date(max(adj_long2$date, na.rm = TRUE), "month")), by = "1 month" ) monthly_adj_merged <- adj_long2 %>% mutate(month = as.Date(floor_date(date, "month"))) %>% filter(adj_group %in% target_groups) %>% count(month, adj_group) %>% complete( month = all_months, adj_group = target_groups, fill = list(n = 0) ) %>% arrange(month) ggplot(monthly_adj_merged, aes(x = month, y = n, color = adj_group, group = adj_group)) + geom_path() + scale_y_continuous() + scale_color_brewer(palette = "Set1") + # You can change palette here labs( title = "Proportional Monthly Dynamics (Merged EN/UKR Equivalents)", x = "Month", y = "Count", color = "Adjective (merged)" ) + theme_minimal() ``` ## 3) "Powerful" Over Time ```{r powerful, eval=TRUE} powerful <- msgs %>% mutate(powerful_mentions = str_count(text_lower, "\\bpowerful\\b") + str_count(text_lower, "\\bпотужн.(|.)\\b")) %>% mutate(day = lubridate::round_date(day, unit = "week")) %>% group_by(day) %>% summarise(powerful = sum(powerful_mentions, na.rm = TRUE), .groups = "drop") powerful %>% ggplot(aes(day, powerful)) + geom_line(linewidth = 0.8) + labs(title = "Usage of the word 'powerful'", x = NULL, y = "Count") + theme_minimal() ```