## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set(warning = FALSE, message = FALSE) library(nycOpenData) library(ggplot2) library(dplyr) library(tidyr) ## ----small-sample------------------------------------------------------------- small_sample <- nyc_pull_dataset("xywu-7bv9", limit = 5) small_sample # Seeing what columns are in the dataset names(small_sample) ## ----full-data---------------------------------------------------------------- population_data <- nyc_pull_dataset("xywu-7bv9") population_data |> slice_head(n = 6) ## ----filter-brooklyn---------------------------------------------------------- brooklyn_pop <- nyc_pull_dataset("xywu-7bv9", filters = list(borough = "Brooklyn")) brooklyn_pop ## ----population-trends, fig.alt="Line chart showing population trends for NYC's five boroughs from 1950 to 2040.", fig.cap="Population trends for NYC's five boroughs from 1950 to 2040, including historical data and projections.", fig.height=6, fig.width=8---- # Get full dataset and filter for Total Population rows only population_data <- nyc_pull_dataset("xywu-7bv9") # Clean borough names and filter to get individual boroughs (exclude NYC Total) borough_data <- population_data |> mutate(borough = trimws(borough)) |> # Remove leading/trailing spaces filter(age_group == "Total Population", borough != "NYC Total") # Reshape from wide to long format pop_long <- borough_data |> select(borough, `x1950`, `x1960`, `x1970`, `x1980`, `x1990`, `x2000`, `x2010`, `x2020`, `x2030`, `x2040`) |> pivot_longer(cols = starts_with("x"), names_to = "year", values_to = "population") |> mutate( year = as.numeric(gsub("x", "", year)), population = as.numeric(population) ) # Create line chart ggplot(pop_long, aes(x = year, y = population, color = borough)) + geom_line(linewidth = 1) + geom_point(size = 2) + scale_y_continuous(labels = scales::comma) + theme_minimal() + labs( title = "NYC Population by Borough: 1950-2040", subtitle = "Historical data and projections", x = "Year", y = "Population", color = "Borough" ) + theme(legend.position = "bottom") ## ----summary-2040------------------------------------------------------------- pop_long |> filter(year == 2040) |> arrange(desc(population))