Github repo: https://github.com/wanjiag/EDLD652/tree/main/lab2

2. Replicate the “National cable news networks” plot from the story using the tv_states data. Don’t worry about all the labels. Try to match the colors but don’t worry about getting them exact.

# I would again encourage you to use `pivot_longer()` and the tidyverse
# e.g., `mutate()`
tv_states_long <- gather(tv_states, key = "location", value = "score", -date)
tv_states_long$location <- factor(tv_states_long$location,
                                     levels = c("florida", "texas", "puerto_rico"))

news_data <- tibble(news = c("Harvey\nlandfall", "Irma\nlandfall", "Maria\nlandfall", "Las Vegas\nshooting"),
                   date = c("2017-08-25","2017-09-10", "2017-09-20", "2017-10-01"))
news_data$date <- as.Date(news_data$date)

location_text <- tibble(location = c("Texas", "Florida", "Puerto Rico"), 
                        x = c("2017-08-29","2017-09-10", "2017-10-01"),
                        y = c(0.9, 1.3, 1.3))
location_text$x <- as.Date(location_text$x)

color_platte <- c("#FC5185", "#ED713A", "#3FC1C9")

ggplot(tv_states_long, aes(x = date, y = score, fill = location)) + 
  geom_ribbon(aes(ymin = 0, ymax = score), 
              color = "white") +
  scale_fill_manual(values = color_platte) +
  labs(y = "Share of sentences", 
       title = "National cable news networks", 
       caption = "Includes Bloomberg, CNBC, CNN, Fox Business, Fox News and MSNBC.\nFiveThirtyEight") + 
  theme_minimal(base_size = 12) + 
  theme(axis.title.x = element_blank(),
        legend.position="none", 
        plot.title = element_text(hjust = 0.5), 
        plot.caption = element_text(hjust = 0)) +
  # You should really never need `$` within ggplot or the tidyverse generally
  geom_vline(aes(xintercept = date),
             data = news_data,
             color = "lightgrey", 
             linetype = "dotted") + 
  geom_label(data = news_data, aes(x = date, y = 3.8, label = news), 
             inherit.aes = FALSE, color = "grey80",  label.size = NA)+
  geom_text(data = location_text, aes(x, y, label = location), 
            inherit.aes = FALSE, size=4.5) + 
  ylim(0,4)

3. Use the comic_characters dataset to visualize the proportion of different genders using at least three different methods. With your final visualization, make any additional modifications you think would help reduce the cognitive load necessary to comprehend the figure.

  1. a pie group that shows the proportion of each gender
gender_prop = comic_characters %>% 
  count(sex) %>%  # no need to group_by with `count()`
  mutate(percentage = n/sum(.$n))

ggplot(gender_prop, aes(x = "", y = percentage, fill = sex)) + 
  geom_bar(stat = "identity", width = 1) + 
  coord_polar("y", start = 0) + 
  theme_void()

  1. the first appearance of characters across time, separated by different gender.
gender_prop_year <- comic_characters %>% 
  count(sex, year)

ggplot(gender_prop_year, aes(x = year, y = n, group = sex, color = sex)) + 
  geom_line() 

  1. Culmultive distribution (proportion) of each gender. It’s interesting to see that the appearance of female and NA are at similar rate within each own category. However, transgener and genderfuild characters are very limited, and thus makes the big steps.
gender_prop_month <- comic_characters %>% 
  count(sex, date)

ggplot(gender_prop_month, aes(x = date, y = n, group = sex, color = sex)) + 
  stat_ecdf(geom = "step")

# ooh... I like this one
  1. final figure
gender_prop_year <- comic_characters %>% 
  count(sex, year)
gender_prop_year$sex = sub(" .*", "", gender_prop_year$sex)
gender_prop_year$sex = factor(gender_prop_year$sex, 
                              levels=c("Male", 
                                       "Female",
                                       "Agender",
                                       "Genderless",
                                       "Genderfluid",
                                       "Transgender"))
# I'm stopping styling here...
gender_prop_year_na = gender_prop_year %>% filter(is.na(sex))
gender_prop_year_no_na = gender_prop_year %>% filter(sex %in% c("Male", "Female", "Agender","Genderless"))
gender_prop_year_no_na$sex = factor(gender_prop_year_no_na$sex, levels=c("Male", "Female", "Agender","Genderless"))
gender_prop_year_no_na_others = gender_prop_year %>% filter(sex %in% c("Genderfluid","Transgender"))

text = gender_prop_year_no_na %>% group_by(sex) %>% summarise(max = max(n)) %>% mutate(x=1994)
text[1,2] = text[1,2]-325
text[2,2] = text[2,2]-125
text[3,2] = 5
text[4,3] = 1979

ggplot(gender_prop_year_no_na, aes(x=year, y=n, fill=sex)) + 
  geom_ribbon(aes(ymin=0, ymax=n)) + 
  geom_label(data=text, aes(x=x, y=max, label=sex, fill=sex),color="white",label.size = NA)+
  geom_line(data=gender_prop_year_na, color="darkgrey", linetype="dotted")+
  scale_y_continuous(trans='log2') + 
  labs(y="Number of Characters Introduced", x="Year", caption="Grey dotted line indicates N.A.")+
  scale_color_manual(values = c("springgreen", "gold"), name = "Least represented groups")+
  viridis::scale_fill_viridis(discrete = TRUE, option = "A", name = "More than 2 in total")+
  theme_minimal()+
  geom_point(data = gender_prop_year_no_na_others, aes(color=sex), shape=8, size=3)+
  guides(fill = FALSE)+
  theme(legend.position = "bottom")

# Pretty!