Loading data and packages

How different songs’ properties influence it’s popularity?

range(data$danceability)
## [1] 0.000 0.988
range(data$duration_ms)
## [1]    4937 5338302
range(data$tempo)
## [1]   0.000 243.507
range(data$speechiness)
## [1] 0.000 0.971
data1 = data %>% 
  select(id, popularity, year, danceability, duration_ms, tempo, speechiness) %>% 
  mutate(duration = 
           (duration_ms-min(duration_ms))/(max(duration_ms)-min(duration_ms))) %>% 
  select(-duration_ms) %>% 
  mutate(tempo = (tempo-min(tempo))/(max(tempo)-min(tempo)))
  

fig1_data = data1 %>% select(-year) %>% 
  mutate(danceability = cut(danceability, 50, labels=FALSE),
         duration = cut(duration, 50, labels=FALSE),
         tempo = cut(tempo, 50, labels=FALSE),
         speechiness = cut(speechiness, 50, labels=FALSE)) %>% 
  pivot_longer(cols = danceability:duration, 
               names_to = "property") %>% 
  group_by(property, value) %>% 
  summarise(popularity_median = median(popularity)) %>% 
  mutate(property = factor(property, levels=c("danceability",
                                              "tempo",
                                              "speechiness",
                                              "duration")))
## `summarise()` has grouped output by 'property'. You can override using the `.groups` argument.
ggplot(fig1_data, aes(x=value, y=popularity_median, group=property)) + 
  geom_ribbon(aes(ymin = 0, ymax = popularity_median, fill=property), alpha=0.6) + 
  labs(x = "Normalized Value (0-30)", y = "Median Popularity (0-100)") + 
  scale_fill_OkabeIto()

How does songs’ properties change over time?

fig2_data = data1 %>% 
  mutate(decades = year - year %% 10 ) %>% 
  select(-c(popularity, year)) %>% 
  pivot_longer(cols = danceability:duration, 
               names_to = "property") %>% 
  group_by(property, decades) %>% 
  summarise(mean = mean(value))
## `summarise()` has grouped output by 'property'. You can override using the `.groups` argument.
ggplot(fig2_data, aes(x=decades, y=property, fill=mean)) + geom_tile() +
  scale_fill_viridis_c(option = "A") + 
  labs(y="Property", x="Mean for each decade") + 
  theme(legend.position = "top")