Loading data and packages
How different songs’ properties influence it’s popularity?
range(data$danceability)
## [1] 0.000 0.988
range(data$duration_ms)
## [1] 4937 5338302
range(data$tempo)
## [1] 0.000 243.507
range(data$speechiness)
## [1] 0.000 0.971
data1 = data %>%
select(id, popularity, year, danceability, duration_ms, tempo, speechiness) %>%
mutate(duration =
(duration_ms-min(duration_ms))/(max(duration_ms)-min(duration_ms))) %>%
select(-duration_ms) %>%
mutate(tempo = (tempo-min(tempo))/(max(tempo)-min(tempo)))
fig1_data = data1 %>% select(-year) %>%
mutate(danceability = cut(danceability, 50, labels=FALSE),
duration = cut(duration, 50, labels=FALSE),
tempo = cut(tempo, 50, labels=FALSE),
speechiness = cut(speechiness, 50, labels=FALSE)) %>%
pivot_longer(cols = danceability:duration,
names_to = "property") %>%
group_by(property, value) %>%
summarise(popularity_median = median(popularity)) %>%
mutate(property = factor(property, levels=c("danceability",
"tempo",
"speechiness",
"duration")))
## `summarise()` has grouped output by 'property'. You can override using the `.groups` argument.
ggplot(fig1_data, aes(x=value, y=popularity_median, group=property)) +
geom_ribbon(aes(ymin = 0, ymax = popularity_median, fill=property), alpha=0.6) +
labs(x = "Normalized Value (0-30)", y = "Median Popularity (0-100)") +
scale_fill_OkabeIto()
How does songs’ properties change over time?
fig2_data = data1 %>%
mutate(decades = year - year %% 10 ) %>%
select(-c(popularity, year)) %>%
pivot_longer(cols = danceability:duration,
names_to = "property") %>%
group_by(property, decades) %>%
summarise(mean = mean(value))
## `summarise()` has grouped output by 'property'. You can override using the `.groups` argument.
ggplot(fig2_data, aes(x=decades, y=property, fill=mean)) + geom_tile() +
scale_fill_viridis_c(option = "A") +
labs(y="Property", x="Mean for each decade") +
theme(legend.position = "top")
The most popular artist over time
library(gganimate)
artist_data = data %>%
select(popularity, artists, year) %>%
mutate(artists = gsub("\\[|\\]", "", artists)) %>%
separate_rows(artists, sep = ", ") %>%
mutate(artists = gsub("'", "", artists)) %>%
mutate(artists = gsub('"', '', artists))
fig3_data = artist_data %>%
mutate(decades = year - year %% 10 ) %>%
mutate(year = ifelse(year < 1990, decades, year)) %>%
group_by(year, artists) %>%
summarise(mean_popularity = mean(popularity)) %>%
arrange(year, desc(mean_popularity)) %>%
mutate(rank = 1:n()) %>%
filter(rank <= 10)
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
my_theme <- theme_classic(base_family = "Times") +
theme(axis.text.y = element_blank()) +
theme(axis.ticks.y = element_blank()) +
theme(axis.line.y = element_blank()) +
theme(legend.background = element_rect(fill = "gainsboro")) +
theme(plot.background = element_rect(fill = "gainsboro")) +
theme(panel.background = element_rect(fill = "gainsboro"))
fig3 = ggplot(fig3_data) +
aes(xmin = 18 ,
xmax = mean_popularity) +
aes(ymin = rank - .45,
ymax = rank + .45,
y = rank) +
facet_wrap(~ year) +
geom_rect(alpha = .7) +
scale_x_continuous(
limits = c(-50, 100),
breaks = c(0, 20, 40, 60, 80, 100)) +
geom_text(col = "gray13",
hjust = "right",
aes(label = artists),
x = 10) +
scale_y_reverse() +
labs(x = 'Popularity (0-100)', y = '') +
my_theme
fig3 = fig3 +
facet_null() +
geom_text(x = 50 , y = -5,
family = "Times",
aes(label = as.character(year)),
size = 25, col = "grey18", alpha=0.5) +
aes(group = artists) +
transition_states(year,
transition_length = 1,
state_length = 4)
animate(fig3, fps=1)