---
title: "EDC 2024"
author: "Joe"
date: "`r Sys.Date()`"
editor: source
format:
html:
theme: quartz
#fontcolor: black
toc: true
toc-location: left
code-fold: true
code-tools: true
df-print: kable
fig-width: 10
fig-height: 7.5
embed-resources: true
grid:
body-width: 2000px
execute:
warning: false
---
```{r}
start_time <- Sys.time ()
suppressPackageStartupMessages (library (ggpath))
suppressPackageStartupMessages (library (plotly))
suppressPackageStartupMessages (library (tidyverse))
options (scipen = 10L)
jam_theme <- theme_minimal () +
theme (text= element_text (size= 14 ),
axis.text= element_text (size= 12 ),
axis.title.y = element_text (margin = margin (t = 0 , r = 8 , b = 0 , l = 0 )),
axis.title.x = element_text (margin = margin (t = 0 , r = 0 , b = 8 , l = 0 )))
jam_theme <- theme_minimal () +
theme (axis.title.y = element_text (margin = margin (t = 0 , r = 8 , b = 0 , l = 0 )),
axis.title.x = element_text (margin = margin (t = 0 , r = 0 , b = 8 , l = 0 )),
text= element_text (size= 20 , face= "bold" , color= "white" ),
axis.text.x= element_text (size= 10 , color= "white" ),
axis.text.y= element_text (size= 13 , color= "white" ),
plot.title= element_text (face= "bold" , color= "white" ),
plot.background = element_rect (fill = "#5E61AF" ),
plot.margin = margin (1 ,1 ,1.5 ,1.2 , "cm" ))
# Create color pallete based on lineup image @ https://coolors.co/image-picker
my_cols <- c ("#52BFEC" ,"#AA1880" ,"#EC0059" ,"#08BCDF" ,"#4C1064" , "#FF00BC" , "#2249CD" ,"#53007D" , "#FF6B02" ,"#B319B2" ,"#EAE100" , "#BF068F" )
```
# Summary
This notebook shows how I searched the Spotify and last.fm APIs to find data on
EDC 2024 artists. I was curious to find the most popular artists.
# EDC 2024 Lineup

# Convert lineup image to text
Create text list of EDC artists using [ imagetotext.io ](https://www.imagetotext.io/)
```{r}
#https://www.imagetotext.io/ to get artist names from edc artist lineup PNG
edc_artists <- read_tsv ("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artists_text_from_png.txt" ) |> unique () |> arrange (artist)
#edc_artists
```
# Collect Artist Data
## Spotify
Access Spotify API using package [ spotifyr ](https://www.rdocumentation.org/packages/spotifyr/versions/2.2.4)
You need to set up a Dev account with Spotify to access their Web API [ here ](https://developer.spotify.com/dashboard/create) . It is very quick and easy.
```{r}
#| include: false
Sys.setenv (SPOTIFY_CLIENT_ID = '15ef5105d34c4cdaa761f8287847f50a' )
Sys.setenv (SPOTIFY_CLIENT_SECRET = 'c1c04d84b2094e5994759aa3275b9683' )
```
```{r}
#| eval: false
#install.packages("spotifyr")
library (spotifyr)
# set up a dev account to get valid API IDs
# Sys.setenv(SPOTIFY_CLIENT_ID = '########################')
# Sys.setenv(SPOTIFY_CLIENT_SECRET = '#########################')
access_token <- get_spotify_access_token ()
# Use search_spotify() to find spotify artist ids from artist names
# No ID for Domina, Hint of Lavender, Marlie, VUIIIGUR
spotify_artist_id <- vector ("character" , length (edc_artists$ artist))
for (i in seq_along (edc_artists$ artist)){
i_search_spotify <- search_spotify (edc_artists$ artist[i])
if (edc_artists$ artist[i] %in% toupper (i_search_spotify$ artists$ items$ name)){
exact_artist_name_match <- which (toupper (i_search_spotify$ artists$ items$ name) == edc_artists$ artist[i])[1 ]
spotify_artist_id[[i]] <- i_search_spotify$ artists$ items$ id[exact_artist_name_match]
}else {
spotify_artist_id[[i]] <- NA
cat (paste0 (" \n No exact match for artist name: " ,edc_artists$ artist[i], " \n\n " ))
# "\n", "Using: ", i_search_spotify$artists$items$name[1],
# "\n", i_search_spotify$artists$items$external_urls.spotify[1], "\n\n"))
# spotify_artist_id[[i]] <- i_search_spotify$artists$items$id[1]
# cat(paste0("\nNo exact match for artist name: ",edc_artists$artist[i],
# "\n", "Using: ", i_search_spotify$artists$items$name[1],
# "\n", i_search_spotify$artists$items$external_urls.spotify[1], "\n\n"))
}
}
edc_artists$ spotify_artist_id <- spotify_artist_id
# Use get_artist() to get genres, followers, and popularity
genres <- vector ("character" , length (edc_artists$ artist))
followers <- vector ("character" , length (edc_artists$ artist))
popularity <- vector ("character" , length (edc_artists$ artist))
image_url <- vector ("character" , length (edc_artists$ artist))
for (i in seq_along (edc_artists$ spotify_artist_id)){
if (! is.na (edc_artists$ spotify_artist_id[i])){
i_artist_info <- get_artist (edc_artists$ spotify_artist_id[i])
genres[[i]] <- paste0 (i_artist_info$ genres, collapse = "," )
followers[[i]] <- i_artist_info$ followers$ total
popularity[[i]] <- i_artist_info$ popularity
image_url[[i]] <- ifelse (! is.null (i_artist_info$ images$ url[1 ]), i_artist_info$ images$ url[1 ], NA )
}else {
genres[[i]] <- followers[[i]] <- popularity[[i]] <- image_url[[i]] <- NA
}
if (! is.na (genres[[i]]) & genres[[i]] == "" ){genres[[i]] <- NA }
}
edc_artists$ genres <- genres
edc_artists$ followers <- as.numeric (followers)
edc_artists$ popularity <- as.numeric (popularity)
edc_artists$ image_url <- image_url
```
## last.fm
Access last.fm API using package [ lastfmR ](https://github.com/ppatrzyk/lastfmR)
```{r}
#| eval: false
#devtools::install_github("ppatrzyk/lastfmR")
library (lastfmR)
# masks get_tracks()
lastfm_artist_info <- get_artist_info (artist_vector = edc_artists$ artist) |> tibble ()
edc_artists <- full_join (edc_artists, lastfm_artist_info)
# write.table(edc_artists, "C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artist_data.txt", row.names = F, quote = F, sep = "\t")
```
# Spotify Followers
```{r}
#| fig-width: 15
#| fig-height: 9
edc_artists <- read_tsv ("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artist_data.txt" )
plot.top.artists <- function (column_name, top, plot_title, include_images = "false" ){
i_plot <- arrange (edc_artists, desc (!! sym (column_name))) |>
head (top)
p1 <- ggplot (i_plot, aes (reorder (artist, !! sym (column_name)), !! sym (column_name))) +
geom_segment (aes (x = reorder (artist, !! sym (column_name)), xend = reorder (artist, !! sym (column_name)), y= 0 , yend = !! sym (column_name), color = artist),
linewidth = 3 ) +
#geom_from_path(aes(path = image_url), width = 0.052) +
coord_flip (clip = "off" ) +
scale_color_manual (values = rep (my_cols,3 )) +
jam_theme +
theme (axis.title.x= element_blank (),
axis.title.y= element_blank (),
legend.position = "none" ) +
ggtitle (plot_title)
if (include_images == "true" ){
p1 <- p1 + geom_from_path (aes (path = image_url), width = 0.052 )
}
return (p1)
}
plot.top.artists ("followers" , 10 , "Top 10 EDC artists with the most followers on Spotify" , include_images = "true" )
plot.top.artists ("followers" , 30 , "Top 30 EDC artists with the most followers on Spotify" , include_images = "false" )
```
# Spotify "Popularity"
The "popularity" metric is "track-based and a measure of how many plays a track received and how recent those plays are. An artist’s popularity is calculated from the popularity of all the artist’s tracks."
```{r}
#| fig-width: 15
#| fig-height: 9
plot.top.artists ("popularity" , 10 , "Top 10 most popular EDC artists according to Spotify" , include_images = "true" )
plot.top.artists ("popularity" , 30 , "Top 30 most popular EDC artists according to Spotify" , include_images = "false" )
```
# last.fm Global Listeners
```{r}
#| fig-width: 15
#| fig-height: 9
plot.top.artists ("global_listeners" , 10 , "Top 10 artists with the most listeners on last.fm" , include_images = "true" )
plot.top.artists ("global_listeners" , 30 , "Top 30 artists with the most listeners on last.fm" , include_images = "false" )
```
# Genres
```{r}
#| fig-width: 15
#| fig-height: 9
# filter(edc_artists, !is.na(genres)) |> nrow()
# n_distinct(edc_artists$genres)
all_genres <- unlist (str_split (edc_artists$ genres, "," )) |>
na.omit () |>
as.character () |>
str_trim () |>
toupper () |>
tibble ()
names (all_genres) <- "genre"
plot.top.genres <- function (all_genres, column_name, plot_title){
i_plot <- group_by (all_genres, genre) |>
summarise (n_genres = dplyr:: n ()) |>
arrange (desc (n_genres)) |>
head (30 )
p1 <- ggplot (i_plot, aes (reorder (genre, !! sym (column_name)), !! sym (column_name))) +
geom_segment (aes (x = reorder (genre, !! sym (column_name)), xend = reorder (genre, !! sym (column_name)), y= 0 , yend = !! sym (column_name), color = genre),
linewidth = 3 ) +
coord_flip (clip = "off" ) +
scale_color_manual (values = rep (my_cols,100 )) +
jam_theme +
theme (axis.title.x= element_blank (),
axis.title.y= element_blank (),
legend.position = "none" ) +
ggtitle (plot_title)
return (p1)
}
plot.top.genres (all_genres, "n_genres" , "Top 30 most represented Spotify genres" )
# png(paste0("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/genres/top_30_genres.png"), height = 9, width = 15,units = 'in', res = 1000)
# plot.top.genres(all_genres, "n_genres", "Top 30 most represented Spotify genres")
# dev.off()
# # filter(edc_artists, !is.na(artist_tags)) |> nrow()
# # n_distinct(edc_artists$artist_tags)
# all_genres <- unlist(str_split(edc_artists$artist_tags, ";")) |>
# na.omit() |>
# as.character() |>
# str_trim() |>
# toupper() |>
# tibble()
# names(all_genres) <- "genre"
#
#
# plot.top.genres(all_genres, "n_genres", "Top 30 most represented last.fm aritst tags")
```
## Popular artists in each genre
```{r}
#| fig-width: 15
#| fig-height: 9
top_genres <- group_by (all_genres, genre) |>
summarise (n_genres = dplyr:: n ()) |>
arrange (desc (n_genres)) |>
filter (n_genres >= 10 ) |>
pull (genre)
for (i in seq_along (top_genres)){
column_name <- "popularity"
i_plot <- filter (edc_artists, str_detect (toupper (genres), top_genres[i])) |>
arrange (desc (popularity)) |>
head (10 )
p1 <- ggplot (i_plot, aes (reorder (artist, !! sym (column_name)), !! sym (column_name))) +
geom_segment (aes (x = reorder (artist, !! sym (column_name)), xend = reorder (artist, !! sym (column_name)), y= 0 , yend = !! sym (column_name), color = artist),
linewidth = 3 ) +
geom_from_path (aes (path = image_url), width = 0.052 ) +
coord_flip (clip = "off" ) +
scale_color_manual (values = rep (my_cols,3 )) +
jam_theme +
theme (axis.title.x= element_blank (),
axis.title.y= element_blank (),
legend.position = "none" ) +
ggtitle (paste0 ("Top 10 " ,top_genres[i]," artists by popularity" ))
print (p1)
# png(paste0("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/genres/", i,".png"), height = 9, width = 15,units = 'in', res = 700)
# print(p1)
# dev.off()
}
```
# All artists in each genre
[ See here ](https://docs.google.com/spreadsheets/d/1DFnTFjlDW9zS2Bkv8YediH602jCJOaxf42j6qbsDBA0/edit?usp=sharing) for a table of all artists labeled by each genre.
```{r}
#| results: asis
all_genres <- unlist (str_split (edc_artists$ genres, "," )) |>
na.omit () |>
as.character () |>
str_trim () |>
tibble ()
names (all_genres) <- "genre"
# for(i in sort(unique(all_genres$genre))){
#
# dat <- tibble()
# dat <- filter(edc_artists, genres == i) |> arrange(desc(popularity)) |> select(artist, popularity) |> rename(!!i := "artist")
# dat <- bind_rows(dat,
# filter(edc_artists, str_detect(genres, paste0(i,",|,", i, ",|,", i))) |> arrange(desc(popularity)) |> rename(!!i := "artist")) |>
# select(!!i)
# # print(knitr::kable(dat))
# # cat("\n")
# }
artists_tagged_by_genre <- vector ("character" , length (unique (all_genres$ genre)))
for (i in seq_along (unique (all_genres$ genre))){
i_genre <- sort (unique (all_genres$ genre))[i]
i_dat <- filter (edc_artists, genres == i_genre) |> arrange (desc (popularity)) |> select (artist, popularity)
i_dat <- bind_rows (i_dat,
filter (edc_artists, str_detect (genres, paste0 (i_genre,",|," , i_genre, ",|," , i_genre)))) |>
arrange (desc (popularity)) |>
pull (artist)
artists_tagged_by_genre[[i]] <- paste0 (i_dat, collapse = "," )
}
artists_tagged_by_genre <- tibble (genre = sort (unique (all_genres$ genre)), artists = artists_tagged_by_genre)
# write.table(artists_tagged_by_genre, "C:/Users/joseph.mcgirr/Personal/R_fun/spotify/artists_in_each_genre.txt", row.names = F, quote = F, sep = "\t")
```
# Notes
## Run time
```{r}
Sys.time () - start_time
```
## Session
```{r}
sessionInfo ()
```