Author

Joe

Published

March 4, 2024

Code
start_time <- Sys.time()

suppressPackageStartupMessages(library(ggpath))
suppressPackageStartupMessages(library(plotly))
suppressPackageStartupMessages(library(tidyverse))

options(scipen = 10L)

jam_theme <-  theme_minimal() +
                theme(text=element_text(size=14),
                      axis.text=element_text(size=12),
                      axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
                            axis.title.x = element_text(margin = margin(t = 0, r = 0, b = 8, l = 0)))

jam_theme <-  theme_minimal() +
                theme(axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
                                        axis.title.x = element_text(margin = margin(t = 0, r = 0, b = 8, l = 0)),
                                        text=element_text(size=20, face="bold", color="white"),
                                    axis.text.x=element_text(size=10, color="white"),
                                        axis.text.y=element_text(size=13, color="white"),
                                        plot.title=element_text(face="bold", color="white"),
                                        plot.background = element_rect(fill = "#5E61AF"),
                                        plot.margin = margin(1,1,1.5,1.2, "cm"))

# Create color pallete based on lineup image @ https://coolors.co/image-picker
my_cols <- c("#52BFEC","#AA1880","#EC0059","#08BCDF","#4C1064", "#FF00BC", "#2249CD","#53007D", "#FF6B02","#B319B2","#EAE100", "#BF068F")

Summary

This notebook shows how I searched the Spotify and last.fm APIs to find data on EDC 2024 artists. I was curious to find the most popular artists.

EDC 2024 Lineup

EDC Lineup 2024

Convert lineup image to text

Create text list of EDC artists using imagetotext.io

Code
#https://www.imagetotext.io/ to get artist names from edc artist lineup PNG

edc_artists <- read_tsv("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artists_text_from_png.txt") |> unique() |> arrange(artist)

#edc_artists

Collect Artist Data

Spotify

Access Spotify API using package spotifyr

You need to set up a Dev account with Spotify to access their Web API here. It is very quick and easy.

Code
#install.packages("spotifyr")
library(spotifyr)

# set up a dev account to get valid API IDs
# Sys.setenv(SPOTIFY_CLIENT_ID = '########################')
# Sys.setenv(SPOTIFY_CLIENT_SECRET = '#########################')

access_token <- get_spotify_access_token()


# Use search_spotify() to find spotify artist ids from artist names
# No ID for Domina, Hint of Lavender, Marlie, VUIIIGUR

spotify_artist_id <- vector("character", length(edc_artists$artist))

for(i in seq_along(edc_artists$artist)){
    
    i_search_spotify <- search_spotify(edc_artists$artist[i])
    
    if(edc_artists$artist[i] %in% toupper(i_search_spotify$artists$items$name)){
        
        exact_artist_name_match <- which(toupper(i_search_spotify$artists$items$name) == edc_artists$artist[i])[1]
        spotify_artist_id[[i]] <- i_search_spotify$artists$items$id[exact_artist_name_match]
        
    }else{
    
        spotify_artist_id[[i]] <- NA
        cat(paste0("\nNo exact match for artist name: ",edc_artists$artist[i], "\n\n"))
        #                    "\n", "Using: ", i_search_spotify$artists$items$name[1], 
        #                    "\n", i_search_spotify$artists$items$external_urls.spotify[1], "\n\n"))
        
        # spotify_artist_id[[i]] <- i_search_spotify$artists$items$id[1]
        # cat(paste0("\nNo exact match for artist name: ",edc_artists$artist[i],
        #                    "\n", "Using: ", i_search_spotify$artists$items$name[1], 
        #                    "\n", i_search_spotify$artists$items$external_urls.spotify[1], "\n\n"))
    }

}

edc_artists$spotify_artist_id <- spotify_artist_id


# Use get_artist() to get genres, followers, and popularity

genres     <- vector("character", length(edc_artists$artist))
followers  <- vector("character", length(edc_artists$artist))
popularity <- vector("character", length(edc_artists$artist))
image_url  <- vector("character", length(edc_artists$artist))

for(i in seq_along(edc_artists$spotify_artist_id)){
    
    if(!is.na(edc_artists$spotify_artist_id[i])){
    
    i_artist_info <- get_artist(edc_artists$spotify_artist_id[i])
    
    genres[[i]]     <- paste0(i_artist_info$genres, collapse = ",")
    followers[[i]]  <- i_artist_info$followers$total
    popularity[[i]] <- i_artist_info$popularity
    image_url[[i]]  <- ifelse(!is.null(i_artist_info$images$url[1]), i_artist_info$images$url[1], NA)
    
    }else{
        
        genres[[i]] <- followers[[i]]  <- popularity[[i]] <- image_url[[i]] <- NA
        
    }
    
    if(!is.na(genres[[i]]) & genres[[i]] == ""){genres[[i]] <- NA}

}

edc_artists$genres     <- genres   
edc_artists$followers  <- as.numeric(followers)
edc_artists$popularity <- as.numeric(popularity)
edc_artists$image_url  <- image_url

last.fm

Access last.fm API using package lastfmR

Code
#devtools::install_github("ppatrzyk/lastfmR")
library(lastfmR)
# masks get_tracks()


lastfm_artist_info <- get_artist_info(artist_vector = edc_artists$artist) |> tibble()

edc_artists <- full_join(edc_artists, lastfm_artist_info)

# write.table(edc_artists, "C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artist_data.txt", row.names = F, quote = F, sep = "\t")

Spotify Followers

Code
edc_artists <- read_tsv("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artist_data.txt")

plot.top.artists <- function(column_name, top, plot_title, include_images = "false"){

i_plot <- arrange(edc_artists, desc(!!sym(column_name))) |>
                    head(top) 
            
p1 <- ggplot(i_plot, aes(reorder(artist, !!sym(column_name)), !!sym(column_name))) +
            geom_segment(aes(x = reorder(artist, !!sym(column_name)), xend = reorder(artist, !!sym(column_name)), y=0, yend = !!sym(column_name), color = artist),
                                     linewidth = 3)  +
        #geom_from_path(aes(path = image_url), width = 0.052) +
            coord_flip(clip = "off") +
            scale_color_manual(values = rep(my_cols,3)) +
            jam_theme +
            theme(axis.title.x=element_blank(),
                        axis.title.y=element_blank(),
                        legend.position = "none") +
            ggtitle(plot_title)

    if(include_images == "true"){
        p1 <- p1 + geom_from_path(aes(path = image_url), width = 0.052) 
    }

    return(p1)

}

plot.top.artists("followers", 10, "Top 10 EDC artists with the most followers on Spotify", include_images = "true")

Code
plot.top.artists("followers", 30, "Top 30 EDC artists with the most followers on Spotify", include_images = "false")

Spotify “Popularity”

The “popularity” metric is “track-based and a measure of how many plays a track received and how recent those plays are. An artist’s popularity is calculated from the popularity of all the artist’s tracks.”

Code
plot.top.artists("popularity", 10, "Top 10 most popular EDC artists according to Spotify", include_images = "true")

Code
plot.top.artists("popularity", 30, "Top 30 most popular EDC artists according to Spotify", include_images = "false")

last.fm Global Listeners

Code
plot.top.artists("global_listeners", 10, "Top 10 artists with the most listeners on last.fm", include_images = "true")

Code
plot.top.artists("global_listeners", 30, "Top 30 artists with the most listeners on last.fm", include_images = "false")

Genres

Code
# filter(edc_artists, !is.na(genres)) |> nrow()
# n_distinct(edc_artists$genres)
all_genres <- unlist(str_split(edc_artists$genres, ",")) |>
    na.omit() |> 
    as.character() |> 
    str_trim() |> 
    toupper() |> 
    tibble()
names(all_genres) <- "genre"

plot.top.genres <- function(all_genres, column_name, plot_title){
    
    i_plot <- group_by(all_genres, genre) |>
                        summarise(n_genres = dplyr::n()) |>
                        arrange(desc(n_genres)) |>
                        head(30) 
        
    p1 <- ggplot(i_plot, aes(reorder(genre, !!sym(column_name)), !!sym(column_name))) +
                geom_segment(aes(x = reorder(genre, !!sym(column_name)), xend = reorder(genre, !!sym(column_name)), y=0, yend = !!sym(column_name), color = genre),
                                         linewidth = 3)  +
                coord_flip(clip = "off") +
                scale_color_manual(values = rep(my_cols,100)) +
                jam_theme +
                theme(axis.title.x=element_blank(),
                            axis.title.y=element_blank(),
                            legend.position = "none") +
                ggtitle(plot_title)

    return(p1)
                
}

plot.top.genres(all_genres, "n_genres", "Top 30 most represented Spotify genres")

Code
# png(paste0("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/genres/top_30_genres.png"), height = 9, width = 15,units = 'in', res = 1000)
# plot.top.genres(all_genres, "n_genres", "Top 30 most represented Spotify genres")
# dev.off()

# # filter(edc_artists, !is.na(artist_tags)) |> nrow()
# # n_distinct(edc_artists$artist_tags)
# all_genres <- unlist(str_split(edc_artists$artist_tags, ";")) |> 
#   na.omit() |> 
#   as.character() |> 
#   str_trim() |> 
#   toupper() |> 
#   tibble()
# names(all_genres) <- "genre"
# 
# 
# plot.top.genres(all_genres, "n_genres", "Top 30 most represented last.fm aritst tags")

All artists in each genre

See here for a table of all artists labeled by each genre.

Code
all_genres <- unlist(str_split(edc_artists$genres, ",")) |>
    na.omit() |> 
    as.character() |> 
    str_trim() |> 
    tibble()
names(all_genres) <- "genre"

# for(i in sort(unique(all_genres$genre))){
#   
#   dat <- tibble()
#   dat <- filter(edc_artists, genres == i) |> arrange(desc(popularity)) |> select(artist, popularity) |> rename(!!i := "artist")
#   dat <- bind_rows(dat, 
#                                    filter(edc_artists, str_detect(genres, paste0(i,",|,", i, ",|,", i))) |> arrange(desc(popularity)) |> rename(!!i := "artist")) |>
#                   select(!!i) 
#   # print(knitr::kable(dat))
#   # cat("\n")
# }

artists_tagged_by_genre <- vector("character", length(unique(all_genres$genre)))

for(i in seq_along(unique(all_genres$genre))){
    
    i_genre <- sort(unique(all_genres$genre))[i]
    
    i_dat <- filter(edc_artists, genres == i_genre) |> arrange(desc(popularity)) |> select(artist, popularity)
    i_dat <- bind_rows(i_dat,
                                         filter(edc_artists, str_detect(genres, paste0(i_genre,",|,", i_genre, ",|,", i_genre)))) |>
                     arrange(desc(popularity)) |>
                     pull(artist)
    
    artists_tagged_by_genre[[i]] <- paste0(i_dat, collapse = ",")

}

artists_tagged_by_genre <- tibble(genre = sort(unique(all_genres$genre)), artists = artists_tagged_by_genre)

# write.table(artists_tagged_by_genre, "C:/Users/joseph.mcgirr/Personal/R_fun/spotify/artists_in_each_genre.txt", row.names = F, quote = F, sep = "\t")

Notes

Run time

Code
Sys.time() - start_time
Time difference of 30.68522 secs

Session

Code
sessionInfo()
R version 4.3.2 (2023-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19045)

Matrix products: default


locale:
[1] LC_COLLATE=English_United States.utf8 
[2] LC_CTYPE=English_United States.utf8   
[3] LC_MONETARY=English_United States.utf8
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.utf8    

time zone: America/Los_Angeles
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] lubridate_1.9.3 forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4    
 [5] purrr_1.0.2     readr_2.1.4     tidyr_1.3.0     tibble_3.2.1   
 [9] tidyverse_2.0.0 plotly_4.10.3   ggplot2_3.4.4   ggpath_1.0.1   

loaded via a namespace (and not attached):
 [1] utf8_1.2.4         generics_0.1.3     stringi_1.8.3      hms_1.1.3         
 [5] digest_0.6.33      magrittr_2.0.3     evaluate_0.23      grid_4.3.2        
 [9] timechange_0.2.0   fastmap_1.1.1      jsonlite_1.8.8     httr_1.4.7        
[13] fansi_1.0.6        viridisLite_0.4.2  scales_1.3.0       lazyeval_0.2.2    
[17] cli_3.6.2          crayon_1.5.2       rlang_1.1.2        bit64_4.0.5       
[21] munsell_0.5.0      withr_2.5.2        cachem_1.0.8       yaml_2.3.8        
[25] parallel_4.3.2     tools_4.3.2        tzdb_0.4.0         memoise_2.0.1     
[29] colorspace_2.1-0   curl_5.2.0         vctrs_0.6.5        R6_2.5.1          
[33] magick_2.8.1       lifecycle_1.0.4    bit_4.0.5          htmlwidgets_1.6.4 
[37] vroom_1.6.5        pkgconfig_2.0.3    pillar_1.9.0       gtable_0.3.4      
[41] Rcpp_1.0.11        data.table_1.14.10 glue_1.6.2         xfun_0.41         
[45] tidyselect_1.2.0   rstudioapi_0.15.0  knitr_1.45         farver_2.1.1      
[49] htmltools_0.5.7    labeling_0.4.3     rmarkdown_2.25     compiler_4.3.2