import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_genres(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        genres_elems = soup.find_all("div", class_="genre")

        genres_objs = []

        for genre in genres_elems:
            style = genre['style']

            font_size = style.split('font-size:')[1].split(';')[0].strip()

            color_str = style.split('color:')[1].split(';')[0].strip()

            r, g, b = tuple(int(color_str[i:i + 2], 16) for i in (1, 3, 5))

            top = style.split('top:')[1].split(';')[0].strip()
            left = style.split('left:')[1].split(';')[0].strip()

            genre_obj = {
                "genre": genre.text.replace("»", "").strip(),
                "font_size": font_size,
                "color": color_str,
                "colors_rgb": f"rgb({r}, {g}, {b})",
                "top": top,
                "left": left
            }
            genres_objs.append(genre_obj)

        genres_df = pd.DataFrame(genres_objs)
        return genres_df

    except Exception as e:
        print(f"An error occurred while scraping genres: {e}")
        return None

url = "https://everynoise.com/engenremap.html"
genres_df = scrape_genres(url)

if genres_df is not None:
    genres_df.to_csv("enao_genres.csv", index=False)

genres_df.head()

genres_df = pd.read_csv("enao_genres.csv")

genres_df['color'] = genres_df['colors_rgb']

enao_graph = genres_df[['genre', 'color', 'font_size', 'left', 'top']]

enao_graph.to_csv("enao_graph.csv", index=False)

print(enao_graph)

                      genre              color font_size    left      top
0                       pop   rgb(173, 136, 7)      160%   787px   4850px
1                       rap   rgb(168, 137, 3)      144%  1070px   5759px
2                      rock  rgb(171, 113, 26)      141%   564px  11449px
3             urbano latino   rgb(189, 144, 2)      134%  1170px   3341px
4                   hip hop   rgb(173, 126, 9)      134%  1085px   6978px
...                     ...                ...       ...     ...      ...
6173     yunnan traditional   rgb(69, 154, 40)      100%   714px  17793px
6174  classical string trio  rgb(25, 173, 130)      100%   381px  21422px
6175         string quintet  rgb(69, 166, 181)      100%   494px  18809px
6176      quartetto d'archi   rgb(52, 164, 95)      100%   430px  18166px
6177        youth orchestra  rgb(38, 145, 172)      100%   175px  20806px

[6178 rows x 5 columns]

genres_df = pd.read_csv("enao_genres.csv")

genres_df['left'] = genres_df['left'].apply(lambda value: int(value.replace("px", "")))
genres_df['top'] = genres_df['top'].apply(lambda value: int(value.replace("px", "")))

genres_df = genres_df.rename(columns={'left': 'x', 'top': 'y'})

df = genres_df[['genre', 'color', 'x', 'y']]

print(df)

                      genre    color     x      y
0                       pop  #ad8807   787   4850
1                       rap  #a88903  1070   5759
2                      rock  #ab711a   564  11449
3             urbano latino  #bd9002  1170   3341
4                   hip hop  #ad7e09  1085   6978
...                     ...      ...   ...    ...
6173     yunnan traditional  #459a28   714  17793
6174  classical string trio  #19ad82   381  21422
6175         string quintet  #45a6b5   494  18809
6176      quartetto d'archi  #34a45f   430  18166
6177        youth orchestra  #2691ac   175  20806

[6178 rows x 4 columns]

import pandas as pd
import plotly.express as px

fig = px.scatter(df,
                 x='x',
                 y='y',
                 color='color',
                 hover_name='genre',
                 title='Visual Mapping of Genres',
                 labels={'x': 'x',
                         'y': 'y'})

# Show the plot
fig.show()

import plotly.graph_objs as go
import colorsys
import pandas as pd

def hex_to_hsl(hex_color):
    rgb = [int(hex_color[i:i+2], 16) / 255.0 for i in range(1, 6, 2)]
    hls = colorsys.rgb_to_hls(*rgb)
    return hls[0] * 360, hls[1], hls[2]

def genre_map_3d(df, hue_rotation, width):
    def rotate_hue(h):
        return (h + hue_rotation) % 360

    data = [
        go.Scatter3d(
            x=df['x'],
            y=[rotate_hue(hex_to_hsl(hex_color)[0]) for hex_color in df['color']],
            z=df['y'],
            hovertext=df['genre'],
            mode="markers",
            marker=dict(
                size=3,
                color=df['color'],
                opacity=1
            )
        )
    ]

    layout = go.Layout(
        scene=dict(
            xaxis=dict(title=dict(text="x")),
            yaxis=dict(title=dict(text="Hue angle (°)"), autorange="reversed"),
            zaxis=dict(title=dict(text="y")),
            camera=dict(eye=dict(x=-1.25, y=-1.25, z=1.25)),
        ),
        height=600,
        width=width,
        margin=dict(l=0, r=0, b=0, t=0),
    )

    fig = go.Figure(data=data, layout=layout)
    fig.show()

# Example dataframe
# Example usage
genre_map_3d(df, 30, 800)

playlist_df = pd.read_csv("features_df.csv")

expanded_genres = playlist_df['genres'].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename('genre')
expanded_df = playlist_df.drop(columns='genres').join(expanded_genres)

mapped_df = expanded_df.merge(df, on='genre', how='left')

mapped_df = mapped_df.dropna(subset=['x', 'y', 'color'])

genre_map_3d(mapped_df, 30, 800)

import sys
import re
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

class GenreProcessingError(Exception):
    def __init__(self, message, genre, *args, **kwargs):
        super(GenreProcessingError, self).__init__(message, *args, **kwargs)
        self.genre = genre

def fetch_genre_page(subgenre):
    try:
        genre_page_url = f"http://everynoise.com/engenremap-{subgenre}.html"
        response = requests.get(genre_page_url)
        return BeautifulSoup(response.text, "html.parser")
    except requests.RequestException as request_err:
        raise GenreProcessingError("Error fetching genre page.", subgenre) from request_err

def process_genre(genre_div):
    genre = re.sub("[:'+»&\s-]", '', genre_div.text)
    soup2 = fetch_genre_page(genre)

    spotify_link = soup2.find_all("a", text='playlist')
    playlist = spotify_link[0]['href'] if len(spotify_link) > 0 else None

    all_artist_divs = set(soup2.find_all("div", "genre scanme"))
    all_genres_related = set(soup2.find_all("div", "genre")) - all_artist_divs

    artist_weights = [
        artist['style'].split()[-1].replace('%', '') for artist in all_artist_divs
    ]
    artists = [
        artist.text.strip().replace("»", "") for artist in all_artist_divs
        if not artist.text.strip().replace("»", "").isspace()
    ]

    sim_weights = [
        weight['style'].split()[-1].replace('%', '')
        for weight in all_genres_related if 'nearby' in weight['id']
    ]
    opp_weights = [
        weight['style'].split()[-1].replace('%', '')
        for weight in all_genres_related if 'mirror' in weight['id']
    ]

    sim_genres = [
        other_genre.text.strip().replace("»", "")
        for other_genre in all_genres_related if 'nearby' in other_genre['id']
    ]
    opp_genres = [
        other_genre.text.strip().replace("»", "")
        for other_genre in all_genres_related if 'mirror' in other_genre['id']
    ]

    return {
        'genre': genre,
        'playlist': playlist,
        'artist_weights': artist_weights,
        'artists': artists,
        'sim_weights': sim_weights,
        'opp_weights': opp_weights,
        'sim_genres': sim_genres,
        'opp_genres': opp_genres,
    }
    
def process_genres_and_update_csv(filename):
    df = pd.read_csv(filename)

    for idx, row in df.iterrows():
        if pd.isnull(row['SPOTIFY_URL']):
            genre_div = BeautifulSoup(f'<div>{row["GENRE"]}</div>', "html.parser")
            try:
                result = process_genre(genre_div)
                print(f"Processed genre #{idx}: {result['genre']}")

                df.at[idx, 'SIM_GENRES'] = ', '.join(result['sim_genres'])
                df.at[idx, 'SIM_WEIGHTS'] = ', '.join(result['sim_weights'])
                df.at[idx, 'OPP_GENRES'] = ', '.join(result['opp_genres'])
                df.at[idx, 'OPP_WEIGHTS'] = ', '.join(result['opp_weights'])
                df.at[idx, 'REL_ARTISTS'] = ', '.join(result['artists'])
                df.at[idx, 'ARTIST_WEIGHTS'] = ', '.join(result['artist_weights'])
                df.at[idx, 'SPOTIFY_URL'] = result['playlist']
            except GenreProcessingError as gpe:
                print(f"Error processing genre #{idx} ({gpe.genre}): {gpe}")

    df.to_csv(filename, index=False)

filename = "all_genres1.csv"
process_genres_and_update_csv(filename)

response = requests.get("http://everynoise.com/engenremap.html")
soup = BeautifulSoup(response.text, "html.parser")
all_genre_divs = soup.find_all("div", "genre scanme")

results = []

for index, genre_div in enumerate(all_genre_divs):
    genre = re.sub("[:'+»&\s-]", '', genre_div.text)

    if genre in genres_to_process:
        try:
            result = process_genre(genre_div)
            results.append(result)
            print(f"Processed genre #{index}: {result['genre']}")
        except GenreProcessingError as gpe:
            print(f"Error processing genre #{index} ({gpe.genre}): {gpe}")

import pandas as pd
from tqdm import tqdm
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
from pprint import pprint
import requests
import concurrent.futures
import json
import configparser
from urllib.parse import quote
import time

config = configparser.ConfigParser()
config.read('config.ini')

USERNAME = config.get('SPOTIFY', 'username')
PASSWORD = config.get('SPOTIFY', 'password')
CLIENT_ID = config.get('SPOTIFY', 'client_id')
CLIENT_SECRET = config.get('SPOTIFY', 'client_secret')
REDIRECT_URI = config.get('SPOTIFY', 'redirect_uri')
SCOPE = config.get('SPOTIFY', 'scope')
SPOTIFY_API_KEY = config.get('SPOTIFY','spotify_api_key')

SPOTIFY_AUTH_URL = "https://accounts.spotify.com/authorize"
SPOTIFY_TOKEN_URL = "https://accounts.spotify.com/api/token"
SPOTIFY_API_BASE_URL = "https://api.spotify.com"
API_VERSION = "v1"
SPOTIFY_API_URL = "{}/{}".format(SPOTIFY_API_BASE_URL, API_VERSION)

client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

import time
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import json

def get_track_ids_from_playlist(sp, playlist_id):

    results = sp.playlist_tracks(playlist_id)
    track_ids = [item['track']['id'] for item in results['items']]
    return track_ids


def get_track_ids_for_playlists(sp, playlists):
    track_ids_dict = {}

    try:
        with open('track_ids.json', 'r') as f:
            track_ids_dict = json.load(f)
    except FileNotFoundError:
        pass
    def process_playlist_url(playlist_url):
        nonlocal track_ids_dict

        if playlist_url is None:
            track_ids_dict[playlist_url] = None
        elif playlist_url not in track_ids_dict:
            playlist_id = playlist_url.split('playlist/')[1]

            success, retries = False, 0
            while not success and retries < 2:  # Attempt once and retry once
                try:
                    track_ids = get_track_ids_from_playlist(sp, playlist_id)
                    track_ids_dict[playlist_url] = track_ids
                    success = True
                except Exception as e:
                    sleep_time = 2 ** retries
                    time.sleep(sleep_time)
                    retries += 1

    with ThreadPoolExecutor() as executor:
        list(tqdm(executor.map(process_playlist_url, playlists), total=len(playlists), desc='Processing playlists'))

    with open('track_ids.json', 'w') as f:
        json.dump(track_ids_dict, f)

    return track_ids_dict

playlists = [url for url in df['SPOTIFY_URL'].tolist() if isinstance(url, str)]
playlist_track_ids = get_track_ids_for_playlists(sp, playlists)

Processing playlists: 100%|█████████████████| 6091/6091 [02:46<00:00, 36.65it/s]

from ratelimiter import RateLimiter

def get_track_popularity(sp, track_ids):
    track_popularity = {}
    
    tracks = sp.tracks(track_ids)
    for track in tracks['tracks']:
        track_popularity[track['id']] = track['popularity']

    return track_popularity

def get_audio_features_for_tracks(sp, track_ids_dict):
    audio_features_dict = {}
        # Load existing audio features
    try:
        with open('audio_features.json', 'r') as f:
            audio_features_dict = json.load(f)
    except FileNotFoundError:
        pass
    
    rate_limiter = RateLimiter(max_calls=30, period=1)

    def process_track_ids(track_ids):
        nonlocal audio_features_dict
        batch_size = 50
    
        for i in range(0, len(track_ids), batch_size):
            batch = track_ids[i:i + batch_size]
        
        unprocessed_batch = [tid for tid in batch if tid not in audio_features_dict]
        processed_batch = [tid for tid in batch if tid in audio_features_dict]
        
        if unprocessed_batch:
            success, retries = False, 0
            while not success and retries < 10:  
                with rate_limiter:
                    try:
                        features = sp.audio_features(unprocessed_batch)
                        popularity = get_track_popularity(sp, unprocessed_batch)
                        
                        for feature in features:
                            feature['popularity'] = popularity[feature['id']]
                            audio_features_dict[feature['id']] = feature
                        
                        success = True
                    except Exception as e:
                        sleep_time = 2 ** retries
                        time.sleep(sleep_time)
                        retries += 1
        
        if processed_batch:
            popularity = get_track_popularity(sp, processed_batch)
                
            for tid in processed_batch:
                audio_features_dict[tid]['popularity'] = popularity[tid]
                
    with ThreadPoolExecutor(max_workers=6) as executor:
        track_ids_list = [ids for ids in track_ids_dict.values() if ids is not None]
        all_track_ids = [tid for ids in track_ids_list for tid in ids]
        chunks = [all_track_ids[i:i+50] for i in range(0, len(all_track_ids), 50)]
        list(tqdm(executor.map(process_track_ids, chunks), total=len(chunks), desc='Processing audio features'))
    with open('audio_features.json', 'w') as f:
        json.dump(audio_features_dict, f)

    return audio_features_dict

audio_features = get_audio_features_for_tracks(sp, playlist_track_ids)

Processing audio features:  26%|██▊        | 3069/11805 [05:25<15:25,  9.43it/s]

KeyboardInterrupt

import pandas as pd
import numpy as np
from tqdm import tqdm

genre_track_ids = {}
for index, row in df.iterrows():
    genre = row['GENRE']
    playlist_url = row['SPOTIFY_URL']
    track_ids = playlist_track_ids.get(playlist_url, [])
    if track_ids is not None:
        if genre not in genre_track_ids:
            genre_track_ids[genre] = []
        genre_track_ids[genre].extend(track_ids)

# Calculate the average of each audio feature for the tracks in each genre
genre_audio_features_avg = {}
for genre, track_ids in tqdm(genre_track_ids.items(), desc='Processing genres'):
    features_list = [audio_features[tid] for tid in track_ids if tid in audio_features]
    if features_list:
        features_df = pd.DataFrame(features_list)
        features_mean = features_df.mean(numeric_only=True)
        genre_audio_features_avg[genre] = features_mean.to_dict()
    else:
        genre_audio_features_avg[genre] = None

import pandas as pd
import numpy as np


valid_genres = [genre for genre, features in genre_audio_features_avg.items() if features is not None]
filtered_df = df[df['GENRE'].isin(valid_genres)].copy()

numeric_features = features_df.select_dtypes(include=np.number).columns
for feature_name in numeric_features:
    filtered_df[f'{feature_name}'] = filtered_df['GENRE'].apply(lambda genre: genre_audio_features_avg[genre][feature_name])

filtered_df.to_csv("avg_genres.csv", index=False)

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import matplotlib.pyplot as plt
import re
from matplotlib.colors import LinearSegmentedColormap
import warnings

warnings.filterwarnings('ignore')

def create_scatter_plot(data, plot_title):
    plt.figure(figsize=(7, 9))
    plot = sns.scatterplot(x='x_norm', y='y_norm', c=enao['color'].tolist(), data=data,
                           legend=None, edgecolor=None, alpha=0.4, s=5)

    for _, row in data.iterrows():
        if row['genre'] in genres_to_label:
            plot.text(row['x_norm'], row['y_norm'], row['genre'], fontsize=8, color='black')

    plot.set(xlabel='← more atmospheric        more bouncy →',
             ylabel='← more organic        more mechanical →')
    plot.set(title=plot_title)

    plot.invert_yaxis()

    plt.show()

def euclidean_distance(x1, y1, x2, y2):
    return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)

enao = pd.read_csv("enao_graph.csv")
playlist_df = pd.read_csv("features_df.csv")

enao['left'] = pd.to_numeric(enao['left'].str.replace('px', ''))
enao['top'] = pd.to_numeric(enao['top'].str.replace('px', ''))
enao[['r', 'g', 'b']] = enao['color'].apply(lambda x: tuple(map(int, re.findall(r'\d+', x)))).apply(pd.Series) 
enao['color'] = enao.apply(lambda row: (row.r/255, row.g/255, row.b/255), axis=1)

scaler = MinMaxScaler()
enao[['x_norm', 'y_norm']] = scaler.fit_transform(enao[['left', 'top']])

selected_enao = enao[['genre', 'x_norm', 'y_norm', 'color']]
color_map = LinearSegmentedColormap.from_list("", selected_enao['color'])
genres_to_label = ['rock', 'rap', 'pop', 'pop rock', 'funk', 'jazz', 'focus', 'metal',
                   'folk', 'techno', 'classical']

plot_title = f"Mapping the {len(selected_enao)} Distinct Musical Genres on Spotify,\n"
create_scatter_plot(selected_enao, plot_title)

playlist_df = playlist_df[playlist_df['genres'].notnull() & (playlist_df['genres'].str.strip() != '')]

playlist_df['genres'] = playlist_df['genres'].str.split(',')
playlist_df = playlist_df.explode('genres').reset_index(drop=True)

merged_data = playlist_df.merge(enao[['genre', 'x_norm', 'y_norm', 'color']],
                                left_on='genres', right_on='genre', how='left').dropna()

plt.figure(figsize=(7, 9))
plt.scatter(merged_data['x_norm'], merged_data['y_norm'], c=merged_data['color'], s=5, alpha=0.4)

genres_to_label = ['rock', 'rap', 'pop', 'pop rock', 'funk', 'jazz', 'focus', 'metal', 'folk', 'techno', 'classical']
for _, row in merged_data.iterrows():
    if row['genre'] in genres_to_label:
        plt.text(row['x_norm'], row['y_norm'], row['genre'], fontsize=8, color='black')

plt.xlabel('← more atmospheric        more bouncy →')
plt.ylabel('← more organic        more mechanical →')

plt.gca().invert_yaxis()
plt.title("Unique Playlist Genres on the same Plane")
plt.show()

selected_enao['cluster'] = -1
labeled_genres = selected_enao[selected_enao['genre'].isin(genres_to_label)]

for idx, genre in selected_enao.iterrows():
    min_distance = float('inf')
    closest_label = -1
    
    for _, label in labeled_genres.iterrows():
        distance = euclidean_distance(genre['x_norm'], genre['y_norm'], label['x_norm'], label['y_norm'])
        
        if distance < min_distance:
            min_distance = distance
            closest_label = label['genre']
    
    selected_enao.loc[idx, 'cluster'] = closest_label

genre_to_cluster = {g: i for i, g in enumerate(genres_to_label)}
selected_enao['cluster'] = selected_enao['cluster'].map(genre_to_cluster)

fig, ax = plt.subplots(figsize=(7, 9))
scatter = ax.scatter(x='x_norm', y='y_norm', c='cluster', cmap='viridis', data=selected_enao, s=5, alpha=0.4)
plt.gca().invert_yaxis()

for _, row in selected_enao.iterrows():
    if row['genre'] in genres_to_label:
        ax.text(row['x_norm'], row['y_norm'], row['genre'], fontsize=8, color='black', bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.2'))

ax.set(xlabel='← more atmospheric        more bouncy →',
       ylabel='← more organic        more mechanical →')

ax.set_title("Parent Genres displayed as Cluster Centroids")

plt.show()

combined_df = selected_enao.merge(data, on='genre', how='left')

cluster_to_genre = {v: k for k, v in genre_to_cluster.items()}


features_columns = [
    col for col in combined_df.columns if col not in ["cluster", "genre", "x_norm", "y_norm", "color"]
cluster_average_features = combined_df.groupby("cluster")[features_columns].mean()
cluster_average_features.reset_index(drop=True, inplace=True)

cluster_average_features["cluster_name"] = cluster_average_features.index.map(cluster_to_genre)

new_columns = ['cluster_name'] + features_columns

cluster_average_features = cluster_average_features[new_columns]

cluster_average_features.head(10)

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy import stats
import numpy as np

df = pd.read_csv("avg_genres.csv")

playlist_df = pd.read_csv("features_df.csv")
playlist_genres = [genre for genres in playlist_df['genres'] for genre in str(genres).split(',')]
cleaned_genres = [genre.replace('/', '').replace('[', '').replace(']', '').replace('"', '').replace("'", '').strip() for genre in playlist_genres]
cleaned_genres = [genre for genre in cleaned_genres if genre in df['GENRE'].tolist()]
unique_main_genres = set(cleaned_genres)

genres_dict = df.set_index("GENRE")["SIM_GENRES"].to_dict()

def similar_genres_to_list(similar_genres_str):
    cleaned_str = (
        similar_genres_str.replace('[', '')
        .replace(']', '')
        .replace('"', '')
        .replace("'", '')
        .strip()
    )
 
    similar_genres = cleaned_str.split(',')
    return [genre.strip() for genre in similar_genres]


all_similar_genres = {main_genre: similar_genres_to_list(similar_genres) for main_genre, similar_genres in genres_dict.items()}

genre_prevalence = pd.Series(cleaned_genres).value_counts()
total_prevalence = sum(genre_prevalence.tolist())
normalized_genre_prevalence = {genre: value / total_prevalence for genre, value in genre_prevalence.items()}

def calculate_mode_features(df, features):
    mode_result = {}
    for feature in features:
        mode = stats.mode(df[feature])
        mode_result[feature] = mode.mode[0] if len(mode.count) > 0 else np.nan
    return mode_result

features = ['danceability', 'energy', 'key', 'mode', 'loudness', 'liveness', 'speechiness', 'acousticness', 'instrumentalness', 'valence', 'tempo', 'time_signature']
playlist_mode_features = calculate_mode_features(playlist_df, features)

df_common_genres = df[df["GENRE"].isin(genre_prevalence.keys())]

df_common_genres['similarity'] = df_common_genres[features].apply(
    lambda x: cosine_similarity([x], [list(playlist_mode_features.values())])[0][0], axis=1
)

similar_genres_prevalence = {}
for main_genre in unique_main_genres:
    similar_genres_list = all_similar_genres[main_genre]
    for similar_genre in similar_genres_list:
        similar_genres_prevalence[similar_genre] = similar_genres_prevalence.get(similar_genre, 0) + 1
        
final_genre_prevalence = genre_prevalence.copy()
for genre, prevalence in similar_genres_prevalence.items():
    final_genre_prevalence[genre] = final_genre_prevalence.get(genre, 0) + prevalence

original_genre_factor = 0.6
similar_genre_factor = 0.25
similarity_factor = 0.15 

total_final_prevalence = sum(final_genre_prevalence.tolist())
normalized_final_prevalence = {genre: value / total_final_prevalence for genre, value in final_genre_prevalence.items()}

total_similar_prevalence = sum(similar_genres_prevalence.values())
normalized_similar_prevalence = {genre: value / total_similar_prevalence for genre, value in similar_genres_prevalence.items()}

total_similarity_weights = sum(df_common_genres['similarity'].tolist())
normalized_similarity_weights = {genre: value / total_similarity_weights for genre, value in df_common_genres[['GENRE', 'similarity']].values}

adjusted_genre_weights = {}
for genre in genre_prevalence.index:
    original_weight = original_genre_factor * normalized_final_prevalence.get(genre, 0)
    similar_weight = similar_genre_factor * normalized_similar_prevalence.get(genre, 0)
    similarity_weight = similarity_factor * normalized_similarity_weights.get(genre, 0)

    adjusted_genre_weights[genre] = original_weight + similar_weight + similarity_weight

sorted_genre_weights = dict(sorted(adjusted_genre_weights.items(), key=lambda item: item[1], reverse=True))
sorted_genre_weights

{'rap': 0.027696915527942113,
 'underground hip hop': 0.026897223014771782,
 'hip hop': 0.019599413231294455,
 'vapor trap': 0.014525900314542208,
 'trap': 0.010688784467408162,
 'dark trap': 0.009147908454904903,
 'alternative hip hop': 0.008455999362671584,
 'plugg': 0.008039504762408793,
 'pluggnb': 0.007025664325277389,
 'melodic rap': 0.005303043265259665,
 'emo rap': 0.00501629780370966,
 'rock': 0.004671676964968901,
 'southern hip hop': 0.004567090493619454,
 'atl hip hop': 0.004539870944342969,
 'psychedelic hip hop': 0.004262073510399586,
 'alternative rock': 0.003917385871820871,
 'east coast hip hop': 0.0038717889755047067,
 'pop rap': 0.003827769264499925,
 'new orleans rap': 0.0037462861774626727,
 'experimental hip hop': 0.003744666214060812,
 'indie rock': 0.003724443718493059,
 'glitchcore': 0.0033673297904365193,
 'conscious hip hop': 0.0032367573230610725,
 'miami hip hop': 0.0031639551528101825,
 'gangster rap': 0.00314014477275242,
 'escape room': 0.0031200752940824753,
 'chicago rap': 0.003067502689769885,
 'pop': 0.002970867216648219,
 'dark plugg': 0.0027642269313056978,
 'permanent wave': 0.0027238086702632137,
 'indie pop': 0.0027009769955032327,
 'new wave': 0.0026107311894828567,
 'art rock': 0.0026002455645387093,
 'hardcore hip hop': 0.0025473629447216777,
 'art pop': 0.0024932088060677093,
 'modern rock': 0.002455150326864235,
 'post-punk': 0.0024448887786291396,
 'classic rock': 0.002396769615017587,
 'indietronica': 0.0023210803088737396,
 'dream pop': 0.0022879329020507252,
 'folk rock': 0.002218729144363207,
 'electronica': 0.0022186749763971475,
 'canadian hip hop': 0.0022167962008356554,
 'noise pop': 0.002214184686532503,
 'boom bap': 0.002208863613537318,
 'uk post-punk': 0.002141429615975468,
 'mellow gold': 0.0021011768761934144,
 'pittsburgh rap': 0.0020914337135944914,
 'aesthetic rap': 0.0020415232529616737,
 'dirty south rap': 0.002040354454883042,
 'album rock': 0.0019985670000517157,
 'alternative dance': 0.0019482483240604574,
 'toronto rap': 0.0019317634953236165,
 'psychedelic rock': 0.0019213998183968222,
 'philly rap': 0.0018712106180780515,
 'drain': 0.0018424169583258188,
 'experimental rock': 0.0018038441468716624,
 'queens hip hop': 0.0017159708099925316,
 'madchester': 0.0017038238591476826,
 'memphis hip hop': 0.0016931573648224373,
 'lo-fi': 0.001678388303350503,
 'dance pop': 0.0016614036159951656,
}

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
from matplotlib.colors import LinearSegmentedColormap


def euclidean_distance(x1, y1, x2, y2):
    return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)

cluster_centroids = selected_enao[selected_enao['genre'].isin(genres_to_label)].groupby('cluster')[['x_norm', 'y_norm']].mean()# Assign each emphasized genre to the cluster with the minimum distance from its centroid
emphasis_genres = list(sorted_genre_weights.keys())
emphasis_genres_df = selected_enao[selected_enao['genre'].isin(emphasis_genres)]

centroids_list = cluster_centroids.values
for idx, row in emphasis_genres_df.iterrows():
    min_distance = float('inf')
    assigned_cluster = -1
    for cluster, centroid in enumerate(centroids_list):
        distance = euclidean_distance(row['x_norm'], row['y_norm'], centroid[0], centroid[1])
        if distance < min_distance:
            min_distance = distance
            assigned_cluster = cluster
    emphasis_genres_df.loc[idx, 'cluster'] = assigned_cluster

def create_scatter_plot(data, plot_title, emphasis_genres):
    plt.figure(figsize=(10, 12))
    plt.scatter(data["x_norm"], data["y_norm"], c=data["color"], alpha=0.5)
    
    for idx, row in emphasis_genres_df.iterrows():
        plt.scatter(row['x_norm'], row['y_norm'], color=row['color'], marker="o", edgecolors="black", s=100, linewidths=2, alpha=0.9)
    
    cluster_names = cluster_average_features['cluster_name'].tolist()
    for idx, row in cluster_centroids.iterrows():
        plt.text(row["x_norm"] - 0.02, row["y_norm"] - 0.02, cluster_names[idx], fontsize=9, color='black', bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.2'))
    
    plt.xlabel("X Norm")
    plt.ylabel("Y Norm")
    plt.title(plot_title)
    plt.gca().invert_yaxis()
    
create_scatter_plot(selected_enao, "Weighted Playlist Genre Positions with Cosine Similarity Clustering", emphasis_genres)
plt.show()

cluster_names = cluster_average_features['cluster_name'].tolist()

emphasis_genres_df['closest_centroid'] = -1

for genre_index, row in emphasis_genres_df.iterrows():
    min_distance = float('inf')
    assigned_cluster = -1
    for cluster_index, centroid in cluster_centroids.iterrows():
        distance = euclidean_distance(row['x_norm'], row['y_norm'], centroid['x_norm'], centroid['y_norm'])
        if distance < min_distance:
            min_distance = distance
            assigned_cluster = cluster_index
    emphasis_genres_df.at[genre_index, 'closest_centroid'] = assigned_cluster

cluster_weights = {}

for cluster_index, cluster_name in enumerate(cluster_names):
    associated_genres = emphasis_genres_df[emphasis_genres_df['closest_centroid'] == cluster_index]['genre'].values
    total_weight = sum(sorted_genre_weights[genre] for genre in associated_genres)
    cluster_weights[cluster_name] = total_weight

print("Total weights per cluster:")

for cluster_name, total_weight in cluster_weights.items():
    print(f"{cluster_name:<15} : {total_weight:.4f}")

Total weights per cluster:
rock            : 0.0793
rap             : 0.1889
pop             : 0.0901
pop rock        : 0.0542
funk            : 0.1036
jazz            : 0.0053
focus           : 0.0048
metal           : 0.0300
folk            : 0.0233
techno          : 0.0199
classical       : 0.0000

/var/folders/y_/d0wbwn_s1tlbh09zvpsn9bp40000gn/T/ipykernel_23694/2209042183.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  emphasis_genres_df['closest_centroid'] = -1

sorted_genres_per_cluster = {}

for cluster_index, cluster_name in enumerate(cluster_names):
    associated_genres = emphasis_genres_df[emphasis_genres_df['closest_centroid'] == cluster_index]['genre'].values
    associated_weights = [sorted_genre_weights[genre] for genre in associated_genres]
    sorted_genres_with_weights = sorted(zip(associated_genres, associated_weights), key=lambda x: x[1], reverse=True)
    sorted_genres_per_cluster[cluster_name] = sorted_genres_with_weights

print("Genres per cluster (sorted by score):")
print("----------------------------------------")

for cluster_name, sorted_genres in sorted_genres_per_cluster.items():
    print(f"Cluster {cluster_name}:")
    for genre, weight in sorted_genres:
        print(f"  {genre:<25}: {weight:.4f}")
    print("----------------------------------------")

Genres per cluster (sorted by score):
----------------------------------------
Cluster rock:
  rock                     : 0.0047
  alternative rock         : 0.0039
  indie rock               : 0.0037
  indie pop                : 0.0027
  art rock                 : 0.0026
  art pop                  : 0.0025
  classic rock             : 0.0024
  mellow gold              : 0.0021
  album rock               : 0.0020
  psychedelic rock         : 0.0019
  experimental rock        : 0.0018
  lo-fi                    : 0.0017
  chamber pop              : 0.0016
  melancholia              : 0.0014
  soft rock                : 0.0014
  zolo                     : 0.0013
  experimental             : 0.0013
  lo-fi indie              : 0.0012
  small room               : 0.0012
  protopunk                : 0.0012
  no wave                  : 0.0011
  slowcore                 : 0.0010
  experimental pop         : 0.0010
  british invasion         : 0.0010
  stomp and holler         : 0.0009
  glam rock                : 0.0009
  symphonic rock           : 0.0009
  progressive rock         : 0.0009
  classic garage rock      : 0.0009
  neo-psychedelic          : 0.0009
  art punk                 : 0.0009
  bedroom pop              : 0.0009
  power pop                : 0.0008
  math rock                : 0.0008
  mandible                 : 0.0008
  lo-fi emo                : 0.0008
  freakbeat                : 0.0007
  beatlesque               : 0.0007
  hardvapour               : 0.0007
  eau claire indie         : 0.0007
  dutch indie              : 0.0007
  canadian pop             : 0.0007
  louisville underground   : 0.0007
  chicago indie            : 0.0007
  norwegian indie          : 0.0007
  baltimore indie          : 0.0007
  western mass indie       : 0.0007
  vancouver indie          : 0.0007
  elephant 6               : 0.0006
  canadian indie           : 0.0006
  ambient pop              : 0.0006
  bristol indie            : 0.0006
  collage pop              : 0.0006
  edmonton indie           : 0.0006
  olympia wa indie         : 0.0006
  acid rock                : 0.0005
  minneapolis indie        : 0.0005
  modern folk rock         : 0.0005
  punk blues               : 0.0005
  viral pop                : 0.0005
  indie psych-pop          : 0.0005
  dutch rock               : 0.0005
  classic uk pop           : 0.0005
  asheville indie          : 0.0005
  shimmer psych            : 0.0005
  modern blues rock        : 0.0005
  austindie                : 0.0005
  san marcos tx indie      : 0.0005
  toronto indie            : 0.0005
  icelandic singer-songwriter: 0.0004
  folktronica              : 0.0004
  canadian indie folk      : 0.0004
  folk-pop                 : 0.0004
  icelandic pop            : 0.0004
  acoustic rock            : 0.0004
  japanese psychedelic     : 0.0004
----------------------------------------
Cluster rap:
  rap                      : 0.0277
  hip hop                  : 0.0196
  vapor trap               : 0.0145
  trap                     : 0.0107
  dark trap                : 0.0091
  plugg                    : 0.0080
  melodic rap              : 0.0053
  southern hip hop         : 0.0046
  atl hip hop              : 0.0045
  east coast hip hop       : 0.0039
  pop rap                  : 0.0038
  new orleans rap          : 0.0037
  miami hip hop            : 0.0032
  gangster rap             : 0.0031
  chicago rap              : 0.0031
  aesthetic rap            : 0.0020
  dirty south rap          : 0.0020
  toronto rap              : 0.0019
  memphis hip hop          : 0.0017
  dfw rap                  : 0.0016
  crunk                    : 0.0015
  canadian trap            : 0.0015
  alabama rap              : 0.0014
  drill                    : 0.0014
  baltimore hip hop        : 0.0013
  tennessee hip hop        : 0.0013
  west coast rap           : 0.0013
  hip pop                  : 0.0013
  deep underground hip hop : 0.0013
  houston rap              : 0.0013
  dmv rap                  : 0.0013
  detroit trap             : 0.0012
  urban contemporary       : 0.0012
  cali rap                 : 0.0012
  indiana hip hop          : 0.0011
  indie soul               : 0.0011
  atl trap                 : 0.0011
  old school atlanta hip hop: 0.0011
  scam rap                 : 0.0011
  uk bass                  : 0.0010
  chicago drill            : 0.0010
  golden age hip hop       : 0.0010
  texas latin rap          : 0.0010
  chicago bop              : 0.0010
  ottawa rap               : 0.0010
  san antonio rap          : 0.0010
  ohio hip hop             : 0.0009
  deep east coast hip hop  : 0.0009
  sad rap                  : 0.0009
  chicano rap              : 0.0009
  bronx hip hop            : 0.0008
  baton rouge rap          : 0.0008
  viral rap                : 0.0008
  g funk                   : 0.0008
  indie pop rap            : 0.0008
  asian american hip hop   : 0.0008
  florida drill            : 0.0008
  kentucky hip hop         : 0.0008
  trap queen               : 0.0008
  norwegian pop rap        : 0.0007
  san diego rap            : 0.0007
  swedish trap             : 0.0007
  minnesota hip hop        : 0.0007
  braindance               : 0.0007
  meme rap                 : 0.0007
  jazz rap                 : 0.0007
  grime                    : 0.0007
  instrumental grime       : 0.0006
  reggae                   : 0.0006
  dub                      : 0.0006
  russian hip hop          : 0.0006
  philly drill             : 0.0005
  roots reggae             : 0.0005
  old school dancehall     : 0.0005
  ghanaian hip hop         : 0.0005
  dancehall queen          : 0.0005
  indonesian hip hop       : 0.0004
  melodic drill            : 0.0004
  mongolian hip hop        : 0.0004
  colombian pop            : 0.0004
  korean trap              : 0.0004
----------------------------------------
Cluster pop:
  pluggnb                  : 0.0070
  emo rap                  : 0.0050
  glitchcore               : 0.0034
  escape room              : 0.0031
  pop                      : 0.0030
  new wave                 : 0.0026
  indietronica             : 0.0023
  electronica              : 0.0022
  alternative dance        : 0.0019
  dance pop                : 0.0017
  r&b                      : 0.0017
  dance rock               : 0.0015
  new rave                 : 0.0015
  electropop               : 0.0015
  neo-synthpop             : 0.0014
  industrial hip hop       : 0.0014
  new wave pop             : 0.0014
  intelligent dance music  : 0.0014
  new romantic             : 0.0013
  wonky                    : 0.0012
  dance-punk               : 0.0012
  slap house               : 0.0012
  russian alt pop          : 0.0012
  hyperpop                 : 0.0011
  synthpop                 : 0.0011
  fluxwork                 : 0.0010
  edm                      : 0.0010
  boy band                 : 0.0010
  electra                  : 0.0010
  french indie pop         : 0.0010
  indie poptimism          : 0.0009
  tropical house           : 0.0009
  pop dance                : 0.0009
  scottish electronic      : 0.0009
  austrian pop             : 0.0009
  downtempo                : 0.0008
  electro house            : 0.0008
  russian electronic       : 0.0008
  funk rock                : 0.0008
  experimental electronic  : 0.0008
  outsider house           : 0.0008
  trip hop                 : 0.0008
  electronic rock          : 0.0008
  australian dance         : 0.0008
  canadian electropop      : 0.0008
  rap rock                 : 0.0008
  glitch pop               : 0.0007
  vapor soul               : 0.0007
  dark synthpop            : 0.0007
  proto-hyperpop           : 0.0007
  la pop                   : 0.0007
  bitpop                   : 0.0007
  hyperpop francais        : 0.0007
  modern alternative pop   : 0.0007
  nz pop                   : 0.0007
  hardcore techno          : 0.0007
  hi-nrg                   : 0.0007
  glitch hop               : 0.0007
  electronic trap          : 0.0007
  australian electropop    : 0.0007
  post-teen pop            : 0.0007
  gabba                    : 0.0007
  aussietronica            : 0.0006
  big beat                 : 0.0006
  emoplugg                 : 0.0006
  british soul             : 0.0005
  vapor twitch             : 0.0005
  substep                  : 0.0005
  social media pop         : 0.0005
  happy hardcore           : 0.0005
  experimental club        : 0.0005
  future funk              : 0.0005
  synthwave                : 0.0005
  sovietwave               : 0.0005
  sped up                  : 0.0005
  terrorcore               : 0.0004
  future bass              : 0.0004
  nightcore                : 0.0004
----------------------------------------
Cluster pop rock:
  permanent wave           : 0.0027
  modern rock              : 0.0025
  post-punk                : 0.0024
  uk post-punk             : 0.0021
  drain                    : 0.0018
  madchester               : 0.0017
  chillwave                : 0.0012
  shimmer pop              : 0.0012
  metropopolis             : 0.0012
  indie garage rock        : 0.0011
  deconstructed club       : 0.0010
  pop rock                 : 0.0010
  garage rock              : 0.0010
  nu metal                 : 0.0009
  sigilkore                : 0.0009
  russian indie            : 0.0009
  grave wave               : 0.0009
  alternative emo          : 0.0009
  uk experimental electronic: 0.0009
  funk metal               : 0.0009
  uk alternative pop       : 0.0008
  vapor pop                : 0.0008
  crank wave               : 0.0008
  modern dream pop         : 0.0008
  egg punk                 : 0.0007
  indie surf               : 0.0007
  dutch indie rock         : 0.0007
  witch house              : 0.0007
  industrial               : 0.0007
  french synthpop          : 0.0007
  japanese dream pop       : 0.0007
  rap metal                : 0.0007
  speedcore                : 0.0007
  dark post-punk           : 0.0007
  atlanta indie            : 0.0007
  la indie                 : 0.0007
  modern alternative rock  : 0.0007
  brooklyn indie           : 0.0006
  australian indie         : 0.0006
  synth punk               : 0.0006
  modern goth              : 0.0006
  uk post-punk revival     : 0.0006
  scottish indie           : 0.0006
  san diego indie          : 0.0006
  russian indie rock       : 0.0006
  russian post-punk        : 0.0006
  norwegian pop            : 0.0005
  bergen indie             : 0.0005
  oxford indie             : 0.0005
  skate punk               : 0.0005
  uptempo hardcore         : 0.0005
  digital hardcore         : 0.0005
  belarusian indie         : 0.0005
  sacramento indie         : 0.0005
  irish indie rock         : 0.0005
  london indie             : 0.0005
  ska punk                 : 0.0005
  wave                     : 0.0005
  vegas indie              : 0.0005
  orlando indie            : 0.0005
  canadian post-punk       : 0.0005
  alternative pop rock     : 0.0004
  candy pop                : 0.0004
  japanese indie rock      : 0.0004
  russian emo              : 0.0004
  scenecore                : 0.0004
----------------------------------------
Cluster funk:
  underground hip hop      : 0.0269
  alternative hip hop      : 0.0085
  psychedelic hip hop      : 0.0043
  experimental hip hop     : 0.0037
  conscious hip hop        : 0.0032
  dark plugg               : 0.0028
  hardcore hip hop         : 0.0025
  canadian hip hop         : 0.0022
  boom bap                 : 0.0022
  pittsburgh rap           : 0.0021
  philly rap               : 0.0019
  queens hip hop           : 0.0017
  alternative r&b          : 0.0016
  detroit hip hop          : 0.0016
  upstate ny rap           : 0.0015
  buffalo hip hop          : 0.0013
  political hip hop        : 0.0013
  nyc rap                  : 0.0013
  new jersey rap           : 0.0013
  indie hip hop            : 0.0013
  virginia hip hop         : 0.0012
  north carolina hip hop   : 0.0011
  uk hip hop               : 0.0011
  harlem hip hop           : 0.0011
  abstract hip hop         : 0.0011
  underground rap          : 0.0010
  swedish gangsta rap      : 0.0009
  wu fam                   : 0.0009
  swedish hip hop          : 0.0009
  afrofuturism             : 0.0009
  sad lo-fi                : 0.0009
  uk alternative hip hop   : 0.0008
  indie r&b                : 0.0008
  indie jazz               : 0.0008
  alt z                    : 0.0008
  swedish underground rap  : 0.0007
  soul flow                : 0.0007
  battle rap               : 0.0007
  uk contemporary r&b      : 0.0007
  trap soul                : 0.0007
  weirdcore                : 0.0007
  nouvelle chanson francaise: 0.0007
  swedish drill            : 0.0007
  boston hip hop           : 0.0007
  portland hip hop         : 0.0007
  experimental r&b         : 0.0006
  swedish trap pop         : 0.0006
  german hip hop           : 0.0006
  chillhop                 : 0.0006
  abstract beats           : 0.0006
  new jersey underground rap: 0.0005
  chill drill              : 0.0005
  sacramento hip hop       : 0.0005
  lo-fi chill              : 0.0005
  birmingham grime         : 0.0005
  chill r&b                : 0.0005
  lo-fi beats              : 0.0005
  plunderphonics           : 0.0005
  reggae fusion            : 0.0005
  lo-fi rap                : 0.0005
  bedroom soul             : 0.0004
  classic belgian pop      : 0.0004
  ska mexicano             : 0.0004
----------------------------------------
Cluster jazz:
  vocal jazz               : 0.0010
  swing                    : 0.0008
  jazz blues               : 0.0007
  torch song               : 0.0007
  jazz                     : 0.0007
  harlem renaissance       : 0.0006
  jump blues               : 0.0004
  modern jazz piano        : 0.0004
----------------------------------------
Cluster focus:
  spectra                  : 0.0009
  ambient                  : 0.0009
  experimental ambient     : 0.0007
  drone                    : 0.0007
  easy listening           : 0.0007
  nordic ambient           : 0.0005
  icelandic experimental   : 0.0005
----------------------------------------
Cluster metal:
  dream pop                : 0.0023
  noise pop                : 0.0022
  noise rock               : 0.0013
  grunge                   : 0.0012
  post-hardcore            : 0.0012
  shoegaze                 : 0.0011
  punk                     : 0.0011
  philly indie             : 0.0011
  alternative metal        : 0.0011
  nu gaze                  : 0.0010
  dreamo                   : 0.0010
  hardcore punk            : 0.0009
  bubblegrunge             : 0.0009
  new isolationism         : 0.0008
  surf punk                : 0.0007
  emo                      : 0.0007
  japanese shoegaze        : 0.0007
  garage pop               : 0.0007
  spacegrunge              : 0.0007
  midwest emo              : 0.0006
  diy emo                  : 0.0006
  california hardcore      : 0.0006
  atlanta punk             : 0.0006
  nintendocore             : 0.0006
  post-rock                : 0.0006
  british post-rock        : 0.0005
  indie shoegaze           : 0.0005
  dc hardcore              : 0.0005
  australian garage punk   : 0.0005
  ethereal wave            : 0.0005
  cosmic post-rock         : 0.0005
  thrash core              : 0.0005
  nyhc                     : 0.0005
  psychedelic punk         : 0.0005
  blackgaze                : 0.0004
  doomgaze                 : 0.0004
  chain punk               : 0.0004
----------------------------------------
Cluster folk:
  folk rock                : 0.0022
  singer-songwriter        : 0.0015
  blues rock               : 0.0014
  country rock             : 0.0013
  adult standards          : 0.0013
  neo soul                 : 0.0013
  rock-and-roll            : 0.0011
  folk                     : 0.0010
  anti-folk                : 0.0010
  merseybeat               : 0.0010
  soul                     : 0.0009
  lounge                   : 0.0008
  outlaw country           : 0.0008
  roots rock               : 0.0007
  classic soul             : 0.0007
  doo-wop                  : 0.0007
  indie folk               : 0.0007
  chanson                  : 0.0006
  rockabilly               : 0.0006
  contemporary jazz        : 0.0006
  arkansas country         : 0.0006
  outsider                 : 0.0006
  cowboy western           : 0.0005
  nashville sound          : 0.0005
  ye ye                    : 0.0005
  experimental vocal       : 0.0004
----------------------------------------
Cluster techno:
  phonk                    : 0.0012
  old school hip hop       : 0.0011
  house                    : 0.0009
  filter house             : 0.0008
  microhouse               : 0.0008
  australian house         : 0.0008
  deep house               : 0.0008
  bassline                 : 0.0008
  float house              : 0.0007
  speed garage             : 0.0007
  bass house               : 0.0007
  classic house            : 0.0007
  classic dubstep          : 0.0007
  footwork                 : 0.0007
  hip house                : 0.0006
  chicago house            : 0.0006
  trap latino              : 0.0006
  electro                  : 0.0005
  swedish techno           : 0.0005
  deep dubstep             : 0.0005
  experimental techno      : 0.0005
  freestyle                : 0.0005
  uk garage                : 0.0005
  latin hip hop            : 0.0005
  moombahton               : 0.0005
  lo-fi house              : 0.0005
  techno rave              : 0.0005
  drift phonk              : 0.0004
  ukg revival              : 0.0004
  italian techno           : 0.0004
  acid techno              : 0.0004
----------------------------------------
Cluster classical:
----------------------------------------

import json

data_to_save = {
    "cluster_weights": cluster_weights,
    "sorted_genres_per_cluster": sorted_genres_per_cluster
}

with open('weighted_rec_genre.json', 'w') as json_file:
    json.dump(data_to_save, json_file)

import json
import math
from pprint import pprint

with open("weighted_rec_genre.json", "r") as file:
    data = json.load(file)

parent_genre_weights = data['cluster_weights']

subgenre_weights = {genre: {subgenre[0]: subgenre[1] for subgenre in subgenres if subgenre[0] != genre} for genre, subgenres in data['sorted_genres_per_cluster'].items()}

normalized_parent_genre_weights = {k: v / sum(parent_genre_weights.values()) for k, v in parent_genre_weights.items()}

total_recommendations = 15
recommendations_per_genre = {k: min(total_recommendations, len(v)) for k, v in subgenre_weights.items()}

selected_subgenres = {}
for parent_genre, k in recommendations_per_genre.items():
    sorted_subgenres = sorted(subgenre_weights[parent_genre].items(), key=lambda x: x[1], reverse=True)
    selected_subgenres[parent_genre] = [subgenre for subgenre, weight in sorted_subgenres][:k]

all_selected_subgenres = {}
for parent_genre, subgenres in selected_subgenres.items():
    for subgenre in subgenres:
        all_selected_subgenres[f"{subgenre}"] = subgenre_weights[parent_genre][subgenre] * normalized_parent_genre_weights[parent_genre]

total_weight = sum(all_selected_subgenres.values())
recalculated_subgenre_weights = {k: v / total_weight for k, v in all_selected_subgenres.items()}

recalculated_subgenre_weights_list = [(k, v) for k, v in recalculated_subgenre_weights.items()]

sorted_subgenre_weights_list = sorted(recalculated_subgenre_weights_list, key=lambda x: x[1], reverse=True)

max_weight = sorted_subgenre_weights_list[0][1]

existing_playlist_weight = .1 * max_weight
similar_playlist_weight = .02 * max_weight

sorted_subgenre_weights_list.extend([("existing playlist", existing_playlist_weight), ("similar playlist", similar_playlist_weight)])

sorted_subgenre_weights_list = sorted(sorted_subgenre_weights_list, key=lambda x: x[1], reverse=True)

pprint(sorted_subgenre_weights_list)

[('hip hop', 0.1083271936286511),
 ('underground hip hop', 0.08157994251265374),
 ('vapor trap', 0.08028556760522831),
 ('trap', 0.05907758620074612),
 ('dark trap', 0.05056106725222655),
 ('plugg', 0.044434850104867196),
 ('melodic rap', 0.02931025474271084),
 ('alternative hip hop', 0.025647255165149537),
 ('southern hip hop', 0.02524259733612499),
 ('atl hip hop', 0.025092153169753197),
 ('east coast hip hop', 0.021399621972819343),
 ('pop rap', 0.021156322252505355),
 ('new orleans rap', 0.02070596008896615),
 ('pluggnb', 0.018531844339968163),
 ('miami hip hop', 0.017487379771328),
 ('gangster rap', 0.017355778298342544),
 ('chicago rap', 0.016954280922070433),
 ('emo rap', 0.013231666894020198),
 ('psychedelic hip hop', 0.012926974348695705),
 ('experimental hip hop', 0.011357665224561837),
 ('aesthetic rap', 0.011283627836769006),
 ('existing playlist', 0.01083271936286511),
 ('conscious hip hop', 0.009817164998695825),
 ('alternative rock', 0.00908886547563794),
 ('glitchcore', 0.008882125394632113),
 ('indie rock', 0.008641213563481206),
 ('dark plugg', 0.008383968635870029),
 ('escape room', 0.008229933427204255),
 ('hardcore hip hop', 0.007726214801993829),
 ('new wave', 0.006886418390774171),
 ('canadian hip hop', 0.006723597693602885),
 ('boom bap', 0.006699537960171684),
 ('pittsburgh rap', 0.006343370169863323),
 ('indie pop', 0.006266632230822565),
 ('indietronica', 0.0061223959746917),
 ('art rock', 0.006032921676089925),
 ('electronica', 0.005852277791815952),
 ('art pop', 0.00578458190806011),
 ('philly rap', 0.0056754280755317585),
 ('classic rock', 0.005560829930921722),
 ('queens hip hop', 0.005204581898871184),
 ('alternative dance', 0.005138963805485647),
 ('mellow gold', 0.004875014765743852),
 ('alternative r&b', 0.004824615980454196),
 ('detroit hip hop', 0.004779014138927603),
 ('album rock', 0.004636945963935906),
 ('upstate ny rap', 0.004494710933967134),
 ('psychedelic rock', 0.0044579076572322995),
 ('dance pop', 0.004382343330396308),
 ('r&b', 0.004355165797506801),
 ('permanent wave', 0.00432237688475291),
 ('experimental rock', 0.004185162587088411),
 ('dance rock', 0.003985210839633927),
 ('new rave', 0.003956679680374684),
 ('electropop', 0.0039295261071781735),
 ('modern rock', 0.003896046421060121),
 ('lo-fi', 0.003894088048555271),
 ('post-punk', 0.0038797625023776583),
 ('chamber pop', 0.0037965138989998043),
 ('neo-synthpop', 0.003758222781530166),
 ('industrial hip hop', 0.003665867466369484),
 ('uk post-punk', 0.0033982070669902114),
 ('melancholia', 0.003277705176449072),
 ('soft rock', 0.0032573381715387587),
 ('zolo', 0.0029562087560496605),
 ('drain', 0.0029237077331040013),
 ('madchester', 0.002703776129679024),
 ('similar playlist', 0.002166543872573022),
 ('dream pop', 0.0020086085493670526),
 ('noise pop', 0.0019438639512812906),
 ('chillwave', 0.0018923453520084288),
 ('shimmer pop', 0.0018587899989210795),
 ('metropopolis', 0.0018489982137628565),
 ('indie garage rock', 0.0017196290917656217),
 ('deconstructed club', 0.0015667837546723081),
 ('garage rock', 0.0015494705441834427),
 ('folk rock', 0.001509828922202524),
 ('nu metal', 0.0015063446834263704),
 ('sigilkore', 0.001470632068517871),
 ('russian indie', 0.0014636542314747187),
 ('noise rock', 0.001103837882965255),
 ('grunge', 0.0010468550666665882),
 ('post-hardcore', 0.0010375149860486815),
 ('singer-songwriter', 0.0009923942267418837),
 ('shoegaze', 0.0009899194289087591),
 ('punk', 0.0009566499176759198),
 ('philly indie', 0.0009512939577290071),
 ('blues rock', 0.0009482251690658023),
 ('alternative metal', 0.0009234336153902236),
 ('nu gaze', 0.000899772976638852),
 ('country rock', 0.000885567552318827),
 ('adult standards', 0.0008813401818104676),
 ('dreamo', 0.0008757944689318428),
 ('neo soul', 0.0008657222578876254),
 ('hardcore punk', 0.0008004319714852693),
 ('bubblegrunge', 0.0007672709310665287),
 ('new isolationism', 0.0007434665624110494),
 ('rock-and-roll', 0.0007273820462735496),
 ('phonk', 0.0006825397977661009),
 ('anti-folk', 0.0006820178260078819),
 ('merseybeat', 0.0006533733683360235),
 ('old school hip hop', 0.0006494017110962985),
 ('surf punk', 0.0006386132461193048),
 ('soul', 0.0006205299799441827),
 ('lounge', 0.0005650042930108216),
 ('outlaw country', 0.0005393596130150045),
 ('house', 0.0005080282436575835),
 ('roots rock', 0.000506587090985159),
 ('classic soul', 0.000506561053166958),
 ('filter house', 0.0004922924659473024),
 ('microhouse', 0.0004704640268658081),
 ('australian house', 0.00047044456823509394),
 ('deep house', 0.0004704038832208561),
 ('bassline', 0.0004703309506134334),
 ('doo-wop', 0.0004624033749873246),
 ('float house', 0.0004327319845568632),
 ('speed garage', 0.0004326838610058799),
 ('bass house', 0.00043251714534351017),
 ('classic house', 0.00041687156361531964),
 ('classic dubstep', 0.00039498066509960166),
 ('footwork', 0.0003949287475241429),
 ('hip house', 0.0003572548842879106),
 ('vocal jazz', 0.0001526947941426506),
 ('spectra', 0.00012873458747844266),
 ('swing', 0.00012653119687490365),
 ('ambient', 0.00012376303503307945),
 ('jazz blues', 0.00011637988624673114),
 ('torch song', 0.0001061946507479888),
 ('harlem renaissance', 9.606164594883092e-05),
 ('experimental ambient', 9.582251970032664e-05),
 ('drone', 9.571060620727332e-05),
 ('easy listening', 9.216059304176921e-05),
 ('nordic ambient', 7.750729716711836e-05),
 ('icelandic experimental', 6.840914278822261e-05),
 ('jump blues', 6.564547977219157e-05),
 ('modern jazz piano', 6.550536657054768e-05)]

import pickle

# Save the sorted_subgenre_weights_list using pickle
with open("sorted_subgenre_weights_list.pkl", "wb") as file:
    pickle.dump(sorted_subgenre_weights_list, file)

import requests
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import json
import configparser
from urllib.parse import quote
import time
import warnings
warnings.filterwarnings('ignore')

config = configparser.ConfigParser()
config.read('config.ini')

USERNAME = config.get('SPOTIFY', 'username')
PASSWORD = config.get('SPOTIFY', 'password')
CLIENT_ID = config.get('SPOTIFY', 'client_id')
CLIENT_SECRET = config.get('SPOTIFY', 'client_secret')
REDIRECT_URI = config.get("SPOTIFY", 'redirect_uri')
SCOPE = config.get('SPOTIFY', 'scope')

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=CLIENT_ID,
                                               client_secret=CLIENT_SECRET,
                                               redirect_uri=REDIRECT_URI,
                                               scope=SCOPE,
                                               username=USERNAME))

import time
import pandas as pd
import pickle
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

def parse_artist_ids(artist_ids_str):
    cleaned_str = artist_ids_str.strip("[]").replace("'", "").replace(" ", "")
    return cleaned_str.split(',')

def fetch_batch_genres(batch):
    artists = sp.artists(batch)
    time.sleep(0.1)
    result = {}
    for artist in artists['artists']:
        result[artist['id']] = (artist['name'], artist['genres'] + ['existing playlist'])
    return result

def fetch_similar_artists_genres(artist_id, artist_genres, original_artist_ids):
    retries = 3
    result = {}

    if not artist_id in artist_genres:
        return {}

    for attempt in range(retries):
        try:
            similar_artists = sp.artist_related_artists(artist_id)
            time.sleep(0.1)

            existing_artist_name, existing_artist_genres = artist_genres[artist_id]
            existing_artist_genres = [genre for genre in existing_artist_genres if genre != 'existing playlist']

            for artist in similar_artists['artists']:
                current_genres = artist['genres'] + existing_artist_genres
                if artist['id'] in artist_genres:
                    artist_name, current_artist_genres = artist_genres[artist['id']]
                    current_genres += current_artist_genres

                result[artist['id']] = (artist['name'], list(set(current_genres)) + ['similar playlist'])

            break

        except (KeyError, Exception) as e:
            if attempt == retries - 1:
                print(f"Error fetching similar artists for artist_id {artist_id}: {e} - Skipping after {retries} retries")
            else:
                print(f"Error fetching similar artists for artist_id {artist_id}: {e} - Retrying...")
                time.sleep(1)

    return result

def save_cache(obj, filename):
    with open(filename, 'wb') as file:
        pickle.dump(obj, file)

def load_cache(filename):
    try:
        with open(filename, 'rb') as file:
            return pickle.load(file)
    except FileNotFoundError:
        return None

playlist_df = pd.read_csv("features_df.csv")

parsed_artist_ids_series = playlist_df['artist_ids'].apply(parse_artist_ids)
artist_ids_df = pd.DataFrame(parsed_artist_ids_series.tolist())
exploded_artist_ids_df = artist_ids_df.stack().reset_index(level=1, drop=True)
artist_ids = exploded_artist_ids_df.unique()

batch_size = 50
artist_ids_batches = [artist_ids[i:i + batch_size] for i in range(0, len(artist_ids), batch_size)]
artist_genres = {}

# Try to load cache
loaded_artist_genres = load_cache('artist_genres_cache.pkl')
if loaded_artist_genres:
    artist_genres = loaded_artist_genres
else:
    executor = ThreadPoolExecutor()
    batch_genres_results = list(tqdm(executor.map(fetch_batch_genres, artist_ids_batches), total=len(artist_ids_batches)))
    executor.shutdown(wait=True)

    for result in batch_genres_results:
        artist_genres.update(result)

    save_cache(artist_genres, 'artist_genres_cache.pkl')  # Save the cache after fetching original artists

    original_artist_ids = set(exploded_artist_ids_df.to_list())  # Get the original_artist_ids

    executor = ThreadPoolExecutor()
    similar_artists_results = list(tqdm(executor.map(fetch_similar_artists_genres, artist_ids, [artist_genres for _ in artist_ids], [original_artist_ids for _ in artist_ids]), total=len(artist_ids)))
    executor.shutdown(wait=True)

    for result in similar_artists_results:
        artist_genres.update(result)

    save_cache(artist_genres, 'artist_genres_cache.pkl')  # Save the cache after fetching similar artists

final_artist_genres = {}
for artist_id, (artist_name, genres) in artist_genres.items():
    unique_genres = list(set(genres))
    final_artist_genres[artist_name] = [genre for genre in unique_genres if genre in dict(sorted_subgenre_weights_list) or genre in ['existing playlist', 'similar playlist']]

100%|███████████████████████████████████████████| 17/17 [00:01<00:00, 15.50it/s]
100%|█████████████████████████████████████████| 827/827 [00:15<00:00, 54.59it/s]

import pandas as pd

all_genres_df = pd.read_csv("all_genres1.csv")
genres_to_match = [genre for genre, weight in sorted_subgenre_weights_list]

def fetch_related_artists(df, genres_to_match):
    artist_pool = {}
    
    for genre in genres_to_match:
        matching_rows = df[df['GENRE'] == genre]
        
        for _, row in matching_rows.iterrows():
            rel_artists = row['REL_ARTISTS'].split(',')
            
            for artist in rel_artists:
                artist = artist.strip()
                if artist not in artist_pool:
                    artist_pool[artist] = [genre]
                else:
                    artist_pool[artist].append(genre)
    
    return artist_pool

related_artists = fetch_related_artists(all_genres_df, genres_to_match)

rec_genres = final_artist_genres.copy()

for artist, genres in related_artists.items():
    if artist in rec_genres:
        custom_genres = [g for g in rec_genres[artist] if g in ['existing playlist', 'similar playlist']]

        unique_genres = list(set(rec_genres[artist] + genres) - set(custom_genres))

        rec_genres[artist] = unique_genres + custom_genres
    else:
        rec_genres[artist] = genres

import os
import pickle
import time
from tqdm import tqdm
import concurrent.futures
import functools
import pandas as pd
import threading
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

def get_artist_id(artist):
    search_result = sp.search(artist, type="artist", limit=1)

    if search_result['artists']['items']:
        return search_result['artists']['items'][0]['id']
    else:
        return None

def retry(retries=3, delay=1):
    def decorator_retry(func):
        @functools.wraps(func)
        def wrapper_retry(*args, **kwargs):
            for _ in range(retries + 1):
                try:
                    return func(*args, **kwargs)
                except spotipy.SpotifyException as e:
                    if e.http_status == 429:
                        if _ == retries:
                            print(f"Error processing function {func.__name__}: {e}")
                            return None
                        retry_after = int(e.headers.get('Retry-After', delay))
                        print(f"Rate limit exceeded. Retrying in {retry_after} seconds.")
                        time.sleep(retry_after)
                    else:
                        print(f"Error processing function {func.__name__}: {e}")
                        time.sleep(delay)
        return wrapper_retry
    return decorator_retry

def get_artist_top_tracks(artist_id, artist, genres):
    top_tracks_data = []
    max_retries = 2

    for attempt in range(max_retries + 1):
        try:
            top_tracks = sp.artist_top_tracks(artist_id, country='US')

            for track in top_tracks['tracks']:
                track_data = {
                    'artist': artist,
                    'genres': ', '.join(genres),
                    'track_name': track['name'],
                    'track_id': track['id'],
                    'popularity': track['popularity'],
                    'duration_ms': track['duration_ms']
                }
                top_tracks_data.append(track_data)
            break
        except Exception as e:
            if attempt == max_retries:
                print(f"Error fetching track ID for artist {artist} with ID {artist_id}: {e}")
                return None
            else:
                time.sleep(1)
    return top_tracks_data

def get_artist_ids_batch(artists):
    artist_ids = {}
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = {executor.submit(get_artist_id, artist): artist for artist in artists}
        for future in concurrent.futures.as_completed(futures):
            artist = futures[future]
            try:
                artist_id = future.result()
                if artist_id:
                    artist_ids[artist] = artist_id
            except Exception as e:
                print(f"Error fetching artist ID for artist {artist}: {e}")

    return artist_ids

@retry(retries=1)
def process_artist(artist, genres, all_artist_ids, semaphore):
    semaphore.acquire()
    try:
        if artist in all_artist_ids:
            artist_id = all_artist_ids[artist]
            tracks_data = get_artist_top_tracks(artist_id, artist, genres)
            return tracks_data
    finally:
        semaphore.release()

def load_cache(cache_name):
    try:
        with open(cache_name, "rb") as cache_file:
            return pickle.load(cache_file)
    except FileNotFoundError:
        return None

def processed_artists(cached_data):
    return {track['artist'] for track in cached_data}

def get_top_tracks(artist_pool):
    top_tracks_data = []

    semaphore = threading.BoundedSemaphore(60)

    cache_name = "cache_top_tracks_data.pkl"
    cached_data = load_cache(cache_name)

    if cached_data:
        print("Loading data from cache")
        top_tracks_data.extend(cached_data)
        processed_artists_set = processed_artists(cached_data)
    else:
        processed_artists_set = set()

    with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
        futures = [executor.submit(process_artist, artist, genres, all_artist_ids, semaphore) for artist, genres in artist_pool.items() if artist not in processed_artists_set]

        cache_counter = len(processed_artists_set)

        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Fetching tracks"):
            try:
                track_data = future.result()
                if track_data:
                    top_tracks_data.extend(track_data)

                    cache_counter += 1
                    if cache_counter % 100 == 0:
                        with open(cache_name, "wb") as cache_file:
                            pickle.dump(top_tracks_data, cache_file)
            except Exception as e:
                print(f"Error processing future: {e}")

    top_tracks_df = pd.DataFrame(top_tracks_data)
    return top_tracks_df

cache_file = "artist_ids_cache.pkl"

if os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
    with open(cache_file, "rb") as f:
        all_artist_ids = pickle.load(f)
    print("Loaded artist IDs from cache file.")
else:
    all_artist_ids = {}

batch_size = 50
artist_batches = [list(rec_genres.keys())[i:i + batch_size] for i in range(0, len(rec_genres.keys()), batch_size)]

for batch in tqdm(artist_batches, desc="Fetching artist IDs"):
    artists_to_search = [artist for artist in batch if artist not in all_artist_ids]
    batch_artist_ids = get_artist_ids_batch(artists_to_search)

    all_artist_ids.update(batch_artist_ids)

    # Save the cache after processing each batch
    with open(cache_file, "wb") as f:
        pickle.dump(all_artist_ids, f)

Loaded artist IDs from cache file.

Fetching artist IDs:  76%|███████████████▏    | 305/401 [04:35<03:44,  2.34s/it]

Error fetching artist ID for artist 'Buddy Guy': HTTPSConnectionPool(host='api.spotify.com', port=443): Read timed out. (read timeout=5)

Fetching artist IDs:  92%|██████████████████▍ | 370/401 [05:55<01:13,  2.38s/it]

Error fetching artist ID for artist 'Oxossi': HTTPSConnectionPool(host='api.spotify.com', port=443): Read timed out. (read timeout=5)

Fetching artist IDs: 100%|████████████████████| 401/401 [06:32<00:00,  1.02it/s]

top_tracks_df = get_top_tracks(rec_genres)

Loading data from cache

Fetching tracks: 100%|█████████████████████| 7869/7869 [01:04<00:00, 122.87it/s]

import re
import concurrent.futures
from tqdm import tqdm
import spotipy
from ratelimiter import RateLimiter
import time
from functools import wraps

rate_limiter = RateLimiter(max_calls=50, period=1)

def retry(retries=3, delay=1):
    def decorator_retry(func):
        @wraps(func)
        def wrapper_retry(*args, **kwargs):
            for _ in range(retries + 1):
                try:
                    return func(*args, **kwargs)
                except spotipy.SpotifyException as e:
                    if e.http_status == 429:
                        if _ == retries:
                            print(f"Error processing function {func.__name__}: {e}")
                            return None
                        retry_after = int(e.headers.get('Retry-After', delay))
                        print(f"Rate limit exceeded. Retrying in {retry_after} seconds.")
                        time.sleep(retry_after)
                    else:
                        print(f"Error processing function {func.__name__}: {e}")
                        time.sleep(delay)
        return wrapper_retry
    return decorator_retry

@retry(retries=3)
def get_audio_features(track_ids):
    audio_features = sp.audio_features(track_ids)
    return audio_features

def batch(iterable, n=1):
    length = len(iterable)
    for ndx in range(0, length, n):
        yield iterable[ndx:min(ndx + n, length)]

# Load Pickle Cache
def load_cache(cache_name):
    try:
        with open(cache_name, "rb") as cache_file:
            return pickle.load(cache_file)
    except FileNotFoundError:
        return None

# Save Pickle Cache
def save_cache(cache_name, data):
    with open(cache_name, "wb") as cache_file:
        pickle.dump(data, cache_file)

def get_audio_features_df(df):
    cache_name = "audio_features_cache.pkl"
    cached_data = load_cache(cache_name)

    if cached_data:
        processed_track_ids = {row["track_id"] for row in cached_data}
        df = df[~df["track_id"].isin(processed_track_ids)]

    if df.empty:
        return pd.DataFrame(cached_data)

    track_ids = df['track_id'].tolist()
    all_features = []

    with concurrent.futures.ThreadPoolExecutor() as executor:
        track_id_batches = list(batch(track_ids, n=50))

        with rate_limiter:
            futures = [executor.submit(get_audio_features, batch) for batch in track_id_batches]

            for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Fetching audio features"):
                result = future.result()
                if result is not None:
                    result = [r for r in result if r is not None]
                    all_features.extend(result)

    audio_features_df = pd.DataFrame(all_features)
    merged_df = pd.merge(df, audio_features_df, left_on='track_id', right_on='id', how='inner')

    relevant_columns = ['artist', 'genres', 'track_name', 'track_id', 'popularity', 'duration_ms_x', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
    merged_df = merged_df[relevant_columns]
    merged_df.rename(columns={'duration_ms_x': 'duration_ms'}, inplace=True)

    if cached_data:
        merged_df = pd.concat([pd.DataFrame(cached_data), merged_df], ignore_index=True)

    save_cache(cache_name, merged_df.to_dict("records"))

    return merged_df

top_feat_df = get_audio_features_df(top_tracks_df)
top_feat_df = top_feat_df.drop_duplicates(subset='track_id')
top_feat_df['artist'] = top_feat_df['artist'].str.replace(r"[\'\[\]]", "", regex=True)

Fetching audio features: 100%|██████████████| 3872/3872 [01:05<00:00, 59.17it/s]

import datetime
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
file_name = f"tracks_for_rec_{timestamp}.csv"
top_feat_df.to_csv(file_name, index=False)

def get_numeric_input(prompt, min_value, max_value, unit):
    while True:
        try:
            value = float(input(prompt))
            if min_value <= value <= max_value:
                return value
            else:
                print(f"Please enter a value between {min_value}{unit} and {max_value}{unit}.")
        except ValueError:
            print("Invalid input, please enter a numeric value.")

def get_desired_value(feature_name, feature_info):
    prompt, min_value, max_value, unit, importance_prompt, conversion_func = feature_info[feature_name]

    print(prompt)
    value = get_numeric_input("Choose a value: ", min_value, max_value, unit)
    importance = get_numeric_input(importance_prompt, 0, 10, "")

    return conversion_func(value), normalize(importance, 0, 10, 0, 1)

def normalize(value, old_min, old_max, new_min, new_max):
    return (((value - old_min) * (new_max - new_min)) / (old_max - old_min)) + new_min

def duration_converter(value):
    return value * 1000

def identity(x):
    return x

def get_feature_info():
    return {
        "danceability": (
            "Danceability level: 1 (not danceable) to 10 (super danceable) (e.g. 1 - Classical, 10 - Electronic Dance)",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: normalize(x, 1, 10, 0, 1),
        ),
        "energy": (
            "Energy level: 1 (low energy) to 10 (high energy) (e.g. 1 - Lullaby, 10 - Heavy Metal)",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: normalize(x, 1, 10, 0, 1),
        ),
        "loudness": (
            "Loudness level: 1 (quiet) to 10 (loud) (e.g. 1 - Quiet instrumental, 10 - Loud rock concert)",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: normalize(x, 1, 10, -60, 0),
        ),
        "speechiness": (
            "Speechiness level: 1 (purely music) to 10 (talkative) (e.g. 1 - Instrumental, 10 - Podcast)",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: normalize(x, 1, 10, 0, 1),
        ),
        "acousticness": (
            "Acousticness level: 1 (electronic) to 10 (acoustic) (e.g. 1 - Techno, 10 - Acoustic guitar)",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: normalize(x, 1, 10, 0, 1),
        ),
        "instrumentalness": (
            "Instrumentalness level: 1 (vocal) to 10 (instrumental) (e.g. 1 - Pop song, 10 - Orchestra)",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: normalize(x, 1, 10, 0, 1),
        ),
        "liveness": (
            "Liveness level: 1 (studio recording) to 10 (live performance) (e.g. 1 - Studio album, 10 - Live performance)",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: normalize(x, 1, 10, 0, 1),
        ),
        "valence": (
            "Valence level: 1 (sad) to 10 (happy) (e.g. 1 - Sad ballad, 10 - Upbeat pop)",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: normalize(x, 1, 10, 0, 1),
        ),
        "tempo": (
            "Tempo range: 1 (slow) to 10 (fast) (e.g. 1 - Slow ballad, 10 - Fast electronic dance)",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: normalize(x, 1, 10, 60, 200),
        ),
        "popularity": (
            "Popularity range: 1 (low) to 10 (high) (e.g. 1 - Niche artist, 10 - Chart-topper)",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: normalize(x, 1, 10, 0, 100),
        ),
        "duration_ms": (
            "Duration range: 1 (short) to 10 (long) (e.g. 1 - Short jingle, 10 - Long symphony) in seconds",
            1, 10, "", "Enter its importance (0 to 10, with 0 being not included): ", lambda x: duration_converter(normalize(x, 1, 10, 60, 600)),
        ),
    }

def get_user_inputs():
    feature_info = get_feature_info()
    desired_values = {}
    importance_values = {}

    for feature_name in feature_info:
        print("=" * 80)
        value, importance = get_desired_value(feature_name, feature_info)

        desired_values[feature_name] = value
        importance_values[feature_name] = importance

    return desired_values, importance_values

desired_values, importance_values = get_user_inputs()
print(desired_values)
print(importance_values)

================================================================================
Danceability level: 1 (not danceable) to 10 (super danceable) (e.g. 1 - Classical, 10 - Electronic Dance)
Choose a value: 7
Enter its importance (0 to 10, with 0 being not included): 6
================================================================================
Energy level: 1 (low energy) to 10 (high energy) (e.g. 1 - Lullaby, 10 - Heavy Metal)
Choose a value: 6
Enter its importance (0 to 10, with 0 being not included): 6
================================================================================
Loudness level: 1 (quiet) to 10 (loud) (e.g. 1 - Quiet instrumental, 10 - Loud rock concert)
Choose a value: 5
Enter its importance (0 to 10, with 0 being not included): 4
================================================================================
Speechiness level: 1 (purely music) to 10 (talkative) (e.g. 1 - Instrumental, 10 - Podcast)
Choose a value: 3
Enter its importance (0 to 10, with 0 being not included): 2
================================================================================
Acousticness level: 1 (electronic) to 10 (acoustic) (e.g. 1 - Techno, 10 - Acoustic guitar)
Choose a value: 4
Enter its importance (0 to 10, with 0 being not included): 6
================================================================================
Instrumentalness level: 1 (vocal) to 10 (instrumental) (e.g. 1 - Pop song, 10 - Orchestra)
Choose a value: 5
Enter its importance (0 to 10, with 0 being not included): 5
================================================================================
Liveness level: 1 (studio recording) to 10 (live performance) (e.g. 1 - Studio album, 10 - Live performance)
Choose a value: 5
Enter its importance (0 to 10, with 0 being not included): 0
================================================================================
Valence level: 1 (sad) to 10 (happy) (e.g. 1 - Sad ballad, 10 - Upbeat pop)
Choose a value: 6
Enter its importance (0 to 10, with 0 being not included): 4
================================================================================
Tempo range: 1 (slow) to 10 (fast) (e.g. 1 - Slow ballad, 10 - Fast electronic dance)
Choose a value: 7
Enter its importance (0 to 10, with 0 being not included): 6
================================================================================
Popularity range: 1 (low) to 10 (high) (e.g. 1 - Niche artist, 10 - Chart-topper)
Choose a value: 3
Enter its importance (0 to 10, with 0 being not included): 3
================================================================================
Duration range: 1 (short) to 10 (long) (e.g. 1 - Short jingle, 10 - Long symphony) in seconds
Choose a value: 2
Enter its importance (0 to 10, with 0 being not included): 0
{'danceability': 0.6666666666666666, 'energy': 0.5555555555555556, 'loudness': -33.33333333333333, 'speechiness': 0.2222222222222222, 'acousticness': 0.3333333333333333, 'instrumentalness': 0.4444444444444444, 'liveness': 0.4444444444444444, 'valence': 0.5555555555555556, 'tempo': 153.33333333333331, 'popularity': 22.22222222222222, 'duration_ms': 120000.0}
{'danceability': 0.6, 'energy': 0.6, 'loudness': 0.4, 'speechiness': 0.2, 'acousticness': 0.6, 'instrumentalness': 0.5, 'liveness': 0.0, 'valence': 0.4, 'tempo': 0.6, 'popularity': 0.3, 'duration_ms': 0.0}

import numpy as np
import pandas as pd
import pickle


with open("sorted_subgenre_weights_list.pkl", "rb") as file:
    sorted_subgenre_weights_list = pickle.load(file)
    
def continuous_similarity(x, y, sigma=0.1):
    return np.exp(-((x - y) ** 2) / (2 * sigma ** 2))

def categorical_similarity(x, y):
    return 1 if x == y else 0

def compute_similarity_scores(row, desired_values, sorted_subgenre_weights_dict, importance_values):
    similarity_sum = 0
    
    row_genres = ','.join(row['genres'].split(', '))
    
    genre_list = row_genres.split(',')
    genre_weight_sum = sum([sorted_subgenre_weights_dict[genre] for genre in genre_list if genre in sorted_subgenre_weights_dict])
    avg_genre_weight = genre_weight_sum / len(genre_list)
    
    for feature, value in desired_values.items():
        if feature in ['key', 'mode']:
            continue
        
        if feature in ['duration_ms']:
            similarity = continuous_similarity(value / 1000, row[feature])
        else:
            similarity = continuous_similarity(value, row[feature])
        
        importance = importance_values[feature]
        weighted_similarity = similarity * importance
        
        similarity_sum += weighted_similarity

    return (similarity_sum / sum(importance_values.values()), avg_genre_weight)

sorted_subgenre_weights_dict = dict(sorted_subgenre_weights_list)

top_feat_df['similarity_score'], top_feat_df['avg_genre_weight'] = zip(*top_feat_df.apply(compute_similarity_scores, axis=1, args=(desired_values, sorted_subgenre_weights_dict, importance_values)))
top_feat_df['combined_score'] = (0.4 * top_feat_df['similarity_score'] + 0.6 * top_feat_df['avg_genre_weight'])

recommendations = top_feat_df.sort_values(by='combined_score', ascending=False)

recommendations.head(10)

import datetime
import os

date_string = datetime.datetime.now().strftime('%Y-%m-%d')

counter = 1

base_filename = f"recommendations_{date_string}"

while os.path.isfile(f"{base_filename}_{counter}.csv"):
    counter += 1

filename = f"{base_filename}_{counter}.csv"

recommendations.to_csv(filename, index=False)

	cluster_name	danceability	energy	key	mode	loudness	liveness	speechiness	acousticness	instrumentalness	valence	tempo	time_signature
0	rock	0.514328	0.571577	5.232563	0.683387	-9.194510	0.186441	0.053367	0.373361	0.165249	0.468150	121.625909	3.881404
1	rap	0.726660	0.653825	5.424918	0.537899	-7.627332	0.175790	0.150779	0.235586	0.071881	0.587986	121.230331	3.978006
2	pop	0.619228	0.721724	5.367366	0.569265	-7.273153	0.196571	0.075010	0.185341	0.219561	0.504339	126.876020	3.957070
3	pop rock	0.505332	0.751019	5.337628	0.644359	-6.759692	0.200557	0.064284	0.145079	0.144414	0.478098	128.659863	3.943759
4	funk	0.663443	0.625277	5.358117	0.596341	-8.198260	0.181769	0.117377	0.367142	0.076347	0.629963	119.775582	3.893258
5	jazz	0.550349	0.344087	5.118756	0.685035	-14.305447	0.212364	0.189669	0.796859	0.339704	0.534094	113.075549	3.760160
6	focus	0.331941	0.306911	4.975790	0.649005	-17.144856	0.186372	0.065677	0.737767	0.590312	0.245744	107.533687	3.724647
7	metal	0.358209	0.832417	5.296676	0.626721	-6.552993	0.224121	0.084712	0.058208	0.341393	0.298757	126.984561	3.883732
8	folk	0.540885	0.463688	5.173792	0.671722	-11.230435	0.189666	0.069790	0.624869	0.242565	0.568838	117.822266	3.789748
9	techno	0.714764	0.750461	5.604626	0.540528	-8.248362	0.173021	0.085216	0.077328	0.537798	0.454211	129.477237	3.978701

	artist	genres	track_name	track_id	popularity	duration_ms	danceability	energy	key	loudness	mode	speechiness	acousticness	instrumentalness	liveness	valence	tempo	similarity_score	avg_genre_weight	combined_score
59083	Wintertime	miami hip hop, underground hip hop, dark trap,...	Me & Ben	4vrzpvXILOhfFaMbFaKFiZ	33	165120	0.582	0.532	9	-14.215	1	0.2070	0.360000	0.539000	0.0736	0.531	124.930	0.592254	0.046416	0.264751
233131	Deergod	similar playlist	Dom P Showers	4hDhcgOPbNTgl92lkp8cMw	53	137534	0.668	0.552	11	-12.660	0	0.2950	0.331000	0.471000	0.6400	0.490	93.977	0.656716	0.002167	0.263986
110340	Archy Marshall	similar playlist	Ammi Ammi	3RPacQGcu4qN4A1QYAIfUq	33	209839	0.609	0.573	11	-11.871	1	0.2470	0.359000	0.423000	0.5960	0.492	109.800	0.640244	0.002167	0.257398
188568	Joyner Lucas	hip hop, pop rap	I Love	30z4LVkScpeNhwHFIB8Ewa	66	210197	0.671	0.565	11	-7.401	0	0.1600	0.274000	0.000065	0.3700	0.561	150.014	0.539086	0.064742	0.254479
59042	ilyTOMMY	underground hip hop, aesthetic rap, similar pl...	Tunnel of Love	1K4OfBFaLleSUaUVjp1l2X	57	142364	0.683	0.636	7	-12.096	0	0.0978	0.302000	0.498000	0.0972	0.615	160.012	0.585260	0.031677	0.253110
136925	Pistola	vapor trap, similar playlist	CA$H	3oow7zvWHMzjPPPyuy8KKw	11	221142	0.734	0.444	5	-13.371	0	0.1890	0.297000	0.436000	0.0820	0.501	139.976	0.570047	0.041226	0.252754
188695	KeithCharles Spacebar	underground hip hop	Ask, If You Really Wanna Know (feat. Mother Ma...	5Gg3yWgOAP305g7lBem7YK	9	168737	0.707	0.540	10	-10.744	0	0.2260	0.154000	0.355000	0.0945	0.493	165.010	0.507146	0.081580	0.251806
52332	Cordae	pop rap, trap, hip hop, underground hip hop, v...	Chronicles (feat. H.E.R. & Lil Durk)	6oOJL3xj8zRz6URS0SwlXC	63	212076	0.678	0.527	4	-9.038	0	0.3010	0.300000	0.000000	0.1190	0.583	76.847	0.540869	0.058766	0.251607
233616	DJ Twin Glocks	vapor trap	They Know Us (Feat. Sean Kingston, Lil Bibby &...	7a9XHEhlccgTzRcCJpMEOG	35	218498	0.725	0.498	5	-8.081	1	0.2050	0.274000	0.000003	0.1700	0.556	150.034	0.503519	0.080286	0.249579
199880	1of1	aesthetic rap	Chateau	6gClLxbq32u8bhukZ6hBaG	38	94675	0.690	0.555	7	-8.517	1	0.2020	0.356000	0.524000	0.3730	0.445	81.019	0.606208	0.011284	0.249253

Botify Grime - Playlist Song Recommendations¶

Web Scraping Tool for Music Genres¶

Genre Scatter Plot Visualization¶

3D Genre Map Visualization with Hue Rotation¶

Combining and Visualizing Merged DataFrames¶

In-Depth 'Every Noise' Web Scraping¶

Extracting Track IDs from Every Noise Playlists¶

Retrieving Audio Features and Popularity for with Spotipy and RateLimiter¶

Averaging Audio Features for Genres¶

Detailed Analysis and Visualization of Genre Clusters and Audio Feature Averages¶

Genre Weight Calculation from Playlist with Similar Genres and Feature Similarity¶

Visualizing Weighted Playlist Genres with Cosine Similarity Clustering¶

Weighted Genre Clusters Based on Emphasized Genres¶

Displaying Sorted Genres per Cluster¶

Generating a Weighted List of Parent and Subgenres from Playlist Data Considering Similarity¶

Generating Customized Artist Genres with Playlist Associations¶

Fetching Audio Features for Top Tracks and Merging with Existing Data¶

Interactive User Input for Desired Feature Values¶

	genre	font_size	color	colors_rgb	top	left
0	pop	160%	#ad8807	rgb(173, 136, 7)	4850px	787px
1	rap	144%	#a88903	rgb(168, 137, 3)	5759px	1070px
2	rock	141%	#ab711a	rgb(171, 113, 26)	11449px	564px
3	urbano latino	134%	#bd9002	rgb(189, 144, 2)	3341px	1170px
4	hip hop	134%	#ad7e09	rgb(173, 126, 9)	6978px	1085px

Botify Grime - Playlist Song Recommendations¶

Web Scraping Tool for Music Genres¶

Genre Scatter Plot Visualization¶

3D Genre Map Visualization with Hue Rotation¶

Combining and Visualizing Merged DataFrames¶

In-Depth 'Every Noise' Web Scraping¶

Extracting Track IDs from Every Noise Playlists¶

Retrieving Audio Features and Popularity for with Spotipy and RateLimiter¶

Averaging Audio Features for Genres¶

Detailed Analysis and Visualization of Genre Clusters and Audio Feature Averages¶

Genre Weight Calculation from Playlist with Similar Genres and Feature Similarity¶

Visualizing Weighted Playlist Genres with Cosine Similarity Clustering¶

Weighted Genre Clusters Based on Emphasized Genres¶

Displaying Sorted Genres per Cluster¶

Generating a Weighted List of Parent and Subgenres from Playlist Data Considering Similarity¶

Generating Customized Artist Genres with Playlist Associations¶

Expanding Personalized Artist Genres with Additional Related Artists¶

Fetching Top Tracks of Related Artists¶

Fetching Audio Features for Top Tracks and Merging with Existing Data¶

Interactive User Input for Desired Feature Values¶