Spaces:

alanahmet
/

nlp_based_song_recommender

Sleeping

App Files Files Community

alanahmet commited on Jun 2, 2023

Commit

68d64b6

1 Parent(s): e4acbfd

docstring added

Browse files

Files changed (2) hide show

app.py +22 -28
spotify_music_recommender.py +99 -99

app.py CHANGED Viewed

@@ -9,6 +9,14 @@ if "song_init" not in st.session_state:
 def song_page(name, year):
     song_uri = smr.find_song_uri(name, year)
     formatted_song_uri = song_uri.split(':')[-1]
     uri_link = f'https://open.spotify.com/embed/track/{formatted_song_uri}?utm_source=generator'
@@ -30,39 +38,29 @@ def spr_sidebar():
         st.session_state.app_mode = 'Results'
     elif menu == 'About':
         st.session_state.app_mode = 'About'
-    # elif menu == 'How It Works':
-    #     st.session_state.app_mode = 'How It Works'
 def home_page():
-    # App layout
     st.title("Spotify Music Recommender")
-    # Song input section
-    # st.subheader("")
     col1, col2 = st.columns(2)
     song_input = col1.text_input("Enter a song:")
     year_input = col2.text_input("Enter the year:")
-    # Button section
-    # st.subheader("")
     col3, col4 = st.columns(2)
     find_song_button = col3.button("Find Song")
     find_random_song_button = col4.button("Random Song")
-    # Critic input section
     st.subheader("Song Review")
     critic_input = st.text_input("")
-    # Prediction button
     predict_button = st.button("Start Prediction")
     if find_song_button:
         song_page(song_input, year_input)
     elif find_random_song_button:
         find_random_song()
-    elif song_input == "" and year_input == "" and not st.session_state.song_init:
         st.session_state.song_init = True
         find_random_song()
@@ -75,28 +73,28 @@ def home_page():
                 song_cluster_pipeline, data, number_cols = smr.get_model_values(
                     data_path, file_path, cluster_labels)
                 user_critic_text = critic_input
-                rec_splitted = smr.get_recommendation_array(
                     song_input, year_input, number_cols, user_critic_text)
-                res = smr.recommend_gpt(
-                    rec_splitted, data, song_cluster_pipeline, 15)
-                st.session_state.song_uris = smr.get_rec_song_uri(res)
                 st.write("You can access recommended song at result page")
             except:
                 st.write("An error occured please try again")
-# def text_field(label, columns=None, **input_params):
-#     c1, c2 = st.columns(columns or [1, 4])
-#     # Display field name with some alignment
-#     c1.markdown("##")
-#     c1.markdown(label)
-#     # Sets a default key parameter to avoid duplicate key errors
-#     input_params.setdefault("key", label)
-#     # Forward text input parameters
-#     return c2.text_input("", **input_params)
 def find_random_song():
@@ -171,10 +169,6 @@ def main():
         result_page()
     if st.session_state.app_mode == 'About':
         About_page()
-    # if st.session_state.app_mode == 'How It Works':
-    #     examples_page()
-# Run main()
 if __name__ == '__main__':
     main()

 def song_page(name, year):
+    """
+    Displays the Spotify song with the given name and year using an iframe.
+    Args:
+        name (str): The name of the song.
+        year (str): The year of the song.
+    """
     song_uri = smr.find_song_uri(name, year)
     formatted_song_uri = song_uri.split(':')[-1]
     uri_link = f'https://open.spotify.com/embed/track/{formatted_song_uri}?utm_source=generator'
         st.session_state.app_mode = 'Results'
     elif menu == 'About':
         st.session_state.app_mode = 'About'
 def home_page():
     st.title("Spotify Music Recommender")
     col1, col2 = st.columns(2)
     song_input = col1.text_input("Enter a song:")
     year_input = col2.text_input("Enter the year:")
     col3, col4 = st.columns(2)
     find_song_button = col3.button("Find Song")
     find_random_song_button = col4.button("Random Song")
     st.subheader("Song Review")
     critic_input = st.text_input("")
     predict_button = st.button("Start Prediction")
     if find_song_button:
         song_page(song_input, year_input)
     elif find_random_song_button:
         find_random_song()
+    elif not st.session_state.song_init:
         st.session_state.song_init = True
         find_random_song()
                 song_cluster_pipeline, data, number_cols = smr.get_model_values(
                     data_path, file_path, cluster_labels)
                 user_critic_text = critic_input
+                raw_recommendation_array = smr.get_recommendation_array(
                     song_input, year_input, number_cols, user_critic_text)
+                result = smr.format_chatgpt_recommendations (
+                    raw_recommendation_array, data, song_cluster_pipeline, 15)
+                st.session_state.song_uris = smr.get_recommendation_song_uri(result)
                 st.write("You can access recommended song at result page")
             except:
                 st.write("An error occured please try again")
+def text_field(label, columns=None, **input_params):
+    c1, c2 = st.columns(columns or [1, 4])
+    # Display field name with some alignment
+    c1.markdown("##")
+    c1.markdown(label)
+    # Sets a default key parameter to avoid duplicate key errors
+    input_params.setdefault("key", label)
+    # Forward text input parameters
+    return c2.text_input("", **input_params)
 def find_random_song():
         result_page()
     if st.session_state.app_mode == 'About':
         About_page()
 if __name__ == '__main__':
     main()

spotify_music_recommender.py CHANGED Viewed

@@ -1,13 +1,3 @@
-#!/usr/bin/env python
-# coding: utf-8
-# # **Import Libraries**
-# In[22]:
-# import os
-# import difflib
 import numpy as np
 import pandas as pd
 import openai
@@ -29,10 +19,17 @@ from collections import defaultdict
 import warnings
 warnings.filterwarnings("ignore")
-# In[23]:
-def get_pipeline_data_number_cols():
     data = pd.read_csv("data/data.csv")
     song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
@@ -41,18 +38,37 @@ def get_pipeline_data_number_cols():
                                       ], verbose=False)
     X = data.select_dtypes(np.number)
-    number_cols = list(X.columns)
     song_cluster_pipeline.fit(X)
     song_cluster_labels = song_cluster_pipeline.predict(X)
     data['cluster_label'] = song_cluster_labels
-    return song_cluster_pipeline, data, number_cols
-# In[24]:
 def find_song(name, year):
     if os.path.isfile(".\secret_keys.py"):
         import secret_keys
         sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
@@ -69,7 +85,6 @@ def find_song(name, year):
     results = results['tracks']['items'][0]
     track_id = results['id']
     audio_features = sp.audio_features(track_id)[0]
     song_data['name'] = [name]
     song_data['year'] = [year]
     song_data['explicit'] = [int(results['explicit'])]
@@ -83,6 +98,17 @@ def find_song(name, year):
 def find_song_uri(name, year):
     # Create a Spotify client object.
     if os.path.isfile(".\secret_keys.py"):
         import secret_keys
@@ -91,26 +117,23 @@ def find_song_uri(name, year):
     else:
         client = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
             client_id=os.environ.get("client_id"), client_secret=os.environ.get("client_secret")))
-    # Get the name of the song you want to get the ID for.
-    song_name = name
-    # Call the `search` method with the song name.
     results = client.search(q='track: {} year: {}'.format(name, year), limit=1)
-    # Get the first result.
     track = results['tracks']['items'][0]
-    # The Spotify ID of the song will be in the `id` property.
     song_id = track['uri']
     return song_id
-def format_song(song_data, number_cols):
-    list_song_data = song_data[number_cols].values.tolist()[0]
-    list_song_data = '[' + ', '.join([str(num)
-                                     for num in list_song_data]) + ']'
-    return list_song_data
 def get_response(text):
     if os.path.isfile(".\secret_keys.py"):
         import secret_keys
         openai.api_key = secret_keys.openai_api_key
@@ -130,13 +153,10 @@ def get_response(text):
     return response.choices[0].get("text")
-# In[25]:
-def get_text(user_critic, list_song_data):
-    init_text = "I want you to act as a song recommender. I will provide you songs data with following format f future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>] \
-     values and user critic about the given song. And you will provide an array based on user critic.You must change at least 3 features. Do not write any explanations or other words, just return an array that include changes in future_columns\
-    and here is the describe values of future_columns  \n\
     valence	year	acousticness	danceability	duration_ms	energy	explicit	instrumentalness	key	liveness	loudness	mode	popularity	speechiness	tempo \n \
     count	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653 \n \
     mean	0.528587211	1976.787241	0.502114764	0.537395535	230948.3107	0.482388835	0.084575132	0.167009581	5.199844128	0.205838655	-11.46799004	0.706902311	31.43179434	0.098393262	116.8615896 \n \
@@ -148,112 +168,92 @@ def get_text(user_critic, list_song_data):
     max	1	2020	0.996	0.988	5403500	1	1	1	11	1	3.855	1	100	0.97	243.507"
     # init_last = "\n\n start with only typing random  future_columns values in given range as a array"
-    # user_critic_ex = "\n \"user_critic=it was too old and loud but i like the energy\" "
-    user_critic_last = "your output will be future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>]  format"
-    user_last = "\n\n start with the adjust following future_columns based on user_critic. "
     # example_features = "future_columns=[0.68, 1976, 0.78, 0.62, 230948.3, 0.44, 0.22, 0.43, 5.2, 0.27, -9.67, 1, 31, 0.19, 118.86]"
-    # feature_col_starter = "future_columns="
-    real_features = "future_columns=" + list_song_data
-    # init_input = init_text + init_last
     # test_input = init_text + user_last + user_critic + example_features + user_critic_last
     real_input = init_text + user_last + \
-        user_critic + real_features + user_critic_last
     return real_input
-# In[26]:
-def format_gpt_output(rec_splitted):
-    formatted = rec_splitted[3:-1].split(",")
     list_song_data = [float(i) for i in formatted]
     return list_song_data
-# In[27]:
-def recommend_gpt(song_list, spotify_data, song_cluster_pipeline, n_songs=15):
-    number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
                    'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
     metadata_cols = ['name', 'year', 'artists']
     song_center = np.array(song_list)
     scaler = song_cluster_pipeline.steps[0][1]
-    scaled_data = scaler.transform(spotify_data[number_cols])
     scaled_song_center = scaler.transform(song_center.reshape(1, -1))
     distances = cdist(scaled_song_center, scaled_data, 'cosine')
     index = list(np.argsort(distances)[:, :n_songs][0])
     rec_songs = spotify_data.iloc[index]
     # rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
     return rec_songs[metadata_cols].to_dict(orient='records')
-# In[28]:
-def get_rec_song_uri(res):
     song_spotipy_info = []
     for song in res:
         song_spotipy_info.append(find_song_uri(song["name"], song["year"]))
     return song_spotipy_info
-# In[30]:
-def get_recommendation_array(song_name, song_year, number_cols, user_critic_text):
     song_data = find_song(song_name, song_year)
-    list_song_data = format_song(song_data, number_cols)
     user_critic = "\n \"user_critic=" + user_critic_text
-    recommendation = get_response(get_text(user_critic, list_song_data))
-    rec_splitted = format_gpt_output(recommendation)
-    return rec_splitted
-# In[34]:
 def get_random_song():
     data = pd.read_csv("data/data.csv")
     sample = data.sample(n=1)
     return sample.name, sample.year
-def get_model_values(data_path, file_path, cluster_path):
-    data_path = data_path
-    file_path = file_path
-    cluster_path = cluster_path
-    # Load the pipeline from the pickle file
-    with open(file_path, 'rb') as file:
-        loaded_pipeline = pickle.load(file)
-    data = pd.read_csv(data_path)
-    labels = pd.read_csv(cluster_path)
-    data["cluster_label"] = labels["cluster_label"]
-    number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
-                   'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
-    return loaded_pipeline, data, number_cols
 def control():
-    # song_cluster_pipeline, data, number_cols = get_pipeline_data_number_cols()
     data_path = "data/data.csv"
     file_path = "data/pipeline.pkl"
     cluster_labels = "data/cluster_labels.csv"
-    song_cluster_pipeline, data, number_cols = get_model_values(
         data_path, file_path, cluster_labels)
     user_critic_text = "it was dull and very loud"
     song_name = "Poem of a Killer"
     song_year = 2022
-    rec_splitted = get_recommendation_array(
-        song_name, song_year, number_cols, user_critic_text)
-    res = recommend_gpt(rec_splitted, data, song_cluster_pipeline)
-    print(res)
-    print(get_rec_song_uri(res))

 import numpy as np
 import pandas as pd
 import openai
 import warnings
 warnings.filterwarnings("ignore")
+def feature_get_pipeline_data_column_names():
+    """
+    Reads data from a CSV file, performs K-means clustering on numeric columns,
+    and assigns cluster labels to the data.
+    Returns:
+    - song_cluster_pipeline: Pipeline object containing the scaler and K-means model.
+    - data: DataFrame with the original data and cluster labels.
+    - feature_column_names: List of column names containing numeric values.
+    """
     data = pd.read_csv("data/data.csv")
     song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
                                       ], verbose=False)
     X = data.select_dtypes(np.number)
+    feature_column_names = list(X.columns)
     song_cluster_pipeline.fit(X)
     song_cluster_labels = song_cluster_pipeline.predict(X)
     data['cluster_label'] = song_cluster_labels
+    return song_cluster_pipeline, data, feature_column_names
+def get_model_values(data_path, file_path, cluster_path):
+    with open(file_path, 'rb') as file:
+        loaded_pipeline = pickle.load(file)
+    data = pd.read_csv(data_path)
+    labels = pd.read_csv(cluster_path)
+    data["cluster_label"] = labels["cluster_label"]
+    feature_column_names = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
+                   'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
+    return loaded_pipeline, data, feature_column_names
 def find_song(name, year):
+    """
+    Finds a song on Spotify based on the song name and year.
+    Args:
+    - name: Name of the song.
+    - year: Year of the song.
+    Returns:
+    - DataFrame containing the song's data.
+    """
     if os.path.isfile(".\secret_keys.py"):
         import secret_keys
         sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
     results = results['tracks']['items'][0]
     track_id = results['id']
     audio_features = sp.audio_features(track_id)[0]
     song_data['name'] = [name]
     song_data['year'] = [year]
     song_data['explicit'] = [int(results['explicit'])]
 def find_song_uri(name, year):
+    """
+    Finds the Spotify URI of a song based on the song name and year.
+    Args:
+    - name: Name of the song.
+    - year: Year of the song.
+    Returns:
+    - Spotify URI of the song.
+    """
     # Create a Spotify client object.
     if os.path.isfile(".\secret_keys.py"):
         import secret_keys
     else:
         client = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
             client_id=os.environ.get("client_id"), client_secret=os.environ.get("client_secret")))
     results = client.search(q='track: {} year: {}'.format(name, year), limit=1)
     track = results['tracks']['items'][0]
     song_id = track['uri']
     return song_id
 def get_response(text):
+    """
+    Retrieves a response using OpenAI's GPT-3 language model.
+    Args:
+    - input_text: The input text for the model.
+    Returns:
+    - Generated response as a string.
+    """
     if os.path.isfile(".\secret_keys.py"):
         import secret_keys
         openai.api_key = secret_keys.openai_api_key
     return response.choices[0].get("text")
+def get_finetune_text(user_critic, list_song_data):
+    init_text = "I want you to act as a song recommender. I will provide you songs data with following format future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>] \
+     values and user critic about the given song. And you will change given array values based on user critic and return result array. Do not write any explanations or other words, just return an array that include changes in future_columns\
+    and here is the np.describe() values of future_columns  \n\
     valence	year	acousticness	danceability	duration_ms	energy	explicit	instrumentalness	key	liveness	loudness	mode	popularity	speechiness	tempo \n \
     count	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653 \n \
     mean	0.528587211	1976.787241	0.502114764	0.537395535	230948.3107	0.482388835	0.084575132	0.167009581	5.199844128	0.205838655	-11.46799004	0.706902311	31.43179434	0.098393262	116.8615896 \n \
     max	1	2020	0.996	0.988	5403500	1	1	1	11	1	3.855	1	100	0.97	243.507"
     # init_last = "\n\n start with only typing random  future_columns values in given range as a array"
+    # user_critic_example = "\n \"user_critic=it was too old and loud but i like the energy\" "
     # example_features = "future_columns=[0.68, 1976, 0.78, 0.62, 230948.3, 0.44, 0.22, 0.43, 5.2, 0.27, -9.67, 1, 31, 0.19, 118.86]"
     # test_input = init_text + user_last + user_critic + example_features + user_critic_last
+    user_critic_last = "your output will be future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>]  format"
+    user_last = "\n\n start with the adjust following future_columns based on user_critic. "
+    features = "future_columns=" + list_song_data
     real_input = init_text + user_last + \
+        user_critic + features + user_critic_last
     return real_input
+def format_gpt_output(raw_recommendation_array):
+    formatted = raw_recommendation_array[3:-1].split(",")
     list_song_data = [float(i) for i in formatted]
     return list_song_data
+def format_song_string(song_data, feature_column_names):
+    list_song_data = song_data[feature_column_names].values.tolist()[0]
+    list_song_data = '[' + ', '.join([str(num)
+                                     for num in list_song_data]) + ']'
+    return list_song_data
+def format_chatgpt_recommendations(song_list, spotify_data, song_cluster_pipeline, n_songs=15):
+    """
+    Recommends a song using OpenAI's GPT-3 language model.
+    Args:
+    - song_name: The name of the song.
+    - song_year: The year of the song.
+    Returns:
+    - Recommended song as a list of string.
+    """
+    feature_column_names = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
                    'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
     metadata_cols = ['name', 'year', 'artists']
     song_center = np.array(song_list)
     scaler = song_cluster_pipeline.steps[0][1]
+    scaled_data = scaler.transform(spotify_data[feature_column_names])
     scaled_song_center = scaler.transform(song_center.reshape(1, -1))
     distances = cdist(scaled_song_center, scaled_data, 'cosine')
     index = list(np.argsort(distances)[:, :n_songs][0])
     rec_songs = spotify_data.iloc[index]
     # rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
     return rec_songs[metadata_cols].to_dict(orient='records')
+def get_recommendation_song_uri(res):
     song_spotipy_info = []
     for song in res:
         song_spotipy_info.append(find_song_uri(song["name"], song["year"]))
     return song_spotipy_info
+def get_recommendation_array(song_name, song_year, feature_column_names, user_critic_text):
     song_data = find_song(song_name, song_year)
+    list_song_data = format_song_string(song_data, feature_column_names)
     user_critic = "\n \"user_critic=" + user_critic_text
+    recommendation = get_response(get_finetune_text(user_critic, list_song_data))
+    raw_recommendation_array = format_gpt_output(recommendation)
+    return raw_recommendation_array
 def get_random_song():
     data = pd.read_csv("data/data.csv")
     sample = data.sample(n=1)
     return sample.name, sample.year
 def control():
+    # song_cluster_pipeline, data, feature_column_names = feature_get_pipeline_data_column_names()
     data_path = "data/data.csv"
     file_path = "data/pipeline.pkl"
     cluster_labels = "data/cluster_labels.csv"
+    song_cluster_pipeline, data, feature_column_names = get_model_values(
         data_path, file_path, cluster_labels)
     user_critic_text = "it was dull and very loud"
     song_name = "Poem of a Killer"
     song_year = 2022
+    raw_recommendation_array = get_recommendation_array(
+        song_name, song_year, feature_column_names, user_critic_text)
+    result = format_chatgpt_recommendations(raw_recommendation_array, data, song_cluster_pipeline)
+    print(result, get_recommendation_song_uri(result))
+if __name__ == "__main__":
+    control()