Spaces:
Runtime error
Runtime error
#Import libraries | |
#pip install pandas torch transformers datasets scikit-learn | |
import torch | |
#Set device | |
if torch.cuda.is_available(): | |
device = torch.device('cuda') # CUDA GPU | |
elif torch.backends.mps.is_available(): | |
device = torch.device('mps') #Apple GPU | |
else: | |
device = torch.device("cpu") | |
print('Using device:', device) | |
#Additional Info when using cuda | |
if device.type == 'cuda': | |
print("Device name: ", torch.cuda.get_device_name(0)) | |
print("Device properties:", torch.cuda.get_device_properties(0)) | |
print('Memory Usage:') | |
print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB') | |
print('Cached: ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB') | |
#ncf_model.to(device) | |
#Load dataset | |
import pandas as pd | |
from datasets import load_dataset | |
import numpy as np | |
review_dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023","raw_review_All_Beauty",trust_remote_code=True) | |
# User Reviews dataframe (reviews are in the 'train' split) | |
reviews_df = pd.DataFrame(review_dataset['full']) | |
# Map user_id and parent_asin to indices | |
user_map = {user: idx for idx, user in enumerate(reviews_df["user_id"].unique())} | |
item_map = {asin: idx for idx, asin in enumerate(reviews_df["parent_asin"].unique())} | |
meta_dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023","raw_meta_All_Beauty") | |
# User Reviews dataframe (reviews are in the 'train' split) | |
meta_df = pd.DataFrame(meta_dataset['full']) | |
#Split data | |
from sklearn.model_selection import train_test_split | |
reviews_df["user_idx"] = reviews_df["user_id"].map(user_map) | |
reviews_df["item_idx"] = reviews_df["parent_asin"].map(item_map) | |
# Train-test split | |
train, test = train_test_split(reviews_df, test_size=0.2, random_state=42) | |
#NCF model | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
class NCF(nn.Module): | |
def __init__(self, num_users, num_items, embedding_dim=32, hidden_dims=[64, 32], dropout_rate=0.5): | |
super(NCF, self).__init__() | |
# Embedding layers | |
self.user_embedding = nn.Embedding(num_users, embedding_dim) | |
self.item_embedding = nn.Embedding(num_items, embedding_dim) | |
# Neural layers | |
input_dim = embedding_dim * 2 | |
layers = [] | |
for hidden_dim in hidden_dims: | |
layers.append(nn.Linear(input_dim, hidden_dim)) | |
layers.append(nn.ReLU()) | |
input_dim = hidden_dim | |
self.mlp = nn.Sequential(*layers) | |
# Final prediction layer | |
self.output = nn.Linear(hidden_dims[-1], 1) | |
self.dropout = nn.Dropout(p=dropout_rate) | |
def forward(self, user_idx, item_idx): | |
# Embeddings | |
user_emb = self.user_embedding(user_idx) | |
item_emb = self.item_embedding(item_idx) | |
# Concatenate and pass through MLP | |
x = torch.cat([user_emb, item_emb], dim=-1) | |
x = self.mlp(x) | |
x = self.dropout(x) | |
# Prediction | |
return torch.sigmoid(self.output(x)) | |
#prepare dataloader | |
from torch.utils.data import Dataset, DataLoader | |
class ReviewsDataset(Dataset): | |
def __init__(self, data): | |
self.user_idx = data["user_idx"].values | |
self.item_idx = data["item_idx"].values | |
self.rating = data["rating"].values | |
def __len__(self): | |
return len(self.rating) | |
def __getitem__(self, idx): | |
return { | |
"user_idx": torch.tensor(self.user_idx[idx], dtype=torch.long), | |
"item_idx": torch.tensor(self.item_idx[idx], dtype=torch.long), | |
"rating": torch.tensor(self.rating[idx], dtype=torch.float), | |
} | |
# Create DataLoaders | |
train_dataset = ReviewsDataset(train) | |
test_dataset = ReviewsDataset(test) | |
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True) | |
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False) | |
#train | |
results = {"train_loss": [], | |
"train_acc": [], | |
"test_loss": [], | |
"test_acc": [] | |
} | |
def train_model(model, train_loader, test_loader, epochs=10, lr=0.001, lr_decay_step=5, lr_decay_gamma=0.1): | |
model.to("cuda") # Move model to GPU | |
criterion = nn.MSELoss() | |
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.01) | |
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_decay_step, gamma=lr_decay_gamma) | |
for epoch in range(epochs): | |
model.train() | |
train_loss = 0 | |
train_mae = 0 | |
for batch in train_loader: | |
user_idx = batch["user_idx"].to("cuda") | |
item_idx = batch["item_idx"].to("cuda") | |
ratings = batch["rating"].to("cuda") | |
optimizer.zero_grad() | |
predictions = model(user_idx, item_idx).squeeze() | |
loss = criterion(predictions, ratings / 5.0) # Normalize ratings | |
loss.backward() | |
optimizer.step() | |
train_loss += loss.item() | |
train_mae += torch.abs(predictions - (ratings / 5.0)).sum().item() | |
avg_train_loss = train_loss / len(train_loader) | |
avg_train_mae = train_mae / len(train_loader.dataset) | |
print(f"Epoch {epoch+1}, Train Loss: {train_loss / len(train_loader):.4f}, Train MAE: {avg_train_mae:.4f}") | |
results["train_loss"].append(train_loss / len(train_loader)) | |
results["train_acc"].append(avg_train_mae) | |
scheduler.step() | |
evaluate_model(model, test_loader) | |
def evaluate_model(model, test_loader): | |
model.eval() | |
test_loss = 0 | |
test_mae = 0 | |
with torch.no_grad(): | |
for batch in test_loader: | |
user_idx = batch["user_idx"].to("cuda") | |
item_idx = batch["item_idx"].to("cuda") | |
ratings = batch["rating"].to("cuda") | |
predictions = model(user_idx, item_idx).squeeze() | |
loss = nn.MSELoss()(predictions, ratings / 5.0) | |
test_loss += loss.item() | |
test_mae += torch.abs(predictions - (ratings / 5.0)).sum().item() | |
avg_test_loss = test_loss / len(test_loader) | |
avg_test_mae = test_mae / len(test_loader.dataset) | |
print(f"Test Loss: {test_loss / len(test_loader):.4f}, Test MAE: {avg_test_mae:.4f}") | |
results["test_loss"].append(test_loss / len(test_loader)) | |
results["test_acc"].append(avg_test_mae) | |
num_users = len(user_map) | |
num_items = len(item_map) | |
# Initialize model | |
ncf_model = NCF(num_users=num_users, num_items=num_items, embedding_dim=32, hidden_dims=[64, 32]) | |
# Train the model | |
train_model(ncf_model, train_loader, test_loader, epochs=10, lr=1e-4) | |
#Plot | |
import matplotlib.pyplot as plt | |
# Plot loss curves of a model | |
def plot_loss_curves(results): | |
loss = results["train_loss"] | |
test_loss = results["test_loss"] | |
accuracy = results["train_acc"] | |
test_accuracy = results["test_acc"] | |
epochs = range(len(results["train_loss"])) | |
plt.figure(figsize=(15, 7)) | |
# Plot loss | |
plt.subplot(1, 2, 1) | |
plt.plot(epochs, loss, label="train_loss") | |
plt.plot(epochs, test_loss, label="test_loss") | |
plt.title("Loss") | |
plt.xlabel("Epochs") | |
plt.legend() | |
# Plot accuracy | |
plt.subplot(1, 2, 2) | |
plt.plot(epochs, accuracy, label="train_accuracy") | |
plt.plot(epochs, test_accuracy, label="test_accuracy") | |
plt.title("Accuracy") | |
plt.xlabel("Epochs") | |
plt.legend() | |
plot_loss_curves(results) | |
#Recommendations | |
# Example recommendation for a user | |
user_id = "AHZM3GVSTF4MCGO67QFLXCNIXSIQ" | |
user_index = user_map[user_id] | |
def recommend(model, user_idx, item_indices, k=10): | |
model.eval() | |
user_tensor = torch.tensor([user_idx] * len(item_indices)).to("cuda") | |
item_tensor = torch.tensor(item_indices).to("cuda") | |
with torch.no_grad(): | |
predictions = model(user_tensor, item_tensor).squeeze() | |
top_k_items = torch.topk(predictions, k=k).indices.cpu().numpy() | |
return [list(item_map.keys())[i] for i in top_k_items] | |
item_indices = list(range(len(item_map))) | |
recommendations = recommend(ncf_model, user_index, item_indices) | |
print("Recommended items:", recommendations) | |
# import matplotlib.pyplot as plt | |
# from PIL import Image | |
# import requests | |
# from io import BytesIO | |
# def fetch_item_images_from_df(asins, meta_df): | |
# items_with_images = [] | |
# for asin in asins: | |
# row = meta_df[meta_df["parent_asin"] == asin] | |
# if not row.empty: | |
# images = row["images"].iloc[0] | |
# if images: # Check if images are available | |
# #print(images["large"][0]) | |
# items_with_images.append((asin, images["large"][0], row["title"])) | |
# return items_with_images | |
# def display_items(title, items): | |
# print(items) | |
# plt.figure(figsize=(15, 5)) | |
# plt.suptitle(title, fontsize=16) | |
# for idx, (asin, image_urls, title) in enumerate(items): | |
# if image_urls: # Only display if images are available | |
# try: | |
# response = requests.get(image_urls) | |
# img = Image.open(BytesIO(response.content)) | |
# plt.subplot(1, len(items), idx + 1) | |
# plt.imshow(img) | |
# plt.axis("off") | |
# plt.title(title) | |
# except Exception as e: | |
# print(f"Could not fetch image for ASIN {asin}: {e}") | |
# plt.tight_layout() | |
# plt.show() | |
# # Fetch ASINs for bought items | |
# #user_id = "user_1" | |
# bought_asins = reviews_df[reviews_df["user_id"] == user_id]["asin"].tolist() | |
# # Fetch images for recommended items | |
# recommended_asins = recommend(ncf_model, user_index, list(range(len(item_map)))) | |
# bought_items = fetch_item_images_from_df(bought_asins, meta_df) | |
# recommended_items = fetch_item_images_from_df(recommended_asins, meta_df) | |
# # Display images | |
# display_items("User Bought Items", bought_items) | |
# display_items("Recommended Items", recommended_items) | |
import gradio as gr | |
import torch | |
from PIL import Image | |
import requests | |
from io import BytesIO | |
# Function to fetch item images from the DataFrame | |
def fetch_item_images_from_df(asins, meta_df): | |
items_with_images = [] | |
for asin in asins: | |
row = meta_df[meta_df["parent_asin"] == asin] | |
if not row.empty: | |
images = row["images"].iloc[0] | |
if images: # Check if images are available | |
items_with_images.append([images["large"][0], row["title"].iloc[0]]) | |
return items_with_images | |
# Function to recommend and fetch images for bought and recommended items | |
def recommend_and_display(user_id): | |
user_index = user_map.get(user_id) | |
if user_index is None: | |
return [], [] # Return empty lists if user not found | |
# Fetch ASINs for bought items | |
bought_asins = reviews_df[reviews_df["user_id"] == user_id]["parent_asin"].tolist() | |
# Fetch images for bought and recommended items | |
bought_items = fetch_item_images_from_df(bought_asins, meta_df) | |
recommended_asins = recommend(ncf_model, user_index, list(range(len(item_map)))) | |
recommended_items = fetch_item_images_from_df(recommended_asins, meta_df) | |
return bought_items, recommended_items | |
# Gradio function to display the recommendations | |
def gradio_interface(user_id): | |
bought, recommended = recommend_and_display(user_id) | |
return bought, recommended | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=gradio_interface, | |
inputs=gr.Textbox(label="Enter User ID"), | |
outputs=[ | |
gr.Gallery(label="Bought Items"), | |
gr.Gallery(label="Recommended Items") | |
], | |
title="Amazon Recommender", | |
description="Enter a User ID to see images of bought and recommended items.", | |
live=True | |
) | |
# Launch Gradio Interface | |
interface.launch(share=True) | |