Spaces:
Runtime error
Runtime error
__copyright__ = "Copyright (C) 2023 Ali Mustapha" | |
__license__ = "GPL-3.0-or-later" | |
import pandas as pd | |
from pydriller import Repository | |
import plotly.graph_objects as go | |
from get_gender import GenderPredictor | |
class CommitInfo: | |
def __init__(self, repo_url): | |
self.repo_url = repo_url | |
self.df = self.get_commit_info() | |
def get_commit_info(self): | |
commit_data = [] | |
for commit in Repository(self.repo_url).traverse_commits(): | |
commit_info = { | |
'Author': commit.author.name, | |
'Committer_Date': commit.committer_date, | |
'Author_Timezone': commit.author_timezone, | |
} | |
commit_data.append(commit_info) | |
df = pd.DataFrame(commit_data) | |
# Assuming you have your DataFrame named df | |
# Convert the "Committer_Date" column to pandas datetime with utc=True | |
df["Committer_Date"] = pd.to_datetime(df["Committer_Date"], utc=True) | |
# Extract the year from the "Committer_Date" column and create a new column "Year" | |
df["Year"] = df["Committer_Date"].dt.year | |
# Print the updated DataFrame | |
return df | |
def get_first_commit_dates(self): | |
# Group the DataFrame by 'Author' and find the minimum commit date and timezone for each author | |
first_commit_dates = self.df.groupby('Author').agg({ | |
'Committer_Date': 'min', | |
'Author_Timezone': 'first' | |
}).reset_index() | |
# Rename the columns for clarity | |
first_commit_dates.columns = ['Author', 'First_Commit_Date', 'Author_Timezone'] | |
return self.df,first_commit_dates | |