__copyright__ = "Copyright (C) 2023 Ali Mustapha" __license__ = "GPL-3.0-or-later" import pandas as pd from pydriller import Repository import plotly.graph_objects as go from get_gender import GenderPredictor class CommitInfo: def __init__(self, repo_url): self.repo_url = repo_url self.df = self.get_commit_info() def get_commit_info(self): commit_data = [] for commit in Repository(self.repo_url).traverse_commits(): commit_info = { 'Author': commit.author.name, 'Committer_Date': commit.committer_date, 'Author_Timezone': commit.author_timezone, } commit_data.append(commit_info) df = pd.DataFrame(commit_data) # Assuming you have your DataFrame named df # Convert the "Committer_Date" column to pandas datetime with utc=True df["Committer_Date"] = pd.to_datetime(df["Committer_Date"], utc=True) # Extract the year from the "Committer_Date" column and create a new column "Year" df["Year"] = df["Committer_Date"].dt.year # Print the updated DataFrame return df def get_first_commit_dates(self): # Group the DataFrame by 'Author' and find the minimum commit date and timezone for each author first_commit_dates = self.df.groupby('Author').agg({ 'Committer_Date': 'min', 'Author_Timezone': 'first' }).reset_index() # Rename the columns for clarity first_commit_dates.columns = ['Author', 'First_Commit_Date', 'Author_Timezone'] return self.df,first_commit_dates