import pandas as pd from pydriller import Repository import plotly.graph_objects as go from get_gender import GenderPredictor class CommitInfo: def __init__(self, repo_url): self.repo_url = repo_url self.df = self.get_commit_info() def get_commit_info(self): commit_data = [] for commit in Repository(self.repo_url).traverse_commits(): commit_info = { 'Author': commit.author.name, 'Committer_Date': commit.committer_date, 'Author_Timezone': commit.author_timezone, } commit_data.append(commit_info) df = pd.DataFrame(commit_data) # Assuming you have your DataFrame named df # Convert the "Committer_Date" column to pandas datetime with utc=True df["Committer_Date"] = pd.to_datetime(df["Committer_Date"], utc=True) # Extract the year from the "Committer_Date" column and create a new column "Year" df["Year"] = df["Committer_Date"].dt.year # Print the updated DataFrame return df def get_first_commit_dates(self): # Group the DataFrame by 'Author' and find the minimum commit date and timezone for each author first_commit_dates = self.df.groupby('Author').agg({ 'Committer_Date': 'min', 'Author_Timezone': 'first' }).reset_index() # Rename the columns for clarity first_commit_dates.columns = ['Author', 'First_Commit_Date', 'Author_Timezone'] return self.df,first_commit_dates # Example usage: if __name__ == "__main__": modelpath="saved_model/bestmodel.tf" gender_predictor = GenderPredictor(modelpath) repo_url = 'https://github.com/Amstf/DDoS-Attacks-Detection-Using-Adversarial-Neural-Network' commit_info = CommitInfo(repo_url) # Get and print the DataFrame with first commit dates for each author df,first_commit_dates = commit_info.get_first_commit_dates() first_commit_dates[['Predicted_Gender', 'Confidence']] = first_commit_dates['Author'].apply(lambda name: pd.Series(gender_predictor.predict_gender(name))) merged_df = df.merge(first_commit_dates[["Author","Predicted_Gender","Confidence"]], on=["Author"]) # Group by Year and Predicted_Gender, then count the occurrences gender_counts = merged_df.groupby(['Year', 'Predicted_Gender']).size().reset_index(name='Count') counts = first_commit_dates['Predicted_Gender'].value_counts() labels = ["Male", "Female", "Unknown"] colors = ["blue", "pink", "red"] fig = go.Figure(data=[go.Pie(labels=first_commit_dates['Predicted_Gender'].unique(), values=counts, marker=dict(colors=colors))]) # Convert the chart to HTML and return it chart_html = fig.to_html(full_html=False) print(first_commit_dates)