File size: 1,642 Bytes
d181dd1
 
 
c75a8b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5603359
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
__copyright__ = "Copyright (C) 2023 Ali Mustapha"
__license__ = "GPL-3.0-or-later"

import pandas as pd
from pydriller import Repository
import plotly.graph_objects as go
from get_gender import GenderPredictor

class CommitInfo:
    def __init__(self, repo_url):
        self.repo_url = repo_url
        self.df = self.get_commit_info()

    def get_commit_info(self):
        commit_data = []

        for commit in Repository(self.repo_url).traverse_commits():
            commit_info = {
                'Author': commit.author.name,
                'Committer_Date': commit.committer_date,
                'Author_Timezone': commit.author_timezone,
            }
            commit_data.append(commit_info)

        df = pd.DataFrame(commit_data)

        # Assuming you have your DataFrame named df
        # Convert the "Committer_Date" column to pandas datetime with utc=True
        df["Committer_Date"] = pd.to_datetime(df["Committer_Date"], utc=True)

        # Extract the year from the "Committer_Date" column and create a new column "Year"
        df["Year"] = df["Committer_Date"].dt.year

        # Print the updated DataFrame
        return df

    def get_first_commit_dates(self):
        # Group the DataFrame by 'Author' and find the minimum commit date and timezone for each author
        first_commit_dates = self.df.groupby('Author').agg({
            'Committer_Date': 'min',
            'Author_Timezone': 'first'
        }).reset_index()

        # Rename the columns for clarity
        first_commit_dates.columns = ['Author', 'First_Commit_Date', 'Author_Timezone']

        return self.df,first_commit_dates