56 lines
2.1 KiB
Python
56 lines
2.1 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from scipy.interpolate import make_interp_spline, BSpline
|
|
from sklearn.linear_model import LinearRegression
|
|
import os
|
|
|
|
def draw(name):
|
|
release_file = os.path.join('.\data', name + '_release_data.csv')
|
|
commit_file = os.path.join('.\data', name + '_commit_data.csv')
|
|
|
|
# read commit.csv and release.csv
|
|
commit_df = pd.read_csv(commit_file, header=None)
|
|
release_df = pd.read_csv(release_file, header=None)
|
|
|
|
# set column names
|
|
commit_df.columns = ['commit_hash','commit_created_at','commit_sec']
|
|
release_df.columns = ['Title', 'Tag', 'Author', 'Rel_Created_At', 'release_sec']
|
|
|
|
# convert release_sec to week number
|
|
release_df['week'] = (release_df['release_sec'] ) // (7 * 24 * 60 * 60)+1.0
|
|
|
|
# get the first week with a release
|
|
first_release_week = release_df['week'].min()
|
|
|
|
# convert commit_sec to week number and group by week
|
|
commit_df['week'] = (commit_df['commit_sec']) // (7 * 24 * 60 * 60)+1.0
|
|
|
|
commit_df = commit_df[commit_df['week'] >= first_release_week]
|
|
commit_grouped = commit_df.groupby('week').size().reset_index(name='count')
|
|
|
|
# filter tags with the third digit from the left as 0
|
|
release_df = release_df[~release_df['Tag'].str.contains('-')]
|
|
release_df = release_df[(release_df['Tag'].str.split('.').str[2] == '0')]#&(release_df['Tag'].str.split('.').str[1] == '0')]
|
|
|
|
# plot the commit changes
|
|
xnew = np.linspace(commit_grouped['week'].min(), commit_grouped['week'].max(), 300)
|
|
spl = make_interp_spline(commit_grouped['week'], commit_grouped['count'], k=3)
|
|
power_smooth = spl(xnew)
|
|
plt.plot(xnew, power_smooth)
|
|
|
|
# add vertical lines for release weeks
|
|
for index, row in release_df.iterrows():
|
|
#print(row['week'])
|
|
plt.axvline(x=row['week'] , color='r', linestyle='--')
|
|
plt.text(row['week'], 0, row['Tag'])
|
|
|
|
# show the plot
|
|
plt.show()
|
|
|
|
if __name__ == '__main__':
|
|
repo_data = pd.read_csv('repo_data.csv')
|
|
for name in repo_data['Name']:
|
|
print(name)
|
|
draw(name)
|