release_analysis/plt.py

56 lines
2.1 KiB
Python

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline, BSpline
from sklearn.linear_model import LinearRegression
import os
def draw(name):
release_file = os.path.join('.\data', name + '_release_data.csv')
commit_file = os.path.join('.\data', name + '_commit_data.csv')
# read commit.csv and release.csv
commit_df = pd.read_csv(commit_file, header=None)
release_df = pd.read_csv(release_file, header=None)
# set column names
commit_df.columns = ['commit_hash','commit_created_at','commit_sec']
release_df.columns = ['Title', 'Tag', 'Author', 'Rel_Created_At', 'release_sec']
# convert release_sec to week number
release_df['week'] = (release_df['release_sec'] ) // (7 * 24 * 60 * 60)+1.0
# get the first week with a release
first_release_week = release_df['week'].min()
# convert commit_sec to week number and group by week
commit_df['week'] = (commit_df['commit_sec']) // (7 * 24 * 60 * 60)+1.0
commit_df = commit_df[commit_df['week'] >= first_release_week]
commit_grouped = commit_df.groupby('week').size().reset_index(name='count')
# filter tags with the third digit from the left as 0
release_df = release_df[~release_df['Tag'].str.contains('-')]
release_df = release_df[(release_df['Tag'].str.split('.').str[2] == '0')]#&(release_df['Tag'].str.split('.').str[1] == '0')]
# plot the commit changes
xnew = np.linspace(commit_grouped['week'].min(), commit_grouped['week'].max(), 300)
spl = make_interp_spline(commit_grouped['week'], commit_grouped['count'], k=3)
power_smooth = spl(xnew)
plt.plot(xnew, power_smooth)
# add vertical lines for release weeks
for index, row in release_df.iterrows():
#print(row['week'])
plt.axvline(x=row['week'] , color='r', linestyle='--')
plt.text(row['week'], 0, row['Tag'])
# show the plot
plt.show()
if __name__ == '__main__':
repo_data = pd.read_csv('repo_data.csv')
for name in repo_data['Name']:
print(name)
draw(name)