github_mongoDB_dataprocess/fork_analysis.py

65 lines
2.0 KiB
Python

from github import Github
import ast
import os
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
def fork_analysis():
path = './fork_issue_number'
cnt = {}
for filename in os.listdir(path):
df = pd.read_excel(path + '/' + filename)
counts = df['number'].value_counts().to_dict()
cnt = {**cnt, **counts}
print(cnt)
matplotlib.rcParams['font.family'] = 'SimHei' # 或者其他你有的中文字体
matplotlib.rcParams['axes.unicode_minus'] = False # 正确显示负号
plt.bar([str(i) for i in cnt.keys()], cnt.values(), color='skyblue')
plt.xlabel('new_issue_number')
plt.ylabel('fork_repository_number')
# plt.title('the')
plt.show()
# fork_analysis()
# 返回值为numpy及其fork的上下游项目的fork名单
def find_fork_names(token):
g = Github(token)
res = list()
with open('./numpy_refer_repo_name(addNumpyFork).txt', 'r', encoding='utf-8', newline='') as f:
repo_list = ast.literal_eval(f.read())
for repo in repo_list:
path = './fork_issue_number/'+repo.replace('/','_',1)+'_fork_repo_issues.xlsx'
df = pd.read_excel(path)
filtered_repos = df[df['number'] > 0]['repo_name'].tolist()
res += filtered_repos
# print(len(res),end=' ')
return res
# path = './fork_issue_number'
# filelist = os.listdir(path)
# cnt = 0
# for filename in filelist:
# # if 'numpy_numpy_fork' not in filename:
# if 'numpy_numpy_fork' in filename:
# continue
# df = pd.read_excel(path+'/'+filename)
# filtered_repos = df[df['number'] > 0]['repo_name'].tolist()
#
# if filtered_repos:
# print('\n',filename)
# for repo in filtered_repos:
# repository = g.get_repo(repo)
# issues = repository.get_issues(state='all')
# cnt += issues.totalCount
# print(cnt,end=' ')
# token = "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"
# find_fork_names(token)