89 lines
3.2 KiB
Python
89 lines
3.2 KiB
Python
|
||
|
||
from github import Github
|
||
import pymongo
|
||
import time
|
||
import ast
|
||
from datetime import datetime
|
||
from fork_analysis import find_fork_names
|
||
|
||
def get_timeline_event(repository_name,db,clt,access_token):
|
||
|
||
# 连接到github
|
||
g = Github(access_token)
|
||
repository = g.get_repo(repository_name)
|
||
|
||
# 连接到MongoDB数据库
|
||
client = pymongo.MongoClient("mongodb://localhost:27017/")
|
||
db = client[db]
|
||
collection = db[clt]
|
||
|
||
# 获取repository的issues
|
||
issues = repository.get_issues(state='all')
|
||
print(issues.totalCount)
|
||
# print(repository.get_events().totalCount)
|
||
|
||
for issue in issues:
|
||
# if issue.pull_request:
|
||
# continue
|
||
try:
|
||
timeline_events = issue.get_timeline()
|
||
for event in timeline_events:
|
||
raw_data = event.raw_data
|
||
raw_data['issue_number'] = issue.number
|
||
collection.insert_one(raw_data)
|
||
except Exception as e:
|
||
with open('get_numpy_timeline_err.txt', 'a', encoding='utf-8', newline='') as f:
|
||
f.write(str(repository_name)+' '+str(issue.number) + f"发生的错误类型是:{type(e)}" + f"错误信息是:{e}")
|
||
|
||
now = datetime.now()
|
||
print("当前时间是:", now.strftime("%Y-%m-%d %H:%M:%S"))
|
||
|
||
def get_issue_and_pr(repository_name, db, clt,access_token):
|
||
|
||
# 连接到github
|
||
g = Github(access_token)
|
||
repository = g.get_repo(repository_name)
|
||
|
||
# 连接到MongoDB数据库
|
||
client = pymongo.MongoClient("mongodb://localhost:27017/")
|
||
db = client[db]
|
||
collection = db[clt]
|
||
|
||
# 获取repository的issues
|
||
issues = repository.get_issues(state='all')
|
||
print(issues.totalCount)
|
||
|
||
# 遍历每个issue并存储_rawdata到MongoDB
|
||
for issue in issues:
|
||
# if issue.pull_request:
|
||
# continue
|
||
try:
|
||
raw_data = issue.raw_data
|
||
collection.insert_one(raw_data)
|
||
except Exception as e:
|
||
with open('get_numpy_err.txt','a',encoding='utf-8',newline='') as f:
|
||
f.write(str(issue.number)+f"发生的错误类型是:{type(e)}"+f"错误信息是:{e}")
|
||
|
||
now = datetime.now()
|
||
print("当前时间是:", now.strftime("%Y-%m-%d %H:%M:%S"))
|
||
|
||
token = "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"
|
||
'''
|
||
#numpy
|
||
get_timeline_event('numpy/numpy', 'numpy_db', 'issue_timeline_new',token)
|
||
get_issue_and_pr('numpy/numpy', 'numpy_db', 'issue_pr',token)
|
||
#numpy的fork
|
||
repo_list = ['seberg/numpy', 'hpyproject/numpy-hpy', 'xman/numpy-posit', 'Mukulikaa/numpy', 'George-Bassilious/numpy', 'plctlab/numpy', 'lvcarlosja/numpy']
|
||
for repos in repo_list:
|
||
get_timeline_event(repos, 'numpy_db', str(repos).replace('/','_')+'_timeline',token)
|
||
get_issue_and_pr(repos, 'numpy_db', str(repos).replace('/','_')+'_issue&pr',token)
|
||
#numpy的上下游、numpy的fork的上下游 2048+1
|
||
'''
|
||
#numpy的上下游的fork 1587(numpy的fork的上下游的fork数满足条件者为0)
|
||
repo_list = find_fork_names(token)
|
||
for repos in repo_list:
|
||
get_timeline_event(repos, 'fork_timeline', str(repos).replace('/','_'),token)
|
||
# get_issue_and_pr(repos, 'fork_db', str(repos).replace('/','_'),token)
|
||
#添加模块:存储已经下载的仓库的id,新加入的仓库判断id是否已经存在,存在
|