482 lines
23 KiB
Python
482 lines
23 KiB
Python
|
||
#2264217 issue节点
|
||
#3276401 pr节点
|
||
#3582 repo节点
|
||
#953922 user节点
|
||
|
||
#统一timeling数据库和issue数据库内repo的名称(已完成)
|
||
#遍历issue数据库,使用hash存储满足时间要求的issue的名称(已完成)
|
||
#确认关注哪些timeline行为,选择哪个字段为开发者
|
||
#遍历timeline数据库,找出参与上述issue的开发者名单,使用hash存储
|
||
#存储关系信息
|
||
#导入neo4j
|
||
|
||
import pymongo
|
||
import collections
|
||
import ast
|
||
import json
|
||
from github import Github
|
||
|
||
def get_issues_and_users():
|
||
|
||
#初始化
|
||
issue_set = collections.defaultdict(set)
|
||
pr_set = collections.defaultdict(set)
|
||
user_set = dict()
|
||
#数据库链接
|
||
client = pymongo.MongoClient("mongodb://localhost:27017/")
|
||
#numpy及其fork
|
||
db = client['numpy_db']
|
||
for collection_name in db.list_collection_names():
|
||
if 'issue&pr' in collection_name:
|
||
name = collection_name[:collection_name.find('issue&pr')-1]
|
||
collection = db[collection_name]
|
||
# pr
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"},"pull_request": {"$exists": True}},
|
||
{'number': 1,'user': 1, '_id': 0})
|
||
for issue in issues:
|
||
pr_set[name].add(issue['number'])
|
||
user_set[issue['user']['id']] = issue['user']['login']
|
||
# issue
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": False}},
|
||
{'number': 1, 'user': 1, '_id': 0})
|
||
for issue in issues:
|
||
issue_set[name].add(issue['number'])
|
||
user_set[issue['user']['id']] = issue['user']['login']
|
||
# numpy及其fork的上下游
|
||
with open('./numpy_refer_repo_name(addNumpyFork).txt', 'r', encoding='utf-8', newline='') as f:
|
||
repo_list = set(ast.literal_eval(f.read()))
|
||
db = client['reference_db']
|
||
for collection_name in db.list_collection_names():
|
||
# 删除数据库中非上下游的仓库的信息
|
||
if collection_name.replace('_','/',1) not in repo_list:
|
||
continue
|
||
collection = db[collection_name]
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": True}},
|
||
{'number': 1, 'user': 1, '_id': 0})
|
||
for issue in issues:
|
||
pr_set[collection_name].add(issue['number'])
|
||
user_set[issue['user']['id']] = issue['user']['login']
|
||
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": False}},
|
||
{'number': 1, 'user': 1, '_id': 0})
|
||
for issue in issues:
|
||
issue_set[collection_name].add(issue['number'])
|
||
user_set[issue['user']['id']] = issue['user']['login']
|
||
# 上下游的fork
|
||
db = client['fork_db']
|
||
for collection_name in db.list_collection_names():
|
||
collection = db[collection_name]
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": True}},
|
||
{'number': 1, 'user': 1, '_id': 0})
|
||
for issue in issues:
|
||
pr_set[collection_name].add(issue['number'])
|
||
user_set[issue['user']['id']] = issue['user']['login']
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": False}},
|
||
{'number': 1, 'user': 1, '_id': 0})
|
||
for issue in issues:
|
||
issue_set[collection_name].add(issue['number'])
|
||
user_set[issue['user']['id']] = issue['user']['login']
|
||
|
||
print('issue finished')
|
||
return issue_set,pr_set,user_set
|
||
|
||
def get_all_users(issue_set,pr_set,user_set,event_type,user):
|
||
|
||
#mentioned actor
|
||
# 数据库链接
|
||
client = pymongo.MongoClient("mongodb://localhost:27017/")
|
||
# numpy及其fork
|
||
db = client['numpy_db']
|
||
for collection_name in db.list_collection_names():
|
||
if '_timeline' in collection_name:
|
||
name = collection_name[:collection_name.find('_timeline')]
|
||
collection = db[collection_name]
|
||
# 取出的事件满足条件:issue建立于2024年3月之前;事件类型符合,事件发生在2024年3月之前
|
||
events = collection.find({"issue_number": {"$in":list(issue_set[name])+list(pr_set[name])},
|
||
"created_at": {"$lt": "2024-03-01T00:00:00Z"},
|
||
'event':event_type},{user: 1, '_id': 0})
|
||
#存在部分事件对象缺失
|
||
for event in events:
|
||
try:
|
||
user_set[event[user]['id']] = event[user]['login']
|
||
except Exception as e:
|
||
pass
|
||
# numpy及其fork的上下游
|
||
with open('./numpy_refer_repo_name(addNumpyFork).txt', 'r', encoding='utf-8', newline='') as f:
|
||
repo_list = set(ast.literal_eval(f.read()))
|
||
db = client['reference_timeline']
|
||
for collection_name in db.list_collection_names():
|
||
# 删除数据库中非上下游的仓库的信息
|
||
if collection_name.replace('_', '/', 1) not in repo_list:
|
||
continue
|
||
collection = db[collection_name]
|
||
events = collection.find({"issue_number": {"$in": list(issue_set[collection_name])+list(pr_set[collection_name])},
|
||
"created_at": {"$lt": "2024-03-01T00:00:00Z"},
|
||
'event': event_type}, {user: 1, '_id': 0})
|
||
for event in events:
|
||
try:
|
||
user_set[event[user]['id']] = event[user]['login']
|
||
except Exception as e:
|
||
pass
|
||
# 上下游的fork
|
||
db = client['fork_reference']
|
||
for collection_name in db.list_collection_names():
|
||
collection = db[collection_name]
|
||
events = collection.find({"issue_number": {"$in": list(issue_set[collection_name])+list(pr_set[collection_name])},
|
||
"created_at": {"$lt": "2024-03-01T00:00:00Z"},
|
||
'event': event_type}, {user: 1, '_id': 0})
|
||
for event in events:
|
||
try:
|
||
user_set[event[user]['id']] = event[user]['login']
|
||
except Exception as e:
|
||
pass
|
||
|
||
print(event_type,'finished')
|
||
return user_set
|
||
|
||
def get_event_user_json(issue_set,pr_set,user_set,event_type,user,tmp,cnt,size):
|
||
|
||
#mentioned actor
|
||
# 数据库链接
|
||
client = pymongo.MongoClient("mongodb://localhost:27017/")
|
||
# numpy及其fork
|
||
db = client['numpy_db']
|
||
for collection_name in db.list_collection_names():
|
||
if '_timeline' in collection_name:
|
||
name = collection_name[:collection_name.find('_timeline')]
|
||
collection = db[collection_name]
|
||
# 取出的事件满足条件:issue建立于2024年3月之前;事件类型符合,事件发生在2024年3月之前
|
||
events = collection.find({"issue_number": {"$in":list(issue_set[name])+list(pr_set[name])},
|
||
"created_at": {"$lt": "2024-03-01T00:00:00Z"},
|
||
'event':event_type},{user: 1, '_id': 0})
|
||
#存在部分事件对象缺失
|
||
for event in events:
|
||
try:
|
||
if event[user]['id'] in user_set:
|
||
cnt += 1
|
||
del user_set[event[user]['id']]
|
||
# tmp.append(event[user])
|
||
tmp.append({"login":event[user]['login'],"id":event[user]['id']})
|
||
if not cnt % size:
|
||
with open('./json/entity/user' + str(cnt // size) + '.json', 'w') as f:
|
||
json.dump(tmp, f, indent=4)
|
||
tmp = []
|
||
except Exception as e:
|
||
pass
|
||
# numpy及其fork的上下游
|
||
with open('./numpy_refer_repo_name(addNumpyFork).txt', 'r', encoding='utf-8', newline='') as f:
|
||
repo_list = set(ast.literal_eval(f.read()))
|
||
db = client['reference_timeline']
|
||
for collection_name in db.list_collection_names():
|
||
# 删除数据库中非上下游的仓库的信息
|
||
if collection_name.replace('_', '/', 1) not in repo_list:
|
||
continue
|
||
collection = db[collection_name]
|
||
events = collection.find({"issue_number": {"$in": list(issue_set[collection_name])+list(pr_set[collection_name])},
|
||
"created_at": {"$lt": "2024-03-01T00:00:00Z"},
|
||
'event': event_type}, {user: 1, '_id': 0})
|
||
for event in events:
|
||
try:
|
||
if event[user]['id'] in user_set:
|
||
cnt += 1
|
||
del user_set[event[user]['id']]
|
||
tmp.append({"login":event[user]['login'],"id":event[user]['id']})
|
||
if not cnt % size:
|
||
with open('./json/entity/user' + str(cnt // size) + '.json', 'w') as f:
|
||
json.dump(tmp, f, indent=4)
|
||
tmp = []
|
||
except Exception as e:
|
||
pass
|
||
# 上下游的fork
|
||
db = client['fork_reference']
|
||
for collection_name in db.list_collection_names():
|
||
collection = db[collection_name]
|
||
events = collection.find({"issue_number": {"$in": list(issue_set[collection_name])+list(pr_set[collection_name])},
|
||
"created_at": {"$lt": "2024-03-01T00:00:00Z"},
|
||
'event': event_type}, {user: 1, '_id': 0})
|
||
for event in events:
|
||
try:
|
||
if event[user]['id'] in user_set:
|
||
cnt += 1
|
||
del user_set[event[user]['id']]
|
||
tmp.append({"login":event[user]['login'],"id":event[user]['id']})
|
||
if not cnt % size:
|
||
with open('./json/entity/user' + str(cnt // size) + '.json', 'w') as f:
|
||
json.dump(tmp, f, indent=4)
|
||
tmp = []
|
||
except Exception as e:
|
||
pass
|
||
|
||
print(event_type,'finished')
|
||
return user_set,tmp,cnt
|
||
|
||
def get_user_json(issue_set,pr_set,size):
|
||
|
||
with open('user_id.txt', 'r', encoding='utf-8', newline='') as f:
|
||
user_set = ast.literal_eval(f.read())
|
||
cnt = 0
|
||
tmp = []
|
||
# 数据库链接
|
||
client = pymongo.MongoClient("mongodb://localhost:27017/")
|
||
# numpy及其fork
|
||
db = client['numpy_db']
|
||
for collection_name in db.list_collection_names():
|
||
if 'issue&pr' in collection_name:
|
||
collection = db[collection_name]
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}},
|
||
{'number': 1, 'user': 1, '_id': 0})
|
||
for issue in issues:
|
||
if issue['user']['id'] in user_set:
|
||
cnt += 1
|
||
del user_set[issue['user']['id']]
|
||
tmp.append({"login":issue['user']['login'],"id":issue['user']['id']})
|
||
if not cnt % size:
|
||
with open('./json/entity/user'+str(cnt//size)+'.json', 'w') as f:
|
||
json.dump(tmp, f, indent=4)
|
||
tmp = []
|
||
# numpy及其fork的上下游
|
||
with open('./numpy_refer_repo_name(addNumpyFork).txt', 'r', encoding='utf-8', newline='') as f:
|
||
repo_list = set(ast.literal_eval(f.read()))
|
||
db = client['reference_db']
|
||
for collection_name in db.list_collection_names():
|
||
# 删除数据库中非上下游的仓库的信息
|
||
if collection_name.replace('_', '/', 1) not in repo_list:
|
||
continue
|
||
collection = db[collection_name]
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}},
|
||
{'number': 1, 'user': 1, '_id': 0})
|
||
for issue in issues:
|
||
if issue['user']['id'] in user_set:
|
||
cnt += 1
|
||
del user_set[issue['user']['id']]
|
||
tmp.append({"login":issue['user']['login'],"id":issue['user']['id']})
|
||
if not cnt % size:
|
||
with open('./json/entity/user' + str(cnt // size) + '.json', 'w') as f:
|
||
json.dump(tmp, f, indent=4)
|
||
tmp = []
|
||
# 上下游的fork
|
||
db = client['fork_db']
|
||
for collection_name in db.list_collection_names():
|
||
collection = db[collection_name]
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}},
|
||
{'number': 1, 'user': 1, '_id': 0})
|
||
for issue in issues:
|
||
if issue['user']['id'] in user_set:
|
||
cnt += 1
|
||
del user_set[issue['user']['id']]
|
||
tmp.append({"login":issue['user']['login'],"id":issue['user']['id']})
|
||
if not cnt % size:
|
||
with open('./json/entity/user' + str(cnt // size) + '.json', 'w') as f:
|
||
json.dump(tmp, f, indent=4)
|
||
tmp = []
|
||
user_set,tmp,cnt = get_event_user_json(issue_set,pr_set,user_set, 'mentioned','actor',tmp,cnt,size)
|
||
user_set,tmp,cnt = get_event_user_json(issue_set,pr_set,user_set, 'labeled', 'actor',tmp,cnt,size)
|
||
user_set,tmp,cnt = get_event_user_json(issue_set,pr_set,user_set, 'assigned', 'actor',tmp,cnt,size)
|
||
user_set,tmp,cnt = get_event_user_json(issue_set,pr_set,user_set, 'assigned', 'assignee',tmp,cnt,size)
|
||
user_set,tmp,cnt = get_event_user_json(issue_set,pr_set,user_set, 'commented', 'user',tmp,cnt,size)
|
||
user_set,tmp,cnt = get_event_user_json(issue_set,pr_set,user_set, 'closed', 'actor',tmp,cnt,size)
|
||
if tmp:
|
||
with open('./json/entity/user' + str(cnt // size+1) + '.json', 'w') as f:
|
||
json.dump(tmp, f, indent=4)
|
||
if not len(user_set):
|
||
print('get user json finished')
|
||
else:
|
||
print('error occurred')
|
||
return None
|
||
|
||
def get_issue_pr_json(size):
|
||
|
||
#初始化
|
||
issue_cnt = 0
|
||
pr_cnt = 0
|
||
issue_tmp = []
|
||
pr_tmp = []
|
||
issue_set = collections.defaultdict(set)
|
||
pr_set = collections.defaultdict(set)
|
||
|
||
#数据库链接
|
||
client = pymongo.MongoClient("mongodb://localhost:27017/")
|
||
#numpy及其fork
|
||
db = client['numpy_db']
|
||
for collection_name in db.list_collection_names():
|
||
if 'issue&pr' in collection_name:
|
||
name = collection_name[:collection_name.find('issue&pr') - 1]
|
||
collection = db[collection_name]
|
||
# pr
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": True}},
|
||
{'user':0,'labels':0,'assignee':0,'assignees':0,'repository_url':0,'labels_url':0,'comments_url':0,'events_url':0,
|
||
'timeline_url':0,'html_url':0,'id':0,'node_id':0,'comments':0,'performed_via_github_app':0,'closed_by':0,'_id': 0})
|
||
for issue in issues:
|
||
#筛掉重复的issuepr
|
||
if issue['number'] in pr_set[name]:
|
||
continue
|
||
pr_set[name].add(issue['number'])
|
||
pr_cnt += 1
|
||
issue["name"] = name+"+"+str(issue['number'])
|
||
issue['reactions']['like'] = issue['reactions'].pop('+1')
|
||
issue['reactions']['dislike'] = issue['reactions'].pop('-1')
|
||
issue['reactions'].pop('url')
|
||
pr_tmp.append(issue)
|
||
if not pr_cnt % size:
|
||
with open('./json/entity/pr' + str(pr_cnt // size) + '.json', 'w') as f:
|
||
json.dump(pr_tmp, f, indent=4)
|
||
pr_tmp = []
|
||
# issue
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": False}},
|
||
{'user':0,'labels':0,'assignee':0,'assignees':0,'repository_url':0,'labels_url':0,'comments_url':0,'events_url':0,
|
||
'timeline_url':0,'html_url':0,'id':0,'node_id':0,'comments':0,'performed_via_github_app':0,'closed_by':0,'_id': 0})
|
||
for issue in issues:
|
||
if issue['number'] in issue_set[name]:
|
||
continue
|
||
issue_set[name].add(issue['number'])
|
||
issue_cnt += 1
|
||
issue["name"] = name+"+"+str(issue['number'])
|
||
issue['reactions']['like'] = issue['reactions'].pop('+1')
|
||
issue['reactions']['dislike'] = issue['reactions'].pop('-1')
|
||
issue['reactions'].pop('url')
|
||
issue_tmp.append(issue)
|
||
if not issue_cnt % size:
|
||
with open('./json/entity/issue' + str(issue_cnt // size) + '.json', 'w') as f:
|
||
json.dump(issue_tmp, f, indent=4)
|
||
issue_tmp = []
|
||
# numpy及其fork的上下游
|
||
with open('./numpy_refer_repo_name(addNumpyFork).txt', 'r', encoding='utf-8', newline='') as f:
|
||
repo_list = set(ast.literal_eval(f.read()))
|
||
db = client['reference_db']
|
||
for collection_name in db.list_collection_names():
|
||
# 删除数据库中非上下游的仓库的信息
|
||
if collection_name.replace('_','/',1) not in repo_list:
|
||
continue
|
||
collection = db[collection_name]
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": True}},
|
||
{'user':0,'labels':0,'assignee':0,'assignees':0,'repository_url':0,'labels_url':0,'comments_url':0,'events_url':0,
|
||
'timeline_url':0,'html_url':0,'id':0,'node_id':0,'comments':0,'performed_via_github_app':0,'closed_by':0,'_id': 0})
|
||
for issue in issues:
|
||
if issue['number'] in pr_set[collection_name]:
|
||
continue
|
||
pr_set[collection_name].add(issue['number'])
|
||
pr_cnt += 1
|
||
issue["name"] = collection_name+"+"+str(issue['number'])
|
||
issue['reactions']['like'] = issue['reactions'].pop('+1')
|
||
issue['reactions']['dislike'] = issue['reactions'].pop('-1')
|
||
issue['reactions'].pop('url')
|
||
pr_tmp.append(issue)
|
||
if not pr_cnt % size:
|
||
with open('./json/entity/pr' + str(pr_cnt // size) + '.json', 'w') as f:
|
||
json.dump(pr_tmp, f, indent=4)
|
||
pr_tmp = []
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": False}},
|
||
{'user':0,'labels':0,'assignee':0,'assignees':0,'repository_url':0,'labels_url':0,'comments_url':0,'events_url':0,
|
||
'timeline_url':0,'html_url':0,'id':0,'node_id':0,'comments':0,'performed_via_github_app':0,'closed_by':0,'_id': 0})
|
||
for issue in issues:
|
||
if issue['number'] in issue_set[collection_name]:
|
||
continue
|
||
issue_set[collection_name].add(issue['number'])
|
||
issue_cnt += 1
|
||
issue["name"] = collection_name+"+"+str(issue['number'])
|
||
issue['reactions']['like'] = issue['reactions'].pop('+1')
|
||
issue['reactions']['dislike'] = issue['reactions'].pop('-1')
|
||
issue['reactions'].pop('url')
|
||
issue_tmp.append(issue)
|
||
if not issue_cnt % size:
|
||
with open('./json/entity/issue' + str(issue_cnt // size) + '.json', 'w') as f:
|
||
json.dump(issue_tmp, f, indent=4)
|
||
issue_tmp = []
|
||
# 上下游的fork
|
||
db = client['fork_db']
|
||
for collection_name in db.list_collection_names():
|
||
collection = db[collection_name]
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": True}},
|
||
{'user':0,'labels':0,'assignee':0,'assignees':0,'repository_url':0,'labels_url':0,'comments_url':0,'events_url':0,
|
||
'timeline_url':0,'html_url':0,'id':0,'node_id':0,'comments':0,'performed_via_github_app':0,'closed_by':0,'_id': 0})
|
||
for issue in issues:
|
||
if issue['number'] in pr_set[collection_name]:
|
||
continue
|
||
pr_set[collection_name].add(issue['number'])
|
||
pr_cnt += 1
|
||
issue["name"] = collection_name+"+"+str(issue['number'])
|
||
issue['reactions']['like'] = issue['reactions'].pop('+1')
|
||
issue['reactions']['dislike'] = issue['reactions'].pop('-1')
|
||
issue['reactions'].pop('url')
|
||
pr_tmp.append(issue)
|
||
if not pr_cnt % size:
|
||
with open('./json/entity/pr' + str(pr_cnt // size) + '.json', 'w') as f:
|
||
json.dump(pr_tmp, f, indent=4)
|
||
pr_tmp = []
|
||
issues = collection.find(
|
||
{"created_at": {"$lt": "2024-03-01T00:00:00Z"}, "pull_request": {"$exists": False}},
|
||
{'user':0,'labels':0,'assignee':0,'assignees':0,'repository_url':0,'labels_url':0,'comments_url':0,'events_url':0,
|
||
'timeline_url':0,'html_url':0,'id':0,'node_id':0,'comments':0,'performed_via_github_app':0,'closed_by':0,'_id': 0})
|
||
for issue in issues:
|
||
if issue['number'] in issue_set[collection_name]:
|
||
continue
|
||
issue_set[collection_name].add(issue['number'])
|
||
issue_cnt += 1
|
||
issue["name"] = collection_name+"+"+str(issue['number'])
|
||
issue['reactions']['like'] = issue['reactions'].pop('+1')
|
||
issue['reactions']['dislike'] = issue['reactions'].pop('-1')
|
||
issue['reactions'].pop('url')
|
||
issue_tmp.append(issue)
|
||
if not issue_cnt % size:
|
||
with open('./json/entity/issue' + str(issue_cnt // size) + '.json', 'w') as f:
|
||
json.dump(issue_tmp, f, indent=4)
|
||
issue_tmp = []
|
||
if issue_tmp:
|
||
with open('./json/entity/issue' + str(issue_cnt // size+1) + '.json', 'w') as f:
|
||
json.dump(issue_tmp, f, indent=4)
|
||
if pr_tmp:
|
||
with open('./json/entity/pr' + str(pr_cnt // size+1) + '.json', 'w') as f:
|
||
json.dump(pr_tmp, f, indent=4)
|
||
|
||
print(issue_cnt,pr_cnt)
|
||
print('get issue and pr json finished')
|
||
return None
|
||
|
||
def get_repo_json(issue_set,pr_set):
|
||
|
||
repo_set = {i for i in issue_set.keys() if issue_set[i]}.union({i for i in pr_set.keys() if pr_set[i]})
|
||
tmp = []
|
||
for i in repo_set:
|
||
tmp.append({"name":i})
|
||
with open('./json/entity/repo.json', 'w') as f:
|
||
json.dump(tmp, f, indent=4)
|
||
return None
|
||
|
||
|
||
if __name__ == "__main__":
|
||
|
||
# issue_set,pr_set,user_set = get_issues_and_users()
|
||
# user_set = get_all_users(issue_set,pr_set,user_set,'mentioned','actor')
|
||
# user_set = get_all_users(issue_set,pr_set,user_set,'labeled','actor')
|
||
# user_set = get_all_users(issue_set,pr_set,user_set,'assigned','actor')
|
||
# user_set = get_all_users(issue_set,pr_set,user_set,'assigned','assignee')
|
||
# user_set = get_all_users(issue_set,pr_set,user_set,'commented','user')
|
||
# user_set = get_all_users(issue_set,pr_set,user_set,'closed','actor')
|
||
#
|
||
# with open('user_id.txt', 'w', encoding='utf-8', newline='') as f:
|
||
# f.write(str(user_set))
|
||
|
||
# repo_num = len({i for i in issue_set.keys() if issue_set[i]}.union({i for i in pr_set.keys() if pr_set[i]}))
|
||
# issue_num = sum(len(value) for value in issue_set.values())
|
||
# pr_num = sum(len(value) for value in pr_set.values())
|
||
# user_num = len(user_set)
|
||
# print(repo_num,issue_num,pr_num,user_num)
|
||
|
||
# get_user_json(issue_set,pr_set,200000)
|
||
get_issue_pr_json(25000)
|
||
# get_repo_json(issue_set,pr_set) |