193 lines
8.4 KiB
Python
193 lines
8.4 KiB
Python
|
||
|
||
from pymongo import MongoClient
|
||
from datetime import datetime
|
||
import matplotlib.pyplot as plt
|
||
import re
|
||
import matplotlib
|
||
import numpy as np
|
||
|
||
|
||
#计算时间差值,输出为秒数
|
||
def get_time_difference(t_old,t_new):
|
||
|
||
time_diff_seconds = (t_new-t_old).total_seconds()
|
||
return time_diff_seconds
|
||
|
||
#计算列表元素时间差
|
||
def get_time_difference_list(time_list):
|
||
interval = list()
|
||
for i in range(1,len(time_list)):
|
||
interval.append(get_time_difference(time_list[i-1],time_list[i]))
|
||
return interval
|
||
|
||
#连接数据库
|
||
client = MongoClient('localhost', 27017)
|
||
db = client['numpy_db']
|
||
collection1 = db['issue_pr']
|
||
collection2 = db['issue_timeline_new']
|
||
#已关闭的issue信息
|
||
issue_data = collection1.find({"state":"closed", "pull_request": {"$exists": False}},
|
||
{ "number": 1, "created_at": 1, "closed_at":1,"body":1,"title":1, "_id": 0 }).sort([("number", 1)]).batch_size(10)
|
||
i = 0
|
||
ave_time = [0]*6
|
||
# count = [0]*6
|
||
combinations = {
|
||
(a, b, c, d, e): 0 for a in [0,1] for b in [0,1] for c in [0,1] for d in [0,1] for e in [0,1]
|
||
}
|
||
num = {
|
||
(a, b, c, d, e): 0 for a in [0,1] for b in [0,1] for c in [0,1] for d in [0,1] for e in [0,1]
|
||
}
|
||
count = {
|
||
(a, b, c, d, e): 0 for a in [0,1] for b in [0,1] for c in [0,1] for d in [0,1] for e in [0,1]
|
||
}
|
||
|
||
for issue in issue_data:
|
||
|
||
print(issue['number'],end=' ')
|
||
#获取五种事件类型的时间戳:@、assigned、labeled、link(cross-reference)
|
||
commented_data = collection2.find(
|
||
{"event": "commented", "issue_number": issue['number']},
|
||
{"created_at": 1, "body": 1, "_id": 0}).sort([("created_at", 1)])
|
||
pattern = r'github.com/[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_.]+/issues/[0-9]+|github.com/[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_.]+/pull/[0-9]+'
|
||
link_time = []
|
||
for event in commented_data:
|
||
for i in range(len(re.findall(pattern, event['body']))):
|
||
link_time.append(datetime.strptime(event["created_at"], '%Y-%m-%dT%H:%M:%SZ'))
|
||
# if issue['body']:
|
||
# for j in range(len(re.findall(pattern, issue['body']))):
|
||
# link_time.append(datetime.strptime(issue["created_at"], '%Y-%m-%dT%H:%M:%SZ'))
|
||
# if issue['title']:
|
||
# for j in range(len(re.findall(pattern, issue['title']))):
|
||
# link_time.append(datetime.strptime(issue["created_at"], '%Y-%m-%dT%H:%M:%SZ'))
|
||
# link_time = [datetime.strptime(event["created_at"], '%Y-%m-%dT%H:%M:%SZ') for event in commented_data if re.findall(pattern,event['body'])]
|
||
link_time.sort()
|
||
# print(len(link_time))
|
||
|
||
|
||
labeled_data = collection2.find(
|
||
{ "event":'labeled',"issue_number": issue['number']},
|
||
{"created_at": 1, "_id": 0 }).sort([("created_at", 1)])
|
||
# labeled_time = [datetime.strptime(event["created_at"], '%Y-%m-%dT%H:%M:%SZ') for event in labeled_data if event['label']['name'] == 'duplicate']
|
||
labeled_time = [datetime.strptime(event["created_at"], '%Y-%m-%dT%H:%M:%SZ') for event in labeled_data]
|
||
labeled_time.sort()
|
||
|
||
mentioned_data = collection2.find(
|
||
{"event": "mentioned", "issue_number": issue['number']},
|
||
{"created_at": 1, "_id": 0}).sort([("created_at", 1)])
|
||
mentioned_time = [datetime.strptime(event["created_at"], '%Y-%m-%dT%H:%M:%SZ') for event in mentioned_data]
|
||
mentioned_time.sort()
|
||
|
||
assigned_data = collection2.find(
|
||
{"event": "assigned", "issue_number": issue['number']},
|
||
{"created_at": 1, "_id": 0}).sort([("created_at", 1)])
|
||
assigned_time = [datetime.strptime(event["created_at"], '%Y-%m-%dT%H:%M:%SZ') for event in assigned_data]
|
||
assigned_time.sort()
|
||
|
||
cross_referenced_data = collection2.find(
|
||
{"event": "cross-referenced", "issue_number": issue['number']},
|
||
{"created_at": 1, "_id": 0}).sort([("created_at", 1)])
|
||
cross_referenced_time = [datetime.strptime(event["created_at"], '%Y-%m-%dT%H:%M:%SZ') for event in
|
||
cross_referenced_data]
|
||
cross_referenced_time.sort()
|
||
|
||
# #五种时间类型分别计算时间差,综合事件类型时间差
|
||
labeled_time.insert(0, datetime.strptime(issue["created_at"], '%Y-%m-%dT%H:%M:%SZ')) # 添加issue创建、关闭的时间戳
|
||
labeled_time.append(datetime.strptime(issue["closed_at"], '%Y-%m-%dT%H:%M:%SZ'))
|
||
for time in labeled_time[0:-1]: # 删除发生在issue关闭后的事件
|
||
if get_time_difference(time, labeled_time[-1]) < 0:
|
||
labeled_time.remove(time)
|
||
labeled_interval_list = get_time_difference_list(labeled_time)
|
||
if all(x < 10 for x in labeled_interval_list): # 删除机器人触发的事件
|
||
continue
|
||
|
||
|
||
mentioned_time.insert(0, datetime.strptime(issue["created_at"], '%Y-%m-%dT%H:%M:%SZ')) # 添加issue创建、关闭的时间戳
|
||
mentioned_time.append(datetime.strptime(issue["closed_at"], '%Y-%m-%dT%H:%M:%SZ'))
|
||
for time in mentioned_time[0:-1]: # 删除发生在issue关闭后的事件
|
||
if get_time_difference(time, mentioned_time[-1]) < 0:
|
||
mentioned_time.remove(time)
|
||
mentioned_interval_list = get_time_difference_list(mentioned_time)
|
||
if all(x < 10 for x in mentioned_interval_list): # 删除机器人触发的事件
|
||
continue
|
||
|
||
assigned_time.insert(0, datetime.strptime(issue["created_at"], '%Y-%m-%dT%H:%M:%SZ')) # 添加issue创建、关闭的时间戳
|
||
assigned_time.append(datetime.strptime(issue["closed_at"], '%Y-%m-%dT%H:%M:%SZ'))
|
||
for time in assigned_time[0:-1]: # 删除发生在issue关闭后的事件
|
||
if get_time_difference(time, assigned_time[-1]) < 0:
|
||
assigned_time.remove(time)
|
||
assigned_interval_list = get_time_difference_list(assigned_time)
|
||
if all(x < 10 for x in assigned_interval_list): # 删除机器人触发的事件
|
||
continue
|
||
|
||
cross_referenced_time.insert(0, datetime.strptime(issue["created_at"], '%Y-%m-%dT%H:%M:%SZ')) # 添加issue创建、关闭的时间戳
|
||
cross_referenced_time.append(datetime.strptime(issue["closed_at"], '%Y-%m-%dT%H:%M:%SZ'))
|
||
for time in cross_referenced_time[0:-1]: # 删除发生在issue关闭后的事件
|
||
if get_time_difference(time, cross_referenced_time[-1]) < 0:
|
||
cross_referenced_time.remove(time)
|
||
cross_referenced_interval_list = get_time_difference_list(cross_referenced_time)
|
||
if all(x < 10 for x in cross_referenced_interval_list): # 删除机器人触发的事件
|
||
continue
|
||
|
||
link_time.insert(0, datetime.strptime(issue["created_at"], '%Y-%m-%dT%H:%M:%SZ')) # 添加issue创建、关闭的时间戳
|
||
link_time.append(datetime.strptime(issue["closed_at"], '%Y-%m-%dT%H:%M:%SZ'))
|
||
for time in link_time[0:-1]: # 删除发生在issue关闭后的事件
|
||
if get_time_difference(time, link_time[-1]) < 0:
|
||
link_time.remove(time)
|
||
link_interval_list = get_time_difference_list(link_time)
|
||
if all(x < 10 for x in link_interval_list): # 删除机器人触发的事件
|
||
continue
|
||
|
||
# try:
|
||
# # ave_time[len(link_time)+len(cross_referenced_time)+len(assigned_time)+len(mentioned_time)+len(labeled_time)-10] += labeled_interval_list[-1]/3600/24
|
||
# # count[len(link_time)+len(cross_referenced_time)+len(assigned_time)+len(mentioned_time)+len(labeled_time)-10] += 1
|
||
# cnt = 0
|
||
# for x in [link_time,cross_referenced_time,assigned_time,mentioned_time,labeled_time]:
|
||
# if not len(x) == 2:
|
||
# cnt += 1
|
||
# ave_time[cnt] += get_time_difference(link_time[0],link_time[-1])
|
||
# count[cnt] += 1
|
||
# except:
|
||
# pass
|
||
try:
|
||
key = (int(len(link_time)>2),int(len(cross_referenced_time)>2),int(len(assigned_time)>2),int(len(mentioned_time)>2),int(len(labeled_time)>2))
|
||
combinations[key] += get_time_difference(link_time[0],link_time[-1])
|
||
num[key] += (len(link_time)+len(cross_referenced_time)+len(assigned_time)+len(mentioned_time)+len(labeled_time))
|
||
count[key] += 1
|
||
except:
|
||
pass
|
||
#
|
||
# if i>3:
|
||
# break
|
||
# i += 1
|
||
|
||
|
||
# for i in range(6):
|
||
# if count[i]:
|
||
# ave_time[i] = ave_time[i]/count[i]/3600/24
|
||
# times = [i for i in range(6)]
|
||
# print(ave_time)
|
||
# print(times)
|
||
#
|
||
# plt.plot(times,ave_time)
|
||
# plt.show()
|
||
|
||
|
||
for key in combinations.keys():
|
||
print(key,end='\t')
|
||
if count[key]:
|
||
combinations[key] = round(combinations[key]/count[key]/3600/24,2)
|
||
num[key] = round(num[key]/count[key],2)
|
||
print(combinations[key],end='\t')
|
||
print(num[key], end='\t')
|
||
if not num[key]:
|
||
print(round(combinations[key]/num[key],2), end='\t')
|
||
print(count[key])
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|