ADD file via upload
This commit is contained in:
parent
8507273634
commit
505bcc9221
|
@ -0,0 +1,197 @@
|
|||
import numpy as np
|
||||
import time
|
||||
import argparse
|
||||
import sys
|
||||
from math import *
|
||||
from collections import Counter
|
||||
from calendar import Calendar
|
||||
import datetime
|
||||
from elasticsearch import Elasticsearch
|
||||
es = Elasticsearch(['http://106.75.10.84:9200'])
|
||||
|
||||
def get_eventLst(repo):
|
||||
###获取repo的所有event
|
||||
frm = 1
|
||||
sz = 100
|
||||
##查询从frm开始的sz个event
|
||||
eventLst = []
|
||||
while 1:
|
||||
query={"query":{"bool":{"must":[{"match":{"search_fields.repo":repo}},]}},'from':frm,'size':sz}
|
||||
value = es.search(index="github_event_raw",body=query,_source=['data'])
|
||||
if value['hits']['hits'] == []:
|
||||
break
|
||||
else:
|
||||
frm += sz
|
||||
for i in value['hits']['hits']:
|
||||
item = {}
|
||||
item['issue_number'] = i['_source']['data']['issue']['number']
|
||||
item['event'] = i['_source']['data']['eventType']
|
||||
item['create_at'] = i['_source']['data']['createdAt']
|
||||
eventLst.append(item)
|
||||
break
|
||||
return eventLst
|
||||
|
||||
def filterEvent(startTime,endTime):
|
||||
filterEventLst = []
|
||||
for event in eventLst:
|
||||
if event['create_at'] > startTime and event['create_at'] < endTime:
|
||||
filterEventLst.append(event)
|
||||
issue2eventCount = {}
|
||||
for i in filterEventLst:
|
||||
if issue2eventCount.get(i['issue_number']):
|
||||
if issue2eventCount[i['issue_number']].get(i['event']):
|
||||
issue2eventCount[i['issue_number']][i['event']] += 1
|
||||
else:
|
||||
issue2eventCount[i['issue_number']][i['event']] = 1
|
||||
else:
|
||||
issue2eventCount[i['issue_number']]={i['event']:1}
|
||||
return issue2eventCount
|
||||
|
||||
def day2timeStamp(string):
|
||||
if len(string) == 10:
|
||||
return int(time.mktime(time.strptime(string,"%Y-%m-%d")))
|
||||
elif len(string) == 20:
|
||||
return int(time.mktime(time.strptime(string,"%Y-%m-%dT%H:%M:%SZ")))
|
||||
else:
|
||||
return -1
|
||||
|
||||
#计算熵的公式
|
||||
def lstToEntropy(lst):
|
||||
result = 0
|
||||
for i in lst:
|
||||
pi = i/sum(lst)
|
||||
result += -pi*log(pi,2)
|
||||
return result*sum(lst)
|
||||
|
||||
#针对整理好的计算熵的数据,求每个时间段的熵,输出x、y,即时间和熵
|
||||
#input time2issuesEvent:{time:{issue_number:{event:count}}}
|
||||
#output {x:y}
|
||||
def dataToEntropy(time2issuesEvent):
|
||||
x = time2issuesEvent.keys()
|
||||
result = {}
|
||||
for i in x:
|
||||
issue2event2count = time2issuesEvent[i]
|
||||
if issue2event2count:
|
||||
tmpResult = 0
|
||||
for item in issue2event2count.values():
|
||||
tmpResult += lstToEntropy(list(item.values()) + [1])
|
||||
result[i] = tmpResult
|
||||
#result[i] = tmpResult/len(issue2event2count.values())
|
||||
else:
|
||||
result[i] = 0
|
||||
return result
|
||||
|
||||
#汇总
|
||||
def timelstToEntropy(timelst):
|
||||
time2issuesEvent = {}
|
||||
for i in range(len(timelst) - 1):
|
||||
time2issuesEvent[timelst[i]] = filterEvent(timelst[i],timelst[i+1])
|
||||
result = dataToEntropy(time2issuesEvent)
|
||||
return result
|
||||
|
||||
#####################################下面是执行函数#################################
|
||||
|
||||
#计算某一个项目的激发熵随时间变化
|
||||
#input repo:str
|
||||
#output (x,y):(lst,lst)
|
||||
#timelst=['2016-09-29', '2016-09-30']时,计算2016-09-29当天产生的熵
|
||||
#timelst=['2016-09-29', '2016-09-30', '2016-10-01']时,计算2016-09-29, 2016-09-30两天产生的熵
|
||||
def calEntropy(repo,timelst):
|
||||
result = timelstToEntropy(timelst)
|
||||
x = list(result.keys())
|
||||
y = list(result.values())
|
||||
return (x,y)
|
||||
|
||||
def calEntropy(repo,year):
|
||||
pass
|
||||
##获取计算熵的时间轴
|
||||
def get_date():
|
||||
c = Calendar()
|
||||
dateLst = []
|
||||
for year in range(2015,2054):
|
||||
for month in range(1,13):
|
||||
dateLst += [str(date) for date in c.itermonthdates(year,month)]
|
||||
dateLst = list(set(dateLst))
|
||||
dateLst.sort()
|
||||
return dateLst
|
||||
|
||||
def get_year(year):
|
||||
c = Calendar()
|
||||
dateLst = []
|
||||
for month in range(1,13):
|
||||
dateLst += [str(date) for date in c.itermonthdates(year,month)]
|
||||
dateLst = list(set(dateLst))
|
||||
dateLst.sort()
|
||||
del_lst = []
|
||||
for i in dateLst:
|
||||
if i[:4] != str(year):
|
||||
del_lst.append(i)
|
||||
for i in del_lst:
|
||||
dateLst.remove(i)
|
||||
dateLst.append(str(year+1)+'-01-01')
|
||||
today_date = datetime.date.today()
|
||||
if(str(today_date) in dateLst):
|
||||
index = dateLst.index(str(today_date))
|
||||
dateLst = dateLst[:index+1]
|
||||
return dateLst
|
||||
|
||||
def calEntropyYear(repo,year):
|
||||
timelst = get_year(year)
|
||||
result = timelstToEntropy(timelst)
|
||||
time = list(result.keys())
|
||||
entropy = list(result.values())
|
||||
time2entropy = dict(zip(time,entropy))
|
||||
return time2entropy
|
||||
|
||||
def get_day(day):
|
||||
c = Calendar()
|
||||
##判断是否是今天前
|
||||
today_date = datetime.date.today()
|
||||
if str(day) > str(today_date):
|
||||
print('error:day > today_date')
|
||||
else:
|
||||
dateLst = []
|
||||
dateLst.append(str(day))
|
||||
dateLst.append(str(datetime.date(int(day[:4]),int(day[5:7]),int(day[8:10]))+datetime.timedelta(days=1)))
|
||||
return dateLst
|
||||
|
||||
def calEntropyDay(repo,day):
|
||||
timelst = get_day(day)
|
||||
result = timelstToEntropy(timelst)
|
||||
time = list(result.keys())
|
||||
entropy = list(result.values())
|
||||
time2entropy = dict(zip(time,entropy))
|
||||
return time2entropy
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
###输入
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-d","--day", default='2023-01-01', help="calculate the date of a day. example: 2023-01-01",type=str)
|
||||
parser.add_argument("-r","--repo", default='Paddle', help="repo name",type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
repo = args.repo
|
||||
day = args.day
|
||||
|
||||
###计算
|
||||
global eventLst
|
||||
eventLst = get_eventLst(repo)
|
||||
result = {}
|
||||
|
||||
time2entropy = calEntropyDay(repo,day)
|
||||
result['data'] = time2entropy
|
||||
result['repo'] = repo
|
||||
result['day'] = day
|
||||
print(result)
|
||||
|
||||
## timelst = get_date()
|
||||
## today_date = datetime.date.today()
|
||||
## index = timelst.index(str(today_date))
|
||||
## timelst = timelst[:index]
|
||||
##
|
||||
## #得到time列表['2016-09-29',...],熵值列表[0,...],
|
||||
## time,entropy = calEntropy(repo,timelst)
|
||||
## time2entropy = dict(zip(time,entropy))
|
||||
## np.save(repo+'_time2entropy.npy',time2entropy)
|
||||
|
Loading…
Reference in New Issue