ADD file via upload
This commit is contained in:
parent
505bcc9221
commit
aa69e89b93
|
@ -0,0 +1,171 @@
|
|||
import numpy as np
|
||||
import time
|
||||
from math import *
|
||||
from collections import Counter
|
||||
from calendar import Calendar
|
||||
import datetime
|
||||
import pickle
|
||||
from flask import Flask, jsonify
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/api/xiuos/entropy', methods=['GET'])
|
||||
def get_dict1():
|
||||
return dictLst[0]
|
||||
|
||||
@app.route('/api/openharmony/entropy', methods=['GET'])
|
||||
def get_dict2():
|
||||
return dictLst[1]
|
||||
|
||||
@app.route('/api/openeuler/entropy', methods=['GET'])
|
||||
def get_dict3():
|
||||
return dictLst[2]
|
||||
|
||||
|
||||
#from elasticsearch import Elasticsearch
|
||||
#es = Elasticsearch(['http://106.75.10.84:9200'])
|
||||
|
||||
##def get_eventLst(repo):
|
||||
## ###获取repo的所有event
|
||||
## frm = 1
|
||||
## sz = 100
|
||||
## ##查询从frm开始的sz个event
|
||||
## eventLst = []
|
||||
## while 1:
|
||||
## query={"query":{"bool":{"must":[{"match":{"search_fields.repo":repo}},]}},'from':frm,'size':sz}
|
||||
## value = es.search(index="github_event_raw",body=query,_source=['data'])
|
||||
## if value['hits']['hits'] == []:
|
||||
## break
|
||||
## else:
|
||||
## frm += sz
|
||||
## for i in value['hits']['hits']:
|
||||
## item = {}
|
||||
## item['issue_number'] = i['_source']['data']['issue']['number']
|
||||
## item['event'] = i['_source']['data']['eventType']
|
||||
## item['create_at'] = i['_source']['data']['createdAt']
|
||||
## eventLst.append(item)
|
||||
## break
|
||||
## return eventLst
|
||||
|
||||
def get_eventLst(repo):
|
||||
with open(repo+'.pkl','rb') as f:
|
||||
eventLst = list(pickle.load(f))
|
||||
return eventLst
|
||||
|
||||
def filterEvent(startTime,endTime):
|
||||
filterEventLst = []
|
||||
for event in eventLst:
|
||||
if event['createdAt'] > startTime and event['createdAt'] < endTime:
|
||||
filterEventLst.append(event)
|
||||
issue2eventCount = {}
|
||||
for i in filterEventLst:
|
||||
if issue2eventCount.get(i['issue_number']):
|
||||
if issue2eventCount[i['issue_number']].get(i['eventType']):
|
||||
issue2eventCount[i['issue_number']][i['eventType']] += 1
|
||||
else:
|
||||
issue2eventCount[i['issue_number']][i['eventType']] = 1
|
||||
else:
|
||||
issue2eventCount[i['issue_number']]={i['eventType']:1}
|
||||
return issue2eventCount
|
||||
|
||||
def day2timeStamp(string):
|
||||
if len(string) == 10:
|
||||
return int(time.mktime(time.strptime(string,"%Y-%m-%d")))
|
||||
elif len(string) == 20:
|
||||
return int(time.mktime(time.strptime(string,"%Y-%m-%dT%H:%M:%SZ")))
|
||||
else:
|
||||
return -1
|
||||
|
||||
#计算熵的公式
|
||||
def lstToEntropy(lst):
|
||||
result = 0
|
||||
for i in lst:
|
||||
pi = i/sum(lst)
|
||||
result += -pi*log(pi,2)
|
||||
return result*sum(lst)
|
||||
|
||||
#针对整理好的计算熵的数据,求每个时间段的熵,输出x、y,即时间和熵
|
||||
#input time2issuesEvent:{time:{issue_number:{event:count}}}
|
||||
#output {x:y}
|
||||
def dataToEntropy(time2issuesEvent):
|
||||
x = time2issuesEvent.keys()
|
||||
result = {}
|
||||
for i in x:
|
||||
issue2event2count = time2issuesEvent[i]
|
||||
if issue2event2count:
|
||||
tmpResult = 0
|
||||
for item in issue2event2count.values():
|
||||
tmpResult += lstToEntropy(list(item.values()) + [1])
|
||||
result[i] = tmpResult
|
||||
#result[i] = tmpResult/len(issue2event2count.values())
|
||||
else:
|
||||
result[i] = 0
|
||||
return result
|
||||
|
||||
#汇总
|
||||
def timelstToEntropy(timelst):
|
||||
time2issuesEvent = {}
|
||||
for i in range(len(timelst) - 1):
|
||||
time2issuesEvent[timelst[i]] = filterEvent(timelst[i],timelst[i+1])
|
||||
result = dataToEntropy(time2issuesEvent)
|
||||
return result
|
||||
|
||||
#####################################下面是执行函数#################################
|
||||
|
||||
#计算某一个项目的激发熵随时间变化
|
||||
#input repo:str
|
||||
#output (x,y):(lst,lst)
|
||||
#timelst=['2016-09-29', '2016-09-30']时,计算2016-09-29当天产生的熵
|
||||
#timelst=['2016-09-29', '2016-09-30', '2016-10-01']时,计算2016-09-29, 2016-09-30两天产生的熵
|
||||
def calEntropy(repo,timelst):
|
||||
result = timelstToEntropy(timelst)
|
||||
x = list(result.keys())
|
||||
y = list(result.values())
|
||||
return (x,y)
|
||||
|
||||
def calEntropy(repo,year):
|
||||
pass
|
||||
##获取计算熵的时间轴
|
||||
def get_date():
|
||||
c = Calendar()
|
||||
dateLst = []
|
||||
for year in range(2015,2025):
|
||||
for month in range(1,13):
|
||||
dateLst += [str(date) for date in c.itermonthdates(year,month)]
|
||||
dateLst = list(set(dateLst))
|
||||
dateLst.sort()
|
||||
return dateLst
|
||||
|
||||
if __name__ == '__main__':
|
||||
repos = ['xiuos', 'openharmony', 'openeuler']
|
||||
|
||||
|
||||
global dictLst
|
||||
dictLst = []
|
||||
global eventLst
|
||||
for repo in repos:
|
||||
eventLst = get_eventLst(repo)
|
||||
today = str(datetime.date.today())
|
||||
min_date = min([i['createdAt'] for i in list(eventLst)])
|
||||
dateLst = get_date()
|
||||
|
||||
dateLst = [
|
||||
i for i in dateLst
|
||||
if min_date < i < today
|
||||
]
|
||||
timelst = []
|
||||
for i in range(len(dateLst)):
|
||||
if i % 7 == 5:
|
||||
timelst.append(dateLst[i])
|
||||
|
||||
result = timelstToEntropy(timelst)
|
||||
formatted_result = {key: round(value, 2) for key, value in result.items()}
|
||||
print(repo)
|
||||
print(formatted_result)
|
||||
dictLst.append(formatted_result)
|
||||
app.run(host='0.0.0.0', port=5000)
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue