172 lines
5.2 KiB
Python
172 lines
5.2 KiB
Python
import numpy as np
|
||
import time
|
||
from math import *
|
||
from collections import Counter
|
||
from calendar import Calendar
|
||
import datetime
|
||
import pickle
|
||
from flask import Flask, jsonify
|
||
|
||
app = Flask(__name__)
|
||
|
||
@app.route('/api/xiuos/entropy', methods=['GET'])
|
||
def get_dict1():
|
||
return dictLst[0]
|
||
|
||
@app.route('/api/openharmony/entropy', methods=['GET'])
|
||
def get_dict2():
|
||
return dictLst[1]
|
||
|
||
@app.route('/api/openeuler/entropy', methods=['GET'])
|
||
def get_dict3():
|
||
return dictLst[2]
|
||
|
||
|
||
#from elasticsearch import Elasticsearch
|
||
#es = Elasticsearch(['http://106.75.10.84:9200'])
|
||
|
||
##def get_eventLst(repo):
|
||
## ###获取repo的所有event
|
||
## frm = 1
|
||
## sz = 100
|
||
## ##查询从frm开始的sz个event
|
||
## eventLst = []
|
||
## while 1:
|
||
## query={"query":{"bool":{"must":[{"match":{"search_fields.repo":repo}},]}},'from':frm,'size':sz}
|
||
## value = es.search(index="github_event_raw",body=query,_source=['data'])
|
||
## if value['hits']['hits'] == []:
|
||
## break
|
||
## else:
|
||
## frm += sz
|
||
## for i in value['hits']['hits']:
|
||
## item = {}
|
||
## item['issue_number'] = i['_source']['data']['issue']['number']
|
||
## item['event'] = i['_source']['data']['eventType']
|
||
## item['create_at'] = i['_source']['data']['createdAt']
|
||
## eventLst.append(item)
|
||
## break
|
||
## return eventLst
|
||
|
||
def get_eventLst(repo):
|
||
with open(repo+'.pkl','rb') as f:
|
||
eventLst = list(pickle.load(f))
|
||
return eventLst
|
||
|
||
def filterEvent(startTime,endTime):
|
||
filterEventLst = []
|
||
for event in eventLst:
|
||
if event['createdAt'] > startTime and event['createdAt'] < endTime:
|
||
filterEventLst.append(event)
|
||
issue2eventCount = {}
|
||
for i in filterEventLst:
|
||
if issue2eventCount.get(i['issue_number']):
|
||
if issue2eventCount[i['issue_number']].get(i['eventType']):
|
||
issue2eventCount[i['issue_number']][i['eventType']] += 1
|
||
else:
|
||
issue2eventCount[i['issue_number']][i['eventType']] = 1
|
||
else:
|
||
issue2eventCount[i['issue_number']]={i['eventType']:1}
|
||
return issue2eventCount
|
||
|
||
def day2timeStamp(string):
|
||
if len(string) == 10:
|
||
return int(time.mktime(time.strptime(string,"%Y-%m-%d")))
|
||
elif len(string) == 20:
|
||
return int(time.mktime(time.strptime(string,"%Y-%m-%dT%H:%M:%SZ")))
|
||
else:
|
||
return -1
|
||
|
||
#计算熵的公式
|
||
def lstToEntropy(lst):
|
||
result = 0
|
||
for i in lst:
|
||
pi = i/sum(lst)
|
||
result += -pi*log(pi,2)
|
||
return result*sum(lst)
|
||
|
||
#针对整理好的计算熵的数据,求每个时间段的熵,输出x、y,即时间和熵
|
||
#input time2issuesEvent:{time:{issue_number:{event:count}}}
|
||
#output {x:y}
|
||
def dataToEntropy(time2issuesEvent):
|
||
x = time2issuesEvent.keys()
|
||
result = {}
|
||
for i in x:
|
||
issue2event2count = time2issuesEvent[i]
|
||
if issue2event2count:
|
||
tmpResult = 0
|
||
for item in issue2event2count.values():
|
||
tmpResult += lstToEntropy(list(item.values()) + [1])
|
||
result[i] = tmpResult
|
||
#result[i] = tmpResult/len(issue2event2count.values())
|
||
else:
|
||
result[i] = 0
|
||
return result
|
||
|
||
#汇总
|
||
def timelstToEntropy(timelst):
|
||
time2issuesEvent = {}
|
||
for i in range(len(timelst) - 1):
|
||
time2issuesEvent[timelst[i]] = filterEvent(timelst[i],timelst[i+1])
|
||
result = dataToEntropy(time2issuesEvent)
|
||
return result
|
||
|
||
#####################################下面是执行函数#################################
|
||
|
||
#计算某一个项目的激发熵随时间变化
|
||
#input repo:str
|
||
#output (x,y):(lst,lst)
|
||
#timelst=['2016-09-29', '2016-09-30']时,计算2016-09-29当天产生的熵
|
||
#timelst=['2016-09-29', '2016-09-30', '2016-10-01']时,计算2016-09-29, 2016-09-30两天产生的熵
|
||
def calEntropy(repo,timelst):
|
||
result = timelstToEntropy(timelst)
|
||
x = list(result.keys())
|
||
y = list(result.values())
|
||
return (x,y)
|
||
|
||
def calEntropy(repo,year):
|
||
pass
|
||
##获取计算熵的时间轴
|
||
def get_date():
|
||
c = Calendar()
|
||
dateLst = []
|
||
for year in range(2015,2025):
|
||
for month in range(1,13):
|
||
dateLst += [str(date) for date in c.itermonthdates(year,month)]
|
||
dateLst = list(set(dateLst))
|
||
dateLst.sort()
|
||
return dateLst
|
||
|
||
if __name__ == '__main__':
|
||
repos = ['xiuos', 'openharmony', 'openeuler']
|
||
|
||
|
||
global dictLst
|
||
dictLst = []
|
||
global eventLst
|
||
for repo in repos:
|
||
eventLst = get_eventLst(repo)
|
||
today = str(datetime.date.today())
|
||
min_date = min([i['createdAt'] for i in list(eventLst)])
|
||
dateLst = get_date()
|
||
|
||
dateLst = [
|
||
i for i in dateLst
|
||
if min_date < i < today
|
||
]
|
||
timelst = []
|
||
for i in range(len(dateLst)):
|
||
if i % 7 == 5:
|
||
timelst.append(dateLst[i])
|
||
|
||
result = timelstToEntropy(timelst)
|
||
formatted_result = {key: round(value, 2) for key, value in result.items()}
|
||
print(repo)
|
||
print(formatted_result)
|
||
dictLst.append(formatted_result)
|
||
app.run(host='0.0.0.0', port=5000)
|
||
|
||
|
||
|
||
|
||
|