calEntropy/calEntropyAPI.py

172 lines
5.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
import time
from math import *
from collections import Counter
from calendar import Calendar
import datetime
import pickle
from flask import Flask, jsonify
app = Flask(__name__)
@app.route('/api/xiuos/entropy', methods=['GET'])
def get_dict1():
return dictLst[0]
@app.route('/api/openharmony/entropy', methods=['GET'])
def get_dict2():
return dictLst[1]
@app.route('/api/openeuler/entropy', methods=['GET'])
def get_dict3():
return dictLst[2]
#from elasticsearch import Elasticsearch
#es = Elasticsearch(['http://106.75.10.84:9200'])
##def get_eventLst(repo):
## ###获取repo的所有event
## frm = 1
## sz = 100
## ##查询从frm开始的sz个event
## eventLst = []
## while 1:
## query={"query":{"bool":{"must":[{"match":{"search_fields.repo":repo}},]}},'from':frm,'size':sz}
## value = es.search(index="github_event_raw",body=query,_source=['data'])
## if value['hits']['hits'] == []:
## break
## else:
## frm += sz
## for i in value['hits']['hits']:
## item = {}
## item['issue_number'] = i['_source']['data']['issue']['number']
## item['event'] = i['_source']['data']['eventType']
## item['create_at'] = i['_source']['data']['createdAt']
## eventLst.append(item)
## break
## return eventLst
def get_eventLst(repo):
with open(repo+'.pkl','rb') as f:
eventLst = list(pickle.load(f))
return eventLst
def filterEvent(startTime,endTime):
filterEventLst = []
for event in eventLst:
if event['createdAt'] > startTime and event['createdAt'] < endTime:
filterEventLst.append(event)
issue2eventCount = {}
for i in filterEventLst:
if issue2eventCount.get(i['issue_number']):
if issue2eventCount[i['issue_number']].get(i['eventType']):
issue2eventCount[i['issue_number']][i['eventType']] += 1
else:
issue2eventCount[i['issue_number']][i['eventType']] = 1
else:
issue2eventCount[i['issue_number']]={i['eventType']:1}
return issue2eventCount
def day2timeStamp(string):
if len(string) == 10:
return int(time.mktime(time.strptime(string,"%Y-%m-%d")))
elif len(string) == 20:
return int(time.mktime(time.strptime(string,"%Y-%m-%dT%H:%M:%SZ")))
else:
return -1
#计算熵的公式
def lstToEntropy(lst):
result = 0
for i in lst:
pi = i/sum(lst)
result += -pi*log(pi,2)
return result*sum(lst)
#针对整理好的计算熵的数据求每个时间段的熵输出x、y即时间和熵
#input time2issuesEvent{time:{issue_number:{eventcount}}}
#output {x:y}
def dataToEntropy(time2issuesEvent):
x = time2issuesEvent.keys()
result = {}
for i in x:
issue2event2count = time2issuesEvent[i]
if issue2event2count:
tmpResult = 0
for item in issue2event2count.values():
tmpResult += lstToEntropy(list(item.values()) + [1])
result[i] = tmpResult
#result[i] = tmpResult/len(issue2event2count.values())
else:
result[i] = 0
return result
#汇总
def timelstToEntropy(timelst):
time2issuesEvent = {}
for i in range(len(timelst) - 1):
time2issuesEvent[timelst[i]] = filterEvent(timelst[i],timelst[i+1])
result = dataToEntropy(time2issuesEvent)
return result
#####################################下面是执行函数#################################
#计算某一个项目的激发熵随时间变化
#input repo:str
#output (x,y):(lst,lst)
#timelst=['2016-09-29', '2016-09-30']时计算2016-09-29当天产生的熵
#timelst=['2016-09-29', '2016-09-30', '2016-10-01']时计算2016-09-29, 2016-09-30两天产生的熵
def calEntropy(repo,timelst):
result = timelstToEntropy(timelst)
x = list(result.keys())
y = list(result.values())
return (x,y)
def calEntropy(repo,year):
pass
##获取计算熵的时间轴
def get_date():
c = Calendar()
dateLst = []
for year in range(2015,2025):
for month in range(1,13):
dateLst += [str(date) for date in c.itermonthdates(year,month)]
dateLst = list(set(dateLst))
dateLst.sort()
return dateLst
if __name__ == '__main__':
repos = ['xiuos', 'openharmony', 'openeuler']
global dictLst
dictLst = []
global eventLst
for repo in repos:
eventLst = get_eventLst(repo)
today = str(datetime.date.today())
min_date = min([i['createdAt'] for i in list(eventLst)])
dateLst = get_date()
dateLst = [
i for i in dateLst
if min_date < i < today
]
timelst = []
for i in range(len(dateLst)):
if i % 7 == 5:
timelst.append(dateLst[i])
result = timelstToEntropy(timelst)
formatted_result = {key: round(value, 2) for key, value in result.items()}
print(repo)
print(formatted_result)
dictLst.append(formatted_result)
app.run(host='0.0.0.0', port=5000)