ADD file via upload

This commit is contained in:
sy00000 2024-11-07 08:49:06 +08:00
parent 505bcc9221
commit aa69e89b93
1 changed files with 171 additions and 0 deletions

171
calEntropyAPI.py Normal file
View File

@ -0,0 +1,171 @@
import numpy as np
import time
from math import *
from collections import Counter
from calendar import Calendar
import datetime
import pickle
from flask import Flask, jsonify
app = Flask(__name__)
@app.route('/api/xiuos/entropy', methods=['GET'])
def get_dict1():
return dictLst[0]
@app.route('/api/openharmony/entropy', methods=['GET'])
def get_dict2():
return dictLst[1]
@app.route('/api/openeuler/entropy', methods=['GET'])
def get_dict3():
return dictLst[2]
#from elasticsearch import Elasticsearch
#es = Elasticsearch(['http://106.75.10.84:9200'])
##def get_eventLst(repo):
## ###获取repo的所有event
## frm = 1
## sz = 100
## ##查询从frm开始的sz个event
## eventLst = []
## while 1:
## query={"query":{"bool":{"must":[{"match":{"search_fields.repo":repo}},]}},'from':frm,'size':sz}
## value = es.search(index="github_event_raw",body=query,_source=['data'])
## if value['hits']['hits'] == []:
## break
## else:
## frm += sz
## for i in value['hits']['hits']:
## item = {}
## item['issue_number'] = i['_source']['data']['issue']['number']
## item['event'] = i['_source']['data']['eventType']
## item['create_at'] = i['_source']['data']['createdAt']
## eventLst.append(item)
## break
## return eventLst
def get_eventLst(repo):
with open(repo+'.pkl','rb') as f:
eventLst = list(pickle.load(f))
return eventLst
def filterEvent(startTime,endTime):
filterEventLst = []
for event in eventLst:
if event['createdAt'] > startTime and event['createdAt'] < endTime:
filterEventLst.append(event)
issue2eventCount = {}
for i in filterEventLst:
if issue2eventCount.get(i['issue_number']):
if issue2eventCount[i['issue_number']].get(i['eventType']):
issue2eventCount[i['issue_number']][i['eventType']] += 1
else:
issue2eventCount[i['issue_number']][i['eventType']] = 1
else:
issue2eventCount[i['issue_number']]={i['eventType']:1}
return issue2eventCount
def day2timeStamp(string):
if len(string) == 10:
return int(time.mktime(time.strptime(string,"%Y-%m-%d")))
elif len(string) == 20:
return int(time.mktime(time.strptime(string,"%Y-%m-%dT%H:%M:%SZ")))
else:
return -1
#计算熵的公式
def lstToEntropy(lst):
result = 0
for i in lst:
pi = i/sum(lst)
result += -pi*log(pi,2)
return result*sum(lst)
#针对整理好的计算熵的数据求每个时间段的熵输出x、y即时间和熵
#input time2issuesEvent{time:{issue_number:{eventcount}}}
#output {x:y}
def dataToEntropy(time2issuesEvent):
x = time2issuesEvent.keys()
result = {}
for i in x:
issue2event2count = time2issuesEvent[i]
if issue2event2count:
tmpResult = 0
for item in issue2event2count.values():
tmpResult += lstToEntropy(list(item.values()) + [1])
result[i] = tmpResult
#result[i] = tmpResult/len(issue2event2count.values())
else:
result[i] = 0
return result
#汇总
def timelstToEntropy(timelst):
time2issuesEvent = {}
for i in range(len(timelst) - 1):
time2issuesEvent[timelst[i]] = filterEvent(timelst[i],timelst[i+1])
result = dataToEntropy(time2issuesEvent)
return result
#####################################下面是执行函数#################################
#计算某一个项目的激发熵随时间变化
#input repo:str
#output (x,y):(lst,lst)
#timelst=['2016-09-29', '2016-09-30']时计算2016-09-29当天产生的熵
#timelst=['2016-09-29', '2016-09-30', '2016-10-01']时计算2016-09-29, 2016-09-30两天产生的熵
def calEntropy(repo,timelst):
result = timelstToEntropy(timelst)
x = list(result.keys())
y = list(result.values())
return (x,y)
def calEntropy(repo,year):
pass
##获取计算熵的时间轴
def get_date():
c = Calendar()
dateLst = []
for year in range(2015,2025):
for month in range(1,13):
dateLst += [str(date) for date in c.itermonthdates(year,month)]
dateLst = list(set(dateLst))
dateLst.sort()
return dateLst
if __name__ == '__main__':
repos = ['xiuos', 'openharmony', 'openeuler']
global dictLst
dictLst = []
global eventLst
for repo in repos:
eventLst = get_eventLst(repo)
today = str(datetime.date.today())
min_date = min([i['createdAt'] for i in list(eventLst)])
dateLst = get_date()
dateLst = [
i for i in dateLst
if min_date < i < today
]
timelst = []
for i in range(len(dateLst)):
if i % 7 == 5:
timelst.append(dateLst[i])
result = timelstToEntropy(timelst)
formatted_result = {key: round(value, 2) for key, value in result.items()}
print(repo)
print(formatted_result)
dictLst.append(formatted_result)
app.run(host='0.0.0.0', port=5000)