From aa69e89b9372b2d596f90bf37e9ac6613bd47cd6 Mon Sep 17 00:00:00 2001 From: sy00000 <745766988@qq.com> Date: Thu, 7 Nov 2024 08:49:06 +0800 Subject: [PATCH] ADD file via upload --- calEntropyAPI.py | 171 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 calEntropyAPI.py diff --git a/calEntropyAPI.py b/calEntropyAPI.py new file mode 100644 index 0000000..75c1614 --- /dev/null +++ b/calEntropyAPI.py @@ -0,0 +1,171 @@ +import numpy as np +import time +from math import * +from collections import Counter +from calendar import Calendar +import datetime +import pickle +from flask import Flask, jsonify + +app = Flask(__name__) + +@app.route('/api/xiuos/entropy', methods=['GET']) +def get_dict1(): + return dictLst[0] + +@app.route('/api/openharmony/entropy', methods=['GET']) +def get_dict2(): + return dictLst[1] + +@app.route('/api/openeuler/entropy', methods=['GET']) +def get_dict3(): + return dictLst[2] + + +#from elasticsearch import Elasticsearch +#es = Elasticsearch(['http://106.75.10.84:9200']) + +##def get_eventLst(repo): +## ###获取repo的所有event +## frm = 1 +## sz = 100 +## ##查询从frm开始的sz个event +## eventLst = [] +## while 1: +## query={"query":{"bool":{"must":[{"match":{"search_fields.repo":repo}},]}},'from':frm,'size':sz} +## value = es.search(index="github_event_raw",body=query,_source=['data']) +## if value['hits']['hits'] == []: +## break +## else: +## frm += sz +## for i in value['hits']['hits']: +## item = {} +## item['issue_number'] = i['_source']['data']['issue']['number'] +## item['event'] = i['_source']['data']['eventType'] +## item['create_at'] = i['_source']['data']['createdAt'] +## eventLst.append(item) +## break +## return eventLst + +def get_eventLst(repo): + with open(repo+'.pkl','rb') as f: + eventLst = list(pickle.load(f)) + return eventLst + +def filterEvent(startTime,endTime): + filterEventLst = [] + for event in eventLst: + if event['createdAt'] > startTime and event['createdAt'] < endTime: + filterEventLst.append(event) + issue2eventCount = {} + for i in filterEventLst: + if issue2eventCount.get(i['issue_number']): + if issue2eventCount[i['issue_number']].get(i['eventType']): + issue2eventCount[i['issue_number']][i['eventType']] += 1 + else: + issue2eventCount[i['issue_number']][i['eventType']] = 1 + else: + issue2eventCount[i['issue_number']]={i['eventType']:1} + return issue2eventCount + +def day2timeStamp(string): + if len(string) == 10: + return int(time.mktime(time.strptime(string,"%Y-%m-%d"))) + elif len(string) == 20: + return int(time.mktime(time.strptime(string,"%Y-%m-%dT%H:%M:%SZ"))) + else: + return -1 + +#计算熵的公式 +def lstToEntropy(lst): + result = 0 + for i in lst: + pi = i/sum(lst) + result += -pi*log(pi,2) + return result*sum(lst) + +#针对整理好的计算熵的数据,求每个时间段的熵,输出x、y,即时间和熵 +#input time2issuesEvent:{time:{issue_number:{event:count}}} +#output {x:y} +def dataToEntropy(time2issuesEvent): + x = time2issuesEvent.keys() + result = {} + for i in x: + issue2event2count = time2issuesEvent[i] + if issue2event2count: + tmpResult = 0 + for item in issue2event2count.values(): + tmpResult += lstToEntropy(list(item.values()) + [1]) + result[i] = tmpResult + #result[i] = tmpResult/len(issue2event2count.values()) + else: + result[i] = 0 + return result + +#汇总 +def timelstToEntropy(timelst): + time2issuesEvent = {} + for i in range(len(timelst) - 1): + time2issuesEvent[timelst[i]] = filterEvent(timelst[i],timelst[i+1]) + result = dataToEntropy(time2issuesEvent) + return result + +#####################################下面是执行函数################################# + +#计算某一个项目的激发熵随时间变化 +#input repo:str +#output (x,y):(lst,lst) +#timelst=['2016-09-29', '2016-09-30']时,计算2016-09-29当天产生的熵 +#timelst=['2016-09-29', '2016-09-30', '2016-10-01']时,计算2016-09-29, 2016-09-30两天产生的熵 +def calEntropy(repo,timelst): + result = timelstToEntropy(timelst) + x = list(result.keys()) + y = list(result.values()) + return (x,y) + +def calEntropy(repo,year): + pass +##获取计算熵的时间轴 +def get_date(): + c = Calendar() + dateLst = [] + for year in range(2015,2025): + for month in range(1,13): + dateLst += [str(date) for date in c.itermonthdates(year,month)] + dateLst = list(set(dateLst)) + dateLst.sort() + return dateLst + +if __name__ == '__main__': + repos = ['xiuos', 'openharmony', 'openeuler'] + + + global dictLst + dictLst = [] + global eventLst + for repo in repos: + eventLst = get_eventLst(repo) + today = str(datetime.date.today()) + min_date = min([i['createdAt'] for i in list(eventLst)]) + dateLst = get_date() + + dateLst = [ + i for i in dateLst + if min_date < i < today + ] + timelst = [] + for i in range(len(dateLst)): + if i % 7 == 5: + timelst.append(dateLst[i]) + + result = timelstToEntropy(timelst) + formatted_result = {key: round(value, 2) for key, value in result.items()} + print(repo) + print(formatted_result) + dictLst.append(formatted_result) + app.run(host='0.0.0.0', port=5000) + + + + +