finish modifying predictor and comments

This commit is contained in:
zhangxunhui 2021-08-21 22:22:16 +08:00
parent 1c0e96825d
commit 7c9a850872
2 changed files with 51 additions and 17 deletions

View File

@ -1,9 +1,14 @@
# predict using the trained model # predict using the trained model
from inspect import trace
from math import ceil
import sys, pathlib, traceback import sys, pathlib, traceback
from typing import List, Dict from typing import List, Dict, Optional
sys.path.append(str(pathlib.Path(__file__).resolve().parents[2])) sys.path.append(str(pathlib.Path(__file__).resolve().parents[2]))
import pandas as pd
import numpy as np
from app.models.pull_request import PullRequest from app.models.pull_request import PullRequest
from app.models.installation import Installation from app.models.installation import Installation
from app.prediction_service.factor_getter import FactorGetter from app.prediction_service.factor_getter import FactorGetter
@ -11,7 +16,7 @@ from app.utils.config_loader import ConfigLoader
from app.prediction_service.trainer import Trainer from app.prediction_service.trainer import Trainer
''' '''
需要修改为合适的factors list并且要修改预测函数及调用同时要修改预测后次方转换2^Y才是最终分钟数 并且要修改预测函数及调用同时要修改预测后次方转换2^Y才是最终分钟数
还需要修改预测后的返回结果返回信息非boolean如何体现数值大小 还需要修改预测后的返回结果返回信息非boolean如何体现数值大小
''' '''
@ -40,32 +45,57 @@ class Predictor():
except Exception as e: except Exception as e:
print("error with func _factor_cut_suffix: %s" % (repr(e))) print("error with func _factor_cut_suffix: %s" % (repr(e)))
print(traceback.format_exc()) print(traceback.format_exc())
def _convert_factor_dict_2_df(self, factorDict: Dict):
try:
for key, value in factorDict.items():
factorDict[key] = [value]
df = pd.DataFrame.from_dict(factorDict)
return df
except Exception as e:
print("error with func _convert_factor_dict_2_df: %s" % (repr(e)))
print(traceback.format_exc())
def _log_transfer(self, factors: List, df: pd.DataFrame) -> pd.DataFrame:
try:
for factor in factors:
if factor in df.columns:
df[factor] = np.log2(df[factor] + 0.5)
return df
except Exception as e:
print("error with func _log_transfer: %s" % (repr(e)))
print(traceback.format_exc())
def predict(self, pr: PullRequest, installation: Installation) -> bool: def predict(self, pr: PullRequest, installation: Installation) -> Optional[int]:
''' '''
predict whether the pull request can be merged predict whether the pull request can be merged
params: params:
pr: with owner login, repo name and number pr: with owner login, repo name and number
installation: which installation is for installation: which installation is for
return: return:
can merge or not: bool int: >0 the minutes needed for the pr to finish
int: -1 wrong prediction
None: error
''' '''
try: try:
# get the factors for this pr # get the factors for this pr
factorDict = self._get_factors(pr, installation) factorDict = self._get_factors(pr, installation)
factorDF = self._convert_factor_dict_2_df(factorDict)
factorList = ConfigLoader().load_prediction_service_config()["trainer"]["factor_list"][self.type] factorList = ConfigLoader().load_prediction_service_config()["trainer"]["factor_list"][self.type]
X_test = [factorDict[self._factor_cut_suffix(f, ["_open", "_close"])] for f in factorList] factorList = [self._factor_cut_suffix(f, ["_open", "_close"]) for f in factorList]
factorDF = self._log_transfer(factorList, factorDF)
X_test = [factorDF[f].iloc[0] for f in factorList]
if self.type == "submission": if self.type == "submission":
predictions = self.modelSubmission.predict([X_test]) predictions = self.modelSubmission.predict([X_test])
elif self.type == "process": elif self.type == "process":
predictions = self.modelProcess.predict([X_test]) predictions = self.modelProcess.predict([X_test])
prediction = 2**predictions[0][0] - 0.5
if predictions[0] == 1: if prediction <= factorDict["lifetime_minutes"]:
return True print("the prediction is not correct.")
elif predictions[0] == 0: return -1
return False
else: else:
raise Exception("error with the prediction result of func predict.") return ceil(prediction - factorDict["lifetime_minutes"])
except Exception as e: except Exception as e:
print("error with func predict: %s" % (repr(e))) print("error with func predict: %s" % (repr(e)))
print(traceback.format_exc()) print(traceback.format_exc())
return None

View File

@ -8,8 +8,8 @@ from app.db.operators.pull_request_operator import PullRequestOperator
from app.utils.global_variables import GlobalVariable from app.utils.global_variables import GlobalVariable
from app.prediction_service.predictor import Predictor from app.prediction_service.predictor import Predictor
LATENCY_ACCEPT = "This pull request can be merged" LATENCY_TEMPLATE = "⏰This pull request needs %s minutes to finish."
LATENCY_REJECT = "This pull request cannot be merged" LATENCY_WRONG = "⏰This pull request can be finished soon."
def return_pr_latency(prTrigger: PRTrigger) -> bool: def return_pr_latency(prTrigger: PRTrigger) -> bool:
try: try:
@ -22,7 +22,9 @@ def return_pr_latency(prTrigger: PRTrigger) -> bool:
# predict the result: # predict the result:
latency = Predictor(trainer=GlobalVariable.trainer, type="submission").predict(pr=prTrigger.pr, installation=prTrigger.installation) latency = Predictor(trainer=GlobalVariable.trainer, type="submission").predict(pr=prTrigger.pr, installation=prTrigger.installation)
latency_comment = LATENCY_ACCEPT if latency else LATENCY_REJECT if latency is None:
raise Exception("error with the prediction, None return")
latency_comment = LATENCY_TEMPLATE % (str(latency)) if latency is not None else LATENCY_WRONG
token = getToken(prTrigger.installation) token = getToken(prTrigger.installation)
comment = PRComment(pr=prTrigger.pr, body=latency_comment) comment = PRComment(pr=prTrigger.pr, body=latency_comment)
@ -50,10 +52,12 @@ async def return_pr_latency_schedular(prSchedulerTrigger:PRSchedulerTrigger) ->
try: try:
# predict the result: # predict the result:
latency = Predictor(trainer=GlobalVariable.trainer, type="process").predict(pr=prSchedulerTrigger.pr, installation=prSchedulerTrigger.installation) latency = Predictor(trainer=GlobalVariable.trainer, type="process").predict(pr=prSchedulerTrigger.pr, installation=prSchedulerTrigger.installation)
latency_comment = LATENCY_ACCEPT if latency else LATENCY_REJECT if latency is None:
raise Exception("error with the prediction, None return")
latency_comment = LATENCY_TEMPLATE % (str(latency)) if latency is not None else LATENCY_WRONG
token = getToken(prSchedulerTrigger.installation) token = getToken(prSchedulerTrigger.installation)
comment = PRComment(pr=prSchedulerTrigger.pr, body=latency) comment = PRComment(pr=prSchedulerTrigger.pr, body=latency_comment)
headers = {'Authorization': 'token ' + token, 'Accept': 'application/vnd.github.v3+json'} headers = {'Authorization': 'token ' + token, 'Accept': 'application/vnd.github.v3+json'}
url = "https://api.github.com/repos/{owner}/{repo}/issues/{pull_request_number}/comments".format(owner=comment.pr.owner.login, repo=comment.pr.repo.name, pull_request_number=comment.pr.number) url = "https://api.github.com/repos/{owner}/{repo}/issues/{pull_request_number}/comments".format(owner=comment.pr.owner.login, repo=comment.pr.repo.name, pull_request_number=comment.pr.number)
data = {"body": comment.body} data = {"body": comment.body}