finish modifying predictor and comments

This commit is contained in:
zhangxunhui 2021-08-21 22:22:16 +08:00
parent 1c0e96825d
commit 7c9a850872
2 changed files with 51 additions and 17 deletions

View File

@ -1,9 +1,14 @@
# predict using the trained model
from inspect import trace
from math import ceil
import sys, pathlib, traceback
from typing import List, Dict
from typing import List, Dict, Optional
sys.path.append(str(pathlib.Path(__file__).resolve().parents[2]))
import pandas as pd
import numpy as np
from app.models.pull_request import PullRequest
from app.models.installation import Installation
from app.prediction_service.factor_getter import FactorGetter
@ -11,7 +16,7 @@ from app.utils.config_loader import ConfigLoader
from app.prediction_service.trainer import Trainer
'''
需要修改为合适的factors list并且要修改预测函数及调用同时要修改预测后次方转换2^Y才是最终分钟数
并且要修改预测函数及调用同时要修改预测后次方转换2^Y才是最终分钟数
还需要修改预测后的返回结果返回信息非boolean如何体现数值大小
'''
@ -40,32 +45,57 @@ class Predictor():
except Exception as e:
print("error with func _factor_cut_suffix: %s" % (repr(e)))
print(traceback.format_exc())
def _convert_factor_dict_2_df(self, factorDict: Dict):
try:
for key, value in factorDict.items():
factorDict[key] = [value]
df = pd.DataFrame.from_dict(factorDict)
return df
except Exception as e:
print("error with func _convert_factor_dict_2_df: %s" % (repr(e)))
print(traceback.format_exc())
def _log_transfer(self, factors: List, df: pd.DataFrame) -> pd.DataFrame:
try:
for factor in factors:
if factor in df.columns:
df[factor] = np.log2(df[factor] + 0.5)
return df
except Exception as e:
print("error with func _log_transfer: %s" % (repr(e)))
print(traceback.format_exc())
def predict(self, pr: PullRequest, installation: Installation) -> bool:
def predict(self, pr: PullRequest, installation: Installation) -> Optional[int]:
'''
predict whether the pull request can be merged
params:
pr: with owner login, repo name and number
installation: which installation is for
return:
can merge or not: bool
int: >0 the minutes needed for the pr to finish
int: -1 wrong prediction
None: error
'''
try:
# get the factors for this pr
factorDict = self._get_factors(pr, installation)
factorDF = self._convert_factor_dict_2_df(factorDict)
factorList = ConfigLoader().load_prediction_service_config()["trainer"]["factor_list"][self.type]
X_test = [factorDict[self._factor_cut_suffix(f, ["_open", "_close"])] for f in factorList]
factorList = [self._factor_cut_suffix(f, ["_open", "_close"]) for f in factorList]
factorDF = self._log_transfer(factorList, factorDF)
X_test = [factorDF[f].iloc[0] for f in factorList]
if self.type == "submission":
predictions = self.modelSubmission.predict([X_test])
elif self.type == "process":
predictions = self.modelProcess.predict([X_test])
if predictions[0] == 1:
return True
elif predictions[0] == 0:
return False
prediction = 2**predictions[0][0] - 0.5
if prediction <= factorDict["lifetime_minutes"]:
print("the prediction is not correct.")
return -1
else:
raise Exception("error with the prediction result of func predict.")
return ceil(prediction - factorDict["lifetime_minutes"])
except Exception as e:
print("error with func predict: %s" % (repr(e)))
print(traceback.format_exc())
print(traceback.format_exc())
return None

View File

@ -8,8 +8,8 @@ from app.db.operators.pull_request_operator import PullRequestOperator
from app.utils.global_variables import GlobalVariable
from app.prediction_service.predictor import Predictor
LATENCY_ACCEPT = "This pull request can be merged"
LATENCY_REJECT = "This pull request cannot be merged"
LATENCY_TEMPLATE = "⏰This pull request needs %s minutes to finish."
LATENCY_WRONG = "⏰This pull request can be finished soon."
def return_pr_latency(prTrigger: PRTrigger) -> bool:
try:
@ -22,7 +22,9 @@ def return_pr_latency(prTrigger: PRTrigger) -> bool:
# predict the result:
latency = Predictor(trainer=GlobalVariable.trainer, type="submission").predict(pr=prTrigger.pr, installation=prTrigger.installation)
latency_comment = LATENCY_ACCEPT if latency else LATENCY_REJECT
if latency is None:
raise Exception("error with the prediction, None return")
latency_comment = LATENCY_TEMPLATE % (str(latency)) if latency is not None else LATENCY_WRONG
token = getToken(prTrigger.installation)
comment = PRComment(pr=prTrigger.pr, body=latency_comment)
@ -50,10 +52,12 @@ async def return_pr_latency_schedular(prSchedulerTrigger:PRSchedulerTrigger) ->
try:
# predict the result:
latency = Predictor(trainer=GlobalVariable.trainer, type="process").predict(pr=prSchedulerTrigger.pr, installation=prSchedulerTrigger.installation)
latency_comment = LATENCY_ACCEPT if latency else LATENCY_REJECT
if latency is None:
raise Exception("error with the prediction, None return")
latency_comment = LATENCY_TEMPLATE % (str(latency)) if latency is not None else LATENCY_WRONG
token = getToken(prSchedulerTrigger.installation)
comment = PRComment(pr=prSchedulerTrigger.pr, body=latency)
comment = PRComment(pr=prSchedulerTrigger.pr, body=latency_comment)
headers = {'Authorization': 'token ' + token, 'Accept': 'application/vnd.github.v3+json'}
url = "https://api.github.com/repos/{owner}/{repo}/issues/{pull_request_number}/comments".format(owner=comment.pr.owner.login, repo=comment.pr.repo.name, pull_request_number=comment.pr.number)
data = {"body": comment.body}