finish modifying predictor and comments

2021-08-21 22:22:16 +08:00 · 2021-08-21 22:22:16 +08:00 · 7c9a850872
parent 1c0e96825d
commit 7c9a850872
2 changed files with 51 additions and 17 deletions
--- a/app/prediction_service/predictor.py
+++ b/app/prediction_service/predictor.py
@ -1,9 +1,14 @@
 # predict using the trained model

+from inspect import trace
+from math import ceil
 import sys, pathlib, traceback
-from typing import List, Dict
+from typing import List, Dict, Optional
 sys.path.append(str(pathlib.Path(__file__).resolve().parents[2]))

+import pandas as pd
+import numpy as np
+
 from app.models.pull_request import PullRequest
 from app.models.installation import Installation
 from app.prediction_service.factor_getter import FactorGetter
@ -11,7 +16,7 @@ from app.utils.config_loader import ConfigLoader
 from app.prediction_service.trainer import Trainer

 '''
-需要修改为合适的factors list，并且要修改预测函数及调用，同时要修改预测后次方转换（2^Y）才是最终分钟数
+并且要修改预测函数及调用，同时要修改预测后次方转换（2^Y）才是最终分钟数
 还需要修改预测后的返回结果（返回信息，非boolean，如何体现数值大小）
 '''

@ -40,32 +45,57 @@ class Predictor():
        except Exception as e:
            print("error with func _factor_cut_suffix: %s" % (repr(e)))
            print(traceback.format_exc())
+
+    def _convert_factor_dict_2_df(self, factorDict: Dict):
+        try:
+            for key, value in factorDict.items():
+                factorDict[key] = [value]
+            df = pd.DataFrame.from_dict(factorDict)
+            return df
+        except Exception as e:
+            print("error with func _convert_factor_dict_2_df: %s" % (repr(e)))
+            print(traceback.format_exc())
+
+    def _log_transfer(self, factors: List, df: pd.DataFrame) -> pd.DataFrame:
+        try:
+            for factor in factors:
+                if factor in df.columns:
+                    df[factor] = np.log2(df[factor] + 0.5)
+            return df
+        except Exception as e:
+            print("error with func _log_transfer: %s" % (repr(e)))
+            print(traceback.format_exc())
    
-    def predict(self, pr: PullRequest, installation: Installation) -> bool:
+    def predict(self, pr: PullRequest, installation: Installation) -> Optional[int]:
        '''
            predict whether the pull request can be merged
            params:
                pr: with owner login, repo name and number
                installation: which installation is for
            return:
-                can merge or not: bool
+                int: >0 the minutes needed for the pr to finish
+                int: -1 wrong prediction
+                None: error
        '''
        try:
            # get the factors for this pr
            factorDict = self._get_factors(pr, installation)
+            factorDF = self._convert_factor_dict_2_df(factorDict)
            factorList = ConfigLoader().load_prediction_service_config()["trainer"]["factor_list"][self.type]
-            X_test = [factorDict[self._factor_cut_suffix(f, ["_open", "_close"])] for f in factorList]
+            factorList = [self._factor_cut_suffix(f, ["_open", "_close"]) for f in factorList]
+            factorDF = self._log_transfer(factorList, factorDF)
+            X_test = [factorDF[f].iloc[0] for f in factorList]
            if self.type == "submission":
                predictions = self.modelSubmission.predict([X_test])
            elif self.type == "process":
                predictions = self.modelProcess.predict([X_test])
-
-            if predictions[0] == 1:
-                return True
-            elif predictions[0] == 0:
-                return False
+            prediction = 2**predictions[0][0] - 0.5
+            if prediction <= factorDict["lifetime_minutes"]:
+                print("the prediction is not correct.")
+                return -1
            else:
-                raise Exception("error with the prediction result of func predict.")
+                return ceil(prediction - factorDict["lifetime_minutes"])
        except Exception as e:
            print("error with func predict: %s" % (repr(e)))
-            print(traceback.format_exc())
+            print(traceback.format_exc())
+            return None
--- a/app/services/comments.py
+++ b/app/services/comments.py
@ -8,8 +8,8 @@ from app.db.operators.pull_request_operator import PullRequestOperator
 from app.utils.global_variables import GlobalVariable
 from app.prediction_service.predictor import Predictor

-LATENCY_ACCEPT = "✔️This pull request can be merged"
-LATENCY_REJECT = "✖️This pull request cannot be merged"
+LATENCY_TEMPLATE = "⏰This pull request needs %s minutes to finish."
+LATENCY_WRONG = "⏰This pull request can be finished soon."

 def return_pr_latency(prTrigger: PRTrigger) -> bool:
    try:
@ -22,7 +22,9 @@ def return_pr_latency(prTrigger: PRTrigger) -> bool:

        # predict the result:
        latency = Predictor(trainer=GlobalVariable.trainer, type="submission").predict(pr=prTrigger.pr, installation=prTrigger.installation)
-        latency_comment = LATENCY_ACCEPT if latency else LATENCY_REJECT
+        if latency is None:
+            raise Exception("error with the prediction, None return")
+        latency_comment = LATENCY_TEMPLATE % (str(latency)) if latency is not None else LATENCY_WRONG

        token = getToken(prTrigger.installation)
        comment = PRComment(pr=prTrigger.pr, body=latency_comment)
@ -50,10 +52,12 @@ async def return_pr_latency_schedular(prSchedulerTrigger:PRSchedulerTrigger) ->
    try:
        # predict the result:
        latency = Predictor(trainer=GlobalVariable.trainer, type="process").predict(pr=prSchedulerTrigger.pr, installation=prSchedulerTrigger.installation)
-        latency_comment = LATENCY_ACCEPT if latency else LATENCY_REJECT
+        if latency is None:
+            raise Exception("error with the prediction, None return")
+        latency_comment = LATENCY_TEMPLATE % (str(latency)) if latency is not None else LATENCY_WRONG

        token = getToken(prSchedulerTrigger.installation)
-        comment = PRComment(pr=prSchedulerTrigger.pr, body=latency)
+        comment = PRComment(pr=prSchedulerTrigger.pr, body=latency_comment)
        headers = {'Authorization': 'token ' + token, 'Accept': 'application/vnd.github.v3+json'}
        url = "https://api.github.com/repos/{owner}/{repo}/issues/{pull_request_number}/comments".format(owner=comment.pr.owner.login, repo=comment.pr.repo.name, pull_request_number=comment.pr.number)
        data = {"body": comment.body}