fix: load env

2025-03-23 10:06:50 +08:00 · 2025-03-23 10:06:50 +08:00 · 2ad5c4f6db
parent 148d70accf
commit 2ad5c4f6db
5 changed files with 41 additions and 8 deletions
--- a/internal/version/VERSION
+++ b/internal/version/VERSION
@ -1 +1 @@
-v5.0.0-beta-2503221523
+v5.0.0-beta-2503231006
--- a/uixt/ai/env.go
+++ b/uixt/ai/env.go
@ -58,6 +58,7 @@ func loadEnv() {
 						Str("path", envFile).Msg("overload env file failed")
 				}
 				log.Info().Str("path", envFile).Msg("overload env success")
+				return
 			}

 			// reached root directory
@ -154,6 +155,12 @@ func (c *CustomTransport) RoundTrip(req *http.Request) (*http.Response, error) {
 	return c.Transport.RoundTrip(req)
 }

+type OutputFormat struct {
+	Thought string `json:"thought"`
+	Action  string `json:"action"`
+	Error   string `json:"error,omitempty"`
+}
+
 // GetModelConfig get OpenAI config
 func GetModelConfig() (*openai.ChatModelConfig, error) {
 	loadEnv()
@ -169,6 +176,11 @@ func GetModelConfig() (*openai.ChatModelConfig, error) {
 		}
 	}

+	// outputFormatSchema, err := openapi3gen.NewSchemaRefForValue(&OutputFormat{}, nil)
+	// if err != nil {
+	// 	log.Fatal().Err(err).Msg("NewSchemaRefForValue failed")
+	// }
+
 	config := &openai.ChatModelConfig{
 		HTTPClient: &http.Client{
 			Timeout: defaultTimeout,
@ -177,6 +189,17 @@ func GetModelConfig() (*openai.ChatModelConfig, error) {
 				Headers:   envConfig.Headers,
 			},
 		},
+		// TODO: set structured response format
+		// https://github.com/cloudwego/eino-ext/blob/main/components/model/openai/examples/structured/structured.go
+		// ResponseFormat: &openai2.ChatCompletionResponseFormat{
+		// 	Type: openai2.ChatCompletionResponseFormatTypeJSONSchema,
+		// 	JSONSchema: &openai2.ChatCompletionResponseFormatJSONSchema{
+		// 		Name:        "thought_and_action",
+		// 		Description: "data that describes planning thought and action",
+		// 		Schema:      outputFormatSchema.Value,
+		// 		Strict:      false,
+		// 	},
+		// },
 	}

 	if baseURL := GetEnvConfig(EnvOpenAIBaseURL); baseURL != "" {
--- a/uixt/ai/parser.go
+++ b/uixt/ai/parser.go
@ -23,7 +23,7 @@ type ActionParser struct {

 // Parse parses the prediction text and extracts actions
 func (p *ActionParser) Parse(predictionText string) ([]ParsedAction, error) {
-	// try parsing JSON format, from VLM like GPT-4o
+	// try parsing JSON format, from VLM like openai/gpt-4o
 	var jsonActions []ParsedAction
 	jsonActions, jsonErr := p.parseJSON(predictionText)
 	if jsonErr == nil {
--- a/uixt/ai/planner.go
+++ b/uixt/ai/planner.go
@ -42,6 +42,7 @@ func NewPlanner(ctx context.Context) (*Planner, error) {
 	parser := NewActionParser(1000)
 	return &Planner{
 		ctx:    ctx,
+		config: config,
 		model:  model,
 		parser: parser,
 	}, nil
@ -50,6 +51,7 @@ func NewPlanner(ctx context.Context) (*Planner, error) {
 type Planner struct {
 	ctx     context.Context
 	model   model.ChatModel
+	config  *openai.ChatModelConfig
 	parser  *ActionParser
 	history []*schema.Message // conversation history
 }
@ -79,7 +81,8 @@ func (p *Planner) Call(opts *PlanningOptions) (*PlanningResult, error) {
 	logRequest(p.history)
 	startTime := time.Now()
 	resp, err := p.model.Generate(p.ctx, p.history)
-	log.Info().Float64("elapsed(s)", time.Since(startTime).Seconds()).Msg("call model service")
+	log.Info().Float64("elapsed(s)", time.Since(startTime).Seconds()).
+		Str("model", p.config.Model).Msg("call model service")
 	if err != nil {
 		return nil, fmt.Errorf("request model service failed: %w", err)
 	}
@ -153,8 +156,15 @@ func logRequest(messages []*schema.Message) {
 }

 func logResponse(resp *schema.Message) {
-	log.Info().Str("role", string(resp.Role)).
-		Str("content", resp.Content).Msg("log response message")
+	logger := log.Info().Str("role", string(resp.Role)).
+		Str("content", resp.Content)
+	if resp.ResponseMeta != nil {
+		logger = logger.Interface("response_meta", resp.ResponseMeta)
+	}
+	if resp.Extra != nil {
+		logger = logger.Interface("extra", resp.Extra)
+	}
+	logger.Msg("log response message")
 }

 // appendConversationHistory adds a message to the conversation history
--- a/uixt/ai/prompt-ui-tars.go
+++ b/uixt/ai/prompt-ui-tars.go
@ -8,10 +8,10 @@ Thought: ...
 Action: ...

 ## Action Space
-click(start_box='<|box_start|>(x1,y1)<|box_end|>')
-long_press(start_box='<|box_start|>(x1,y1)<|box_end|>', time='')
+click(start_box='[x1,y1]')
+long_press(start_box='[x1,y1]', time='')
 type(content='')
-drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>')
+drag(start_box='[x1,y1]', end_box='[x2,y2]')
 press_home()
 press_back()
 finished(content='') # Submit the task regardless of whether it succeeds or fails.