feat: save screenshots for PlanNextAction

This commit is contained in:
lilong.129 2025-03-22 01:07:28 +08:00
parent 8a3b6b5c4c
commit 12e0f7f9a2
6 changed files with 55 additions and 19 deletions

View File

@ -1 +1 @@
v5.0.0-beta-2503220006
v5.0.0-beta-2503220107

View File

@ -13,6 +13,7 @@ import (
"math"
"os"
"strings"
"time"
"github.com/cloudwego/eino-ext/components/model/openai"
"github.com/cloudwego/eino/components/model"
@ -76,8 +77,9 @@ func (p *Planner) Call(opts *PlanningOptions) (*PlanningResult, error) {
// call model service, generate response
logRequest(p.history)
log.Info().Msg("calling model service...")
startTime := time.Now()
resp, err := p.model.Generate(p.ctx, p.history)
log.Info().Float64("elapsed(s)", time.Since(startTime).Seconds()).Msg("call model service")
if err != nil {
return nil, fmt.Errorf("request model service failed: %w", err)
}

BIN
uixt/ai/testdata/chat_list.jpeg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 295 KiB

View File

@ -1,8 +1,14 @@
package uixt
import (
"encoding/base64"
"fmt"
"path/filepath"
"github.com/cloudwego/eino/schema"
"github.com/httprunner/httprunner/v5/code"
"github.com/httprunner/httprunner/v5/internal/builtin"
"github.com/httprunner/httprunner/v5/internal/config"
"github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/pkg/errors"
@ -46,11 +52,27 @@ func (dExt *XTDriver) PlanNextAction(text string, opts ...option.ActionOption) (
return nil, errors.New("LLM service is not initialized")
}
screenShotBase64, err := dExt.GetScreenShotBase64()
compressedBufSource, err := dExt.GetScreenShotBuffer()
if err != nil {
return nil, err
}
// convert buffer to base64 string
screenShotBase64 := "data:image/jpeg;base64," +
base64.StdEncoding.EncodeToString(compressedBufSource.Bytes())
// save screenshot to file
imagePath := filepath.Join(
config.GetConfig().ScreenShotsPath,
fmt.Sprintf("%s.jpeg", builtin.GenNameWithTimestamp("%d_screenshot")),
)
go func() {
err := saveScreenShot(compressedBufSource, imagePath)
if err != nil {
log.Error().Err(err).Msg("save screenshot file failed")
}
}()
size, err := dExt.IDriver.WindowSize()
if err != nil {
return nil, errors.Wrap(code.DeviceGetInfoError, err.Error())

View File

@ -2,7 +2,6 @@ package uixt
import (
"bytes"
"encoding/base64"
"fmt"
"image"
"image/gif"
@ -63,19 +62,6 @@ func (dExt *XTDriver) GetScreenShotBuffer() (compressedBufSource *bytes.Buffer,
return compressBufSource, nil
}
func (dExt *XTDriver) GetScreenShotBase64() (base64Str string, err error) {
compressedBufSource, err := dExt.GetScreenShotBuffer()
if err != nil {
return "", err
}
// convert buffer to base64 string
base64Str = "data:image/jpeg;base64," +
base64.StdEncoding.EncodeToString(compressedBufSource.Bytes())
return base64Str, nil
}
// GetScreenResult takes a screenshot, returns the image recognition result
func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
// get compressed screenshot buffer
@ -104,8 +90,6 @@ func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult
err := saveScreenShot(compressBufSource, imagePath)
if err != nil {
log.Error().Err(err).Msg("save screenshot file failed")
} else {
log.Info().Str("path", imagePath).Msg("screenshot saved")
}
}()

View File

@ -129,6 +129,34 @@ func TestDriverExt_TapByLLM(t *testing.T) {
assert.Nil(t, err)
}
func TestDriverExt_StartToGoal(t *testing.T) {
driver := setupDriverExt(t)
userInstruction := `连连看是一款经典的益智消除类小游戏通常以图案或图标为主要元素以下是连连看的基本规则说明
1. 游戏目标: 玩家需要在规定时间内通过连接相同的图案或图标将它们从游戏界面中消除
2. 连接规则:
- 两个相同的图案可以通过不超过三条直线连接
- 连接线可以水平或垂直但不能斜线也不能跨过其他图案
- 连接线的转折次数不能超过两次
3. 游戏界面:
- 游戏界面通常是一个矩形区域内含多个图案或图标排列成行和列
- 图案或图标在未选中状态下背景为白色选中状态下背景为绿色
4. 时间限制: 游戏通常设有时间限制玩家需要在时间耗尽前完成所有图案的消除
5. 得分机制: 每成功连接并消除一对图案玩家会获得相应的分数完成游戏后根据剩余时间和消除效率计算总分
6. 关卡设计: 游戏可能包含多个关卡随着关卡的推进图案的复杂度和数量会增加
注意事项
1当连接错误时顶部的红心会减少一个需及时调整策略避免红心变为0个后游戏失败
2不要连续 2 次点击同一个图案
3不要犯重复的错误
`
userInstruction += "\n\n请严格按照以上游戏规则开始游戏注意请只做点击操作"
err := driver.StartToGoal(userInstruction)
assert.Nil(t, err)
}
func TestDriverExt_PlanNextAction(t *testing.T) {
driver := setupDriverExt(t)
result, err := driver.PlanNextAction("启动抖音")