Refactor to addVectorsForStore()

This commit is contained in:
Yang Luo 2023-09-07 23:05:20 +08:00
parent 9020043669
commit ec9bcb6d78
4 changed files with 30 additions and 38 deletions

View File

@ -25,7 +25,7 @@ import (
"github.com/sashabaranov/go-openai"
)
func readCloserToString(f io.ReadCloser, fileName string) (string, error) {
func ReadFileToString(f io.ReadCloser, fileName string) (string, error) {
fileType := docconv.MimeTypeByExtension(fileName)
res, err := docconv.Convert(f, fileType, true)
if err != nil {
@ -38,7 +38,7 @@ func readCloserToString(f io.ReadCloser, fileName string) (string, error) {
return res.Body, nil
}
func splitText(text string) []string {
func SplitText(text string) []string {
const maxLength = 210 * 3
var res []string
var temp string
@ -59,15 +59,6 @@ func splitText(text string) []string {
return res
}
func GetSplitTxt(f io.ReadCloser, fileName string) []string {
text, err := readCloserToString(f, fileName)
if err != nil {
return nil
}
return splitText(text)
}
func getEmbedding(authToken string, text string, timeout int) ([]float32, error) {
client := getProxyClientFromToken(authToken)

View File

@ -132,11 +132,11 @@ func (c *ApiController) RefreshStoreVectors() {
return
}
success, err := object.RefreshStoreVectors(&store)
ok, err := object.RefreshStoreVectors(&store)
if err != nil {
c.ResponseError(err.Error())
return
}
c.ResponseOk(success)
c.ResponseOk(ok)
}

View File

@ -157,12 +157,6 @@ func RefreshStoreVectors(store *Store) (bool, error) {
}
authToken := provider.ClientSecret
success, err := setTextObjectVector(authToken, store.StorageProvider, "", store.Name)
if err != nil {
return false, err
}
if !success {
return false, nil
}
return true, nil
ok, err := addVectorsForStore(authToken, store.StorageProvider, "", store.Name)
return ok, err
}

View File

@ -48,7 +48,7 @@ func filterTextFiles(files []*storage.Object) []*storage.Object {
return res
}
func getTextFiles(provider string, prefix string) ([]*storage.Object, error) {
func getFilteredFileObjects(provider string, prefix string) ([]*storage.Object, error) {
files, err := storage.ListObjects(provider, prefix)
if err != nil {
return nil, err
@ -57,7 +57,7 @@ func getTextFiles(provider string, prefix string) ([]*storage.Object, error) {
return filterTextFiles(files), nil
}
func getObjectReadCloser(object *storage.Object) (io.ReadCloser, error) {
func getObjectFile(object *storage.Object) (io.ReadCloser, error) {
resp, err := http.Get(object.Url)
if err != nil {
return nil, err
@ -94,44 +94,51 @@ func addEmbeddedVector(authToken string, text string, storeName string, fileName
return AddVector(vector)
}
func setTextObjectVector(authToken string, provider string, key string, storeName string) (bool, error) {
lb := rate.NewLimiter(rate.Every(time.Minute), 3)
func addVectorsForStore(authToken string, provider string, key string, storeName string) (bool, error) {
timeLimiter := rate.NewLimiter(rate.Every(time.Minute), 3)
textObjects, err := getTextFiles(provider, key)
objs, err := getFilteredFileObjects(provider, key)
if err != nil {
return false, err
}
if len(textObjects) == 0 {
if len(objs) == 0 {
return false, nil
}
for _, textObject := range textObjects {
readCloser, err := getObjectReadCloser(textObject)
for _, obj := range objs {
f, err := getObjectFile(obj)
if err != nil {
return false, err
}
defer readCloser.Close()
defer f.Close()
splitTxts := ai.GetSplitTxt(readCloser, textObject.Key)
for _, splitTxt := range splitTxts {
if lb.Allow() {
success, err := addEmbeddedVector(authToken, splitTxt, storeName, textObject.Key)
filename := obj.Key
text, err := ai.ReadFileToString(f, filename)
if err != nil {
return false, err
}
if !success {
textSections := ai.SplitText(text)
for _, textSection := range textSections {
if timeLimiter.Allow() {
ok, err := addEmbeddedVector(authToken, textSection, storeName, obj.Key)
if err != nil {
return false, err
}
if !ok {
return false, nil
}
} else {
err := lb.Wait(context.Background())
err := timeLimiter.Wait(context.Background())
if err != nil {
return false, err
}
success, err := addEmbeddedVector(authToken, splitTxt, storeName, textObject.Key)
ok, err := addEmbeddedVector(authToken, textSection, storeName, obj.Key)
if err != nil {
return false, err
}
if !success {
if !ok {
return false, nil
}
}