JCC-CSScheduler/advisor/internal/scheduler/scheduler.go

531 lines
14 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package scheduler
import (
"fmt"
"sort"
"github.com/inhies/go-bytesize"
"github.com/samber/lo"
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
uopsdk "gitlink.org.cn/cloudream/common/sdks/unifyops"
"gitlink.org.cn/cloudream/common/utils/math"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/collector"
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
)
const (
//每个节点划分的资源等级:
// ResourceLevel1表示所有资源类型均满足 大于等于1.5倍
ResourceLevel1 = 1
// ResourceLevel2表示不满足Level1但所有资源类型均满足 大于等于1倍
ResourceLevel2 = 2
// ResourceLevel3 表示某些资源类型 小于一倍
ResourceLevel3 = 3
CpuResourceWeight float64 = 1
StgResourceWeight float64 = 1.2
CachingWeight float64 = 1
LoadedWeight float64 = 2
)
var ErrNoAvailableScheme = fmt.Errorf("no appropriate scheduling node found, please wait")
type Scheduler interface {
Schedule(info *jobmod.NormalJob) (*jobmod.JobScheduleScheme, error)
}
type candidate struct {
CC schmod.ComputingCenter
IsPreScheduled bool // 是否是在预调度时被选中的节点
Resource resourcesDetail
Files filesDetail
}
type resourcesDetail struct {
CPU resourceDetail
GPU resourceDetail
NPU resourceDetail
MLU resourceDetail
Storage resourceDetail
Memory resourceDetail
TotalScore float64
AvgScore float64
MaxLevel int
}
type resourceDetail struct {
Level int
Score float64
}
type filesDetail struct {
Dataset fileDetail
Code fileDetail
Image fileDetail
TotalScore float64
}
type fileDetail struct {
CachingScore float64
LoadingScore float64
IsLoaded bool //表示storage是否已经调度到该节点, image表示镜像是否已经加载到该算力中心
}
type CandidateArr []*candidate
func (a CandidateArr) Len() int { return len(a) }
func (a CandidateArr) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a CandidateArr) Less(i, j int) bool {
n1 := a[i]
n2 := a[j]
// 如果节点是预调度中选中的节点,那么只要资源满足需求,就优先选择这个节点
if n1.IsPreScheduled && n1.Resource.MaxLevel < ResourceLevel3 {
return true
}
if n2.IsPreScheduled && n2.Resource.MaxLevel < ResourceLevel3 {
return false
}
// 优先判断资源等级,资源等级越低,代表越满足需求
if n1.Resource.MaxLevel < n2.Resource.MaxLevel {
return true
}
if n1.Resource.MaxLevel > n2.Resource.MaxLevel {
return false
}
// 等级相同时,根据单项分值比较
switch n1.Resource.MaxLevel {
case ResourceLevel1:
// 数据文件总分越高,代表此节点上拥有的数据文件越完整,则越优先考虑
return n1.Files.TotalScore > n2.Files.TotalScore
case ResourceLevel2:
// 资源分的平均值越高,代表资源越空余,则越优先考虑
return n1.Resource.AvgScore > n2.Resource.AvgScore
case ResourceLevel3:
// 资源分的平均值越高,代表资源越空余,则越优先考虑
return n1.Resource.AvgScore > n2.Resource.AvgScore
}
return false
}
type DefaultScheduler struct {
}
func NewDefaultSchedule() *DefaultScheduler {
return &DefaultScheduler{}
}
func (s *DefaultScheduler) Schedule(job *jobmod.NormalJob) (*jobmod.JobScheduleScheme, error) {
mgrCli, err := schglb.ManagerMQPool.Acquire()
if err != nil {
return nil, fmt.Errorf("new collector client: %w", err)
}
defer schglb.ManagerMQPool.Release(mgrCli)
allCCs := make(map[schsdk.CCID]*candidate)
// 查询有哪些算力中心可用
allCC, err := mgrCli.GetAllComputingCenter(mgrmq.NewGetAllComputingCenter())
if err != nil {
return nil, fmt.Errorf("getting all computing center info: %w", err)
}
if len(allCC.ComputingCenters) == 0 {
return nil, ErrNoAvailableScheme
}
for _, cc := range allCC.ComputingCenters {
allCCs[cc.CCID] = &candidate{
CC: cc,
IsPreScheduled: cc.CCID == job.TargetCCID,
}
}
// 计算
err = s.calcFileScore(job.Files, allCCs)
if err != nil {
return nil, err
}
err = s.calcResourceScore(job, allCCs)
if err != nil {
return nil, err
}
allCCsArr := lo.Values(allCCs)
sort.Sort(CandidateArr(allCCsArr))
targetNode := allCCsArr[0]
if targetNode.Resource.MaxLevel == ResourceLevel3 {
return nil, ErrNoAvailableScheme
}
scheme := s.makeSchemeForNode(targetNode)
return &scheme, nil
}
func (s *DefaultScheduler) makeSchemeForNode(targetCC *candidate) jobmod.JobScheduleScheme {
scheme := jobmod.JobScheduleScheme{
TargetCCID: targetCC.CC.CCID,
}
if !targetCC.Files.Dataset.IsLoaded {
scheme.Dataset.Action = jobmod.ActionLoad
} else {
scheme.Dataset.Action = jobmod.ActionNo
}
if !targetCC.Files.Code.IsLoaded {
scheme.Code.Action = jobmod.ActionLoad
} else {
scheme.Code.Action = jobmod.ActionNo
}
if !targetCC.Files.Image.IsLoaded {
scheme.Image.Action = jobmod.ActionImportImage
} else {
scheme.Image.Action = jobmod.ActionNo
}
return scheme
}
func (s *DefaultScheduler) calcResourceScore(job *jobmod.NormalJob, allCCs map[schsdk.CCID]*candidate) error {
for _, cc := range allCCs {
res, err := s.calcOneResourceScore(job.Info.Resources, &cc.CC)
if err != nil {
return err
}
cc.Resource = *res
}
return nil
}
// 划分节点资源等级,并计算资源得分
func (s *DefaultScheduler) calcOneResourceScore(requires schsdk.JobResourcesInfo, cc *schmod.ComputingCenter) (*resourcesDetail, error) {
colCli, err := schglb.CollectorMQPool.Acquire()
if err != nil {
return nil, fmt.Errorf("new collector client: %w", err)
}
defer schglb.CollectorMQPool.Release(colCli)
getResDataResp, err := colCli.GetAllResourceData(collector.NewGetAllResourceData(cc.UOPSlwNodeID))
if err != nil {
return nil, err
}
var resDetail resourcesDetail
//计算资源得分
totalScore := 0.0
maxLevel := 0
resKinds := 0
if requires.CPU > 0 {
res := findResuorce[*uopsdk.CPUResourceData](getResDataResp.Datas)
if res == nil {
resDetail.CPU.Level = ResourceLevel3
resDetail.CPU.Score = 0
} else {
resDetail.CPU.Level = s.calcResourceLevel(float64(res.Available.Value), requires.CPU)
resDetail.CPU.Score = (float64(res.Available.Value) / requires.CPU) * CpuResourceWeight
}
maxLevel = math.Max(maxLevel, resDetail.CPU.Level)
totalScore += resDetail.CPU.Score
resKinds++
}
if requires.GPU > 0 {
res := findResuorce[*uopsdk.GPUResourceData](getResDataResp.Datas)
if res == nil {
resDetail.GPU.Level = ResourceLevel3
resDetail.GPU.Score = 0
} else {
resDetail.GPU.Level = s.calcResourceLevel(float64(res.Available.Value), requires.GPU)
resDetail.GPU.Score = (float64(res.Available.Value) / requires.GPU) * CpuResourceWeight
}
maxLevel = math.Max(maxLevel, resDetail.GPU.Level)
totalScore += resDetail.GPU.Score
resKinds++
}
if requires.NPU > 0 {
res := findResuorce[*uopsdk.NPUResourceData](getResDataResp.Datas)
if res == nil {
resDetail.NPU.Level = ResourceLevel3
resDetail.NPU.Score = 0
} else {
resDetail.NPU.Level = s.calcResourceLevel(float64(res.Available.Value), requires.NPU)
resDetail.NPU.Score = (float64(res.Available.Value) / requires.NPU) * CpuResourceWeight
}
maxLevel = math.Max(maxLevel, resDetail.NPU.Level)
totalScore += resDetail.NPU.Score
resKinds++
}
if requires.MLU > 0 {
res := findResuorce[*uopsdk.MLUResourceData](getResDataResp.Datas)
if res == nil {
resDetail.MLU.Level = ResourceLevel3
resDetail.MLU.Score = 0
} else {
resDetail.MLU.Level = s.calcResourceLevel(float64(res.Available.Value), requires.MLU)
resDetail.MLU.Score = (float64(res.Available.Value) / requires.MLU) * CpuResourceWeight
}
maxLevel = math.Max(maxLevel, resDetail.MLU.Level)
totalScore += resDetail.MLU.Score
resKinds++
}
if requires.Storage > 0 {
res := findResuorce[*uopsdk.StorageResourceData](getResDataResp.Datas)
if res == nil {
resDetail.Storage.Level = ResourceLevel3
resDetail.Storage.Score = 0
} else {
bytes, err := bytesize.Parse(fmt.Sprintf("%f%s", res.Available.Value, res.Available.Unit))
if err != nil {
return nil, err
}
resDetail.Storage.Level = s.calcResourceLevel(float64(bytes), float64(requires.Storage))
resDetail.Storage.Score = (float64(bytes) / float64(requires.Storage)) * StgResourceWeight
}
maxLevel = math.Max(maxLevel, resDetail.Storage.Level)
totalScore += resDetail.Storage.Score
resKinds++
}
if requires.Memory > 0 {
res := findResuorce[*uopsdk.MemoryResourceData](getResDataResp.Datas)
if res == nil {
resDetail.Memory.Level = ResourceLevel3
resDetail.Memory.Score = 0
} else {
bytes, err := bytesize.Parse(fmt.Sprintf("%f%s", res.Available.Value, res.Available.Unit))
if err != nil {
return nil, err
}
resDetail.Memory.Level = s.calcResourceLevel(float64(bytes), float64(requires.Memory))
resDetail.Memory.Score = (float64(bytes) / float64(requires.Memory)) * StgResourceWeight
}
maxLevel = math.Max(maxLevel, resDetail.Memory.Level)
totalScore += resDetail.Memory.Score
resKinds++
}
if resKinds == 0 {
return &resDetail, nil
}
resDetail.TotalScore = totalScore
resDetail.AvgScore = resDetail.AvgScore / float64(resKinds)
resDetail.MaxLevel = maxLevel
return &resDetail, nil
}
func (s *DefaultScheduler) calcResourceLevel(avai float64, need float64) int {
if avai >= 1.5*need {
return ResourceLevel1
}
if avai >= need {
return ResourceLevel2
}
return ResourceLevel3
}
// 计算节点得分情况
func (s *DefaultScheduler) calcFileScore(files jobmod.JobFiles, allCCs map[schsdk.CCID]*candidate) error {
// 只计算运控返回的计算中心上的存储服务的数据权重
cdsNodeToCC := make(map[cdssdk.NodeID]*candidate)
for _, cc := range allCCs {
cdsNodeToCC[cc.CC.CDSNodeID] = cc
}
//计算code相关得分
codeFileScores, err := s.calcPackageFileScore(files.Code.PackageID, cdsNodeToCC)
if err != nil {
return fmt.Errorf("calc code file score: %w", err)
}
for id, score := range codeFileScores {
allCCs[id].Files.Code = *score
}
//计算dataset相关得分
datasetFileScores, err := s.calcPackageFileScore(files.Dataset.PackageID, cdsNodeToCC)
if err != nil {
return fmt.Errorf("calc dataset file score: %w", err)
}
for id, score := range datasetFileScores {
allCCs[id].Files.Dataset = *score
}
//计算image相关得分
imageFileScores, err := s.calcImageFileScore(files.Image.ImageID, allCCs, cdsNodeToCC)
if err != nil {
return fmt.Errorf("calc image file score: %w", err)
}
for id, score := range imageFileScores {
allCCs[id].Files.Image = *score
}
for _, cc := range allCCs {
cc.Files.TotalScore = cc.Files.Code.CachingScore +
cc.Files.Code.LoadingScore +
cc.Files.Dataset.CachingScore +
cc.Files.Dataset.LoadingScore +
cc.Files.Image.CachingScore +
cc.Files.Image.LoadingScore
}
return nil
}
// 计算package在各节点的得分情况
func (s *DefaultScheduler) calcPackageFileScore(packageID cdssdk.PackageID, cdsNodeToCC map[cdssdk.NodeID]*candidate) (map[schsdk.CCID]*fileDetail, error) {
colCli, err := schglb.CollectorMQPool.Acquire()
if err != nil {
return nil, fmt.Errorf("new collector client: %w", err)
}
defer schglb.CollectorMQPool.Release(colCli)
ccFileScores := make(map[schsdk.CCID]*fileDetail)
cachedResp, err := colCli.PackageGetCachedStgNodes(collector.NewPackageGetCachedStgNodes(0, packageID))
if err != nil {
return nil, err
}
for _, cdsNodeCacheInfo := range cachedResp.NodeInfos {
cc, ok := cdsNodeToCC[cdsNodeCacheInfo.NodeID]
if !ok {
continue
}
ccFileScores[cc.CC.CCID] = &fileDetail{
//TODO 根据缓存方式不同,可能会有不同的计算方式
CachingScore: float64(cdsNodeCacheInfo.FileSize) / float64(cachedResp.PackageSize) * CachingWeight,
}
}
loadedResp, err := colCli.PackageGetLoadedStgNodes(collector.NewPackageGetLoadedStgNodes(0, packageID))
if err != nil {
return nil, err
}
for _, cdsNodeID := range loadedResp.StgNodeIDs {
cc, ok := cdsNodeToCC[cdsNodeID]
if !ok {
continue
}
fsc, ok := ccFileScores[cc.CC.CCID]
if !ok {
fsc = &fileDetail{}
ccFileScores[cc.CC.CCID] = fsc
}
fsc.LoadingScore = 1 * LoadedWeight
fsc.IsLoaded = true
}
return ccFileScores, nil
}
// 计算package在各节点的得分情况
func (s *DefaultScheduler) calcImageFileScore(imageID schsdk.ImageID, allCCs map[schsdk.CCID]*candidate, cdsNodeToCC map[cdssdk.NodeID]*candidate) (map[schsdk.CCID]*fileDetail, error) {
colCli, err := schglb.CollectorMQPool.Acquire()
if err != nil {
return nil, fmt.Errorf("new collector client: %w", err)
}
defer schglb.CollectorMQPool.Release(colCli)
magCli, err := schglb.ManagerMQPool.Acquire()
if err != nil {
return nil, fmt.Errorf("new manager client: %w", err)
}
defer schglb.ManagerMQPool.Release(magCli)
imageInfoResp, err := magCli.GetImageInfo(mgrmq.NewGetImageInfo(imageID))
if err != nil {
return nil, fmt.Errorf("getting image info: %w", err)
}
ccFileScores := make(map[schsdk.CCID]*fileDetail)
if imageInfoResp.Image.CDSPackageID != nil {
cachedResp, err := colCli.PackageGetCachedStgNodes(collector.NewPackageGetCachedStgNodes(0, *imageInfoResp.Image.CDSPackageID))
if err != nil {
return nil, err
}
for _, cdsNodeCacheInfo := range cachedResp.NodeInfos {
cc, ok := cdsNodeToCC[cdsNodeCacheInfo.NodeID]
if !ok {
continue
}
ccFileScores[cc.CC.CCID] = &fileDetail{
//TODO 根据缓存方式不同,可能会有不同的计算方式
CachingScore: float64(cdsNodeCacheInfo.FileSize) / float64(cachedResp.PackageSize) * CachingWeight,
}
}
}
// 镜像的LoadingScore是判断是否导入到算力中心
for _, pcmImg := range imageInfoResp.PCMImages {
_, ok := allCCs[pcmImg.CCID]
if !ok {
continue
}
fsc, ok := ccFileScores[pcmImg.CCID]
if !ok {
fsc = &fileDetail{}
ccFileScores[pcmImg.CCID] = fsc
}
fsc.LoadingScore = 1 * LoadedWeight
fsc.IsLoaded = true
}
return ccFileScores, nil
}
func findResuorce[T uopsdk.ResourceData](all []uopsdk.ResourceData) T {
for _, data := range all {
if ret, ok := data.(T); ok {
return ret
}
}
var def T
return def
}