forked from JointCloud/JCC-CSScheduler
531 lines
14 KiB
Go
531 lines
14 KiB
Go
package scheduler
|
||
|
||
import (
|
||
"fmt"
|
||
"sort"
|
||
|
||
"github.com/inhies/go-bytesize"
|
||
"github.com/samber/lo"
|
||
|
||
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||
uopsdk "gitlink.org.cn/cloudream/common/sdks/unifyops"
|
||
"gitlink.org.cn/cloudream/common/utils/math"
|
||
|
||
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
||
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
|
||
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/collector"
|
||
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
|
||
)
|
||
|
||
const (
|
||
//每个节点划分的资源等级:
|
||
// ResourceLevel1:表示所有资源类型均满足 大于等于1.5倍
|
||
ResourceLevel1 = 1
|
||
// ResourceLevel2:表示不满足Level1,但所有资源类型均满足 大于等于1倍
|
||
ResourceLevel2 = 2
|
||
// ResourceLevel3: 表示某些资源类型 小于一倍
|
||
ResourceLevel3 = 3
|
||
|
||
CpuResourceWeight float64 = 1
|
||
StgResourceWeight float64 = 1.2
|
||
|
||
CachingWeight float64 = 1
|
||
LoadedWeight float64 = 2
|
||
)
|
||
|
||
var ErrNoAvailableScheme = fmt.Errorf("no appropriate scheduling node found, please wait")
|
||
|
||
type Scheduler interface {
|
||
Schedule(info *jobmod.NormalJob) (*jobmod.JobScheduleScheme, error)
|
||
}
|
||
|
||
type candidate struct {
|
||
CC schmod.ComputingCenter
|
||
IsPreScheduled bool // 是否是在预调度时被选中的节点
|
||
Resource resourcesDetail
|
||
Files filesDetail
|
||
}
|
||
|
||
type resourcesDetail struct {
|
||
CPU resourceDetail
|
||
GPU resourceDetail
|
||
NPU resourceDetail
|
||
MLU resourceDetail
|
||
Storage resourceDetail
|
||
Memory resourceDetail
|
||
|
||
TotalScore float64
|
||
AvgScore float64
|
||
MaxLevel int
|
||
}
|
||
type resourceDetail struct {
|
||
Level int
|
||
Score float64
|
||
}
|
||
|
||
type filesDetail struct {
|
||
Dataset fileDetail
|
||
Code fileDetail
|
||
Image fileDetail
|
||
|
||
TotalScore float64
|
||
}
|
||
type fileDetail struct {
|
||
CachingScore float64
|
||
LoadingScore float64
|
||
IsLoaded bool //表示storage是否已经调度到该节点, image表示镜像是否已经加载到该算力中心
|
||
}
|
||
|
||
type CandidateArr []*candidate
|
||
|
||
func (a CandidateArr) Len() int { return len(a) }
|
||
func (a CandidateArr) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||
func (a CandidateArr) Less(i, j int) bool {
|
||
n1 := a[i]
|
||
n2 := a[j]
|
||
|
||
// 如果节点是预调度中选中的节点,那么只要资源满足需求,就优先选择这个节点
|
||
if n1.IsPreScheduled && n1.Resource.MaxLevel < ResourceLevel3 {
|
||
return true
|
||
}
|
||
|
||
if n2.IsPreScheduled && n2.Resource.MaxLevel < ResourceLevel3 {
|
||
return false
|
||
}
|
||
|
||
// 优先判断资源等级,资源等级越低,代表越满足需求
|
||
if n1.Resource.MaxLevel < n2.Resource.MaxLevel {
|
||
return true
|
||
}
|
||
|
||
if n1.Resource.MaxLevel > n2.Resource.MaxLevel {
|
||
return false
|
||
}
|
||
|
||
// 等级相同时,根据单项分值比较
|
||
switch n1.Resource.MaxLevel {
|
||
case ResourceLevel1:
|
||
// 数据文件总分越高,代表此节点上拥有的数据文件越完整,则越优先考虑
|
||
return n1.Files.TotalScore > n2.Files.TotalScore
|
||
|
||
case ResourceLevel2:
|
||
// 资源分的平均值越高,代表资源越空余,则越优先考虑
|
||
return n1.Resource.AvgScore > n2.Resource.AvgScore
|
||
|
||
case ResourceLevel3:
|
||
// 资源分的平均值越高,代表资源越空余,则越优先考虑
|
||
return n1.Resource.AvgScore > n2.Resource.AvgScore
|
||
}
|
||
|
||
return false
|
||
}
|
||
|
||
type DefaultScheduler struct {
|
||
}
|
||
|
||
func NewDefaultSchedule() *DefaultScheduler {
|
||
return &DefaultScheduler{}
|
||
}
|
||
|
||
func (s *DefaultScheduler) Schedule(job *jobmod.NormalJob) (*jobmod.JobScheduleScheme, error) {
|
||
mgrCli, err := schglb.ManagerMQPool.Acquire()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("new collector client: %w", err)
|
||
}
|
||
defer schglb.ManagerMQPool.Release(mgrCli)
|
||
|
||
allCCs := make(map[schsdk.CCID]*candidate)
|
||
|
||
// 查询有哪些算力中心可用
|
||
allCC, err := mgrCli.GetAllComputingCenter(mgrmq.NewGetAllComputingCenter())
|
||
if err != nil {
|
||
return nil, fmt.Errorf("getting all computing center info: %w", err)
|
||
}
|
||
|
||
if len(allCC.ComputingCenters) == 0 {
|
||
return nil, ErrNoAvailableScheme
|
||
}
|
||
|
||
for _, cc := range allCC.ComputingCenters {
|
||
allCCs[cc.CCID] = &candidate{
|
||
CC: cc,
|
||
IsPreScheduled: cc.CCID == job.TargetCCID,
|
||
}
|
||
}
|
||
|
||
// 计算
|
||
err = s.calcFileScore(job.Files, allCCs)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
err = s.calcResourceScore(job, allCCs)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
allCCsArr := lo.Values(allCCs)
|
||
sort.Sort(CandidateArr(allCCsArr))
|
||
|
||
targetNode := allCCsArr[0]
|
||
if targetNode.Resource.MaxLevel == ResourceLevel3 {
|
||
return nil, ErrNoAvailableScheme
|
||
}
|
||
|
||
scheme := s.makeSchemeForNode(targetNode)
|
||
return &scheme, nil
|
||
}
|
||
|
||
func (s *DefaultScheduler) makeSchemeForNode(targetCC *candidate) jobmod.JobScheduleScheme {
|
||
scheme := jobmod.JobScheduleScheme{
|
||
TargetCCID: targetCC.CC.CCID,
|
||
}
|
||
|
||
if !targetCC.Files.Dataset.IsLoaded {
|
||
scheme.Dataset.Action = jobmod.ActionLoad
|
||
} else {
|
||
scheme.Dataset.Action = jobmod.ActionNo
|
||
}
|
||
|
||
if !targetCC.Files.Code.IsLoaded {
|
||
scheme.Code.Action = jobmod.ActionLoad
|
||
} else {
|
||
scheme.Code.Action = jobmod.ActionNo
|
||
}
|
||
|
||
if !targetCC.Files.Image.IsLoaded {
|
||
scheme.Image.Action = jobmod.ActionImportImage
|
||
} else {
|
||
scheme.Image.Action = jobmod.ActionNo
|
||
}
|
||
|
||
return scheme
|
||
}
|
||
|
||
func (s *DefaultScheduler) calcResourceScore(job *jobmod.NormalJob, allCCs map[schsdk.CCID]*candidate) error {
|
||
for _, cc := range allCCs {
|
||
res, err := s.calcOneResourceScore(job.Info.Resources, &cc.CC)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
cc.Resource = *res
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// 划分节点资源等级,并计算资源得分
|
||
func (s *DefaultScheduler) calcOneResourceScore(requires schsdk.JobResourcesInfo, cc *schmod.ComputingCenter) (*resourcesDetail, error) {
|
||
colCli, err := schglb.CollectorMQPool.Acquire()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("new collector client: %w", err)
|
||
}
|
||
defer schglb.CollectorMQPool.Release(colCli)
|
||
|
||
getResDataResp, err := colCli.GetAllResourceData(collector.NewGetAllResourceData(cc.UOPSlwNodeID))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
var resDetail resourcesDetail
|
||
|
||
//计算资源得分
|
||
totalScore := 0.0
|
||
maxLevel := 0
|
||
resKinds := 0
|
||
|
||
if requires.CPU > 0 {
|
||
res := findResuorce[*uopsdk.CPUResourceData](getResDataResp.Datas)
|
||
if res == nil {
|
||
resDetail.CPU.Level = ResourceLevel3
|
||
resDetail.CPU.Score = 0
|
||
} else {
|
||
resDetail.CPU.Level = s.calcResourceLevel(float64(res.Available.Value), requires.CPU)
|
||
resDetail.CPU.Score = (float64(res.Available.Value) / requires.CPU) * CpuResourceWeight
|
||
}
|
||
|
||
maxLevel = math.Max(maxLevel, resDetail.CPU.Level)
|
||
totalScore += resDetail.CPU.Score
|
||
resKinds++
|
||
}
|
||
|
||
if requires.GPU > 0 {
|
||
res := findResuorce[*uopsdk.GPUResourceData](getResDataResp.Datas)
|
||
if res == nil {
|
||
resDetail.GPU.Level = ResourceLevel3
|
||
resDetail.GPU.Score = 0
|
||
} else {
|
||
resDetail.GPU.Level = s.calcResourceLevel(float64(res.Available.Value), requires.GPU)
|
||
resDetail.GPU.Score = (float64(res.Available.Value) / requires.GPU) * CpuResourceWeight
|
||
}
|
||
|
||
maxLevel = math.Max(maxLevel, resDetail.GPU.Level)
|
||
totalScore += resDetail.GPU.Score
|
||
resKinds++
|
||
}
|
||
|
||
if requires.NPU > 0 {
|
||
res := findResuorce[*uopsdk.NPUResourceData](getResDataResp.Datas)
|
||
if res == nil {
|
||
resDetail.NPU.Level = ResourceLevel3
|
||
resDetail.NPU.Score = 0
|
||
} else {
|
||
resDetail.NPU.Level = s.calcResourceLevel(float64(res.Available.Value), requires.NPU)
|
||
resDetail.NPU.Score = (float64(res.Available.Value) / requires.NPU) * CpuResourceWeight
|
||
}
|
||
|
||
maxLevel = math.Max(maxLevel, resDetail.NPU.Level)
|
||
totalScore += resDetail.NPU.Score
|
||
resKinds++
|
||
}
|
||
|
||
if requires.MLU > 0 {
|
||
res := findResuorce[*uopsdk.MLUResourceData](getResDataResp.Datas)
|
||
if res == nil {
|
||
resDetail.MLU.Level = ResourceLevel3
|
||
resDetail.MLU.Score = 0
|
||
} else {
|
||
resDetail.MLU.Level = s.calcResourceLevel(float64(res.Available.Value), requires.MLU)
|
||
resDetail.MLU.Score = (float64(res.Available.Value) / requires.MLU) * CpuResourceWeight
|
||
}
|
||
|
||
maxLevel = math.Max(maxLevel, resDetail.MLU.Level)
|
||
totalScore += resDetail.MLU.Score
|
||
resKinds++
|
||
}
|
||
|
||
if requires.Storage > 0 {
|
||
res := findResuorce[*uopsdk.StorageResourceData](getResDataResp.Datas)
|
||
if res == nil {
|
||
resDetail.Storage.Level = ResourceLevel3
|
||
resDetail.Storage.Score = 0
|
||
} else {
|
||
bytes, err := bytesize.Parse(fmt.Sprintf("%f%s", res.Available.Value, res.Available.Unit))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
resDetail.Storage.Level = s.calcResourceLevel(float64(bytes), float64(requires.Storage))
|
||
resDetail.Storage.Score = (float64(bytes) / float64(requires.Storage)) * StgResourceWeight
|
||
}
|
||
|
||
maxLevel = math.Max(maxLevel, resDetail.Storage.Level)
|
||
totalScore += resDetail.Storage.Score
|
||
resKinds++
|
||
}
|
||
|
||
if requires.Memory > 0 {
|
||
res := findResuorce[*uopsdk.MemoryResourceData](getResDataResp.Datas)
|
||
if res == nil {
|
||
resDetail.Memory.Level = ResourceLevel3
|
||
resDetail.Memory.Score = 0
|
||
} else {
|
||
bytes, err := bytesize.Parse(fmt.Sprintf("%f%s", res.Available.Value, res.Available.Unit))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
resDetail.Memory.Level = s.calcResourceLevel(float64(bytes), float64(requires.Memory))
|
||
resDetail.Memory.Score = (float64(bytes) / float64(requires.Memory)) * StgResourceWeight
|
||
}
|
||
|
||
maxLevel = math.Max(maxLevel, resDetail.Memory.Level)
|
||
totalScore += resDetail.Memory.Score
|
||
resKinds++
|
||
}
|
||
|
||
if resKinds == 0 {
|
||
return &resDetail, nil
|
||
}
|
||
|
||
resDetail.TotalScore = totalScore
|
||
resDetail.AvgScore = resDetail.AvgScore / float64(resKinds)
|
||
resDetail.MaxLevel = maxLevel
|
||
|
||
return &resDetail, nil
|
||
}
|
||
|
||
func (s *DefaultScheduler) calcResourceLevel(avai float64, need float64) int {
|
||
if avai >= 1.5*need {
|
||
return ResourceLevel1
|
||
}
|
||
|
||
if avai >= need {
|
||
return ResourceLevel2
|
||
}
|
||
|
||
return ResourceLevel3
|
||
}
|
||
|
||
// 计算节点得分情况
|
||
func (s *DefaultScheduler) calcFileScore(files jobmod.JobFiles, allCCs map[schsdk.CCID]*candidate) error {
|
||
// 只计算运控返回的计算中心上的存储服务的数据权重
|
||
cdsNodeToCC := make(map[cdssdk.NodeID]*candidate)
|
||
for _, cc := range allCCs {
|
||
cdsNodeToCC[cc.CC.CDSNodeID] = cc
|
||
}
|
||
|
||
//计算code相关得分
|
||
codeFileScores, err := s.calcPackageFileScore(files.Code.PackageID, cdsNodeToCC)
|
||
if err != nil {
|
||
return fmt.Errorf("calc code file score: %w", err)
|
||
}
|
||
for id, score := range codeFileScores {
|
||
allCCs[id].Files.Code = *score
|
||
}
|
||
|
||
//计算dataset相关得分
|
||
datasetFileScores, err := s.calcPackageFileScore(files.Dataset.PackageID, cdsNodeToCC)
|
||
if err != nil {
|
||
return fmt.Errorf("calc dataset file score: %w", err)
|
||
}
|
||
for id, score := range datasetFileScores {
|
||
allCCs[id].Files.Dataset = *score
|
||
}
|
||
|
||
//计算image相关得分
|
||
imageFileScores, err := s.calcImageFileScore(files.Image.ImageID, allCCs, cdsNodeToCC)
|
||
if err != nil {
|
||
return fmt.Errorf("calc image file score: %w", err)
|
||
}
|
||
for id, score := range imageFileScores {
|
||
allCCs[id].Files.Image = *score
|
||
}
|
||
|
||
for _, cc := range allCCs {
|
||
cc.Files.TotalScore = cc.Files.Code.CachingScore +
|
||
cc.Files.Code.LoadingScore +
|
||
cc.Files.Dataset.CachingScore +
|
||
cc.Files.Dataset.LoadingScore +
|
||
cc.Files.Image.CachingScore +
|
||
cc.Files.Image.LoadingScore
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// 计算package在各节点的得分情况
|
||
func (s *DefaultScheduler) calcPackageFileScore(packageID cdssdk.PackageID, cdsNodeToCC map[cdssdk.NodeID]*candidate) (map[schsdk.CCID]*fileDetail, error) {
|
||
colCli, err := schglb.CollectorMQPool.Acquire()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("new collector client: %w", err)
|
||
}
|
||
defer schglb.CollectorMQPool.Release(colCli)
|
||
|
||
ccFileScores := make(map[schsdk.CCID]*fileDetail)
|
||
|
||
cachedResp, err := colCli.PackageGetCachedStgNodes(collector.NewPackageGetCachedStgNodes(0, packageID))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
for _, cdsNodeCacheInfo := range cachedResp.NodeInfos {
|
||
cc, ok := cdsNodeToCC[cdsNodeCacheInfo.NodeID]
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
ccFileScores[cc.CC.CCID] = &fileDetail{
|
||
//TODO 根据缓存方式不同,可能会有不同的计算方式
|
||
CachingScore: float64(cdsNodeCacheInfo.FileSize) / float64(cachedResp.PackageSize) * CachingWeight,
|
||
}
|
||
}
|
||
|
||
loadedResp, err := colCli.PackageGetLoadedStgNodes(collector.NewPackageGetLoadedStgNodes(0, packageID))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
for _, cdsNodeID := range loadedResp.StgNodeIDs {
|
||
cc, ok := cdsNodeToCC[cdsNodeID]
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
fsc, ok := ccFileScores[cc.CC.CCID]
|
||
if !ok {
|
||
fsc = &fileDetail{}
|
||
ccFileScores[cc.CC.CCID] = fsc
|
||
}
|
||
|
||
fsc.LoadingScore = 1 * LoadedWeight
|
||
fsc.IsLoaded = true
|
||
}
|
||
|
||
return ccFileScores, nil
|
||
}
|
||
|
||
// 计算package在各节点的得分情况
|
||
func (s *DefaultScheduler) calcImageFileScore(imageID schsdk.ImageID, allCCs map[schsdk.CCID]*candidate, cdsNodeToCC map[cdssdk.NodeID]*candidate) (map[schsdk.CCID]*fileDetail, error) {
|
||
colCli, err := schglb.CollectorMQPool.Acquire()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("new collector client: %w", err)
|
||
}
|
||
defer schglb.CollectorMQPool.Release(colCli)
|
||
|
||
magCli, err := schglb.ManagerMQPool.Acquire()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("new manager client: %w", err)
|
||
}
|
||
defer schglb.ManagerMQPool.Release(magCli)
|
||
|
||
imageInfoResp, err := magCli.GetImageInfo(mgrmq.NewGetImageInfo(imageID))
|
||
if err != nil {
|
||
return nil, fmt.Errorf("getting image info: %w", err)
|
||
}
|
||
|
||
ccFileScores := make(map[schsdk.CCID]*fileDetail)
|
||
|
||
if imageInfoResp.Image.CDSPackageID != nil {
|
||
cachedResp, err := colCli.PackageGetCachedStgNodes(collector.NewPackageGetCachedStgNodes(0, *imageInfoResp.Image.CDSPackageID))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
for _, cdsNodeCacheInfo := range cachedResp.NodeInfos {
|
||
cc, ok := cdsNodeToCC[cdsNodeCacheInfo.NodeID]
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
ccFileScores[cc.CC.CCID] = &fileDetail{
|
||
//TODO 根据缓存方式不同,可能会有不同的计算方式
|
||
CachingScore: float64(cdsNodeCacheInfo.FileSize) / float64(cachedResp.PackageSize) * CachingWeight,
|
||
}
|
||
}
|
||
}
|
||
|
||
// 镜像的LoadingScore是判断是否导入到算力中心
|
||
for _, pcmImg := range imageInfoResp.PCMImages {
|
||
_, ok := allCCs[pcmImg.CCID]
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
fsc, ok := ccFileScores[pcmImg.CCID]
|
||
if !ok {
|
||
fsc = &fileDetail{}
|
||
ccFileScores[pcmImg.CCID] = fsc
|
||
}
|
||
|
||
fsc.LoadingScore = 1 * LoadedWeight
|
||
fsc.IsLoaded = true
|
||
}
|
||
|
||
return ccFileScores, nil
|
||
}
|
||
|
||
func findResuorce[T uopsdk.ResourceData](all []uopsdk.ResourceData) T {
|
||
for _, data := range all {
|
||
if ret, ok := data.(T); ok {
|
||
return ret
|
||
}
|
||
}
|
||
|
||
var def T
|
||
return def
|
||
}
|