pcm-coordinator/rpc/pb/pcmCore.proto

332 lines
8.8 KiB
Protocol Buffer
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

syntax = "proto3";
package pcmCore;
option go_package = "/pcmCore";
message SyncInfoReq {
int64 participantId = 1;
repeated HpcInfo HpcInfoList = 2;
repeated CloudInfo CloudInfoList = 3;
repeated AiInfo AiInfoList = 4;
repeated VmInfo VmInfoList = 5;
}
message AiInfo {
int64 participantId = 1;
int64 taskId = 2;
string project_id = 3;
string name = 4;
string status = 5;
string startTime = 6;
int64 runningTime = 7;
string result = 8;
string jobId = 9;
string createTime = 10;
string imageUrl = 11;
string command = 12;
string flavorId = 13;
string subscriptionId = 14;
string itemVersionId = 15;
}
message CloudInfo {
int64 participant = 1;
int64 id = 2;
int64 taskId = 3;
string apiVersion = 4;
string kind = 5;
string namespace = 6;
string name = 7;
string status = 11;
string startTime = 8;
int64 runningTime = 9;
string result = 10;
string yamlString = 12;
}
message VmInfo {
int64 participantId = 1;
int64 taskId = 2;
string name = 3;
string flavor_ref = 4;
string image_ref = 5;
string network_uuid = 6;
string block_uuid = 7;
string source_type = 8;
bool delete_on_termination = 9;
string state = 10;
}
message HpcInfo {
int64 participantId = 1;
int64 taskId = 2;
string jobId = 3;
string name = 4;
string status = 5;
string startTime = 6;
int64 runningTime = 7;
string result = 8;
string workDir = 9;
string wallTime = 10;
string cmdScript = 11;
string derivedEs = 12;
string cluster = 13;
string blockId = 14;
uint32 allocNodes = 15;
uint32 allocCpu = 16;
string version = 17;
string account = 18;
uint32 exitCode = 19;
uint32 assocId = 20;
string appType = 21;
string appName = 22;
string queue = 23;
string submitType = 24;
string nNode = 25;
string stdOutFile = 26;
string stdErrFile = 27;
string stdInput = 28;
string environment = 29;
}
message SyncInfoResp {
int64 code = 1;
string msg = 2;
}
message InfoListReq {
int64 participantId = 1;
}
message InfoListResp {
repeated HpcInfo HpcInfoList = 1;
repeated CloudInfo CloudInfoList = 2;
repeated AiInfo AiInfoList = 3;
}
// pcm core services
service pcmCore {
// SyncInfo Synchronous data information
rpc SyncInfo(SyncInfoReq) returns (SyncInfoResp);
// InfoList
rpc InfoList(InfoListReq) returns (InfoListResp);
}
// participantTenant 租户信息
message ParticipantTenant {
string tenantName = 1; // 租户名称
}
// 集群标签
message ParticipantLabel {
int64 id = 1; // id
string key = 2; // 标签名
string value = 3; // 标签值
}
enum MessageStatus {
FAIL = 0;
SUCCESS = 1;
UNKNOWN = 2;
}
message HealthCheckResp {
MessageStatus messageStatus = 1;
int64 code = 2;
string msg = 3;
}
message ParticipantPhyResp {
int64 code = 1;
string msg = 2;
int64 participantId = 3; // participant 唯一标识
}
// 集群静态信息返回
message ListParticipantPhyResp {
int64 code = 1;
string msg = 2;
repeated ParticipantPhyReq ParticipantPhys = 3;
}
// participantPhy 静态信息
message ParticipantPhyReq {
string name = 1; // 名称
string address = 2; // 地址
string networkType = 4; // 集群网络类型
string networkBandwidth = 5; // 集群网络带宽
string storageType = 6; // 集群存储类型
string storageSpace = 7; // 集群存储空间
string storageAvailSpace = 8; // 集群存储可用空间
string storageBandwidth = 9; // 集群存储带宽
string type = 10; // 参与者类型:CLOUD-数算集群;AI-智算集群HPC-超算集群
int64 tenantId = 11; // 租户id
string tenantName = 12; // 租户名称
repeated NodePhyInfo nodeInfo = 13; // 节点信息
int64 participantId = 14; // participant id
repeated ParticipantLabel labelInfo = 15; // 标签信息
repeated QueuePhyInfo queueInfo = 16; // 队列信息
int64 id = 17; // id
string MetricsUrl = 18; //监控url
string RpcAddress = 19;
string Token = 20; //token
}
// NodePhyInfo 节点信息
message NodePhyInfo {
int64 id = 1;
string nodeName = 2; // 节点名称
string osName = 3; // 系统名称
string osVersion = 4; // 系统版本
string archType = 5; // 架构类型
string archName = 6; // 架构名称
string archFreq = 7; // 架构频率
}
// QueuePhyInfo 队列信息
message QueuePhyInfo {
int64 id = 1; // id
string aclHosts = 2; // 可用节点,多个节点用逗号隔开
string queueId = 3; // 队列名称
string text = 4 ; // 队列名称
string queueName = 5; //队列名称
string queNodes = 6; // 队列节点总数
string queMinNodect = 7; // 队列最小节点数
string queMaxNgpus = 8; // 队列最大GPU卡数
string queMaxPpn = 9; // 使用该队列作业最大CPU核心数
string queChargeRate = 10; // 费率
string queMaxNcpus = 11; // 用户最大可用核心数
string queMaxNdcus = 12; // 队列总DCU卡数
string queMinNcpus = 13; // 队列最小CPU核数
string queFreeNodes = 14; // 队列空闲节点数
string queMaxNodect = 15; // 队列作业最大节点数
string queMaxGpuPN = 16; // 队列单作业最大GPU卡数
string queMaxWalltime = 17; // 队列最大运行时间
string queMaxDcuPN = 18; // 队列单作业最大DCU卡数
string queNcpus = 19; //队列cpu数
string queFreeNcpus = 20; //队列空闲cpu数
}
// ParticipantHeartbeatReq 心跳请求
message ParticipantHeartbeatReq {
int64 participantId = 1; // participantId
string address = 2;
}
// ParticipantAvailInfo Participant可用信息
message ParticipantAvailReq {
int64 id = 1; // id
int64 availStorageSpace = 2; // 集群存储可用空间
int64 userNum = 3; // 用户数量
int64 pendingJobNum = 4; // 待处理作业数量
int64 runningJobNum = 5; // 运行作业数量
int64 participantId = 6; // 集群静态信息id
repeated NodeAvailInfo nodeAvailInfo = 7; // 节点可用信息
}
// NodeAvailInfo 节点可用信息
message NodeAvailInfo {
int64 id = 1; // id
string nodeName = 2; // 节点名称
int64 cpuTotal = 3; // cpu核数
double cpuUsable = 4; // cpu可用率
int64 diskTotal = 5; // 磁盘空间
int64 diskAvail = 6; // 磁盘可用空间
int64 memTotal = 7; // 内存总数
int64 memAvail = 8; // 内存可用数
int64 gpuTotal = 9; // gpu总数
int64 gpuAvail = 10; // gpu可用数
int64 participantId = 11; // 集群动态信息id
}
// 集群可用信息
message ListParticipantAvailResp {
int64 code = 1;
string msg = 2;
repeated ParticipantAvailReq ParticipantAvails = 3;
}
message ParticipantResp {
int64 code = 1;
string msg = 2;
}
message ParticipantServiceResp {
int64 code = 1;
string msg = 2;
repeated ClientInfo data = 3;
}
message ClientInfo {
string address = 1; // @gotags: redis:"address"
int64 participantId = 2; // @gotags: redis:"participantId"
string clientState = 3; // @gotags: redis:"clientState"
int64 lastHeartbeat = 4; // @gotags: redis:"lastHeartbeat"
}
message TenantInfo {
int64 id = 1;
string tenantName = 2;
string tenantDesc = 3;
}
message TenantResp {
int64 code = 1;
string msg = 2;
int64 id = 3;
}
message ListTenantResp {
int64 code = 1;
string msg = 2;
repeated TenantInfo tenantInfos = 3;
}
message ApplyListReq{
}
message ApplyListResp{
repeated ApplyInfo infoList = 1;
}
message ApplyInfo{
string participantName = 1;
string yamlString = 2;
}
// participant 参与者
service participantService {
// registerParticipant Participant注册接口
rpc registerParticipant(ParticipantPhyReq) returns (ParticipantPhyResp) {};
// reportHeartbeat 心跳请求
rpc reportHeartbeat(ParticipantHeartbeatReq) returns (HealthCheckResp) {};
// reportAvailable 监控数据上报
rpc reportAvailable(ParticipantAvailReq) returns (ParticipantResp) {}
// listParticipant 服务列表
rpc listParticipant(ParticipantTenant) returns (ParticipantServiceResp) {}
// listAvailable 集群动态信息列表
rpc listPhyAvailable(ParticipantTenant) returns (ListParticipantAvailResp) {}
// listPhyInformation 集群静态信息列表
rpc listPhyInformation(ParticipantTenant) returns (ListParticipantPhyResp) {};
// registerTenant 注册租户信息
rpc registerTenant(TenantInfo) returns (TenantResp) {};
// listTenant 租户列表信息
rpc listTenant(TenantInfo) returns (ListTenantResp) {};
// applyList 执行任务列表
rpc applyList(ApplyListReq) returns (ApplyListResp) {};
// DeleteList 删除任务列表
rpc deleteList(ApplyListReq) returns (ApplyListResp) {};
}