332 lines
8.8 KiB
Protocol Buffer
332 lines
8.8 KiB
Protocol Buffer
syntax = "proto3";
|
||
|
||
package pcmCore;
|
||
option go_package = "/pcmCore";
|
||
|
||
message SyncInfoReq {
|
||
int64 participantId = 1;
|
||
repeated HpcInfo HpcInfoList = 2;
|
||
repeated CloudInfo CloudInfoList = 3;
|
||
repeated AiInfo AiInfoList = 4;
|
||
repeated VmInfo VmInfoList = 5;
|
||
}
|
||
|
||
message AiInfo {
|
||
int64 participantId = 1;
|
||
int64 taskId = 2;
|
||
string project_id = 3;
|
||
string name = 4;
|
||
string status = 5;
|
||
string startTime = 6;
|
||
int64 runningTime = 7;
|
||
string result = 8;
|
||
string jobId = 9;
|
||
string createTime = 10;
|
||
string imageUrl = 11;
|
||
string command = 12;
|
||
string flavorId = 13;
|
||
string subscriptionId = 14;
|
||
string itemVersionId = 15;
|
||
|
||
}
|
||
|
||
message CloudInfo {
|
||
int64 participant = 1;
|
||
int64 id = 2;
|
||
int64 taskId = 3;
|
||
string apiVersion = 4;
|
||
string kind = 5;
|
||
string namespace = 6;
|
||
string name = 7;
|
||
string status = 11;
|
||
string startTime = 8;
|
||
int64 runningTime = 9;
|
||
string result = 10;
|
||
string yamlString = 12;
|
||
}
|
||
|
||
message VmInfo {
|
||
int64 participantId = 1;
|
||
int64 taskId = 2;
|
||
string name = 3;
|
||
string flavor_ref = 4;
|
||
string image_ref = 5;
|
||
string network_uuid = 6;
|
||
string block_uuid = 7;
|
||
string source_type = 8;
|
||
bool delete_on_termination = 9;
|
||
string state = 10;
|
||
}
|
||
|
||
message HpcInfo {
|
||
int64 participantId = 1;
|
||
int64 taskId = 2;
|
||
string jobId = 3;
|
||
string name = 4;
|
||
string status = 5;
|
||
string startTime = 6;
|
||
int64 runningTime = 7;
|
||
string result = 8;
|
||
string workDir = 9;
|
||
string wallTime = 10;
|
||
string cmdScript = 11;
|
||
string derivedEs = 12;
|
||
string cluster = 13;
|
||
string blockId = 14;
|
||
uint32 allocNodes = 15;
|
||
uint32 allocCpu = 16;
|
||
string version = 17;
|
||
string account = 18;
|
||
uint32 exitCode = 19;
|
||
uint32 assocId = 20;
|
||
string appType = 21;
|
||
string appName = 22;
|
||
string queue = 23;
|
||
string submitType = 24;
|
||
string nNode = 25;
|
||
string stdOutFile = 26;
|
||
string stdErrFile = 27;
|
||
string stdInput = 28;
|
||
string environment = 29;
|
||
}
|
||
|
||
message SyncInfoResp {
|
||
int64 code = 1;
|
||
string msg = 2;
|
||
}
|
||
|
||
message InfoListReq {
|
||
int64 participantId = 1;
|
||
}
|
||
|
||
message InfoListResp {
|
||
repeated HpcInfo HpcInfoList = 1;
|
||
repeated CloudInfo CloudInfoList = 2;
|
||
repeated AiInfo AiInfoList = 3;
|
||
}
|
||
|
||
// pcm core services
|
||
service pcmCore {
|
||
|
||
// SyncInfo Synchronous data information
|
||
rpc SyncInfo(SyncInfoReq) returns (SyncInfoResp);
|
||
|
||
// InfoList
|
||
rpc InfoList(InfoListReq) returns (InfoListResp);
|
||
}
|
||
|
||
// participantTenant 租户信息
|
||
message ParticipantTenant {
|
||
string tenantName = 1; // 租户名称
|
||
}
|
||
|
||
// 集群标签
|
||
message ParticipantLabel {
|
||
int64 id = 1; // id
|
||
string key = 2; // 标签名
|
||
string value = 3; // 标签值
|
||
}
|
||
|
||
enum MessageStatus {
|
||
FAIL = 0;
|
||
SUCCESS = 1;
|
||
UNKNOWN = 2;
|
||
}
|
||
|
||
message HealthCheckResp {
|
||
MessageStatus messageStatus = 1;
|
||
int64 code = 2;
|
||
string msg = 3;
|
||
}
|
||
|
||
message ParticipantPhyResp {
|
||
int64 code = 1;
|
||
string msg = 2;
|
||
int64 participantId = 3; // participant 唯一标识
|
||
}
|
||
|
||
// 集群静态信息返回
|
||
message ListParticipantPhyResp {
|
||
int64 code = 1;
|
||
string msg = 2;
|
||
repeated ParticipantPhyReq ParticipantPhys = 3;
|
||
}
|
||
|
||
// participantPhy 静态信息
|
||
message ParticipantPhyReq {
|
||
string name = 1; // 名称
|
||
string address = 2; // 地址
|
||
string networkType = 4; // 集群网络类型
|
||
string networkBandwidth = 5; // 集群网络带宽
|
||
string storageType = 6; // 集群存储类型
|
||
string storageSpace = 7; // 集群存储空间
|
||
string storageAvailSpace = 8; // 集群存储可用空间
|
||
string storageBandwidth = 9; // 集群存储带宽
|
||
string type = 10; // 参与者类型:CLOUD-数算集群;AI-智算集群;HPC-超算集群
|
||
int64 tenantId = 11; // 租户id
|
||
string tenantName = 12; // 租户名称
|
||
repeated NodePhyInfo nodeInfo = 13; // 节点信息
|
||
int64 participantId = 14; // participant id
|
||
repeated ParticipantLabel labelInfo = 15; // 标签信息
|
||
repeated QueuePhyInfo queueInfo = 16; // 队列信息
|
||
int64 id = 17; // id
|
||
string MetricsUrl = 18; //监控url
|
||
string RpcAddress = 19;
|
||
string Token = 20; //token
|
||
}
|
||
|
||
// NodePhyInfo 节点信息
|
||
message NodePhyInfo {
|
||
int64 id = 1;
|
||
string nodeName = 2; // 节点名称
|
||
string osName = 3; // 系统名称
|
||
string osVersion = 4; // 系统版本
|
||
string archType = 5; // 架构类型
|
||
string archName = 6; // 架构名称
|
||
string archFreq = 7; // 架构频率
|
||
}
|
||
|
||
// QueuePhyInfo 队列信息
|
||
message QueuePhyInfo {
|
||
int64 id = 1; // id
|
||
string aclHosts = 2; // 可用节点,多个节点用逗号隔开
|
||
string queueId = 3; // 队列名称
|
||
string text = 4 ; // 队列名称
|
||
string queueName = 5; //队列名称
|
||
string queNodes = 6; // 队列节点总数
|
||
string queMinNodect = 7; // 队列最小节点数
|
||
string queMaxNgpus = 8; // 队列最大GPU卡数
|
||
string queMaxPpn = 9; // 使用该队列作业最大CPU核心数
|
||
string queChargeRate = 10; // 费率
|
||
string queMaxNcpus = 11; // 用户最大可用核心数
|
||
string queMaxNdcus = 12; // 队列总DCU卡数
|
||
string queMinNcpus = 13; // 队列最小CPU核数
|
||
string queFreeNodes = 14; // 队列空闲节点数
|
||
string queMaxNodect = 15; // 队列作业最大节点数
|
||
string queMaxGpuPN = 16; // 队列单作业最大GPU卡数
|
||
string queMaxWalltime = 17; // 队列最大运行时间
|
||
string queMaxDcuPN = 18; // 队列单作业最大DCU卡数
|
||
string queNcpus = 19; //队列cpu数
|
||
string queFreeNcpus = 20; //队列空闲cpu数
|
||
}
|
||
|
||
// ParticipantHeartbeatReq 心跳请求
|
||
message ParticipantHeartbeatReq {
|
||
int64 participantId = 1; // participantId
|
||
string address = 2;
|
||
}
|
||
|
||
// ParticipantAvailInfo Participant可用信息
|
||
message ParticipantAvailReq {
|
||
int64 id = 1; // id
|
||
int64 availStorageSpace = 2; // 集群存储可用空间
|
||
int64 userNum = 3; // 用户数量
|
||
int64 pendingJobNum = 4; // 待处理作业数量
|
||
int64 runningJobNum = 5; // 运行作业数量
|
||
int64 participantId = 6; // 集群静态信息id
|
||
repeated NodeAvailInfo nodeAvailInfo = 7; // 节点可用信息
|
||
}
|
||
|
||
// NodeAvailInfo 节点可用信息
|
||
message NodeAvailInfo {
|
||
int64 id = 1; // id
|
||
string nodeName = 2; // 节点名称
|
||
int64 cpuTotal = 3; // cpu核数
|
||
double cpuUsable = 4; // cpu可用率
|
||
int64 diskTotal = 5; // 磁盘空间
|
||
int64 diskAvail = 6; // 磁盘可用空间
|
||
int64 memTotal = 7; // 内存总数
|
||
int64 memAvail = 8; // 内存可用数
|
||
int64 gpuTotal = 9; // gpu总数
|
||
int64 gpuAvail = 10; // gpu可用数
|
||
int64 participantId = 11; // 集群动态信息id
|
||
}
|
||
|
||
// 集群可用信息
|
||
message ListParticipantAvailResp {
|
||
int64 code = 1;
|
||
string msg = 2;
|
||
repeated ParticipantAvailReq ParticipantAvails = 3;
|
||
}
|
||
|
||
message ParticipantResp {
|
||
int64 code = 1;
|
||
string msg = 2;
|
||
}
|
||
message ParticipantServiceResp {
|
||
int64 code = 1;
|
||
string msg = 2;
|
||
repeated ClientInfo data = 3;
|
||
}
|
||
|
||
message ClientInfo {
|
||
string address = 1; // @gotags: redis:"address"
|
||
int64 participantId = 2; // @gotags: redis:"participantId"
|
||
string clientState = 3; // @gotags: redis:"clientState"
|
||
int64 lastHeartbeat = 4; // @gotags: redis:"lastHeartbeat"
|
||
}
|
||
|
||
message TenantInfo {
|
||
int64 id = 1;
|
||
string tenantName = 2;
|
||
string tenantDesc = 3;
|
||
}
|
||
|
||
message TenantResp {
|
||
int64 code = 1;
|
||
string msg = 2;
|
||
int64 id = 3;
|
||
}
|
||
|
||
message ListTenantResp {
|
||
int64 code = 1;
|
||
string msg = 2;
|
||
repeated TenantInfo tenantInfos = 3;
|
||
}
|
||
|
||
message ApplyListReq{
|
||
|
||
}
|
||
|
||
message ApplyListResp{
|
||
repeated ApplyInfo infoList = 1;
|
||
}
|
||
|
||
message ApplyInfo{
|
||
string participantName = 1;
|
||
string yamlString = 2;
|
||
}
|
||
|
||
// participant 参与者
|
||
service participantService {
|
||
|
||
// registerParticipant Participant注册接口
|
||
rpc registerParticipant(ParticipantPhyReq) returns (ParticipantPhyResp) {};
|
||
|
||
// reportHeartbeat 心跳请求
|
||
rpc reportHeartbeat(ParticipantHeartbeatReq) returns (HealthCheckResp) {};
|
||
|
||
// reportAvailable 监控数据上报
|
||
rpc reportAvailable(ParticipantAvailReq) returns (ParticipantResp) {}
|
||
|
||
// listParticipant 服务列表
|
||
rpc listParticipant(ParticipantTenant) returns (ParticipantServiceResp) {}
|
||
|
||
// listAvailable 集群动态信息列表
|
||
rpc listPhyAvailable(ParticipantTenant) returns (ListParticipantAvailResp) {}
|
||
|
||
// listPhyInformation 集群静态信息列表
|
||
rpc listPhyInformation(ParticipantTenant) returns (ListParticipantPhyResp) {};
|
||
|
||
// registerTenant 注册租户信息
|
||
rpc registerTenant(TenantInfo) returns (TenantResp) {};
|
||
|
||
// listTenant 租户列表信息
|
||
rpc listTenant(TenantInfo) returns (ListTenantResp) {};
|
||
|
||
// applyList 执行任务列表
|
||
rpc applyList(ApplyListReq) returns (ApplyListResp) {};
|
||
|
||
// DeleteList 删除任务列表
|
||
rpc deleteList(ApplyListReq) returns (ApplyListResp) {};
|
||
} |