Skip to content

Commit

Permalink
Edge Node Clusters: nodes, volumes, replicas, upgrades
Browse files Browse the repository at this point in the history
Node Info: role, age, ip
Pod Info: status, restarts, ip
Volume and Replica Info: rebuild progress, health
Upgrade Status: cluster wide and node components

Signed-off-by: Andrew Durbin <[email protected]>
  • Loading branch information
andrewd-zededa committed Aug 19, 2024
1 parent 106b1d6 commit e487a11
Show file tree
Hide file tree
Showing 4 changed files with 333 additions and 52 deletions.
267 changes: 267 additions & 0 deletions proto/info/edge_node_cluster.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
// Copyright(c) 2024 Zededa, Inc.
// All rights reserved.

syntax = "proto3";

package org.lfedge.eve.info;

import "info/error.proto";

option go_package = "github.com/lf-edge/eve-api/go/info";
option java_package = "org.lfedge.eve.info";

import "google/protobuf/timestamp.proto";

// KubeNodeStatus follows Kubernetes Node Conditions
// Refer here: https://pkg.go.dev/k8s.io/api/core/v1#NodeConditionType
enum KubeNodeConditionType {
KUBE_NODE_CONDITION_TYPE_READY = 0;
KUBE_NODE_CONDITION_TYPE_MEMORY_PRESSURE = 1;
KUBE_NODE_CONDITION_TYPE_DISK_PRESSURE = 2;
KUBE_NODE_CONDITION_TYPE_PID_PRESSURE = 3;
KUBE_NODE_CONDITION_TYPE_NETWORK_UNAVAIL = 4;
}

message KubeNodeCondition {
// Type of the condition
KubeNodeConditionType type = 1;

// Condition state
bool set = 2;
}

message KubeNodeInfo {
// Name of the node, will match device name
string name = 1;

// Status of the node
repeated KubeNodeCondition conditions = 2;

// Role of the node is server or not
bool role_server = 3;

// Creation Time of the node in the cluster
google.protobuf.Timestamp creation_timestamp = 4;

// Version of the API Server running on the node
string api_server_sersion = 5;

// Internal IP address of the node
string internal_ip = 6;

bool schedulable = 7;
}

// StorageHealthStatus is a higher level tracking status to show redundancy/failure-zone level
// and rebuild progress.
enum StorageHealthStatus {
STORAGE_HEALTH_STATUS_UNKNOWN = 0;
STORAGE_HEALTH_STATUS_HEALTHY = 1;
STORAGE_HEALTH_STATUS_DEGRADED_2_REPLICA_AVAILABLE_REPLICATING = 2;
STORAGE_HEALTH_STATUS_DEGRADED_2_REPLICA_AVAILABLE_NOT_REPLICATING = 3;
STORAGE_HEALTH_STATUS_DEGRADED_1_REPLICA_AVAILABLE_REPLICATING = 4;
STORAGE_HEALTH_STATUS_DEGRADED_1_REPLICA_AVAILABLE_NOT_REPLICATING = 5;
STORAGE_HEALTH_STATUS_FAILED = 6;
}

// StorageVolumeState is the kubernetes 'state' field of a replicated csi-driver volume.
// Refer to: https://github.com/longhorn/longhorn-manager/blob/v1.6.2/k8s/pkg/apis/longhorn/v1beta1/volume.go#L14
enum StorageVolumeState {
STORAGE_VOLUME_STATE_UNKNOWN = 0;
STORAGE_VOLUME_STATE_CREATING = 1;
STORAGE_VOLUME_STATE_ATTACHED = 2;
STORAGE_VOLUME_STATE_DETACHED = 3;
STORAGE_VOLUME_STATE_ATTACHING = 4;
STORAGE_VOLUME_STATE_DETACHING = 5;
STORAGE_VOLUME_STATE_DELETING = 6;
}

// StorageVolumeRobustness is the 'robustness' of a replicated csi-driver volume.
// Refer to: https://github.com/longhorn/longhorn-manager/blob/v1.6.2/k8s/pkg/apis/longhorn/v1beta1/volume.go#L25
enum StorageVolumeRobustness {
STORAGE_VOLUME_ROBUSTNESS_UNKNOWN = 0;
STORAGE_VOLUME_ROBUSTNESS_HEALTHY = 1;
STORAGE_VOLUME_ROBUSTNESS_DEGRADED = 2;
STORAGE_VOLUME_ROBUSTNESS_FAULTED = 3;
}

// StorageVolumePVCStatus is the kubernetes 'phase' of a PVC. Listed as status in cli.
// Refer to: https://kubernetes.io/docs/concepts/storage/persistent-volumes#phase
enum StorageVolumePVCStatus {
STORAGE_VOLUME_PVC_STATUS_UNKNOWN = 0;
STORAGE_VOLUME_PVC_STATUS_BOUND = 1;
STORAGE_VOLUME_PVC_STATUS_PENDING = 2; // Accepted but not yet scheduled
STORAGE_VOLUME_PVC_STATUS_AVAILABLE = 3;
STORAGE_VOLUME_PVC_STATUS_RELEASED = 4;
STORAGE_VOLUME_PVC_STATUS_FAILED = 5;
}

// StorageVolumeReplicaStatus is a higher level status which combines replica and engine
// status to show a simplified view of a replica rebuild state.
enum StorageVolumeReplicaStatus {
STORAGE_VOLUME_REPLICA_STATUS_UNKNOWN = 0;
STORAGE_VOLUME_REPLICA_STATUS_REBUILDING = 1;
STORAGE_VOLUME_REPLICA_STATUS_ONLINE = 2;
STORAGE_VOLUME_REPLICA_STATUS_FAILED = 3; // Replacement/Rebuilt replica not yet scheduled.
}

message KubePodNameSpaceInfo {
// Name of the namespace
string name = 1;

// Number of pods in the namespace
uint32 pod_count = 2;

// Number of pods in the namespace that are running
uint32 pod_running_count = 3;

// Number of pods in the namespace that are pending
uint32 pod_pending_count = 4;

// Number of pods in the namespace that are failed
uint32 pod_failed_count = 5;

// Number of pods in the namespace that are succeeded
uint32 pod_succeeded_count = 6;
}

enum KubePodStatus {
KUBE_POD_STATUS_UNKNOWN = 0;
KUBE_POD_STATUS_PENDING = 1;
KUBE_POD_STATUS_RUNNING = 2;
KUBE_POD_STATUS_SUCCEEDED = 3;
KUBE_POD_STATUS_CONTAINER_CREATING = 4;
KUBE_POD_STATUS_CRASHLOOP_BACKOFF = 5;
KUBE_POD_STATUS_ERROR = 6;
KUBE_POD_STATUS_EVICTED = 7;
KUBE_POD_STATUS_FAILED = 8;
}

message KubeEVEAppPodInfo {
// Name of the EVE application
string name = 1;

// Application Status
KubePodStatus status = 2;

// Restart count of the application
uint32 restart_count = 3;

// Restart time of the application, seconds ago
google.protobuf.Timestamp restart_time = 4;

// Creation Time of the application
google.protobuf.Timestamp creation_timestamp = 5;

// IP address of the application, on cni0 interface
string ip_address = 6;

// Node name on which the application is running
string node_name = 7;
}

message KubeVolumeReplicaInfo {
// Name of the volume replica
string name = 1;

// Node replica resides on, will match node name
string owner_node = 2;

// Rebuild progress of the volume replica
uint32 rebuild_progress_percentage = 3;

// Replica status
StorageVolumeReplicaStatus status = 4;
}

message KubeVolumeInfo {
// Name of the volume
string name = 1;

// Status of the volume
StorageVolumeState state = 2;

// Robustness of the volume
StorageVolumeRobustness robustness = 3;

// Creation Time of the volume in the cluster
google.protobuf.Timestamp creation_timestamp = 4;

// Provisioned size of the volume in bytes
uint64 provisioned_bytes = 5;

// Allocated size of the volume in bytes
uint64 allocated_bytes = 6;

// PV/PVC status of the volume
StorageVolumePVCStatus pvc_status = 7;

// Replicas of the volume
repeated KubeVolumeReplicaInfo replica = 8;
}

message KubeStorageInfo {
// Overall status of Longhorn
StorageHealthStatus health = 1;

// Time of the most recent health status transition
google.protobuf.Timestamp transition_time = 2;

// Status of all the volumes in Longhorn
repeated KubeVolumeInfo volumes = 3;
}

// KubeCompUpgradeStatus will track status of each
// KubeComp which will upgrade serially in a cluster.
enum KubeCompUpgradeStatus {
KUBE_COMP_UPGRADE_STATUS_UNKNOWN = 0;
KUBE_COMP_UPGRADE_STATUS_DOWNLOAD = 1;
KUBE_COMP_UPGRADE_STATUS_DOWNLOAD_FAILED = 2;
KUBE_COMP_UPGRADE_STATUS_IN_PROGRESS = 3;
KUBE_COMP_UPGRADE_STATUS_FAILED = 4;
KUBE_COMP_UPGRADE_STATUS_COMPLETED = 5;
}

// KubeComp is a component installed in eve after usb install.
// These are provided to show more detail on cluster upgrade progress.
enum KubeComp {
KUBE_COMP_UNKNOWN = 0;
KUBE_COMP_CONTAINERD = 1; //every node will publish
KUBE_COMP_K3S = 2; // every node will publish
KUBE_COMP_MULTUS = 3; // Only the first node to upgrade eve-os will publish the remaining here and below
KUBE_COMP_KUBEVIRT = 4;
KUBE_COMP_CDI = 5;
KUBE_COMP_LONGHORN = 6;
}

message KubeClusterUpgradeStatus {
// current_node will have an empty value when no node is in an upgrade
string current_node = 1;

// component currently under upgrade, COMP_UNKNOWN when no upgrades in progress
KubeComp component = 2;

// status of the current component under upgrade, KUBE_COMP_UPGRADE_STATUS_UNKNOWN when
// no upgrades in progress
KubeCompUpgradeStatus status = 3;

// Error info in case of failure
org.lfedge.eve.info.ErrorInfo error = 4;
}

message KubeClusterInfo {
// Info message on a list of cluster nodes
repeated KubeNodeInfo nodes = 1;

// Info message on a list of namespaces's pods summary
repeated KubePodNameSpaceInfo pod_name_spaces = 2;

// Info message on a list of EVE applications
repeated KubeEVEAppPodInfo eve_apps = 3;

// Info message on cluster storage
KubeStorageInfo storage = 4;

// The current status of the cluster upgrade
KubeClusterUpgradeStatus upgrade = 5;
}
62 changes: 62 additions & 0 deletions proto/info/error.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright(c) 2024 Zededa, Inc.
// All rights reserved.

syntax = "proto3";

package org.lfedge.eve.info;
option go_package = "github.com/lf-edge/eve-api/go/info";
option java_package = "org.lfedge.eve.info";

import "google/protobuf/timestamp.proto";

// Entity contains the entity type
enum Entity {
// Invalid Device Entity
ENTITY_UNSPECIFIED = 0;
// Base OS entity
ENTITY_BASE_OS = 1;
// System Adapter Entity
ENTITY_SYSTEM_ADAPTER = 2;
// Vault Entity
ENTITY_VAULT = 3;
// Attestation Entity
ENTITY_ATTESTATION = 4;
// App Instance Entity
ENTITY_APP_INSTANCE = 5;
// Port Entity
ENTITY_PORT = 6;
// Network Entity
ENTITY_NETWORK = 7;
// Network Instance Entity
ENTITY_NETWORK_INSTANCE = 8;
// ContentTree Entity
ENTITY_CONTENT_TREE = 9;
// Blob Entity
ENTITY_CONTENT_BLOB = 10;
// VOLUME Entity
ENTITY_VOLUME = 11;
}

// Severity tells the severity type
enum Severity {
SEVERITY_UNSPECIFIED = 0; // severity unspecified
SEVERITY_NOTICE = 1; // severity notice
SEVERITY_WARNING = 2; // severity warning
SEVERITY_ERROR = 3; // severity error
}

// DeviceEntity contains the device entity details
message DeviceEntity {
Entity entity = 1; // entity type
string entity_id = 2; // entity uuid
string entity_name = 3; // entity name
}

// Errors in response to the application of configuration
message ErrorInfo {
string description = 1;
google.protobuf.Timestamp timestamp = 2; // Timestamp at which error had occurred
Severity severity = 3; // Severity of the error
repeated DeviceEntity entities = 4; // objects referenced by the description or retry_condition
string retry_condition = 5; // condition to retry
}
Loading

0 comments on commit e487a11

Please sign in to comment.