Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nfd refractoring #55

Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
22b938a
adding nfd config change utill
Dec 25, 2024
ad921a7
NFD vars seperated in gpu and nno
AdamKaabyia Jan 23, 2025
ea554a7
NFD consts seperated in gpu and nno
AdamKaabyia Jan 23, 2025
49e6673
1.nfd package created
AdamKaabyia Jan 23, 2025
b4f7680
1.common consts added to nfd consts even tho theyre not nfd
AdamKaabyia Jan 23, 2025
73d13c8
put the consts of nfd package in its correct directory not in the tes…
AdamKaabyia Jan 24, 2025
18a07f1
put the vars of nfd package in its correct directory not in the tests…
AdamKaabyia Jan 24, 2025
f7ee7d1
gpu consts put in the gpu package and names adjusted to remove redund…
AdamKaabyia Jan 24, 2025
5b4adea
added some time consts in the consts file of the package of nvidiagpu…
AdamKaabyia Jan 24, 2025
6f851da
added a 2 consts to the nfd package and used them
AdamKaabyia Jan 25, 2025
b7aa1ea
1. seperated global variables from nfd vars
AdamKaabyia Jan 25, 2025
6122a06
adding a PCI whitelists set util
Dec 30, 2024
cd52b1d
Bump github.com/golang/glog from 1.2.0 to 1.2.4 in the go_modules group
dependabot[bot] Jan 28, 2025
a26a2dd
Added Untitled Diagram.drawio
AdamKaabyia Jan 29, 2025
e238d74
Revert "1. seperated global variables from nfd vars"
AdamKaabyia Jan 29, 2025
82a9b05
Merge remote-tracking branch 'origin/nfd-refractoring' into nfd-refra…
AdamKaabyia Jan 29, 2025
fa13690
removed the diagram i added by accident
AdamKaabyia Jan 29, 2025
d2f0fd5
Merge branch 'rh-ecosystem-edge:main' into nfd-refractoring
AdamKaabyia Jan 29, 2025
8f8535f
Making Must gather function a generic one
TomerNewman Jan 21, 2025
a3ada2b
Merge pull request #49 from rh-ecosystem-edge/add_white_list
ggordaniRed Jan 29, 2025
c7e0ed9
Merge branch 'rh-ecosystem-edge:main' into nfd-refractoring
AdamKaabyia Jan 29, 2025
3eac99c
fixed the double declaration of nfd config, the vars that are used in…
AdamKaabyia Feb 4, 2025
3b7e187
fixed error:
AdamKaabyia Feb 6, 2025
d03b8d6
fixed error in NNO testing
AdamKaabyia Feb 9, 2025
fd2409c
1. the CheckNfdInstallation seperated and put in the nfd package
AdamKaabyia Feb 10, 2025
ab36fbb
created a gpuBurnConfig.go that has the default configurations of a b…
AdamKaabyia Feb 13, 2025
c47b3f8
ranamed the file from vars to config
AdamKaabyia Feb 13, 2025
98d26b2
seperated the nfdccheck from the nfd package for SOC(seperation of co…
AdamKaabyia Feb 14, 2025
f186566
Update pkg/nfd/config.go
AdamKaabyia Feb 14, 2025
e2fdaf7
put all the funcs that use nfd in nfd pkg, next step is to seperate i…
AdamKaabyia Feb 16, 2025
eadfb14
Merge remote-tracking branch 'origin/nfd-refractoring' into nfd-refra…
AdamKaabyia Feb 16, 2025
ba84157
added the EnsureNFDIsInstalled in deplo-nno-testing
AdamKaabyia Feb 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions pkg/nfd/consts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package nfd

const (
NfdCustomNFDCatalogSourcePublisherName = "Red Hat"
NfdCustomCatalogSourceDisplayName = "Redhat Operators Custom"
NfdRhcosLabel = "feature.node.kubernetes.io/system-os_release.ID"
NfdRhcosLabelValue = "rhcos"
NfdOperatorNamespace = "openshift-nfd"
NfdCatalogSourceDefault = "redhat-operators"
NfdCatalogSourceNamespace = "openshift-marketplace"
NfdOperatorDeploymentName = "nfd-controller-manager"
NfdPackage = "nfd"
NfdCRName = "nfd-instance"

resourceCRD = "NodeFeatureDiscovery"
LogLevel = 100

//not related to NFD but common consts between gpu and nno
UndefinedValue = "undefined"
OperatorVersionFile = "operator.version"
OpenShiftVersionFile = "ocp.version"
)
39 changes: 19 additions & 20 deletions pkg/nfd/nodefeaturediscovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ type Builder struct {

// NewBuilderFromObjectString creates a Builder object from CSV alm-examples.
func NewBuilderFromObjectString(apiClient *clients.Settings, almExample string) *Builder {
glog.V(100).Infof(
glog.V(LogLevel).Infof(
"Initializing new Builder structure from almExample string")

nodeFeatureDiscovery, err := getNodeFeatureDiscoveryFromAlmExample(almExample)

glog.V(100).Infof(
glog.V(LogLevel).Infof(
"Initializing Builder definition to NodeFeatureDiscovery object")

builder := Builder{
Expand All @@ -44,15 +44,15 @@ func NewBuilderFromObjectString(apiClient *clients.Settings, almExample string)
}

if err != nil {
glog.V(100).Infof(
glog.V(LogLevel).Infof(
"Error initializing NodeFeatureDiscovery from alm-examples: %s", err.Error())

builder.errorMsg = fmt.Sprintf("Error initializing NodeFeatureDiscovery from alm-examples: %s",
err.Error())
}

if builder.Definition == nil {
glog.V(100).Infof("The NodeFeatureDiscovery object definition is nil")
glog.V(LogLevel).Infof("The NodeFeatureDiscovery object definition is nil")

builder.errorMsg = "NodeFeatureDiscovery definition is nil"
}
Expand All @@ -66,7 +66,7 @@ func (builder *Builder) Get() (*nfdv1.NodeFeatureDiscovery, error) {
return nil, err
}

glog.V(100).Infof("Collecting NodeFeatureDiscovery object %s in namespace %s",
glog.V(LogLevel).Infof("Collecting NodeFeatureDiscovery object %s in namespace %s",
builder.Definition.Name, builder.Definition.Namespace)

nodeFeatureDiscovery := &nfdv1.NodeFeatureDiscovery{}
Expand All @@ -76,7 +76,7 @@ func (builder *Builder) Get() (*nfdv1.NodeFeatureDiscovery, error) {
}, nodeFeatureDiscovery)

if err != nil {
glog.V(100).Infof("NodeFeatureDiscovery object %s doesn't exist in namespace %s",
glog.V(LogLevel).Infof("NodeFeatureDiscovery object %s doesn't exist in namespace %s",
builder.Definition.Name, builder.Definition.Namespace)

return nil, err
Expand All @@ -87,7 +87,7 @@ func (builder *Builder) Get() (*nfdv1.NodeFeatureDiscovery, error) {

// Pull loads an existing NodeFeatureDiscovery into Builder struct.
func Pull(apiClient *clients.Settings, name, namespace string) (*Builder, error) {
glog.V(100).Infof("Pulling existing nodeFeatureDiscovery name: %s in namespace: %s", name, namespace)
glog.V(LogLevel).Infof("Pulling existing nodeFeatureDiscovery name: %s in namespace: %s", name, namespace)

builder := Builder{
apiClient: apiClient,
Expand All @@ -100,13 +100,13 @@ func Pull(apiClient *clients.Settings, name, namespace string) (*Builder, error)
}

if name == "" {
glog.V(100).Infof("NodeFeatureDiscovery name is empty")
glog.V(LogLevel).Infof("NodeFeatureDiscovery name is empty")

builder.errorMsg = "NodeFeatureDiscovery 'name' cannot be empty"
}

if namespace == "" {
glog.V(100).Infof("NodeFeatureDiscovery namespace is empty")
glog.V(LogLevel).Infof("NodeFeatureDiscovery namespace is empty")

builder.errorMsg = "NodeFeatureDiscovery 'namespace' cannot be empty"
}
Expand All @@ -126,15 +126,15 @@ func (builder *Builder) Exists() bool {
return false
}

glog.V(100).Infof(
glog.V(LogLevel).Infof(
"Checking if NodeFeatureDiscovery %s exists in namespace %s", builder.Definition.Name,
builder.Definition.Namespace)

var err error
builder.Object, err = builder.Get()

if err != nil {
glog.V(100).Infof("Failed to collect NodeFeatureDiscovery object due to %s", err.Error())
glog.V(LogLevel).Infof("Failed to collect NodeFeatureDiscovery object due to %s", err.Error())
}

return err == nil || !k8serrors.IsNotFound(err)
Expand All @@ -146,7 +146,7 @@ func (builder *Builder) Delete() (*Builder, error) {
return builder, err
}

glog.V(100).Infof("Deleting NodeFeatureDiscovery %s in namespace %s", builder.Definition.Name,
glog.V(LogLevel).Infof("Deleting NodeFeatureDiscovery %s in namespace %s", builder.Definition.Name,
builder.Definition.Namespace)

if !builder.Exists() {
Expand All @@ -170,7 +170,7 @@ func (builder *Builder) Create() (*Builder, error) {
return builder, err
}

glog.V(100).Infof("Creating the NodeFeatureDiscovery %s in namespace %s", builder.Definition.Name,
glog.V(LogLevel).Infof("Creating the NodeFeatureDiscovery %s in namespace %s", builder.Definition.Name,
builder.Definition.Namespace)

var err error
Expand All @@ -191,20 +191,20 @@ func (builder *Builder) Update(force bool) (*Builder, error) {
return builder, err
}

glog.V(100).Infof("Updating the NodeFeatureDiscovery object named: %s in namespace: %s",
glog.V(LogLevel).Infof("Updating the NodeFeatureDiscovery object named: %s in namespace: %s",
builder.Definition.Name, builder.Definition.Namespace)

err := builder.apiClient.Update(context.TODO(), builder.Definition)

if err != nil {
if force {
glog.V(100).Infof(
glog.V(LogLevel).Infof(
msg.FailToUpdateNotification("NodeFeatureDiscovery", builder.Definition.Name, builder.Definition.Namespace))

builder, err := builder.Delete()

if err != nil {
glog.V(100).Infof(
glog.V(LogLevel).Infof(
msg.FailToUpdateError("NodeFeatureDiscovery", builder.Definition.Name, builder.Definition.Namespace))

return nil, err
Expand Down Expand Up @@ -241,22 +241,21 @@ func getNodeFeatureDiscoveryFromAlmExample(almExample string) (*nfdv1.NodeFeatur
// validate will check that the builder and builder definition are properly initialized before
// accessing any member fields.
func (builder *Builder) validate() (bool, error) {
resourceCRD := "NodeFeatureDiscovery"

if builder == nil {
glog.V(100).Infof("The %s builder is uninitialized", resourceCRD)
glog.V(LogLevel).Infof("The %s builder is uninitialized", resourceCRD)

return false, fmt.Errorf("error: received nil %s builder", resourceCRD)
}

if builder.Definition == nil {
glog.V(100).Infof("The %s is undefined", resourceCRD)
glog.V(LogLevel).Infof("The %s is undefined", resourceCRD)

return false, fmt.Errorf(msg.UndefinedCrdObjectErrString(resourceCRD))
}

if builder.apiClient == nil {
glog.V(100).Infof("The %s builder apiclient is nil", resourceCRD)
glog.V(LogLevel).Infof("The %s builder apiclient is nil", resourceCRD)

return false, fmt.Errorf("%s builder cannot have nil apiClient", resourceCRD)
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/nfd/vars.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package nfd

var (
NfdCustomCatalogsourceIndexImage = UndefinedValue
CreateNFDCustomCatalogsource = false
NfdCustomCatalogSource = UndefinedValue
NfdCatalogSource = UndefinedValue
NfdCleanupAfterInstall = false
)
69 changes: 69 additions & 0 deletions pkg/nvidiagpu/consts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package nvidiagpu

import "time"

const (
NvidiaGPUNamespace = "nvidia-gpu-operator"

NvidiaGPULabel = "feature.node.kubernetes.io/pci-10de.present"
OperatorGroupName = "gpu-og"
OperatorDeployment = "gpu-operator"
SubscriptionName = "gpu-subscription"
SubscriptionNamespace = "nvidia-gpu-operator"
CatalogSourceDefault = "certified-operators"
CatalogSourceNamespace = "openshift-marketplace"
Package = "gpu-operator-certified"
ClusterPolicyName = "gpu-cluster-policy"
BurnNamespace = "test-gpu-burn"
BurnPodName = "gpu-burn-pod"
BurnPodLabel = "app=gpu-burn-app"
BurnConfigmapName = "gpu-burn-entrypoint"
OperatorDefaultMasterBundleImage = "registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator-bundle:main-latest"

CustomCatalogSourcePublisherName = "Red Hat"

CustomCatalogSourceDisplayName = "Certified Operators Custom"

SleepDuration = 30 * time.Second

WaitDuration = 4 * time.Minute

DeletionPollInterval = 30 * time.Second
DeletionTimeoutDuration = 5 * time.Minute
MachineReadyWaitDuration = 15 * time.Minute

NodeLabelingDelay = 2 * time.Minute

CatalogSourceCreationDelay = 30 * time.Second
CatalogSourceReadyTimeout = 4 * time.Minute
PackageManifestCheckInterval = 30 * time.Second
PackageManifestTimeout = 5 * time.Minute
GpuBundleDeploymentTimeout = 5 * time.Minute

OperatorDeploymentCreationDelay = 2 * time.Minute
DeploymentCreationCheckInterval = 30 * time.Second
DeploymentCreationTimeout = 4 * time.Minute

OperatorDeploymentReadyTimeout = 4 * time.Minute

CsvSucceededCheckInterval = 60 * time.Second
CsvSucceededTimeout = 5 * time.Minute

ClusterPolicyReadyCheckInterval = 60 * time.Second
ClusterPolicyReadyTimeout = 12 * time.Minute

BurnPodCreationTimeout = 5 * time.Minute

BurnPodRunningTimeout = 3 * time.Minute
BurnPodSuccessTimeout = 8 * time.Minute

BurnLogCollectionPeriod = 500 * time.Second

CsvDeploymentSleepInterval = 2 * time.Minute

BurnPodPostUpgradeCreationTimeout = 5 * time.Minute

RedeployedBurnPodRunningTimeout = 3 * time.Minute
RedeployedBurnPodSuccessTimeout = 8 * time.Minute
RedeployedBurnLogCollectionPeriod = 500 * time.Second
)
Loading