diff --git a/PROJECT b/PROJECT index fd3e90d1..51db69fb 100644 --- a/PROJECT +++ b/PROJECT @@ -35,4 +35,13 @@ resources: kind: NIMPipeline path: github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1 version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: nvidia.com + group: apps + kind: NemoCustomizer + path: github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1 + version: v1alpha1 version: "3" diff --git a/api/apps/v1alpha1/common_types.go b/api/apps/v1alpha1/common_types.go index f6e08fbc..51bd9c61 100644 --- a/api/apps/v1alpha1/common_types.go +++ b/api/apps/v1alpha1/common_types.go @@ -34,9 +34,13 @@ type Service struct { Type corev1.ServiceType `json:"type,omitempty"` // override the default service name Name string `json:"name,omitempty"` + // Deprecated: Use Ports instead. + // +kubebuilder:deprecatedversion // +kubebuilder:default=8000 - Port int32 `json:"port"` - Annotations map[string]string `json:"annotations,omitempty"` + Port int32 `json:"port,omitempty"` + // Defines multiple ports for the service + Ports []corev1.ServicePort `json:"ports,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` } // Metrics defines attributes to setup metrics collection diff --git a/api/apps/v1alpha1/nemo_common_types.go b/api/apps/v1alpha1/nemo_common_types.go index 824caaec..838c6ae9 100644 --- a/api/apps/v1alpha1/nemo_common_types.go +++ b/api/apps/v1alpha1/nemo_common_types.go @@ -29,13 +29,13 @@ type DataStore struct { Endpoint string `json:"endpoint"` } +// DatabaseConfig is the external database configuration type DatabaseConfig struct { // Host is the hostname of the database. // Required, must not be empty. // - // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 - Host string `json:"host,omitempty"` + Host string `json:"host"` // Port is the port where the database is reachable at. // If specified, this must be a valid port number, 0 < databasePort < 65536. // Defaults to 5432. @@ -47,32 +47,99 @@ type DatabaseConfig struct { // DatabaseName is the database name for a NEMO Service. // Required, must not be empty. // - // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 - DatabaseName string `json:"databaseName,omitempty"` + DatabaseName string `json:"databaseName"` // DatabaseCredentials stores the configuration to retrieve the database credentials. // Required, must not be nil. // - // +kubebuilder:validation:Required - Credentials *DatabaseCredentials `json:"credentials,omitempty"` + Credentials *DatabaseCredentials `json:"credentials"` } +// DatabaseCredentials are the external database credentials type DatabaseCredentials struct { // User is the non-root username for a NEMO Service in the database. // Required, must not be empty. // - // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 - User string `json:"user,omitempty"` + User string `json:"user"` // SecretName is the name of the secret which has the database credentials for a NEMO service user. // Required, must not be empty. // - // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 - SecretName string `json:"secretName,omitempty"` + SecretName string `json:"secretName"` // PasswordKey is the name of the key in the `CredentialsSecret` secret for the database credentials. // Defaults to "password". // // +kubebuilder:default:="password" PasswordKey string `json:"passwordKey,omitempty"` } + +// WandBSecret represents the secret and key details for the Weights and Biases service. +type WandBSecret struct { + // Name is the name of the Kubernetes Secret containing the WandB API key. + // Required, must not be empty. + // + // +kubebuilder:validation:MinLength=1 + Name string `json:"name"` + + // APIKeyKey is the key in the Secret that holds the WandB API key. + // Defaults to "apiKey". + // +kubebuilder:default="apiKey" + // +kubebuilder:validation:MinLength=1 + APIKeyKey string `json:"apiKeyKey"` + + // EncryptionKey is an optional key in the secret used for encrypting WandB credentials. + // This can be used for additional security layers if required. + // Defaults to "encryptionKey". + // +kubebuilder:validation:Optional + // +kubebuilder:default="encryptionKey" + EncryptionKey string `json:"encryptionKey,omitempty"` +} + +// OTelSpec defines the settings for OpenTelemetry +type OTelSpec struct { + // Enabled indicates if opentelemetry collector and tracing are enabled + Enabled *bool `json:"enabled,omitempty"` + + // ExporterOtlpEndpoint is the OTLP collector endpoint. + // +kubebuilder:validation:MinLength=1 + ExporterOtlpEndpoint string `json:"exporterOtlpEndpoint"` + + // DisableLogging indicates whether Python logging auto-instrumentation should be disabled. + // +kubebuilder:validation:Optional + DisableLogging *bool `json:"disableLogging,omitempty"` + + // ExporterConfig defines configuration for different OTel exporters + // +kubebuilder:validation:Optional + ExporterConfig ExporterConfig `json:"exporterConfig,omitempty"` + + // ExcludedUrls defines URLs to be excluded from tracing. + // +kubebuilder:validation:Optional + // +kubebuilder:default={"health"} + ExcludedUrls []string `json:"excludedUrls,omitempty"` + + // LogLevel defines the log level (e.g., INFO, DEBUG). + // +kubebuilder:validation:Optional + // +kubebuilder:default="INFO" + LogLevel string `json:"logLevel,omitempty"` +} + +// ExporterConfig stores configuration for different OTel exporters +type ExporterConfig struct { + // TracesExporter sets the traces exporter: (otlp, console, none). + // +kubebuilder:validation:Optional + // +kubebuilder:default="otlp" + TracesExporter string `json:"tracesExporter,omitempty"` + + // MetricsExporter sets the metrics exporter: (otlp, console, none). + // +kubebuilder:validation:Optional + // +kubebuilder:validation:Enum=otlp;console;none + // +kubebuilder:default="otlp" + MetricsExporter string `json:"metricsExporter,omitempty"` + + // LogsExporter sets the logs exporter: (otlp, console, none). + // +kubebuilder:validation:Optional + // +kubebuilder:validation:Enum=otlp;console;none + // +kubebuilder:default="otlp" + LogsExporter string `json:"logsExporter,omitempty"` +} diff --git a/api/apps/v1alpha1/nemo_customizer_types.go b/api/apps/v1alpha1/nemo_customizer_types.go new file mode 100644 index 00000000..9bc8fb44 --- /dev/null +++ b/api/apps/v1alpha1/nemo_customizer_types.go @@ -0,0 +1,925 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "fmt" + "maps" + "os" + "strconv" + "strings" + + rendertypes "github.com/NVIDIA/k8s-nim-operator/internal/render/types" + utils "github.com/NVIDIA/k8s-nim-operator/internal/utils" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + autoscalingv2 "k8s.io/api/autoscaling/v2" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +const ( + // NemoCustomizerConditionReady indicates that the NEMO CustomizerService is ready. + NemoCustomizerConditionReady = "Ready" + // NemoCustomizerConditionFailed indicates that the NEMO CustomizerService has failed. + NemoCustomizerConditionFailed = "Failed" + + // NemoCustomizerStatusPending indicates that NEMO CustomizerService is in pending state + NemoCustomizerStatusPending = "Pending" + // NemoCustomizerStatusNotReady indicates that NEMO CustomizerService is not ready + NemoCustomizerStatusNotReady = "NotReady" + // NemoCustomizerStatusReady indicates that NEMO CustomizerService is ready + NemoCustomizerStatusReady = "Ready" + // NemoCustomizerStatusFailed indicates that NEMO CustomizerService has failed + NemoCustomizerStatusFailed = "Failed" + + // SchedulerTypeVolcano indicates if the scheduler is volcano + SchedulerTypeVolcano = "volcano" + // SchedulerTypeRunAI indicates if the scheduler is run.ai + SchedulerTypeRunAI = "runai" +) + +// NemoCustomizerSpec defines the desired state of NemoCustomizer +type NemoCustomizerSpec struct { + Image Image `json:"image,omitempty"` + Command []string `json:"command,omitempty"` + Args []string `json:"args,omitempty"` + Env []corev1.EnvVar `json:"env,omitempty"` + Labels map[string]string `json:"labels,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + PodAffinity *corev1.PodAffinity `json:"podAffinity,omitempty"` + Resources *corev1.ResourceRequirements `json:"resources,omitempty"` + Expose Expose `json:"expose,omitempty"` + LivenessProbe Probe `json:"livenessProbe,omitempty"` + ReadinessProbe Probe `json:"readinessProbe,omitempty"` + StartupProbe Probe `json:"startupProbe,omitempty"` + Scale Autoscaling `json:"scale,omitempty"` + Metrics Metrics `json:"metrics,omitempty"` + + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:default:=1 + Replicas int `json:"replicas,omitempty"` + UserID *int64 `json:"userID,omitempty"` + GroupID *int64 `json:"groupID,omitempty"` + RuntimeClass string `json:"runtimeClass,omitempty"` + + // CustomizerConfig stores the customizer configuration for training and models + // +kubebuilder:validation:MinLength=1 + CustomizerConfig string `json:"customizerConfig"` + + // Scheduler Configuration + Scheduler Scheduler `json:"scheduler,omitempty"` + + // OpenTelemetry Settings + OpenTelemetry OTelSpec `json:"otel"` + + // DatabaseConfig stores the database configuration + DatabaseConfig DatabaseConfig `json:"databaseConfig"` + + // WandBSecret stores the secret and encryption key for the Weights and Biases service. + WandBSecret WandBSecret `json:"wandbSecret"` +} + +// Scheduler defines the configuration for the scheduler +type Scheduler struct { + // Type is the scheduler type (volcano, runai) + // +kubebuilder:validation:Enum=volcano;runai + // +kubebuilder:default:=volcano + Type string `json:"type,omitempty"` +} + +// NemoCustomizerStatus defines the observed state of NemoCustomizer +type NemoCustomizerStatus struct { + Conditions []metav1.Condition `json:"conditions,omitempty"` + AvailableReplicas int32 `json:"availableReplicas,omitempty"` + State string `json:"state,omitempty"` +} + +// +genclient +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.state`,priority=0 +// +kubebuilder:printcolumn:name="Age",type="date",format="date-time",JSONPath=".metadata.creationTimestamp",priority=0 + +// NemoCustomizer is the Schema for the NemoCustomizer API +type NemoCustomizer struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec NemoCustomizerSpec `json:"spec,omitempty"` + Status NemoCustomizerStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// NemoCustomizerList contains a list of NemoCustomizer +type NemoCustomizerList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []NemoCustomizer `json:"items"` +} + +// GetStandardSelectorLabels returns the standard selector labels for the NemoCustomizer deployment +func (n *NemoCustomizer) GetStandardSelectorLabels() map[string]string { + return map[string]string{ + "app.kubernetes.io/name": n.Name, + } +} + +// GetStandardLabels returns the standard set of labels for NemoCustomizer resources +func (n *NemoCustomizer) GetStandardLabels() map[string]string { + return map[string]string{ + "app.kubernetes.io/name": n.Name, + "app.kubernetes.io/instance": n.Name, + "app.kubernetes.io/operator-version": os.Getenv("OPERATOR_VERSION"), + "app.kubernetes.io/part-of": "nemo-customizer-service", + "app.kubernetes.io/managed-by": "k8s-nim-operator", + } +} + +// GetStandardEnv returns the standard set of env variables for the NemoCustomizer container +func (n *NemoCustomizer) GetStandardEnv() []corev1.EnvVar { + envVars := []corev1.EnvVar{ + { + Name: "NAMESPACE", + Value: n.Namespace, + }, + { + Name: "HOST_IP", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "status.hostIP", + }, + }, + }, + { + Name: "CONFIG_PATH", + Value: "/app/config/config.yaml", + }, + { + Name: "CUSTOMIZATIONS_CALLBACK_URL", + Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:%d", n.GetName(), n.GetNamespace(), n.GetInternalServicePort()), + }, + { + Name: "LOG_LEVEL", + Value: "INFO", + }, + { + Name: "WANDB_ENCRYPTION_KEY", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: n.Spec.WandBSecret.APIKeyKey, + LocalObjectReference: corev1.LocalObjectReference{ + Name: n.Spec.WandBSecret.Name, + }, + }, + }, + }, + { + Name: "WANDB_SECRET_NAME", + Value: n.Spec.WandBSecret.Name, + }, + { + Name: "WANDB_SECRET_KEY", + Value: n.Spec.WandBSecret.APIKeyKey, + }, + } + + // Append the environment variables for Postgres + envVars = append(envVars, n.GetPostgresEnv()...) + + // Append the environment variables for OTel + if n.IsOtelEnabled() { + envVars = append(envVars, n.GetOtelEnv()...) + } + + return envVars +} + +// IsOtelEnabled returns true if Open Telemetry Collector is enabled +func (n *NemoCustomizer) IsOtelEnabled() bool { + return n.Spec.OpenTelemetry.Enabled != nil && *n.Spec.OpenTelemetry.Enabled +} + +// GetOtelEnv generates OpenTelemetry-related environment variables. +func (n *NemoCustomizer) GetOtelEnv() []corev1.EnvVar { + var otelEnvVars []corev1.EnvVar + + otelEnvVars = append(otelEnvVars, + corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: n.Spec.OpenTelemetry.ExporterOtlpEndpoint}, + corev1.EnvVar{Name: "OTEL_TRACES_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.TracesExporter}, + corev1.EnvVar{Name: "OTEL_METRICS_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.MetricsExporter}, + corev1.EnvVar{Name: "OTEL_LOGS_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.LogsExporter}, + corev1.EnvVar{Name: "OTEL_LOG_LEVEL", Value: n.Spec.OpenTelemetry.LogLevel}, + ) + + if len(n.Spec.OpenTelemetry.ExcludedUrls) > 0 { + otelEnvVars = append(otelEnvVars, corev1.EnvVar{ + Name: "OTEL_PYTHON_EXCLUDED_URLS", + Value: strings.Join(n.Spec.OpenTelemetry.ExcludedUrls, ","), + }) + } + + if n.Spec.OpenTelemetry.DisableLogging != nil { + otelEnvVars = append(otelEnvVars, corev1.EnvVar{ + Name: "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED", + Value: strconv.FormatBool(!*n.Spec.OpenTelemetry.DisableLogging), + }) + } else { + otelEnvVars = append(otelEnvVars, corev1.EnvVar{ + Name: "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED", + Value: strconv.FormatBool(true), + }) + } + + return otelEnvVars +} + +// GetPostgresEnv returns the PostgreSQL environment variables for a Kubernetes pod. +func (n *NemoCustomizer) GetPostgresEnv() []corev1.EnvVar { + envVars := []corev1.EnvVar{ + { + Name: "POSTGRES_DB_PASSWORD", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: n.Spec.DatabaseConfig.Credentials.PasswordKey, + LocalObjectReference: corev1.LocalObjectReference{ + Name: n.Spec.DatabaseConfig.Credentials.SecretName, + }, + }, + }, + }, + { + Name: "POSTGRES_DB_DSN", + Value: n.GeneratePostgresConnString(), + }, + } + + return envVars +} + +// GeneratePostgresConnString generates a PostgreSQL connection string using the database config. +func (n *NemoCustomizer) GeneratePostgresConnString() string { + // Construct the connection string + connString := fmt.Sprintf( + "postgresql://%s:%s@%s:%d/%s", + n.Spec.DatabaseConfig.Credentials.User, + "$(POSTGRES_DB_PASSWORD)", + n.Spec.DatabaseConfig.Host, + n.Spec.DatabaseConfig.Port, + n.Spec.DatabaseConfig.DatabaseName, + ) + + return connString +} + +// GetVolumes generates the volumes required for the customizer. +func (n *NemoCustomizer) GetVolumes() []corev1.Volume { + return []corev1.Volume{ + { + Name: "config", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: n.GetConfigName(), + }, + Items: []corev1.KeyToPath{ + { + Key: "config.yaml", + Path: "config.yaml", + }, + }, + }, + }, + }, + } +} + +// GetVolumeMounts generates the volume mounts required for the customizer. +func (n *NemoCustomizer) GetVolumeMounts() []corev1.VolumeMount { + return []corev1.VolumeMount{ + { + Name: "config", + MountPath: "/app/config", + ReadOnly: true, + }, + } +} + +// GetConfigName returns the ConfigMap name for the customizer configuration +func (n *NemoCustomizer) GetConfigName() string { + return fmt.Sprintf("%s-config", n.GetName()) +} + +// GetStandardAnnotations returns default annotations to apply to the NemoCustomizer instance +func (n *NemoCustomizer) GetStandardAnnotations() map[string]string { + standardAnnotations := map[string]string{ + "openshift.io/scc": "nonroot", + } + return standardAnnotations +} + +// GetNemoCustomizerAnnotations returns annotations to apply to the NemoCustomizer instance +func (n *NemoCustomizer) GetNemoCustomizerAnnotations() map[string]string { + standardAnnotations := n.GetStandardAnnotations() + + if n.Spec.Annotations != nil { + return utils.MergeMaps(standardAnnotations, n.Spec.Annotations) + } + + return standardAnnotations +} + +// GetServiceLabels returns merged labels to apply to the NemoCustomizer instance +func (n *NemoCustomizer) GetServiceLabels() map[string]string { + standardLabels := n.GetStandardLabels() + + if n.Spec.Labels != nil { + return utils.MergeMaps(standardLabels, n.Spec.Labels) + } + return standardLabels +} + +// GetSelectorLabels returns standard selector labels to apply to the NemoCustomizer instance +func (n *NemoCustomizer) GetSelectorLabels() map[string]string { + // TODO: add custom ones + return n.GetStandardSelectorLabels() +} + +// GetNodeSelector returns node selector labels for the NemoCustomizer instance +func (n *NemoCustomizer) GetNodeSelector() map[string]string { + return n.Spec.NodeSelector +} + +// GetTolerations returns tolerations for the NemoCustomizer instance +func (n *NemoCustomizer) GetTolerations() []corev1.Toleration { + return n.Spec.Tolerations +} + +// GetPodAffinity returns pod affinity for the NemoCustomizer instance +func (n *NemoCustomizer) GetPodAffinity() *corev1.PodAffinity { + return n.Spec.PodAffinity +} + +// GetContainerName returns name of the container for NemoCustomizer deployment +func (n *NemoCustomizer) GetContainerName() string { + return fmt.Sprintf("%s-ctr", n.Name) +} + +// GetCommand return command to override for the NemoCustomizer container +func (n *NemoCustomizer) GetCommand() []string { + return n.Spec.Command +} + +// GetArgs return arguments for the NemoCustomizer container +func (n *NemoCustomizer) GetArgs() []string { + return n.Spec.Args +} + +// GetEnv returns merged slice of standard and user specified env variables +func (n *NemoCustomizer) GetEnv() []corev1.EnvVar { + return utils.MergeEnvVars(n.GetStandardEnv(), n.Spec.Env) +} + +// GetImage returns container image for the NemoCustomizer +func (n *NemoCustomizer) GetImage() string { + return fmt.Sprintf("%s:%s", n.Spec.Image.Repository, n.Spec.Image.Tag) +} + +// GetImagePullSecrets returns the image pull secrets for the NIM container +func (n *NemoCustomizer) GetImagePullSecrets() []string { + return n.Spec.Image.PullSecrets +} + +// GetImagePullPolicy returns the image pull policy for the NIM container +func (n *NemoCustomizer) GetImagePullPolicy() string { + return n.Spec.Image.PullPolicy +} + +// GetResources returns resources to allocate to the NemoCustomizer container +func (n *NemoCustomizer) GetResources() *corev1.ResourceRequirements { + return n.Spec.Resources +} + +// GetLivenessProbe returns liveness probe for the NemoCustomizer container +func (n *NemoCustomizer) GetLivenessProbe() *corev1.Probe { + if n.Spec.LivenessProbe.Probe == nil { + return n.GetDefaultLivenessProbe() + } + return n.Spec.LivenessProbe.Probe +} + +// GetDefaultLivenessProbe returns the default liveness probe for the NemoCustomizer container +func (n *NemoCustomizer) GetDefaultLivenessProbe() *corev1.Probe { + probe := corev1.Probe{ + FailureThreshold: 3, + InitialDelaySeconds: 10, + PeriodSeconds: 10, + SuccessThreshold: 1, + TimeoutSeconds: 10, + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/v1/health/live", + Port: intstr.IntOrString{ + Type: intstr.Type(1), + StrVal: "api", + }, + Scheme: "HTTP", + }, + }, + } + return &probe +} + +// GetReadinessProbe returns readiness probe for the NemoCustomizer container +func (n *NemoCustomizer) GetReadinessProbe() *corev1.Probe { + if n.Spec.ReadinessProbe.Probe == nil { + return n.GetDefaultReadinessProbe() + } + return n.Spec.ReadinessProbe.Probe +} + +// GetDefaultReadinessProbe returns the default readiness probe for the NemoCustomizer container +func (n *NemoCustomizer) GetDefaultReadinessProbe() *corev1.Probe { + probe := corev1.Probe{ + FailureThreshold: 3, + InitialDelaySeconds: 10, + PeriodSeconds: 10, + SuccessThreshold: 1, + TimeoutSeconds: 1, + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/v1/health/ready", + Port: intstr.IntOrString{ + Type: intstr.Type(1), + StrVal: "api", + }, + }, + }, + } + + return &probe +} + +// GetStartupProbe returns startup probe for the NemoCustomizer container +func (n *NemoCustomizer) GetStartupProbe() *corev1.Probe { + return n.Spec.StartupProbe.Probe +} + +// GetServiceAccountName returns service account name for the NemoCustomizer deployment +func (n *NemoCustomizer) GetServiceAccountName() string { + return n.Name +} + +// GetRuntimeClass return the runtime class name for the NemoCustomizer deployment +func (n *NemoCustomizer) GetRuntimeClass() string { + return n.Spec.RuntimeClass +} + +// GetHPA returns the HPA spec for the NemoCustomizer deployment +func (n *NemoCustomizer) GetHPA() HorizontalPodAutoscalerSpec { + return n.Spec.Scale.HPA +} + +// GetServiceMonitor returns the Service Monitor details for the NemoCustomizer deployment +func (n *NemoCustomizer) GetServiceMonitor() ServiceMonitor { + return n.Spec.Metrics.ServiceMonitor +} + +// GetReplicas returns replicas for the NemoCustomizer deployment +func (n *NemoCustomizer) GetReplicas() int { + if n.IsAutoScalingEnabled() { + return 0 + } + return n.Spec.Replicas +} + +// GetDeploymentKind returns the kind of deployment for NemoCustomizer +func (n *NemoCustomizer) GetDeploymentKind() string { + return "Deployment" +} + +// IsAutoScalingEnabled returns true if autoscaling is enabled for NemoCustomizer deployment +func (n *NemoCustomizer) IsAutoScalingEnabled() bool { + return n.Spec.Scale.Enabled != nil && *n.Spec.Scale.Enabled +} + +// IsIngressEnabled returns true if ingress is enabled for NemoCustomizer deployment +func (n *NemoCustomizer) IsIngressEnabled() bool { + return n.Spec.Expose.Ingress.Enabled != nil && *n.Spec.Expose.Ingress.Enabled +} + +// GetIngressSpec returns the Ingress spec NemoCustomizer deployment +func (n *NemoCustomizer) GetIngressSpec() networkingv1.IngressSpec { + return n.Spec.Expose.Ingress.Spec +} + +// IsServiceMonitorEnabled returns true if servicemonitor is enabled for NemoCustomizer deployment +func (n *NemoCustomizer) IsServiceMonitorEnabled() bool { + return n.Spec.Metrics.Enabled != nil && *n.Spec.Metrics.Enabled +} + +// GetInternalServicePort returns the internal service port number for the NemoCustomizer deployment +func (n *NemoCustomizer) GetInternalServicePort() int32 { + for _, port := range n.Spec.Expose.Service.Ports { + if port.Name == "internal" { + return port.Port + } + } + + // Default to 9009 if no internal port is found + return 9009 +} + +// GetServicePorts returns the service ports for the NemoCustomizer deployment +func (n *NemoCustomizer) GetServicePorts() []corev1.ServicePort { + return n.Spec.Expose.Service.Ports +} + +// GetServicePort returns the service port for the NemoCustomizer deployment +func (n *NemoCustomizer) GetServicePort() int32 { + return n.Spec.Expose.Service.Port +} + +// GetServiceType returns the service type for the NemoCustomizer deployment +func (n *NemoCustomizer) GetServiceType() string { + return string(n.Spec.Expose.Service.Type) +} + +// GetUserID returns the user ID for the NemoCustomizer deployment +func (n *NemoCustomizer) GetUserID() *int64 { + return n.Spec.UserID + +} + +// GetGroupID returns the group ID for the NemoCustomizer deployment +func (n *NemoCustomizer) GetGroupID() *int64 { + return n.Spec.GroupID + +} + +// GetServiceAccountParams return params to render ServiceAccount from templates +func (n *NemoCustomizer) GetServiceAccountParams() *rendertypes.ServiceAccountParams { + params := &rendertypes.ServiceAccountParams{} + + // Set metadata + params.Name = n.GetName() + params.Namespace = n.GetNamespace() + params.Labels = n.GetServiceLabels() + params.Annotations = n.GetNemoCustomizerAnnotations() + return params +} + +// GetDeploymentParams returns params to render Deployment from templates +func (n *NemoCustomizer) GetDeploymentParams() *rendertypes.DeploymentParams { + params := &rendertypes.DeploymentParams{} + + // Set metadata + params.Name = n.GetName() + params.Namespace = n.GetNamespace() + params.Labels = n.GetServiceLabels() + params.Annotations = n.GetNemoCustomizerAnnotations() + + // Set template spec + if !n.IsAutoScalingEnabled() { + params.Replicas = n.GetReplicas() + } + params.NodeSelector = n.GetNodeSelector() + params.Tolerations = n.GetTolerations() + params.Affinity = n.GetPodAffinity() + params.ImagePullSecrets = n.GetImagePullSecrets() + params.ImagePullPolicy = n.GetImagePullPolicy() + + // Set labels and selectors + params.SelectorLabels = n.GetSelectorLabels() + + // Set container spec + params.ContainerName = n.GetContainerName() + params.Env = n.GetEnv() + params.Args = n.GetArgs() + params.Command = n.GetCommand() + params.Resources = n.GetResources() + params.Image = n.GetImage() + + // Set container probes + if IsProbeEnabled(n.Spec.LivenessProbe) { + params.LivenessProbe = n.GetLivenessProbe() + } + if IsProbeEnabled(n.Spec.ReadinessProbe) { + params.ReadinessProbe = n.GetReadinessProbe() + } + if IsProbeEnabled(n.Spec.StartupProbe) { + params.StartupProbe = n.GetStartupProbe() + } + params.UserID = n.GetUserID() + params.GroupID = n.GetGroupID() + + // Set service account + params.ServiceAccountName = n.GetServiceAccountName() + + // Set runtime class + params.RuntimeClassName = n.GetRuntimeClass() + + // Extract ports from spec and update rendering params + if len(n.GetServicePorts()) > 0 { + var containerPorts []corev1.ContainerPort + for _, svcPort := range n.GetServicePorts() { + containerPorts = append(containerPorts, corev1.ContainerPort{ + Name: svcPort.Name, + Protocol: svcPort.Protocol, + ContainerPort: svcPort.Port, + }) + } + params.Ports = containerPorts + } else { + params.Ports = []corev1.ContainerPort{{ + Name: "api", + Protocol: corev1.ProtocolTCP, + ContainerPort: n.GetServicePort(), + }} + } + + return params +} + +// GetStatefulSetParams returns params to render StatefulSet from templates +func (n *NemoCustomizer) GetStatefulSetParams() *rendertypes.StatefulSetParams { + + params := &rendertypes.StatefulSetParams{} + + // Set metadata + params.Name = n.GetName() + params.Namespace = n.GetNamespace() + params.Labels = n.GetServiceLabels() + params.Annotations = n.GetNemoCustomizerAnnotations() + + // Set template spec + if !n.IsAutoScalingEnabled() { + params.Replicas = n.GetReplicas() + } + params.ServiceName = n.GetName() + params.NodeSelector = n.GetNodeSelector() + params.Tolerations = n.GetTolerations() + params.Affinity = n.GetPodAffinity() + params.ImagePullSecrets = n.GetImagePullSecrets() + params.ImagePullPolicy = n.GetImagePullPolicy() + + // Set labels and selectors + params.SelectorLabels = n.GetSelectorLabels() + + // Set container spec + params.ContainerName = n.GetContainerName() + params.Env = n.GetEnv() + params.Args = n.GetArgs() + params.Command = n.GetCommand() + params.Resources = n.GetResources() + params.Image = n.GetImage() + + // Set container probes + params.LivenessProbe = n.GetLivenessProbe() + params.ReadinessProbe = n.GetReadinessProbe() + params.StartupProbe = n.GetStartupProbe() + + // Set service account + params.ServiceAccountName = n.GetServiceAccountName() + + // Set runtime class + params.RuntimeClassName = n.GetRuntimeClass() + return params +} + +// GetServiceParams returns params to render Service from templates +func (n *NemoCustomizer) GetServiceParams() *rendertypes.ServiceParams { + params := &rendertypes.ServiceParams{} + + // Set metadata + params.Name = n.GetName() + params.Namespace = n.GetNamespace() + params.Labels = n.GetServiceLabels() + params.Annotations = n.GetServiceAnnotations() + + // Set service selector labels + params.SelectorLabels = n.GetSelectorLabels() + + // Set service type + params.Type = n.GetServiceType() + + servicePorts := n.GetServicePorts() + if len(servicePorts) != 0 { + params.Ports = servicePorts + } else { + // Use corev1.ServicePort instead of deprecated params.Port + params.Ports = []corev1.ServicePort{{ + Name: "api", + Port: 8000, + TargetPort: intstr.FromInt(8000), + Protocol: corev1.ProtocolTCP, + }} + } + + return params +} + +// GetIngressParams returns params to render Ingress from templates +func (n *NemoCustomizer) GetIngressParams() *rendertypes.IngressParams { + params := &rendertypes.IngressParams{} + + params.Enabled = n.IsIngressEnabled() + // Set metadata + params.Name = n.GetName() + params.Namespace = n.GetNamespace() + params.Labels = n.GetServiceLabels() + params.Annotations = n.GetIngressAnnotations() + params.Spec = n.GetIngressSpec() + return params +} + +// GetRoleParams returns params to render Role from templates +func (n *NemoCustomizer) GetRoleParams() *rendertypes.RoleParams { + params := &rendertypes.RoleParams{} + + // Set metadata + params.Name = n.GetName() + params.Namespace = n.GetNamespace() + + // Set rules for customizer + params.Rules = []rbacv1.PolicyRule{ + { + APIGroups: []string{"security.openshift.io"}, + Resources: []string{"securitycontextconstraints"}, + ResourceNames: []string{"nonroot"}, + Verbs: []string{"use"}, + }, + { + APIGroups: []string{""}, + Resources: []string{"pods", "persistentvolumeclaims", "services", "configmaps"}, + Verbs: []string{"create", "get", "list", "watch", "delete"}, + }, + { + APIGroups: []string{"nvidia.com"}, + Resources: []string{"nemotrainingjobs", "nemotrainingjobs/status", "nemoentityhandlers"}, + Verbs: []string{"create", "get", "list", "watch", "update", "delete", "patch"}, + }, + } + + volcanoRules := []rbacv1.PolicyRule{ + { + APIGroups: []string{"batch.volcano.sh"}, + Resources: []string{"jobs", "jobs/status"}, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"nodeinfo.volcano.sh"}, + Resources: []string{"numatopologies"}, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"scheduling.incubator.k8s.io", "scheduling.volcano.sh"}, + Resources: []string{"queues", "queues/status", "podgroups"}, + Verbs: []string{"get", "list", "watch"}, + }, + } + + if n.Spec.Scheduler.Type == SchedulerTypeVolcano { + params.Rules = append(params.Rules, volcanoRules...) + } + + return params +} + +// GetRoleBindingParams returns params to render RoleBinding from templates +func (n *NemoCustomizer) GetRoleBindingParams() *rendertypes.RoleBindingParams { + params := &rendertypes.RoleBindingParams{} + + // Set metadata + params.Name = n.GetName() + params.Namespace = n.GetNamespace() + + params.ServiceAccountName = n.GetServiceAccountName() + params.RoleName = n.GetName() + return params +} + +// GetHPAParams returns params to render HPA from templates +func (n *NemoCustomizer) GetHPAParams() *rendertypes.HPAParams { + params := &rendertypes.HPAParams{} + + params.Enabled = n.IsAutoScalingEnabled() + + // Set metadata + params.Name = n.GetName() + params.Namespace = n.GetNamespace() + params.Labels = n.GetServiceLabels() + params.Annotations = n.GetHPAAnnotations() + + // Set HPA spec + hpa := n.GetHPA() + hpaSpec := autoscalingv2.HorizontalPodAutoscalerSpec{ + ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{ + Kind: n.GetDeploymentKind(), + Name: n.GetName(), + APIVersion: "apps/v1", + }, + MinReplicas: hpa.MinReplicas, + MaxReplicas: hpa.MaxReplicas, + Metrics: hpa.Metrics, + Behavior: hpa.Behavior, + } + params.HPASpec = hpaSpec + return params +} + +// GetSCCParams return params to render SCC from templates +func (n *NemoCustomizer) GetSCCParams() *rendertypes.SCCParams { + params := &rendertypes.SCCParams{} + // Set metadata + params.Name = "nemo-customizer-scc" + + params.ServiceAccountName = n.GetServiceAccountName() + return params +} + +// GetServiceMonitorParams return params to render Service Monitor from templates +func (n *NemoCustomizer) GetServiceMonitorParams() *rendertypes.ServiceMonitorParams { + params := &rendertypes.ServiceMonitorParams{} + serviceMonitor := n.GetServiceMonitor() + params.Enabled = n.IsServiceMonitorEnabled() + params.Name = n.GetName() + params.Namespace = n.GetNamespace() + svcLabels := n.GetServiceLabels() + maps.Copy(svcLabels, serviceMonitor.AdditionalLabels) + params.Labels = svcLabels + params.Annotations = n.GetServiceMonitorAnnotations() + + // Set Service Monitor spec + smSpec := monitoringv1.ServiceMonitorSpec{ + NamespaceSelector: monitoringv1.NamespaceSelector{MatchNames: []string{n.Namespace}}, + Selector: metav1.LabelSelector{MatchLabels: n.GetServiceLabels()}, + Endpoints: []monitoringv1.Endpoint{{Port: "service-port", ScrapeTimeout: serviceMonitor.ScrapeTimeout, Interval: serviceMonitor.Interval}}, + } + params.SMSpec = smSpec + return params +} + +// GetIngressAnnotations return standard and customized ingress annotations +func (n *NemoCustomizer) GetIngressAnnotations() map[string]string { + NemoCustomizerAnnotations := n.GetNemoCustomizerAnnotations() + + if n.Spec.Expose.Ingress.Annotations != nil { + return utils.MergeMaps(NemoCustomizerAnnotations, n.Spec.Expose.Ingress.Annotations) + } + return NemoCustomizerAnnotations +} + +// GetServiceAnnotations return standard and customized service annotations +func (n *NemoCustomizer) GetServiceAnnotations() map[string]string { + NemoCustomizerAnnotations := n.GetNemoCustomizerAnnotations() + + if n.Spec.Expose.Service.Annotations != nil { + return utils.MergeMaps(NemoCustomizerAnnotations, n.Spec.Expose.Service.Annotations) + } + return NemoCustomizerAnnotations +} + +// GetHPAAnnotations return standard and customized hpa annotations +func (n *NemoCustomizer) GetHPAAnnotations() map[string]string { + NemoCustomizerAnnotations := n.GetNemoCustomizerAnnotations() + + if n.Spec.Scale.Annotations != nil { + return utils.MergeMaps(NemoCustomizerAnnotations, n.Spec.Scale.Annotations) + } + return NemoCustomizerAnnotations +} + +// GetServiceMonitorAnnotations return standard and customized servicemonitor annotations +func (n *NemoCustomizer) GetServiceMonitorAnnotations() map[string]string { + NemoCustomizerAnnotations := n.GetNemoCustomizerAnnotations() + + if n.Spec.Metrics.ServiceMonitor.Annotations != nil { + return utils.MergeMaps(NemoCustomizerAnnotations, n.Spec.Metrics.ServiceMonitor.Annotations) + } + return NemoCustomizerAnnotations +} + +func init() { + SchemeBuilder.Register(&NemoCustomizer{}, &NemoCustomizerList{}) +} diff --git a/api/apps/v1alpha1/zz_generated.deepcopy.go b/api/apps/v1alpha1/zz_generated.deepcopy.go index 440b9039..6743ba74 100644 --- a/api/apps/v1alpha1/zz_generated.deepcopy.go +++ b/api/apps/v1alpha1/zz_generated.deepcopy.go @@ -165,6 +165,21 @@ func (in *DatabaseCredentials) DeepCopy() *DatabaseCredentials { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExporterConfig) DeepCopyInto(out *ExporterConfig) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExporterConfig. +func (in *ExporterConfig) DeepCopy() *ExporterConfig { + if in == nil { + return nil + } + out := new(ExporterConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Expose) DeepCopyInto(out *Expose) { *out = *in @@ -983,6 +998,178 @@ func (in *NIMSource) DeepCopy() *NIMSource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NemoCustomizer) DeepCopyInto(out *NemoCustomizer) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NemoCustomizer. +func (in *NemoCustomizer) DeepCopy() *NemoCustomizer { + if in == nil { + return nil + } + out := new(NemoCustomizer) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NemoCustomizer) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NemoCustomizerList) DeepCopyInto(out *NemoCustomizerList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]NemoCustomizer, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NemoCustomizerList. +func (in *NemoCustomizerList) DeepCopy() *NemoCustomizerList { + if in == nil { + return nil + } + out := new(NemoCustomizerList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NemoCustomizerList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NemoCustomizerSpec) DeepCopyInto(out *NemoCustomizerSpec) { + *out = *in + in.Image.DeepCopyInto(&out.Image) + if in.Command != nil { + in, out := &in.Command, &out.Command + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Args != nil { + in, out := &in.Args, &out.Args + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Env != nil { + in, out := &in.Env, &out.Env + *out = make([]v1.EnvVar, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Labels != nil { + in, out := &in.Labels, &out.Labels + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Annotations != nil { + in, out := &in.Annotations, &out.Annotations + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Tolerations != nil { + in, out := &in.Tolerations, &out.Tolerations + *out = make([]v1.Toleration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.PodAffinity != nil { + in, out := &in.PodAffinity, &out.PodAffinity + *out = new(v1.PodAffinity) + (*in).DeepCopyInto(*out) + } + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = new(v1.ResourceRequirements) + (*in).DeepCopyInto(*out) + } + in.Expose.DeepCopyInto(&out.Expose) + in.LivenessProbe.DeepCopyInto(&out.LivenessProbe) + in.ReadinessProbe.DeepCopyInto(&out.ReadinessProbe) + in.StartupProbe.DeepCopyInto(&out.StartupProbe) + in.Scale.DeepCopyInto(&out.Scale) + in.Metrics.DeepCopyInto(&out.Metrics) + if in.UserID != nil { + in, out := &in.UserID, &out.UserID + *out = new(int64) + **out = **in + } + if in.GroupID != nil { + in, out := &in.GroupID, &out.GroupID + *out = new(int64) + **out = **in + } + out.Scheduler = in.Scheduler + in.OpenTelemetry.DeepCopyInto(&out.OpenTelemetry) + in.DatabaseConfig.DeepCopyInto(&out.DatabaseConfig) + out.WandBSecret = in.WandBSecret +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NemoCustomizerSpec. +func (in *NemoCustomizerSpec) DeepCopy() *NemoCustomizerSpec { + if in == nil { + return nil + } + out := new(NemoCustomizerSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NemoCustomizerStatus) DeepCopyInto(out *NemoCustomizerStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NemoCustomizerStatus. +func (in *NemoCustomizerStatus) DeepCopy() *NemoCustomizerStatus { + if in == nil { + return nil + } + out := new(NemoCustomizerStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NemoDatastore) DeepCopyInto(out *NemoDatastore) { *out = *in @@ -1689,6 +1876,37 @@ func (in *NemoGuardrailStatus) DeepCopy() *NemoGuardrailStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OTelSpec) DeepCopyInto(out *OTelSpec) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } + if in.DisableLogging != nil { + in, out := &in.DisableLogging, &out.DisableLogging + *out = new(bool) + **out = **in + } + out.ExporterConfig = in.ExporterConfig + if in.ExcludedUrls != nil { + in, out := &in.ExcludedUrls, &out.ExcludedUrls + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OTelSpec. +func (in *OTelSpec) DeepCopy() *OTelSpec { + if in == nil { + return nil + } + out := new(OTelSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ObjectStoreConfig) DeepCopyInto(out *ObjectStoreConfig) { *out = *in @@ -1782,6 +2000,21 @@ func (in *Resources) DeepCopy() *Resources { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Scheduler) DeepCopyInto(out *Scheduler) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Scheduler. +func (in *Scheduler) DeepCopy() *Scheduler { + if in == nil { + return nil + } + out := new(Scheduler) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Secrets) DeepCopyInto(out *Secrets) { *out = *in @@ -1800,6 +2033,13 @@ func (in *Secrets) DeepCopy() *Secrets { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Service) DeepCopyInto(out *Service) { *out = *in + if in.Ports != nil { + in, out := &in.Ports, &out.Ports + *out = make([]v1.ServicePort, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.Annotations != nil { in, out := &in.Annotations, &out.Annotations *out = make(map[string]string, len(*in)) @@ -1862,3 +2102,18 @@ func (in *ServiceMonitor) DeepCopy() *ServiceMonitor { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *WandBSecret) DeepCopyInto(out *WandBSecret) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WandBSecret. +func (in *WandBSecret) DeepCopy() *WandBSecret { + if in == nil { + return nil + } + out := new(WandBSecret) + in.DeepCopyInto(out) + return out +} diff --git a/api/versioned/typed/apps/v1alpha1/apps_client.go b/api/versioned/typed/apps/v1alpha1/apps_client.go index 045f4265..10db1e04 100644 --- a/api/versioned/typed/apps/v1alpha1/apps_client.go +++ b/api/versioned/typed/apps/v1alpha1/apps_client.go @@ -30,6 +30,7 @@ type AppsV1alpha1Interface interface { NIMCachesGetter NIMPipelinesGetter NIMServicesGetter + NemoCustomizersGetter NemoDatastoresGetter NemoEntitystoresGetter NemoEvaluatorsGetter @@ -53,6 +54,10 @@ func (c *AppsV1alpha1Client) NIMServices(namespace string) NIMServiceInterface { return newNIMServices(c, namespace) } +func (c *AppsV1alpha1Client) NemoCustomizers(namespace string) NemoCustomizerInterface { + return newNemoCustomizers(c, namespace) +} + func (c *AppsV1alpha1Client) NemoDatastores(namespace string) NemoDatastoreInterface { return newNemoDatastores(c, namespace) } diff --git a/api/versioned/typed/apps/v1alpha1/fake/fake_apps_client.go b/api/versioned/typed/apps/v1alpha1/fake/fake_apps_client.go index 1896f204..5246cc31 100644 --- a/api/versioned/typed/apps/v1alpha1/fake/fake_apps_client.go +++ b/api/versioned/typed/apps/v1alpha1/fake/fake_apps_client.go @@ -39,6 +39,10 @@ func (c *FakeAppsV1alpha1) NIMServices(namespace string) v1alpha1.NIMServiceInte return &FakeNIMServices{c, namespace} } +func (c *FakeAppsV1alpha1) NemoCustomizers(namespace string) v1alpha1.NemoCustomizerInterface { + return &FakeNemoCustomizers{c, namespace} +} + func (c *FakeAppsV1alpha1) NemoDatastores(namespace string) v1alpha1.NemoDatastoreInterface { return &FakeNemoDatastores{c, namespace} } diff --git a/api/versioned/typed/apps/v1alpha1/fake/fake_nemocustomizer.go b/api/versioned/typed/apps/v1alpha1/fake/fake_nemocustomizer.go new file mode 100644 index 00000000..cb73ebde --- /dev/null +++ b/api/versioned/typed/apps/v1alpha1/fake/fake_nemocustomizer.go @@ -0,0 +1,146 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + "context" + + v1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + labels "k8s.io/apimachinery/pkg/labels" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + testing "k8s.io/client-go/testing" +) + +// FakeNemoCustomizers implements NemoCustomizerInterface +type FakeNemoCustomizers struct { + Fake *FakeAppsV1alpha1 + ns string +} + +var nemocustomizersResource = v1alpha1.SchemeGroupVersion.WithResource("nemocustomizers") + +var nemocustomizersKind = v1alpha1.SchemeGroupVersion.WithKind("NemoCustomizer") + +// Get takes name of the nemoCustomizer, and returns the corresponding nemoCustomizer object, and an error if there is any. +func (c *FakeNemoCustomizers) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.NemoCustomizer, err error) { + emptyResult := &v1alpha1.NemoCustomizer{} + obj, err := c.Fake. + Invokes(testing.NewGetActionWithOptions(nemocustomizersResource, c.ns, name, options), emptyResult) + + if obj == nil { + return emptyResult, err + } + return obj.(*v1alpha1.NemoCustomizer), err +} + +// List takes label and field selectors, and returns the list of NemoCustomizers that match those selectors. +func (c *FakeNemoCustomizers) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.NemoCustomizerList, err error) { + emptyResult := &v1alpha1.NemoCustomizerList{} + obj, err := c.Fake. + Invokes(testing.NewListActionWithOptions(nemocustomizersResource, nemocustomizersKind, c.ns, opts), emptyResult) + + if obj == nil { + return emptyResult, err + } + + label, _, _ := testing.ExtractFromListOptions(opts) + if label == nil { + label = labels.Everything() + } + list := &v1alpha1.NemoCustomizerList{ListMeta: obj.(*v1alpha1.NemoCustomizerList).ListMeta} + for _, item := range obj.(*v1alpha1.NemoCustomizerList).Items { + if label.Matches(labels.Set(item.Labels)) { + list.Items = append(list.Items, item) + } + } + return list, err +} + +// Watch returns a watch.Interface that watches the requested nemoCustomizers. +func (c *FakeNemoCustomizers) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) { + return c.Fake. + InvokesWatch(testing.NewWatchActionWithOptions(nemocustomizersResource, c.ns, opts)) + +} + +// Create takes the representation of a nemoCustomizer and creates it. Returns the server's representation of the nemoCustomizer, and an error, if there is any. +func (c *FakeNemoCustomizers) Create(ctx context.Context, nemoCustomizer *v1alpha1.NemoCustomizer, opts v1.CreateOptions) (result *v1alpha1.NemoCustomizer, err error) { + emptyResult := &v1alpha1.NemoCustomizer{} + obj, err := c.Fake. + Invokes(testing.NewCreateActionWithOptions(nemocustomizersResource, c.ns, nemoCustomizer, opts), emptyResult) + + if obj == nil { + return emptyResult, err + } + return obj.(*v1alpha1.NemoCustomizer), err +} + +// Update takes the representation of a nemoCustomizer and updates it. Returns the server's representation of the nemoCustomizer, and an error, if there is any. +func (c *FakeNemoCustomizers) Update(ctx context.Context, nemoCustomizer *v1alpha1.NemoCustomizer, opts v1.UpdateOptions) (result *v1alpha1.NemoCustomizer, err error) { + emptyResult := &v1alpha1.NemoCustomizer{} + obj, err := c.Fake. + Invokes(testing.NewUpdateActionWithOptions(nemocustomizersResource, c.ns, nemoCustomizer, opts), emptyResult) + + if obj == nil { + return emptyResult, err + } + return obj.(*v1alpha1.NemoCustomizer), err +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *FakeNemoCustomizers) UpdateStatus(ctx context.Context, nemoCustomizer *v1alpha1.NemoCustomizer, opts v1.UpdateOptions) (result *v1alpha1.NemoCustomizer, err error) { + emptyResult := &v1alpha1.NemoCustomizer{} + obj, err := c.Fake. + Invokes(testing.NewUpdateSubresourceActionWithOptions(nemocustomizersResource, "status", c.ns, nemoCustomizer, opts), emptyResult) + + if obj == nil { + return emptyResult, err + } + return obj.(*v1alpha1.NemoCustomizer), err +} + +// Delete takes name of the nemoCustomizer and deletes it. Returns an error if one occurs. +func (c *FakeNemoCustomizers) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error { + _, err := c.Fake. + Invokes(testing.NewDeleteActionWithOptions(nemocustomizersResource, c.ns, name, opts), &v1alpha1.NemoCustomizer{}) + + return err +} + +// DeleteCollection deletes a collection of objects. +func (c *FakeNemoCustomizers) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error { + action := testing.NewDeleteCollectionActionWithOptions(nemocustomizersResource, c.ns, opts, listOpts) + + _, err := c.Fake.Invokes(action, &v1alpha1.NemoCustomizerList{}) + return err +} + +// Patch applies the patch and returns the patched nemoCustomizer. +func (c *FakeNemoCustomizers) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.NemoCustomizer, err error) { + emptyResult := &v1alpha1.NemoCustomizer{} + obj, err := c.Fake. + Invokes(testing.NewPatchSubresourceActionWithOptions(nemocustomizersResource, c.ns, name, pt, data, opts, subresources...), emptyResult) + + if obj == nil { + return emptyResult, err + } + return obj.(*v1alpha1.NemoCustomizer), err +} diff --git a/api/versioned/typed/apps/v1alpha1/generated_expansion.go b/api/versioned/typed/apps/v1alpha1/generated_expansion.go index 78164a8c..848a9efb 100644 --- a/api/versioned/typed/apps/v1alpha1/generated_expansion.go +++ b/api/versioned/typed/apps/v1alpha1/generated_expansion.go @@ -23,6 +23,8 @@ type NIMPipelineExpansion interface{} type NIMServiceExpansion interface{} +type NemoCustomizerExpansion interface{} + type NemoDatastoreExpansion interface{} type NemoEntitystoreExpansion interface{} diff --git a/api/versioned/typed/apps/v1alpha1/nemocustomizer.go b/api/versioned/typed/apps/v1alpha1/nemocustomizer.go new file mode 100644 index 00000000..6a2de2ea --- /dev/null +++ b/api/versioned/typed/apps/v1alpha1/nemocustomizer.go @@ -0,0 +1,68 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "context" + + v1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1" + scheme "github.com/NVIDIA/k8s-nim-operator/api/versioned/scheme" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + gentype "k8s.io/client-go/gentype" +) + +// NemoCustomizersGetter has a method to return a NemoCustomizerInterface. +// A group's client should implement this interface. +type NemoCustomizersGetter interface { + NemoCustomizers(namespace string) NemoCustomizerInterface +} + +// NemoCustomizerInterface has methods to work with NemoCustomizer resources. +type NemoCustomizerInterface interface { + Create(ctx context.Context, nemoCustomizer *v1alpha1.NemoCustomizer, opts v1.CreateOptions) (*v1alpha1.NemoCustomizer, error) + Update(ctx context.Context, nemoCustomizer *v1alpha1.NemoCustomizer, opts v1.UpdateOptions) (*v1alpha1.NemoCustomizer, error) + // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). + UpdateStatus(ctx context.Context, nemoCustomizer *v1alpha1.NemoCustomizer, opts v1.UpdateOptions) (*v1alpha1.NemoCustomizer, error) + Delete(ctx context.Context, name string, opts v1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error + Get(ctx context.Context, name string, opts v1.GetOptions) (*v1alpha1.NemoCustomizer, error) + List(ctx context.Context, opts v1.ListOptions) (*v1alpha1.NemoCustomizerList, error) + Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.NemoCustomizer, err error) + NemoCustomizerExpansion +} + +// nemoCustomizers implements NemoCustomizerInterface +type nemoCustomizers struct { + *gentype.ClientWithList[*v1alpha1.NemoCustomizer, *v1alpha1.NemoCustomizerList] +} + +// newNemoCustomizers returns a NemoCustomizers +func newNemoCustomizers(c *AppsV1alpha1Client, namespace string) *nemoCustomizers { + return &nemoCustomizers{ + gentype.NewClientWithList[*v1alpha1.NemoCustomizer, *v1alpha1.NemoCustomizerList]( + "nemocustomizers", + c.RESTClient(), + scheme.ParameterCodec, + namespace, + func() *v1alpha1.NemoCustomizer { return &v1alpha1.NemoCustomizer{} }, + func() *v1alpha1.NemoCustomizerList { return &v1alpha1.NemoCustomizerList{} }), + } +} diff --git a/bundle/manifests/apps.nvidia.com_nemocustomizers.yaml b/bundle/manifests/apps.nvidia.com_nemocustomizers.yaml new file mode 100644 index 00000000..c67c7cfa --- /dev/null +++ b/bundle/manifests/apps.nvidia.com_nemocustomizers.yaml @@ -0,0 +1,2401 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.2 + name: nemocustomizers.apps.nvidia.com +spec: + group: apps.nvidia.com + names: + kind: NemoCustomizer + listKind: NemoCustomizerList + plural: nemocustomizers + singular: nemocustomizer + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.state + name: Status + type: string + - format: date-time + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: NemoCustomizer is the Schema for the NemoCustomizer API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: NemoCustomizerSpec defines the desired state of NemoCustomizer + properties: + annotations: + additionalProperties: + type: string + type: object + args: + items: + type: string + type: array + command: + items: + type: string + type: array + customizerConfig: + description: CustomizerConfig stores the customizer configuration + for training and models + minLength: 1 + type: string + databaseConfig: + description: DatabaseConfig stores the database configuration + properties: + credentials: + description: |- + DatabaseCredentials stores the configuration to retrieve the database credentials. + Required, must not be nil. + properties: + passwordKey: + default: password + description: |- + PasswordKey is the name of the key in the `CredentialsSecret` secret for the database credentials. + Defaults to "password". + type: string + secretName: + description: |- + SecretName is the name of the secret which has the database credentials for a NEMO service user. + Required, must not be empty. + minLength: 1 + type: string + user: + description: |- + User is the non-root username for a NEMO Service in the database. + Required, must not be empty. + minLength: 1 + type: string + required: + - secretName + - user + type: object + databaseName: + description: |- + DatabaseName is the database name for a NEMO Service. + Required, must not be empty. + minLength: 1 + type: string + host: + description: |- + Host is the hostname of the database. + Required, must not be empty. + minLength: 1 + type: string + port: + default: 5432 + description: |- + Port is the port where the database is reachable at. + If specified, this must be a valid port number, 0 < databasePort < 65536. + Defaults to 5432. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - credentials + - databaseName + - host + type: object + env: + items: + description: EnvVar represents an environment variable present in + a Container. + properties: + name: + description: Name of the environment variable. Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. Cannot + be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap or its key + must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath is + written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the specified + API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of the exposed + resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's namespace + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + expose: + description: Expose defines attributes to expose the service + properties: + ingress: + description: Ingress defines attributes to enable ingress for + the service + properties: + annotations: + additionalProperties: + type: string + type: object + enabled: + description: ingress, or virtualService - not both + type: boolean + spec: + description: IngressSpec describes the Ingress the user wishes + to exist. + properties: + defaultBackend: + description: |- + defaultBackend is the backend that should handle requests that don't + match any rule. If Rules are not specified, DefaultBackend must be specified. + If DefaultBackend is not set, the handling of requests that do not match any + of the rules will be up to the Ingress controller. + properties: + resource: + description: |- + resource is an ObjectRef to another Kubernetes resource in the namespace + of the Ingress object. If resource is specified, a service.Name and + service.Port must not be specified. + This is a mutually exclusive setting with "Service". + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being + referenced + type: string + name: + description: Name is the name of resource being + referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + service: + description: |- + service references a service as a backend. + This is a mutually exclusive setting with "Resource". + properties: + name: + description: |- + name is the referenced service. The service must exist in + the same namespace as the Ingress object. + type: string + port: + description: |- + port of the referenced service. A port name or port number + is required for a IngressServiceBackend. + properties: + name: + description: |- + name is the name of the port on the Service. + This is a mutually exclusive setting with "Number". + type: string + number: + description: |- + number is the numerical port number (e.g. 80) on the Service. + This is a mutually exclusive setting with "Name". + format: int32 + type: integer + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + type: object + ingressClassName: + description: |- + ingressClassName is the name of an IngressClass cluster resource. Ingress + controller implementations use this field to know whether they should be + serving this Ingress resource, by a transitive connection + (controller -> IngressClass -> Ingress resource). Although the + `kubernetes.io/ingress.class` annotation (simple constant name) was never + formally defined, it was widely supported by Ingress controllers to create + a direct binding between Ingress controller and Ingress resources. Newly + created Ingress resources should prefer using the field. However, even + though the annotation is officially deprecated, for backwards compatibility + reasons, ingress controllers should still honor that annotation if present. + type: string + rules: + description: |- + rules is a list of host rules used to configure the Ingress. If unspecified, + or no rule matches, all traffic is sent to the default backend. + items: + description: |- + IngressRule represents the rules mapping the paths under a specified host to + the related backend services. Incoming requests are first evaluated for a host + match, then routed to the backend associated with the matching IngressRuleValue. + properties: + host: + description: "host is the fully qualified domain + name of a network host, as defined by RFC 3986.\nNote + the following deviations from the \"host\" part + of the\nURI as defined in RFC 3986:\n1. IPs are + not allowed. Currently an IngressRuleValue can + only apply to\n the IP in the Spec of the parent + Ingress.\n2. The `:` delimiter is not respected + because ports are not allowed.\n\t Currently + the port of an Ingress is implicitly :80 for http + and\n\t :443 for https.\nBoth these may change + in the future.\nIncoming requests are matched + against the host before the\nIngressRuleValue. + If the host is unspecified, the Ingress routes + all\ntraffic based on the specified IngressRuleValue.\n\nhost + can be \"precise\" which is a domain name without + the terminating dot of\na network host (e.g. \"foo.bar.com\") + or \"wildcard\", which is a domain name\nprefixed + with a single wildcard label (e.g. \"*.foo.com\").\nThe + wildcard character '*' must appear by itself as + the first DNS label and\nmatches only a single + label. You cannot have a wildcard label by itself + (e.g. Host == \"*\").\nRequests will be matched + against the Host field in the following way:\n1. + If host is precise, the request matches this rule + if the http host header is equal to Host.\n2. + If host is a wildcard, then the request matches + this rule if the http host header\nis to equal + to the suffix (removing the first label) of the + wildcard rule." + type: string + http: + description: |- + HTTPIngressRuleValue is a list of http selectors pointing to backends. + In the example: http:///? -> backend where + where parts of the url correspond to RFC 3986, this resource will be used + to match against everything after the last '/' and before the first '?' + or '#'. + properties: + paths: + description: paths is a collection of paths + that map requests to backends. + items: + description: |- + HTTPIngressPath associates a path with a backend. Incoming urls matching the + path are forwarded to the backend. + properties: + backend: + description: |- + backend defines the referenced service endpoint to which the traffic + will be forwarded to. + properties: + resource: + description: |- + resource is an ObjectRef to another Kubernetes resource in the namespace + of the Ingress object. If resource is specified, a service.Name and + service.Port must not be specified. + This is a mutually exclusive setting with "Service". + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type + of resource being referenced + type: string + name: + description: Name is the name + of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + service: + description: |- + service references a service as a backend. + This is a mutually exclusive setting with "Resource". + properties: + name: + description: |- + name is the referenced service. The service must exist in + the same namespace as the Ingress object. + type: string + port: + description: |- + port of the referenced service. A port name or port number + is required for a IngressServiceBackend. + properties: + name: + description: |- + name is the name of the port on the Service. + This is a mutually exclusive setting with "Number". + type: string + number: + description: |- + number is the numerical port number (e.g. 80) on the Service. + This is a mutually exclusive setting with "Name". + format: int32 + type: integer + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + type: object + path: + description: |- + path is matched against the path of an incoming request. Currently it can + contain characters disallowed from the conventional "path" part of a URL + as defined by RFC 3986. Paths must begin with a '/' and must be present + when using PathType with value "Exact" or "Prefix". + type: string + pathType: + description: |- + pathType determines the interpretation of the path matching. PathType can + be one of the following values: + * Exact: Matches the URL path exactly. + * Prefix: Matches based on a URL path prefix split by '/'. Matching is + done on a path element by element basis. A path element refers is the + list of labels in the path split by the '/' separator. A request is a + match for path p if every p is an element-wise prefix of p of the + request path. Note that if the last element of the path is a substring + of the last element in request path, it is not a match (e.g. /foo/bar + matches /foo/bar/baz, but does not match /foo/barbaz). + * ImplementationSpecific: Interpretation of the Path matching is up to + the IngressClass. Implementations can treat this as a separate PathType + or treat it identically to Prefix or Exact path types. + Implementations are required to support all path types. + type: string + required: + - backend + - pathType + type: object + type: array + x-kubernetes-list-type: atomic + required: + - paths + type: object + type: object + type: array + x-kubernetes-list-type: atomic + tls: + description: |- + tls represents the TLS configuration. Currently the Ingress only supports a + single TLS port, 443. If multiple members of this list specify different hosts, + they will be multiplexed on the same port according to the hostname specified + through the SNI TLS extension, if the ingress controller fulfilling the + ingress supports SNI. + items: + description: IngressTLS describes the transport layer + security associated with an ingress. + properties: + hosts: + description: |- + hosts is a list of hosts included in the TLS certificate. The values in + this list must match the name/s used in the tlsSecret. Defaults to the + wildcard host setting for the loadbalancer controller fulfilling this + Ingress, if left unspecified. + items: + type: string + type: array + x-kubernetes-list-type: atomic + secretName: + description: |- + secretName is the name of the secret used to terminate TLS traffic on + port 443. Field is left optional to allow TLS routing based on SNI + hostname alone. If the SNI host in a listener conflicts with the "Host" + header field used by an IngressRule, the SNI host is used for termination + and value of the "Host" header is used for routing. + type: string + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + service: + description: Service defines attributes to create a service + properties: + annotations: + additionalProperties: + type: string + type: object + name: + description: override the default service name + type: string + port: + default: 8000 + description: 'Deprecated: Use Ports instead.' + format: int32 + type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array + type: + description: Service Type string describes ingress methods + for a service + type: string + type: object + type: object + groupID: + format: int64 + type: integer + image: + description: Image defines image attributes + properties: + pullPolicy: + type: string + pullSecrets: + items: + type: string + type: array + repository: + type: string + tag: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + livenessProbe: + description: Probe defines attributes for startup/liveness/readiness + probes + properties: + enabled: + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + properties: + port: + description: Port number of the gRPC service. Number must + be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + metrics: + description: Metrics defines attributes to setup metrics collection + properties: + enabled: + type: boolean + serviceMonitor: + description: for use with the Prometheus Operator and the primary + service object + properties: + additionalLabels: + additionalProperties: + type: string + type: object + annotations: + additionalProperties: + type: string + type: object + interval: + description: |- + Duration is a valid time duration that can be parsed by Prometheus model.ParseDuration() function. + Supported units: y, w, d, h, m, s, ms + Examples: `30s`, `1m`, `1h20m15s`, `15d` + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + scrapeTimeout: + description: |- + Duration is a valid time duration that can be parsed by Prometheus model.ParseDuration() function. + Supported units: y, w, d, h, m, s, ms + Examples: `30s`, `1m`, `1h20m15s`, `15d` + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + type: object + type: object + nodeSelector: + additionalProperties: + type: string + type: object + otel: + description: OpenTelemetry Settings + properties: + disableLogging: + description: DisableLogging indicates whether Python logging auto-instrumentation + should be disabled. + type: boolean + enabled: + description: Enabled indicates if opentelemetry collector and + tracing are enabled + type: boolean + excludedUrls: + default: + - health + description: ExcludedUrls defines URLs to be excluded from tracing. + items: + type: string + type: array + exporterConfig: + description: ExporterConfig defines configuration for different + OTel exporters + properties: + logsExporter: + default: otlp + description: 'LogsExporter sets the logs exporter: (otlp, + console, none).' + enum: + - otlp + - console + - none + type: string + metricsExporter: + default: otlp + description: 'MetricsExporter sets the metrics exporter: (otlp, + console, none).' + enum: + - otlp + - console + - none + type: string + tracesExporter: + default: otlp + description: 'TracesExporter sets the traces exporter: (otlp, + console, none).' + type: string + type: object + exporterOtlpEndpoint: + description: ExporterOtlpEndpoint is the OTLP collector endpoint. + minLength: 1 + type: string + logLevel: + default: INFO + description: LogLevel defines the log level (e.g., INFO, DEBUG). + type: string + required: + - exporterOtlpEndpoint + type: object + podAffinity: + description: Pod affinity is a group of inter pod affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, associated with + the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + readinessProbe: + description: Probe defines attributes for startup/liveness/readiness + probes + properties: + enabled: + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + properties: + port: + description: Port number of the gRPC service. Number must + be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + replicas: + default: 1 + minimum: 1 + type: integer + resources: + description: ResourceRequirements describes the compute resource requirements. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + runtimeClass: + type: string + scale: + description: Autoscaling defines attributes to automatically scale + the service based on metrics + properties: + annotations: + additionalProperties: + type: string + type: object + enabled: + type: boolean + hpa: + description: HorizontalPodAutoscalerSpec defines the parameters + required to setup HPA + properties: + behavior: + description: |- + HorizontalPodAutoscalerBehavior configures the scaling behavior of the target + in both Up and Down directions (scaleUp and scaleDown fields respectively). + properties: + scaleDown: + description: |- + scaleDown is scaling policy for scaling Down. + If not set, the default value is to allow to scale down to minReplicas pods, with a + 300 second stabilization window (i.e., the highest recommendation for + the last 300sec is used). + properties: + policies: + description: |- + policies is a list of potential scaling polices which can be used during scaling. + At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid + items: + description: HPAScalingPolicy is a single policy + which must hold true for a specified past interval. + properties: + periodSeconds: + description: |- + periodSeconds specifies the window of time for which the policy should hold true. + PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min). + format: int32 + type: integer + type: + description: type is used to specify the scaling + policy. + type: string + value: + description: |- + value contains the amount of change which is permitted by the policy. + It must be greater than zero + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + description: |- + selectPolicy is used to specify which policy should be used. + If not set, the default value Max is used. + type: string + stabilizationWindowSeconds: + description: |- + stabilizationWindowSeconds is the number of seconds for which past recommendations should be + considered while scaling up or scaling down. + StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour). + If not set, use the default values: + - For scale up: 0 (i.e. no stabilization is done). + - For scale down: 300 (i.e. the stabilization window is 300 seconds long). + format: int32 + type: integer + type: object + scaleUp: + description: |- + scaleUp is scaling policy for scaling Up. + If not set, the default value is the higher of: + * increase no more than 4 pods per 60 seconds + * double the number of pods per 60 seconds + No stabilization is used. + properties: + policies: + description: |- + policies is a list of potential scaling polices which can be used during scaling. + At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid + items: + description: HPAScalingPolicy is a single policy + which must hold true for a specified past interval. + properties: + periodSeconds: + description: |- + periodSeconds specifies the window of time for which the policy should hold true. + PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min). + format: int32 + type: integer + type: + description: type is used to specify the scaling + policy. + type: string + value: + description: |- + value contains the amount of change which is permitted by the policy. + It must be greater than zero + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + description: |- + selectPolicy is used to specify which policy should be used. + If not set, the default value Max is used. + type: string + stabilizationWindowSeconds: + description: |- + stabilizationWindowSeconds is the number of seconds for which past recommendations should be + considered while scaling up or scaling down. + StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour). + If not set, use the default values: + - For scale up: 0 (i.e. no stabilization is done). + - For scale down: 300 (i.e. the stabilization window is 300 seconds long). + format: int32 + type: integer + type: object + type: object + maxReplicas: + format: int32 + type: integer + metrics: + items: + description: |- + MetricSpec specifies how to scale based on a single metric + (only `type` and one other matching field should be set at once). + properties: + containerResource: + description: |- + containerResource refers to a resource metric (such as those specified in + requests and limits) known to Kubernetes describing a single container in + each pod of the current scale target (e.g. CPU or memory). Such metrics are + built in to Kubernetes, and have special scaling options on top of those + available to normal per-pod metrics using the "pods" source. + This is an alpha feature and can be enabled by the HPAContainerMetrics feature flag. + properties: + container: + description: container is the name of the container + in the pods of the scaling target + type: string + name: + description: name is the name of the resource in + question. + type: string + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - container + - name + - target + type: object + external: + description: |- + external refers to a global metric that is not associated + with any Kubernetes object. It allows autoscaling based on information + coming from components running outside of cluster + (for example length of queue in cloud messaging service, or + QPS from loadbalancer running outside of cluster). + properties: + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: |- + selector is the string-encoded form of a standard kubernetes label selector for the given metric + When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. + When unset, just the metricName will be used to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - metric + - target + type: object + object: + description: |- + object refers to a metric describing a single kubernetes object + (for example, hits-per-second on an Ingress object). + properties: + describedObject: + description: describedObject specifies the descriptions + of a object,such as kind,name apiVersion + properties: + apiVersion: + description: apiVersion is the API version of + the referent + type: string + kind: + description: 'kind is the kind of the referent; + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + name: + description: 'name is the name of the referent; + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + required: + - kind + - name + type: object + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: |- + selector is the string-encoded form of a standard kubernetes label selector for the given metric + When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. + When unset, just the metricName will be used to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - describedObject + - metric + - target + type: object + pods: + description: |- + pods refers to a metric describing each pod in the current scale target + (for example, transactions-processed-per-second). The values will be + averaged together before being compared to the target value. + properties: + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: |- + selector is the string-encoded form of a standard kubernetes label selector for the given metric + When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. + When unset, just the metricName will be used to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - metric + - target + type: object + resource: + description: |- + resource refers to a resource metric (such as those specified in + requests and limits) known to Kubernetes describing each pod in the + current scale target (e.g. CPU or memory). Such metrics are built in to + Kubernetes, and have special scaling options on top of those available + to normal per-pod metrics using the "pods" source. + properties: + name: + description: name is the name of the resource in + question. + type: string + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - name + - target + type: object + type: + description: |- + type is the type of metric source. It should be one of "ContainerResource", "External", + "Object", "Pods" or "Resource", each mapping to a matching field in the object. + Note: "ContainerResource" type is available on when the feature-gate + HPAContainerMetrics is enabled + type: string + required: + - type + type: object + type: array + minReplicas: + format: int32 + type: integer + required: + - maxReplicas + type: object + type: object + scheduler: + description: Scheduler Configuration + properties: + type: + default: volcano + description: Type is the scheduler type (volcano, runai) + enum: + - volcano + - runai + type: string + type: object + startupProbe: + description: Probe defines attributes for startup/liveness/readiness + probes + properties: + enabled: + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + properties: + port: + description: Port number of the gRPC service. Number must + be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + tolerations: + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + userID: + format: int64 + type: integer + wandbSecret: + description: WandBSecret stores the secret and encryption key for + the Weights and Biases service. + properties: + apiKeyKey: + default: apiKey + description: |- + APIKeyKey is the key in the Secret that holds the WandB API key. + Defaults to "apiKey". + minLength: 1 + type: string + encryptionKey: + default: encryptionKey + description: |- + EncryptionKey is an optional key in the secret used for encrypting WandB credentials. + This can be used for additional security layers if required. + Defaults to "encryptionKey". + type: string + name: + description: |- + Name is the name of the Kubernetes Secret containing the WandB API key. + Required, must not be empty. + minLength: 1 + type: string + required: + - apiKeyKey + - name + type: object + required: + - customizerConfig + - databaseConfig + - otel + - wandbSecret + type: object + status: + description: NemoCustomizerStatus defines the observed state of NemoCustomizer + properties: + availableReplicas: + format: int32 + type: integer + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + state: + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/bundle/manifests/apps.nvidia.com_nemodatastores.yaml b/bundle/manifests/apps.nvidia.com_nemodatastores.yaml index 6f101cf0..c1ba5905 100644 --- a/bundle/manifests/apps.nvidia.com_nemodatastores.yaml +++ b/bundle/manifests/apps.nvidia.com_nemodatastores.yaml @@ -535,14 +535,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/bundle/manifests/apps.nvidia.com_nemoentitystores.yaml b/bundle/manifests/apps.nvidia.com_nemoentitystores.yaml index 5d277f04..3d4a468d 100644 --- a/bundle/manifests/apps.nvidia.com_nemoentitystores.yaml +++ b/bundle/manifests/apps.nvidia.com_nemoentitystores.yaml @@ -533,14 +533,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/bundle/manifests/apps.nvidia.com_nemoevaluators.yaml b/bundle/manifests/apps.nvidia.com_nemoevaluators.yaml index 80fa85bd..46c1dd80 100644 --- a/bundle/manifests/apps.nvidia.com_nemoevaluators.yaml +++ b/bundle/manifests/apps.nvidia.com_nemoevaluators.yaml @@ -550,14 +550,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/bundle/manifests/apps.nvidia.com_nemoguardrails.yaml b/bundle/manifests/apps.nvidia.com_nemoguardrails.yaml index 085924cd..f85ced37 100644 --- a/bundle/manifests/apps.nvidia.com_nemoguardrails.yaml +++ b/bundle/manifests/apps.nvidia.com_nemoguardrails.yaml @@ -509,14 +509,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/bundle/manifests/apps.nvidia.com_nimpipelines.yaml b/bundle/manifests/apps.nvidia.com_nimpipelines.yaml index 04224974..18478e01 100644 --- a/bundle/manifests/apps.nvidia.com_nimpipelines.yaml +++ b/bundle/manifests/apps.nvidia.com_nimpipelines.yaml @@ -527,14 +527,87 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information + on service's port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed + by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/bundle/manifests/apps.nvidia.com_nimservices.yaml b/bundle/manifests/apps.nvidia.com_nimservices.yaml index 419ef0f1..6370c33c 100644 --- a/bundle/manifests/apps.nvidia.com_nimservices.yaml +++ b/bundle/manifests/apps.nvidia.com_nimservices.yaml @@ -477,14 +477,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/bundle/manifests/k8s-nim-operator.clusterserviceversion.yaml b/bundle/manifests/k8s-nim-operator.clusterserviceversion.yaml index ddf1e249..8f70aa3d 100644 --- a/bundle/manifests/k8s-nim-operator.clusterserviceversion.yaml +++ b/bundle/manifests/k8s-nim-operator.clusterserviceversion.yaml @@ -321,6 +321,26 @@ spec: kind: ConfigMap specDescriptors: [ ] statusDescriptors: [ ] + - name: nemocustomizers.apps.nvidia.com + displayName: NemoCustomizer + kind: NemoCustomizer + version: v1alpha1 + description: NEMO Customizer Service + resources: + - version: v1 + kind: Deployment + - version: v1 + kind: Service + - version: v1 + kind: ReplicaSet + - version: v1 + kind: Pod + - version: v1 + kind: Secret + - version: v1 + kind: ConfigMap + specDescriptors: [ ] + statusDescriptors: [ ] required: [] install: strategy: deployment @@ -590,6 +610,32 @@ spec: - get - patch - update + - apiGroups: + - apps.nvidia.com + resources: + - nemocustomizers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - apps.nvidia.com + resources: + - nemocustomizers/finalizers + verbs: + - update + - apiGroups: + - apps.nvidia.com + resources: + - nemocustomizers/status + verbs: + - get + - patch + - update - apiGroups: - batch resources: diff --git a/cmd/main.go b/cmd/main.go index 8b626558..205a87d3 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -221,6 +221,17 @@ func main() { os.Exit(1) } + if err = controller.NewNemoCustomizerReconciler( + mgr.GetClient(), + mgr.GetScheme(), + updater, + render.NewRenderer("/manifests"), + ctrl.Log.WithName("controllers").WithName("NemoCustomizer"), + ).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "NemoCustomizer") + os.Exit(1) + } + // +kubebuilder:scaffold:builder if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { diff --git a/config/crd/bases/apps.nvidia.com_nemocustomizers.yaml b/config/crd/bases/apps.nvidia.com_nemocustomizers.yaml new file mode 100644 index 00000000..c67c7cfa --- /dev/null +++ b/config/crd/bases/apps.nvidia.com_nemocustomizers.yaml @@ -0,0 +1,2401 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.2 + name: nemocustomizers.apps.nvidia.com +spec: + group: apps.nvidia.com + names: + kind: NemoCustomizer + listKind: NemoCustomizerList + plural: nemocustomizers + singular: nemocustomizer + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.state + name: Status + type: string + - format: date-time + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: NemoCustomizer is the Schema for the NemoCustomizer API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: NemoCustomizerSpec defines the desired state of NemoCustomizer + properties: + annotations: + additionalProperties: + type: string + type: object + args: + items: + type: string + type: array + command: + items: + type: string + type: array + customizerConfig: + description: CustomizerConfig stores the customizer configuration + for training and models + minLength: 1 + type: string + databaseConfig: + description: DatabaseConfig stores the database configuration + properties: + credentials: + description: |- + DatabaseCredentials stores the configuration to retrieve the database credentials. + Required, must not be nil. + properties: + passwordKey: + default: password + description: |- + PasswordKey is the name of the key in the `CredentialsSecret` secret for the database credentials. + Defaults to "password". + type: string + secretName: + description: |- + SecretName is the name of the secret which has the database credentials for a NEMO service user. + Required, must not be empty. + minLength: 1 + type: string + user: + description: |- + User is the non-root username for a NEMO Service in the database. + Required, must not be empty. + minLength: 1 + type: string + required: + - secretName + - user + type: object + databaseName: + description: |- + DatabaseName is the database name for a NEMO Service. + Required, must not be empty. + minLength: 1 + type: string + host: + description: |- + Host is the hostname of the database. + Required, must not be empty. + minLength: 1 + type: string + port: + default: 5432 + description: |- + Port is the port where the database is reachable at. + If specified, this must be a valid port number, 0 < databasePort < 65536. + Defaults to 5432. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - credentials + - databaseName + - host + type: object + env: + items: + description: EnvVar represents an environment variable present in + a Container. + properties: + name: + description: Name of the environment variable. Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. Cannot + be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap or its key + must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath is + written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the specified + API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of the exposed + resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's namespace + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + expose: + description: Expose defines attributes to expose the service + properties: + ingress: + description: Ingress defines attributes to enable ingress for + the service + properties: + annotations: + additionalProperties: + type: string + type: object + enabled: + description: ingress, or virtualService - not both + type: boolean + spec: + description: IngressSpec describes the Ingress the user wishes + to exist. + properties: + defaultBackend: + description: |- + defaultBackend is the backend that should handle requests that don't + match any rule. If Rules are not specified, DefaultBackend must be specified. + If DefaultBackend is not set, the handling of requests that do not match any + of the rules will be up to the Ingress controller. + properties: + resource: + description: |- + resource is an ObjectRef to another Kubernetes resource in the namespace + of the Ingress object. If resource is specified, a service.Name and + service.Port must not be specified. + This is a mutually exclusive setting with "Service". + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being + referenced + type: string + name: + description: Name is the name of resource being + referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + service: + description: |- + service references a service as a backend. + This is a mutually exclusive setting with "Resource". + properties: + name: + description: |- + name is the referenced service. The service must exist in + the same namespace as the Ingress object. + type: string + port: + description: |- + port of the referenced service. A port name or port number + is required for a IngressServiceBackend. + properties: + name: + description: |- + name is the name of the port on the Service. + This is a mutually exclusive setting with "Number". + type: string + number: + description: |- + number is the numerical port number (e.g. 80) on the Service. + This is a mutually exclusive setting with "Name". + format: int32 + type: integer + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + type: object + ingressClassName: + description: |- + ingressClassName is the name of an IngressClass cluster resource. Ingress + controller implementations use this field to know whether they should be + serving this Ingress resource, by a transitive connection + (controller -> IngressClass -> Ingress resource). Although the + `kubernetes.io/ingress.class` annotation (simple constant name) was never + formally defined, it was widely supported by Ingress controllers to create + a direct binding between Ingress controller and Ingress resources. Newly + created Ingress resources should prefer using the field. However, even + though the annotation is officially deprecated, for backwards compatibility + reasons, ingress controllers should still honor that annotation if present. + type: string + rules: + description: |- + rules is a list of host rules used to configure the Ingress. If unspecified, + or no rule matches, all traffic is sent to the default backend. + items: + description: |- + IngressRule represents the rules mapping the paths under a specified host to + the related backend services. Incoming requests are first evaluated for a host + match, then routed to the backend associated with the matching IngressRuleValue. + properties: + host: + description: "host is the fully qualified domain + name of a network host, as defined by RFC 3986.\nNote + the following deviations from the \"host\" part + of the\nURI as defined in RFC 3986:\n1. IPs are + not allowed. Currently an IngressRuleValue can + only apply to\n the IP in the Spec of the parent + Ingress.\n2. The `:` delimiter is not respected + because ports are not allowed.\n\t Currently + the port of an Ingress is implicitly :80 for http + and\n\t :443 for https.\nBoth these may change + in the future.\nIncoming requests are matched + against the host before the\nIngressRuleValue. + If the host is unspecified, the Ingress routes + all\ntraffic based on the specified IngressRuleValue.\n\nhost + can be \"precise\" which is a domain name without + the terminating dot of\na network host (e.g. \"foo.bar.com\") + or \"wildcard\", which is a domain name\nprefixed + with a single wildcard label (e.g. \"*.foo.com\").\nThe + wildcard character '*' must appear by itself as + the first DNS label and\nmatches only a single + label. You cannot have a wildcard label by itself + (e.g. Host == \"*\").\nRequests will be matched + against the Host field in the following way:\n1. + If host is precise, the request matches this rule + if the http host header is equal to Host.\n2. + If host is a wildcard, then the request matches + this rule if the http host header\nis to equal + to the suffix (removing the first label) of the + wildcard rule." + type: string + http: + description: |- + HTTPIngressRuleValue is a list of http selectors pointing to backends. + In the example: http:///? -> backend where + where parts of the url correspond to RFC 3986, this resource will be used + to match against everything after the last '/' and before the first '?' + or '#'. + properties: + paths: + description: paths is a collection of paths + that map requests to backends. + items: + description: |- + HTTPIngressPath associates a path with a backend. Incoming urls matching the + path are forwarded to the backend. + properties: + backend: + description: |- + backend defines the referenced service endpoint to which the traffic + will be forwarded to. + properties: + resource: + description: |- + resource is an ObjectRef to another Kubernetes resource in the namespace + of the Ingress object. If resource is specified, a service.Name and + service.Port must not be specified. + This is a mutually exclusive setting with "Service". + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type + of resource being referenced + type: string + name: + description: Name is the name + of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + service: + description: |- + service references a service as a backend. + This is a mutually exclusive setting with "Resource". + properties: + name: + description: |- + name is the referenced service. The service must exist in + the same namespace as the Ingress object. + type: string + port: + description: |- + port of the referenced service. A port name or port number + is required for a IngressServiceBackend. + properties: + name: + description: |- + name is the name of the port on the Service. + This is a mutually exclusive setting with "Number". + type: string + number: + description: |- + number is the numerical port number (e.g. 80) on the Service. + This is a mutually exclusive setting with "Name". + format: int32 + type: integer + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + type: object + path: + description: |- + path is matched against the path of an incoming request. Currently it can + contain characters disallowed from the conventional "path" part of a URL + as defined by RFC 3986. Paths must begin with a '/' and must be present + when using PathType with value "Exact" or "Prefix". + type: string + pathType: + description: |- + pathType determines the interpretation of the path matching. PathType can + be one of the following values: + * Exact: Matches the URL path exactly. + * Prefix: Matches based on a URL path prefix split by '/'. Matching is + done on a path element by element basis. A path element refers is the + list of labels in the path split by the '/' separator. A request is a + match for path p if every p is an element-wise prefix of p of the + request path. Note that if the last element of the path is a substring + of the last element in request path, it is not a match (e.g. /foo/bar + matches /foo/bar/baz, but does not match /foo/barbaz). + * ImplementationSpecific: Interpretation of the Path matching is up to + the IngressClass. Implementations can treat this as a separate PathType + or treat it identically to Prefix or Exact path types. + Implementations are required to support all path types. + type: string + required: + - backend + - pathType + type: object + type: array + x-kubernetes-list-type: atomic + required: + - paths + type: object + type: object + type: array + x-kubernetes-list-type: atomic + tls: + description: |- + tls represents the TLS configuration. Currently the Ingress only supports a + single TLS port, 443. If multiple members of this list specify different hosts, + they will be multiplexed on the same port according to the hostname specified + through the SNI TLS extension, if the ingress controller fulfilling the + ingress supports SNI. + items: + description: IngressTLS describes the transport layer + security associated with an ingress. + properties: + hosts: + description: |- + hosts is a list of hosts included in the TLS certificate. The values in + this list must match the name/s used in the tlsSecret. Defaults to the + wildcard host setting for the loadbalancer controller fulfilling this + Ingress, if left unspecified. + items: + type: string + type: array + x-kubernetes-list-type: atomic + secretName: + description: |- + secretName is the name of the secret used to terminate TLS traffic on + port 443. Field is left optional to allow TLS routing based on SNI + hostname alone. If the SNI host in a listener conflicts with the "Host" + header field used by an IngressRule, the SNI host is used for termination + and value of the "Host" header is used for routing. + type: string + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + service: + description: Service defines attributes to create a service + properties: + annotations: + additionalProperties: + type: string + type: object + name: + description: override the default service name + type: string + port: + default: 8000 + description: 'Deprecated: Use Ports instead.' + format: int32 + type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array + type: + description: Service Type string describes ingress methods + for a service + type: string + type: object + type: object + groupID: + format: int64 + type: integer + image: + description: Image defines image attributes + properties: + pullPolicy: + type: string + pullSecrets: + items: + type: string + type: array + repository: + type: string + tag: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + livenessProbe: + description: Probe defines attributes for startup/liveness/readiness + probes + properties: + enabled: + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + properties: + port: + description: Port number of the gRPC service. Number must + be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + metrics: + description: Metrics defines attributes to setup metrics collection + properties: + enabled: + type: boolean + serviceMonitor: + description: for use with the Prometheus Operator and the primary + service object + properties: + additionalLabels: + additionalProperties: + type: string + type: object + annotations: + additionalProperties: + type: string + type: object + interval: + description: |- + Duration is a valid time duration that can be parsed by Prometheus model.ParseDuration() function. + Supported units: y, w, d, h, m, s, ms + Examples: `30s`, `1m`, `1h20m15s`, `15d` + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + scrapeTimeout: + description: |- + Duration is a valid time duration that can be parsed by Prometheus model.ParseDuration() function. + Supported units: y, w, d, h, m, s, ms + Examples: `30s`, `1m`, `1h20m15s`, `15d` + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + type: object + type: object + nodeSelector: + additionalProperties: + type: string + type: object + otel: + description: OpenTelemetry Settings + properties: + disableLogging: + description: DisableLogging indicates whether Python logging auto-instrumentation + should be disabled. + type: boolean + enabled: + description: Enabled indicates if opentelemetry collector and + tracing are enabled + type: boolean + excludedUrls: + default: + - health + description: ExcludedUrls defines URLs to be excluded from tracing. + items: + type: string + type: array + exporterConfig: + description: ExporterConfig defines configuration for different + OTel exporters + properties: + logsExporter: + default: otlp + description: 'LogsExporter sets the logs exporter: (otlp, + console, none).' + enum: + - otlp + - console + - none + type: string + metricsExporter: + default: otlp + description: 'MetricsExporter sets the metrics exporter: (otlp, + console, none).' + enum: + - otlp + - console + - none + type: string + tracesExporter: + default: otlp + description: 'TracesExporter sets the traces exporter: (otlp, + console, none).' + type: string + type: object + exporterOtlpEndpoint: + description: ExporterOtlpEndpoint is the OTLP collector endpoint. + minLength: 1 + type: string + logLevel: + default: INFO + description: LogLevel defines the log level (e.g., INFO, DEBUG). + type: string + required: + - exporterOtlpEndpoint + type: object + podAffinity: + description: Pod affinity is a group of inter pod affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, associated with + the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + readinessProbe: + description: Probe defines attributes for startup/liveness/readiness + probes + properties: + enabled: + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + properties: + port: + description: Port number of the gRPC service. Number must + be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + replicas: + default: 1 + minimum: 1 + type: integer + resources: + description: ResourceRequirements describes the compute resource requirements. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + runtimeClass: + type: string + scale: + description: Autoscaling defines attributes to automatically scale + the service based on metrics + properties: + annotations: + additionalProperties: + type: string + type: object + enabled: + type: boolean + hpa: + description: HorizontalPodAutoscalerSpec defines the parameters + required to setup HPA + properties: + behavior: + description: |- + HorizontalPodAutoscalerBehavior configures the scaling behavior of the target + in both Up and Down directions (scaleUp and scaleDown fields respectively). + properties: + scaleDown: + description: |- + scaleDown is scaling policy for scaling Down. + If not set, the default value is to allow to scale down to minReplicas pods, with a + 300 second stabilization window (i.e., the highest recommendation for + the last 300sec is used). + properties: + policies: + description: |- + policies is a list of potential scaling polices which can be used during scaling. + At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid + items: + description: HPAScalingPolicy is a single policy + which must hold true for a specified past interval. + properties: + periodSeconds: + description: |- + periodSeconds specifies the window of time for which the policy should hold true. + PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min). + format: int32 + type: integer + type: + description: type is used to specify the scaling + policy. + type: string + value: + description: |- + value contains the amount of change which is permitted by the policy. + It must be greater than zero + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + description: |- + selectPolicy is used to specify which policy should be used. + If not set, the default value Max is used. + type: string + stabilizationWindowSeconds: + description: |- + stabilizationWindowSeconds is the number of seconds for which past recommendations should be + considered while scaling up or scaling down. + StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour). + If not set, use the default values: + - For scale up: 0 (i.e. no stabilization is done). + - For scale down: 300 (i.e. the stabilization window is 300 seconds long). + format: int32 + type: integer + type: object + scaleUp: + description: |- + scaleUp is scaling policy for scaling Up. + If not set, the default value is the higher of: + * increase no more than 4 pods per 60 seconds + * double the number of pods per 60 seconds + No stabilization is used. + properties: + policies: + description: |- + policies is a list of potential scaling polices which can be used during scaling. + At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid + items: + description: HPAScalingPolicy is a single policy + which must hold true for a specified past interval. + properties: + periodSeconds: + description: |- + periodSeconds specifies the window of time for which the policy should hold true. + PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min). + format: int32 + type: integer + type: + description: type is used to specify the scaling + policy. + type: string + value: + description: |- + value contains the amount of change which is permitted by the policy. + It must be greater than zero + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + description: |- + selectPolicy is used to specify which policy should be used. + If not set, the default value Max is used. + type: string + stabilizationWindowSeconds: + description: |- + stabilizationWindowSeconds is the number of seconds for which past recommendations should be + considered while scaling up or scaling down. + StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour). + If not set, use the default values: + - For scale up: 0 (i.e. no stabilization is done). + - For scale down: 300 (i.e. the stabilization window is 300 seconds long). + format: int32 + type: integer + type: object + type: object + maxReplicas: + format: int32 + type: integer + metrics: + items: + description: |- + MetricSpec specifies how to scale based on a single metric + (only `type` and one other matching field should be set at once). + properties: + containerResource: + description: |- + containerResource refers to a resource metric (such as those specified in + requests and limits) known to Kubernetes describing a single container in + each pod of the current scale target (e.g. CPU or memory). Such metrics are + built in to Kubernetes, and have special scaling options on top of those + available to normal per-pod metrics using the "pods" source. + This is an alpha feature and can be enabled by the HPAContainerMetrics feature flag. + properties: + container: + description: container is the name of the container + in the pods of the scaling target + type: string + name: + description: name is the name of the resource in + question. + type: string + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - container + - name + - target + type: object + external: + description: |- + external refers to a global metric that is not associated + with any Kubernetes object. It allows autoscaling based on information + coming from components running outside of cluster + (for example length of queue in cloud messaging service, or + QPS from loadbalancer running outside of cluster). + properties: + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: |- + selector is the string-encoded form of a standard kubernetes label selector for the given metric + When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. + When unset, just the metricName will be used to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - metric + - target + type: object + object: + description: |- + object refers to a metric describing a single kubernetes object + (for example, hits-per-second on an Ingress object). + properties: + describedObject: + description: describedObject specifies the descriptions + of a object,such as kind,name apiVersion + properties: + apiVersion: + description: apiVersion is the API version of + the referent + type: string + kind: + description: 'kind is the kind of the referent; + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + name: + description: 'name is the name of the referent; + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + required: + - kind + - name + type: object + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: |- + selector is the string-encoded form of a standard kubernetes label selector for the given metric + When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. + When unset, just the metricName will be used to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - describedObject + - metric + - target + type: object + pods: + description: |- + pods refers to a metric describing each pod in the current scale target + (for example, transactions-processed-per-second). The values will be + averaged together before being compared to the target value. + properties: + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: |- + selector is the string-encoded form of a standard kubernetes label selector for the given metric + When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. + When unset, just the metricName will be used to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - metric + - target + type: object + resource: + description: |- + resource refers to a resource metric (such as those specified in + requests and limits) known to Kubernetes describing each pod in the + current scale target (e.g. CPU or memory). Such metrics are built in to + Kubernetes, and have special scaling options on top of those available + to normal per-pod metrics using the "pods" source. + properties: + name: + description: name is the name of the resource in + question. + type: string + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - name + - target + type: object + type: + description: |- + type is the type of metric source. It should be one of "ContainerResource", "External", + "Object", "Pods" or "Resource", each mapping to a matching field in the object. + Note: "ContainerResource" type is available on when the feature-gate + HPAContainerMetrics is enabled + type: string + required: + - type + type: object + type: array + minReplicas: + format: int32 + type: integer + required: + - maxReplicas + type: object + type: object + scheduler: + description: Scheduler Configuration + properties: + type: + default: volcano + description: Type is the scheduler type (volcano, runai) + enum: + - volcano + - runai + type: string + type: object + startupProbe: + description: Probe defines attributes for startup/liveness/readiness + probes + properties: + enabled: + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + properties: + port: + description: Port number of the gRPC service. Number must + be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + tolerations: + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + userID: + format: int64 + type: integer + wandbSecret: + description: WandBSecret stores the secret and encryption key for + the Weights and Biases service. + properties: + apiKeyKey: + default: apiKey + description: |- + APIKeyKey is the key in the Secret that holds the WandB API key. + Defaults to "apiKey". + minLength: 1 + type: string + encryptionKey: + default: encryptionKey + description: |- + EncryptionKey is an optional key in the secret used for encrypting WandB credentials. + This can be used for additional security layers if required. + Defaults to "encryptionKey". + type: string + name: + description: |- + Name is the name of the Kubernetes Secret containing the WandB API key. + Required, must not be empty. + minLength: 1 + type: string + required: + - apiKeyKey + - name + type: object + required: + - customizerConfig + - databaseConfig + - otel + - wandbSecret + type: object + status: + description: NemoCustomizerStatus defines the observed state of NemoCustomizer + properties: + availableReplicas: + format: int32 + type: integer + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + state: + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/apps.nvidia.com_nemodatastores.yaml b/config/crd/bases/apps.nvidia.com_nemodatastores.yaml index 6f101cf0..c1ba5905 100644 --- a/config/crd/bases/apps.nvidia.com_nemodatastores.yaml +++ b/config/crd/bases/apps.nvidia.com_nemodatastores.yaml @@ -535,14 +535,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/config/crd/bases/apps.nvidia.com_nemoentitystores.yaml b/config/crd/bases/apps.nvidia.com_nemoentitystores.yaml index 5d277f04..3d4a468d 100644 --- a/config/crd/bases/apps.nvidia.com_nemoentitystores.yaml +++ b/config/crd/bases/apps.nvidia.com_nemoentitystores.yaml @@ -533,14 +533,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/config/crd/bases/apps.nvidia.com_nemoevaluators.yaml b/config/crd/bases/apps.nvidia.com_nemoevaluators.yaml index 80fa85bd..46c1dd80 100644 --- a/config/crd/bases/apps.nvidia.com_nemoevaluators.yaml +++ b/config/crd/bases/apps.nvidia.com_nemoevaluators.yaml @@ -550,14 +550,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/config/crd/bases/apps.nvidia.com_nemoguardrails.yaml b/config/crd/bases/apps.nvidia.com_nemoguardrails.yaml index 085924cd..f85ced37 100644 --- a/config/crd/bases/apps.nvidia.com_nemoguardrails.yaml +++ b/config/crd/bases/apps.nvidia.com_nemoguardrails.yaml @@ -509,14 +509,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/config/crd/bases/apps.nvidia.com_nimpipelines.yaml b/config/crd/bases/apps.nvidia.com_nimpipelines.yaml index 04224974..18478e01 100644 --- a/config/crd/bases/apps.nvidia.com_nimpipelines.yaml +++ b/config/crd/bases/apps.nvidia.com_nimpipelines.yaml @@ -527,14 +527,87 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information + on service's port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed + by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/config/crd/bases/apps.nvidia.com_nimservices.yaml b/config/crd/bases/apps.nvidia.com_nimservices.yaml index 419ef0f1..6370c33c 100644 --- a/config/crd/bases/apps.nvidia.com_nimservices.yaml +++ b/config/crd/bases/apps.nvidia.com_nimservices.yaml @@ -477,14 +477,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 966bf643..c37e9ccf 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -5,6 +5,7 @@ resources: - bases/apps.nvidia.com_nimservices.yaml - bases/apps.nvidia.com_nimcaches.yaml - bases/apps.nvidia.com_nimpipelines.yaml +- bases/apps.nvidia.com_nemocustomizers.yaml # +kubebuilder:scaffold:crdkustomizeresource patches: @@ -18,6 +19,7 @@ patches: #- path: patches/cainjection_in_nimworkflows.yaml #- path: patches/cainjection_in_nimcaches.yaml #- path: patches/cainjection_in_nimpipelines.yaml +#- path: patches/cainjection_in_nemocustomizers.yaml # +kubebuilder:scaffold:crdkustomizecainjectionpatch # [WEBHOOK] To enable webhook, uncomment the following section diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index af92bc07..df593a02 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -13,6 +13,8 @@ resources: # default, aiding admins in cluster management. Those roles are # not used by the Project itself. You can comment the following lines # if you do not want those helpers be installed with your Project. +- nemocustomizer_editor_role.yaml +- nemocustomizer_viewer_role.yaml - nimpipeline_editor_role.yaml - nimpipeline_viewer_role.yaml - nimcache_editor_role.yaml diff --git a/config/rbac/nemocustomizer_editor_role.yaml b/config/rbac/nemocustomizer_editor_role.yaml new file mode 100644 index 00000000..0e11eeb7 --- /dev/null +++ b/config/rbac/nemocustomizer_editor_role.yaml @@ -0,0 +1,27 @@ +# permissions for end users to edit nemocustomizers. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: k8s-nim-operator + app.kubernetes.io/managed-by: kustomize + name: nemocustomizer-editor-role +rules: +- apiGroups: + - apps.nvidia.com + resources: + - nemocustomizers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - apps.nvidia.com + resources: + - nemocustomizers/status + verbs: + - get diff --git a/config/rbac/nemocustomizer_viewer_role.yaml b/config/rbac/nemocustomizer_viewer_role.yaml new file mode 100644 index 00000000..8a58a0bc --- /dev/null +++ b/config/rbac/nemocustomizer_viewer_role.yaml @@ -0,0 +1,23 @@ +# permissions for end users to view nemocustomizers. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: k8s-nim-operator + app.kubernetes.io/managed-by: kustomize + name: nemocustomizer-viewer-role +rules: +- apiGroups: + - apps.nvidia.com + resources: + - nemocustomizers + verbs: + - get + - list + - watch +- apiGroups: + - apps.nvidia.com + resources: + - nemocustomizers/status + verbs: + - get diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 4a01d8f8..138147dd 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -62,6 +62,7 @@ rules: - apiGroups: - apps.nvidia.com resources: + - nemocustomizers - nemodatastores - nemoentitystores - nemoevaluators @@ -80,6 +81,7 @@ rules: - apiGroups: - apps.nvidia.com resources: + - nemocustomizers/finalizers - nemodatastores/finalizers - nemoentitystores/finalizers - nemoevaluators/finalizers @@ -92,6 +94,7 @@ rules: - apiGroups: - apps.nvidia.com resources: + - nemocustomizers/status - nemodatastores/status - nemoentitystores/status - nemoevaluators/status @@ -128,6 +131,15 @@ rules: - patch - update - watch +- apiGroups: + - batch.volcano.sh + resources: + - jobs + - jobs/status + verbs: + - get + - list + - watch - apiGroups: - config.openshift.io resources: @@ -162,6 +174,28 @@ rules: - patch - update - watch +- apiGroups: + - nodeinfo.volcano.sh + resources: + - numatopologies + verbs: + - get + - list + - watch +- apiGroups: + - nvidia.com + resources: + - nemoentityhandlers + - nemotrainingjobs + - nemotrainingjobs/status + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - rbac.authorization.k8s.io resources: @@ -187,6 +221,17 @@ rules: - patch - update - watch +- apiGroups: + - scheduling.incubator.k8s.io + - scheduling.volcano.sh + resources: + - podgroups + - queues + - queues/status + verbs: + - get + - list + - watch - apiGroups: - scheduling.k8s.io resources: diff --git a/config/samples/apps_v1alpha1_nemocustomizer.yaml b/config/samples/apps_v1alpha1_nemocustomizer.yaml new file mode 100644 index 00000000..ac2a77b3 --- /dev/null +++ b/config/samples/apps_v1alpha1_nemocustomizer.yaml @@ -0,0 +1,145 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NemoCustomizer +metadata: + labels: + app.kubernetes.io/name: k8s-nim-operator + app.kubernetes.io/managed-by: kustomize + name: nemocustomizer-sample +spec: + wandbSecret: + name: wandb-secret + apiKeyKey: encryption_key + otel: + exporterOtlpEndpoint: http://:4317 + databaseConfig: + credentials: + user: ncsuser + secretName: ncs-pg-existing-secret + passwordKey: password + host: + port: 5432 + databaseName: ncsdb + expose: + service: + type: ClusterIP + ports: + - name: api + port: 8000 + protocol: TCP + - name: internal + port: 9009 + protocol: TCP + image: + repository: "nvidian/nemo-llm/customizer" + tag: "25.02-rc1" + pullPolicy: IfNotPresent + pullSecrets: + - ngc-image-pull-secret + replicas: 1 + resources: + requests: + memory: "256Mi" + cpu: "500m" + limits: + memory: "512Mi" + cpu: "1" + livenessProbe: + enabled: true + probe: + failureThreshold: 5 + httpGet: + path: /v1/health/live + port: api + scheme: HTTP + initialDelaySeconds: 15 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 10 + readinessProbe: + enabled: true + probe: + failureThreshold: 5 + httpGet: + path: /v1/health/ready + port: api + scheme: HTTP + initialDelaySeconds: 15 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + customizerConfig: | + namespace: default + entity_store_url: http://:8000 + nemo_data_store_url: http://:3000 + mlflow_tracking_url: http://:80 + + training: + queue: "default" + image: "nvcr.io/nvidian/nemo-llm/customizer:25.02-rc1" + imagePullSecrets: + - name: ngc-image-pull-secret + pvc: + storageClass: local-path + size: 5Gi + volumeAccessMode: ReadWriteOnce + env: + - name: LOG_LEVEL + value: INFO + + workspace_dir: /pvc/workspace + # volumes reference pre-existing PVCs + volumes: + # Model cache PVC + - name: models + persistentVolumeClaim: + claimName: finetuning-ms-models-pvc + readOnly: True + - name: dshm + emptyDir: + medium: Memory + volumeMounts: + - name: models + mountPath: "/mount/models" + readOnly: True + - name: dshm + mountPath: "/dev/shm" + + # Network configuration for multi node training specific to CSP + training_networking: + - name: "NCCL_IB_SL" + value: 0 + - name: "NCCL_IB_TC" + value: 41 + - name: "NCCL_IB_QPS_PER_CONNECTION" + value: 4 + - name: "UCX_TLS" + value: TCP + - name: "UCX_NET_DEVICES" + value: eth0 + - name: "HCOLL_ENABLE_MCAST_ALL" + value: 0 + - name: "NCCL_IB_GID_INDEX" + value: 3 + + tolerations: + [] + + container_defaults: + imagePullPolicy: IfNotPresent + + models: + meta/llama3-8b-instruct: + enabled: true + max_seq_length: 4096 + micro_batch_size: 2 + model_path: llama3-8b-bf16 + num_parameters: 8000000000 + precision: bf16 + training_job_resources: + - finetuning_type: lora + num_gpus: 2 + training_type: sft + + nemo_data_store_tools: + image: nvcr.io/nvidian/nemo-llm/nds-v2-huggingface-cli:278bb5cd0fde8ad46842daf6d2471134624ae891 + imagePullSecret: ngc-image-pull-secret diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 1cadcfad..fd58ae5e 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -3,4 +3,5 @@ resources: - apps_v1alpha1_nimservice.yaml - apps_v1alpha1_nimcache.yaml - apps_v1alpha1_nimpipeline.yaml +- apps_v1alpha1_nemocustomizer.yaml # +kubebuilder:scaffold:manifestskustomizesamples diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemocustomizers.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemocustomizers.yaml new file mode 100644 index 00000000..c67c7cfa --- /dev/null +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemocustomizers.yaml @@ -0,0 +1,2401 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.2 + name: nemocustomizers.apps.nvidia.com +spec: + group: apps.nvidia.com + names: + kind: NemoCustomizer + listKind: NemoCustomizerList + plural: nemocustomizers + singular: nemocustomizer + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.state + name: Status + type: string + - format: date-time + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: NemoCustomizer is the Schema for the NemoCustomizer API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: NemoCustomizerSpec defines the desired state of NemoCustomizer + properties: + annotations: + additionalProperties: + type: string + type: object + args: + items: + type: string + type: array + command: + items: + type: string + type: array + customizerConfig: + description: CustomizerConfig stores the customizer configuration + for training and models + minLength: 1 + type: string + databaseConfig: + description: DatabaseConfig stores the database configuration + properties: + credentials: + description: |- + DatabaseCredentials stores the configuration to retrieve the database credentials. + Required, must not be nil. + properties: + passwordKey: + default: password + description: |- + PasswordKey is the name of the key in the `CredentialsSecret` secret for the database credentials. + Defaults to "password". + type: string + secretName: + description: |- + SecretName is the name of the secret which has the database credentials for a NEMO service user. + Required, must not be empty. + minLength: 1 + type: string + user: + description: |- + User is the non-root username for a NEMO Service in the database. + Required, must not be empty. + minLength: 1 + type: string + required: + - secretName + - user + type: object + databaseName: + description: |- + DatabaseName is the database name for a NEMO Service. + Required, must not be empty. + minLength: 1 + type: string + host: + description: |- + Host is the hostname of the database. + Required, must not be empty. + minLength: 1 + type: string + port: + default: 5432 + description: |- + Port is the port where the database is reachable at. + If specified, this must be a valid port number, 0 < databasePort < 65536. + Defaults to 5432. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - credentials + - databaseName + - host + type: object + env: + items: + description: EnvVar represents an environment variable present in + a Container. + properties: + name: + description: Name of the environment variable. Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. Cannot + be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap or its key + must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath is + written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the specified + API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of the exposed + resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's namespace + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + expose: + description: Expose defines attributes to expose the service + properties: + ingress: + description: Ingress defines attributes to enable ingress for + the service + properties: + annotations: + additionalProperties: + type: string + type: object + enabled: + description: ingress, or virtualService - not both + type: boolean + spec: + description: IngressSpec describes the Ingress the user wishes + to exist. + properties: + defaultBackend: + description: |- + defaultBackend is the backend that should handle requests that don't + match any rule. If Rules are not specified, DefaultBackend must be specified. + If DefaultBackend is not set, the handling of requests that do not match any + of the rules will be up to the Ingress controller. + properties: + resource: + description: |- + resource is an ObjectRef to another Kubernetes resource in the namespace + of the Ingress object. If resource is specified, a service.Name and + service.Port must not be specified. + This is a mutually exclusive setting with "Service". + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being + referenced + type: string + name: + description: Name is the name of resource being + referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + service: + description: |- + service references a service as a backend. + This is a mutually exclusive setting with "Resource". + properties: + name: + description: |- + name is the referenced service. The service must exist in + the same namespace as the Ingress object. + type: string + port: + description: |- + port of the referenced service. A port name or port number + is required for a IngressServiceBackend. + properties: + name: + description: |- + name is the name of the port on the Service. + This is a mutually exclusive setting with "Number". + type: string + number: + description: |- + number is the numerical port number (e.g. 80) on the Service. + This is a mutually exclusive setting with "Name". + format: int32 + type: integer + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + type: object + ingressClassName: + description: |- + ingressClassName is the name of an IngressClass cluster resource. Ingress + controller implementations use this field to know whether they should be + serving this Ingress resource, by a transitive connection + (controller -> IngressClass -> Ingress resource). Although the + `kubernetes.io/ingress.class` annotation (simple constant name) was never + formally defined, it was widely supported by Ingress controllers to create + a direct binding between Ingress controller and Ingress resources. Newly + created Ingress resources should prefer using the field. However, even + though the annotation is officially deprecated, for backwards compatibility + reasons, ingress controllers should still honor that annotation if present. + type: string + rules: + description: |- + rules is a list of host rules used to configure the Ingress. If unspecified, + or no rule matches, all traffic is sent to the default backend. + items: + description: |- + IngressRule represents the rules mapping the paths under a specified host to + the related backend services. Incoming requests are first evaluated for a host + match, then routed to the backend associated with the matching IngressRuleValue. + properties: + host: + description: "host is the fully qualified domain + name of a network host, as defined by RFC 3986.\nNote + the following deviations from the \"host\" part + of the\nURI as defined in RFC 3986:\n1. IPs are + not allowed. Currently an IngressRuleValue can + only apply to\n the IP in the Spec of the parent + Ingress.\n2. The `:` delimiter is not respected + because ports are not allowed.\n\t Currently + the port of an Ingress is implicitly :80 for http + and\n\t :443 for https.\nBoth these may change + in the future.\nIncoming requests are matched + against the host before the\nIngressRuleValue. + If the host is unspecified, the Ingress routes + all\ntraffic based on the specified IngressRuleValue.\n\nhost + can be \"precise\" which is a domain name without + the terminating dot of\na network host (e.g. \"foo.bar.com\") + or \"wildcard\", which is a domain name\nprefixed + with a single wildcard label (e.g. \"*.foo.com\").\nThe + wildcard character '*' must appear by itself as + the first DNS label and\nmatches only a single + label. You cannot have a wildcard label by itself + (e.g. Host == \"*\").\nRequests will be matched + against the Host field in the following way:\n1. + If host is precise, the request matches this rule + if the http host header is equal to Host.\n2. + If host is a wildcard, then the request matches + this rule if the http host header\nis to equal + to the suffix (removing the first label) of the + wildcard rule." + type: string + http: + description: |- + HTTPIngressRuleValue is a list of http selectors pointing to backends. + In the example: http:///? -> backend where + where parts of the url correspond to RFC 3986, this resource will be used + to match against everything after the last '/' and before the first '?' + or '#'. + properties: + paths: + description: paths is a collection of paths + that map requests to backends. + items: + description: |- + HTTPIngressPath associates a path with a backend. Incoming urls matching the + path are forwarded to the backend. + properties: + backend: + description: |- + backend defines the referenced service endpoint to which the traffic + will be forwarded to. + properties: + resource: + description: |- + resource is an ObjectRef to another Kubernetes resource in the namespace + of the Ingress object. If resource is specified, a service.Name and + service.Port must not be specified. + This is a mutually exclusive setting with "Service". + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type + of resource being referenced + type: string + name: + description: Name is the name + of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + service: + description: |- + service references a service as a backend. + This is a mutually exclusive setting with "Resource". + properties: + name: + description: |- + name is the referenced service. The service must exist in + the same namespace as the Ingress object. + type: string + port: + description: |- + port of the referenced service. A port name or port number + is required for a IngressServiceBackend. + properties: + name: + description: |- + name is the name of the port on the Service. + This is a mutually exclusive setting with "Number". + type: string + number: + description: |- + number is the numerical port number (e.g. 80) on the Service. + This is a mutually exclusive setting with "Name". + format: int32 + type: integer + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + type: object + path: + description: |- + path is matched against the path of an incoming request. Currently it can + contain characters disallowed from the conventional "path" part of a URL + as defined by RFC 3986. Paths must begin with a '/' and must be present + when using PathType with value "Exact" or "Prefix". + type: string + pathType: + description: |- + pathType determines the interpretation of the path matching. PathType can + be one of the following values: + * Exact: Matches the URL path exactly. + * Prefix: Matches based on a URL path prefix split by '/'. Matching is + done on a path element by element basis. A path element refers is the + list of labels in the path split by the '/' separator. A request is a + match for path p if every p is an element-wise prefix of p of the + request path. Note that if the last element of the path is a substring + of the last element in request path, it is not a match (e.g. /foo/bar + matches /foo/bar/baz, but does not match /foo/barbaz). + * ImplementationSpecific: Interpretation of the Path matching is up to + the IngressClass. Implementations can treat this as a separate PathType + or treat it identically to Prefix or Exact path types. + Implementations are required to support all path types. + type: string + required: + - backend + - pathType + type: object + type: array + x-kubernetes-list-type: atomic + required: + - paths + type: object + type: object + type: array + x-kubernetes-list-type: atomic + tls: + description: |- + tls represents the TLS configuration. Currently the Ingress only supports a + single TLS port, 443. If multiple members of this list specify different hosts, + they will be multiplexed on the same port according to the hostname specified + through the SNI TLS extension, if the ingress controller fulfilling the + ingress supports SNI. + items: + description: IngressTLS describes the transport layer + security associated with an ingress. + properties: + hosts: + description: |- + hosts is a list of hosts included in the TLS certificate. The values in + this list must match the name/s used in the tlsSecret. Defaults to the + wildcard host setting for the loadbalancer controller fulfilling this + Ingress, if left unspecified. + items: + type: string + type: array + x-kubernetes-list-type: atomic + secretName: + description: |- + secretName is the name of the secret used to terminate TLS traffic on + port 443. Field is left optional to allow TLS routing based on SNI + hostname alone. If the SNI host in a listener conflicts with the "Host" + header field used by an IngressRule, the SNI host is used for termination + and value of the "Host" header is used for routing. + type: string + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + service: + description: Service defines attributes to create a service + properties: + annotations: + additionalProperties: + type: string + type: object + name: + description: override the default service name + type: string + port: + default: 8000 + description: 'Deprecated: Use Ports instead.' + format: int32 + type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array + type: + description: Service Type string describes ingress methods + for a service + type: string + type: object + type: object + groupID: + format: int64 + type: integer + image: + description: Image defines image attributes + properties: + pullPolicy: + type: string + pullSecrets: + items: + type: string + type: array + repository: + type: string + tag: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + livenessProbe: + description: Probe defines attributes for startup/liveness/readiness + probes + properties: + enabled: + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + properties: + port: + description: Port number of the gRPC service. Number must + be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + metrics: + description: Metrics defines attributes to setup metrics collection + properties: + enabled: + type: boolean + serviceMonitor: + description: for use with the Prometheus Operator and the primary + service object + properties: + additionalLabels: + additionalProperties: + type: string + type: object + annotations: + additionalProperties: + type: string + type: object + interval: + description: |- + Duration is a valid time duration that can be parsed by Prometheus model.ParseDuration() function. + Supported units: y, w, d, h, m, s, ms + Examples: `30s`, `1m`, `1h20m15s`, `15d` + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + scrapeTimeout: + description: |- + Duration is a valid time duration that can be parsed by Prometheus model.ParseDuration() function. + Supported units: y, w, d, h, m, s, ms + Examples: `30s`, `1m`, `1h20m15s`, `15d` + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + type: object + type: object + nodeSelector: + additionalProperties: + type: string + type: object + otel: + description: OpenTelemetry Settings + properties: + disableLogging: + description: DisableLogging indicates whether Python logging auto-instrumentation + should be disabled. + type: boolean + enabled: + description: Enabled indicates if opentelemetry collector and + tracing are enabled + type: boolean + excludedUrls: + default: + - health + description: ExcludedUrls defines URLs to be excluded from tracing. + items: + type: string + type: array + exporterConfig: + description: ExporterConfig defines configuration for different + OTel exporters + properties: + logsExporter: + default: otlp + description: 'LogsExporter sets the logs exporter: (otlp, + console, none).' + enum: + - otlp + - console + - none + type: string + metricsExporter: + default: otlp + description: 'MetricsExporter sets the metrics exporter: (otlp, + console, none).' + enum: + - otlp + - console + - none + type: string + tracesExporter: + default: otlp + description: 'TracesExporter sets the traces exporter: (otlp, + console, none).' + type: string + type: object + exporterOtlpEndpoint: + description: ExporterOtlpEndpoint is the OTLP collector endpoint. + minLength: 1 + type: string + logLevel: + default: INFO + description: LogLevel defines the log level (e.g., INFO, DEBUG). + type: string + required: + - exporterOtlpEndpoint + type: object + podAffinity: + description: Pod affinity is a group of inter pod affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, associated with + the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + readinessProbe: + description: Probe defines attributes for startup/liveness/readiness + probes + properties: + enabled: + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + properties: + port: + description: Port number of the gRPC service. Number must + be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + replicas: + default: 1 + minimum: 1 + type: integer + resources: + description: ResourceRequirements describes the compute resource requirements. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + runtimeClass: + type: string + scale: + description: Autoscaling defines attributes to automatically scale + the service based on metrics + properties: + annotations: + additionalProperties: + type: string + type: object + enabled: + type: boolean + hpa: + description: HorizontalPodAutoscalerSpec defines the parameters + required to setup HPA + properties: + behavior: + description: |- + HorizontalPodAutoscalerBehavior configures the scaling behavior of the target + in both Up and Down directions (scaleUp and scaleDown fields respectively). + properties: + scaleDown: + description: |- + scaleDown is scaling policy for scaling Down. + If not set, the default value is to allow to scale down to minReplicas pods, with a + 300 second stabilization window (i.e., the highest recommendation for + the last 300sec is used). + properties: + policies: + description: |- + policies is a list of potential scaling polices which can be used during scaling. + At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid + items: + description: HPAScalingPolicy is a single policy + which must hold true for a specified past interval. + properties: + periodSeconds: + description: |- + periodSeconds specifies the window of time for which the policy should hold true. + PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min). + format: int32 + type: integer + type: + description: type is used to specify the scaling + policy. + type: string + value: + description: |- + value contains the amount of change which is permitted by the policy. + It must be greater than zero + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + description: |- + selectPolicy is used to specify which policy should be used. + If not set, the default value Max is used. + type: string + stabilizationWindowSeconds: + description: |- + stabilizationWindowSeconds is the number of seconds for which past recommendations should be + considered while scaling up or scaling down. + StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour). + If not set, use the default values: + - For scale up: 0 (i.e. no stabilization is done). + - For scale down: 300 (i.e. the stabilization window is 300 seconds long). + format: int32 + type: integer + type: object + scaleUp: + description: |- + scaleUp is scaling policy for scaling Up. + If not set, the default value is the higher of: + * increase no more than 4 pods per 60 seconds + * double the number of pods per 60 seconds + No stabilization is used. + properties: + policies: + description: |- + policies is a list of potential scaling polices which can be used during scaling. + At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid + items: + description: HPAScalingPolicy is a single policy + which must hold true for a specified past interval. + properties: + periodSeconds: + description: |- + periodSeconds specifies the window of time for which the policy should hold true. + PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min). + format: int32 + type: integer + type: + description: type is used to specify the scaling + policy. + type: string + value: + description: |- + value contains the amount of change which is permitted by the policy. + It must be greater than zero + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + description: |- + selectPolicy is used to specify which policy should be used. + If not set, the default value Max is used. + type: string + stabilizationWindowSeconds: + description: |- + stabilizationWindowSeconds is the number of seconds for which past recommendations should be + considered while scaling up or scaling down. + StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour). + If not set, use the default values: + - For scale up: 0 (i.e. no stabilization is done). + - For scale down: 300 (i.e. the stabilization window is 300 seconds long). + format: int32 + type: integer + type: object + type: object + maxReplicas: + format: int32 + type: integer + metrics: + items: + description: |- + MetricSpec specifies how to scale based on a single metric + (only `type` and one other matching field should be set at once). + properties: + containerResource: + description: |- + containerResource refers to a resource metric (such as those specified in + requests and limits) known to Kubernetes describing a single container in + each pod of the current scale target (e.g. CPU or memory). Such metrics are + built in to Kubernetes, and have special scaling options on top of those + available to normal per-pod metrics using the "pods" source. + This is an alpha feature and can be enabled by the HPAContainerMetrics feature flag. + properties: + container: + description: container is the name of the container + in the pods of the scaling target + type: string + name: + description: name is the name of the resource in + question. + type: string + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - container + - name + - target + type: object + external: + description: |- + external refers to a global metric that is not associated + with any Kubernetes object. It allows autoscaling based on information + coming from components running outside of cluster + (for example length of queue in cloud messaging service, or + QPS from loadbalancer running outside of cluster). + properties: + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: |- + selector is the string-encoded form of a standard kubernetes label selector for the given metric + When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. + When unset, just the metricName will be used to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - metric + - target + type: object + object: + description: |- + object refers to a metric describing a single kubernetes object + (for example, hits-per-second on an Ingress object). + properties: + describedObject: + description: describedObject specifies the descriptions + of a object,such as kind,name apiVersion + properties: + apiVersion: + description: apiVersion is the API version of + the referent + type: string + kind: + description: 'kind is the kind of the referent; + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + name: + description: 'name is the name of the referent; + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + required: + - kind + - name + type: object + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: |- + selector is the string-encoded form of a standard kubernetes label selector for the given metric + When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. + When unset, just the metricName will be used to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - describedObject + - metric + - target + type: object + pods: + description: |- + pods refers to a metric describing each pod in the current scale target + (for example, transactions-processed-per-second). The values will be + averaged together before being compared to the target value. + properties: + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: |- + selector is the string-encoded form of a standard kubernetes label selector for the given metric + When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping. + When unset, just the metricName will be used to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - metric + - target + type: object + resource: + description: |- + resource refers to a resource metric (such as those specified in + requests and limits) known to Kubernetes describing each pod in the + current scale target (e.g. CPU or memory). Such metrics are built in to + Kubernetes, and have special scaling options on top of those available + to normal per-pod metrics using the "pods" source. + properties: + name: + description: name is the name of the resource in + question. + type: string + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: |- + averageUtilization is the target value of the average of the + resource metric across all relevant pods, represented as a percentage of + the requested value of the resource for the pods. + Currently only valid for Resource metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: |- + averageValue is the target value of the average of the + metric across all relevant pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - name + - target + type: object + type: + description: |- + type is the type of metric source. It should be one of "ContainerResource", "External", + "Object", "Pods" or "Resource", each mapping to a matching field in the object. + Note: "ContainerResource" type is available on when the feature-gate + HPAContainerMetrics is enabled + type: string + required: + - type + type: object + type: array + minReplicas: + format: int32 + type: integer + required: + - maxReplicas + type: object + type: object + scheduler: + description: Scheduler Configuration + properties: + type: + default: volcano + description: Type is the scheduler type (volcano, runai) + enum: + - volcano + - runai + type: string + type: object + startupProbe: + description: Probe defines attributes for startup/liveness/readiness + probes + properties: + enabled: + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies the action to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. + properties: + port: + description: Port number of the gRPC service. Number must + be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action involving a TCP + port. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + tolerations: + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + userID: + format: int64 + type: integer + wandbSecret: + description: WandBSecret stores the secret and encryption key for + the Weights and Biases service. + properties: + apiKeyKey: + default: apiKey + description: |- + APIKeyKey is the key in the Secret that holds the WandB API key. + Defaults to "apiKey". + minLength: 1 + type: string + encryptionKey: + default: encryptionKey + description: |- + EncryptionKey is an optional key in the secret used for encrypting WandB credentials. + This can be used for additional security layers if required. + Defaults to "encryptionKey". + type: string + name: + description: |- + Name is the name of the Kubernetes Secret containing the WandB API key. + Required, must not be empty. + minLength: 1 + type: string + required: + - apiKeyKey + - name + type: object + required: + - customizerConfig + - databaseConfig + - otel + - wandbSecret + type: object + status: + description: NemoCustomizerStatus defines the observed state of NemoCustomizer + properties: + availableReplicas: + format: int32 + type: integer + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + state: + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemodatastores.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemodatastores.yaml index 6f101cf0..c1ba5905 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemodatastores.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemodatastores.yaml @@ -535,14 +535,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoentitystores.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoentitystores.yaml index 5d277f04..3d4a468d 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoentitystores.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoentitystores.yaml @@ -533,14 +533,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoevaluators.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoevaluators.yaml index 80fa85bd..46c1dd80 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoevaluators.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoevaluators.yaml @@ -550,14 +550,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoguardrails.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoguardrails.yaml index 085924cd..f85ced37 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoguardrails.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoguardrails.yaml @@ -509,14 +509,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml index 04224974..18478e01 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml @@ -527,14 +527,87 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information + on service's port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed + by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml index 419ef0f1..6370c33c 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml @@ -477,14 +477,86 @@ spec: type: string port: default: 8000 + description: 'Deprecated: Use Ports instead.' format: int32 type: integer + ports: + description: Defines multiple ports for the service + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: |- + The application protocol for this port. + This is used as a hint for implementations to offer richer behavior for protocols that they understand. + This field follows standard Kubernetes label syntax. + Valid values are either: + + * Un-prefixed protocol names - reserved for IANA standard service names (as per + RFC-6335 and https://www.iana.org/assignments/service-names). + + * Kubernetes-defined prefixed names: + * 'kubernetes.io/h2c' - HTTP/2 prior knowledge over cleartext as described in https://www.rfc-editor.org/rfc/rfc9113.html#name-starting-http-2-with-prior- + * 'kubernetes.io/ws' - WebSocket over cleartext as described in https://www.rfc-editor.org/rfc/rfc6455 + * 'kubernetes.io/wss' - WebSocket over TLS as described in https://www.rfc-editor.org/rfc/rfc6455 + + * Other protocols should use implementation-defined prefixed names such as + mycompany.com/my-custom-protocol. + type: string + name: + description: |- + The name of this port within the service. This must be a DNS_LABEL. + All ports within a ServiceSpec must have unique names. When considering + the endpoints for a Service, this must match the 'name' field in the + EndpointPort. + Optional if only one ServicePort is defined on this service. + type: string + nodePort: + description: |- + The port on each node on which this service is exposed when type is + NodePort or LoadBalancer. Usually assigned by the system. If a value is + specified, in-range, and not in use it will be used, otherwise the + operation will fail. If not specified, a port will be allocated if this + Service requires one. If this field is specified when creating a + Service which does not need it, creation will fail. This field will be + wiped when updating a Service to no longer need it (e.g. changing type + from NodePort to ClusterIP). + More info: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: |- + The IP protocol for this port. Supports "TCP", "UDP", and "SCTP". + Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the pods targeted by the service. + Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME. + If this is a string, it will be looked up as a named port in the + target Pod's container ports. If this is not specified, the value + of the 'port' field is used (an identity map). + This field is ignored for services with clusterIP=None, and should be + omitted or set equal to the 'port' field. + More info: https://kubernetes.io/docs/concepts/services-networking/service/#defining-a-service + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array type: description: Service Type string describes ingress methods for a service type: string - required: - - port type: object type: object groupID: diff --git a/deployments/helm/k8s-nim-operator/templates/manager-rbac.yaml b/deployments/helm/k8s-nim-operator/templates/manager-rbac.yaml index 73d0ba59..73227f59 100644 --- a/deployments/helm/k8s-nim-operator/templates/manager-rbac.yaml +++ b/deployments/helm/k8s-nim-operator/templates/manager-rbac.yaml @@ -76,6 +76,32 @@ rules: - patch - update - watch +- apiGroups: + - apps.nvidia.com + resources: + - nemocustomizers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - apps.nvidia.com + resources: + - nemocustomizers/finalizers + verbs: + - update +- apiGroups: + - apps.nvidia.com + resources: + - nemocustomizers/status + verbs: + - get + - patch + - update - apiGroups: - apps.nvidia.com resources: @@ -398,6 +424,53 @@ rules: - subjectaccessreviews verbs: - create +- apiGroups: + - batch.volcano.sh + resources: + - jobs + - jobs/status + verbs: + - get + - list + - watch + verbs: + - get + - list + - watch +- apiGroups: + - nvidia.com + resources: + - nemoentityhandlers + - nemotrainingjobs + - nemotrainingjobs/status + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - scheduling.incubator.k8s.io + - scheduling.volcano.sh + resources: + - podgroups + - queues + - queues/status + verbs: + - get + - list + - watch +- apiGroups: + - nodeinfo.volcano.sh + resources: + - numatopologies + verbs: + - get + - list + - watch + --- apiVersion: rbac.authorization.k8s.io/v1 diff --git a/internal/conditions/conditions.go b/internal/conditions/conditions.go index d9d574d0..b717acc7 100644 --- a/internal/conditions/conditions.go +++ b/internal/conditions/conditions.go @@ -53,6 +53,8 @@ const ( ReasonSCCFailed = "SCCFailed" // ReasonServiceMonitorFailed indicates that the creation of Service Monitor has failed ReasonServiceMonitorFailed = "ServiceMonitorFailed" + // ReasonConfigMapFailed indicates that the creation of configmap has failed + ReasonConfigMapFailed = "ConfigMapFailed" // ReasonDeploymentFailed indicates that the creation of deployment has failed ReasonDeploymentFailed = "DeploymentFailed" // ReasonStatefulSetFailed indicates that the creation of statefulset has failed @@ -84,6 +86,8 @@ func (u *updater) SetConditionsReady(ctx context.Context, obj client.Object, rea return u.SetConditionsReadyNemoGuardrail(ctx, cr, reason, message) case *appsv1alpha1.NemoEntitystore: return u.SetConditionsReadyNemoEntitystore(ctx, cr, reason, message) + case *appsv1alpha1.NemoCustomizer: + return u.SetConditionsReadyNemoCustomizer(ctx, cr, reason, message) default: return fmt.Errorf("unknown CRD type for %v", obj) } @@ -140,6 +144,23 @@ func (u *updater) SetConditionsReadyNemoEntitystore(ctx context.Context, cr *app return u.updateNemoEntitystoreStatus(ctx, cr) } +func (u *updater) SetConditionsReadyNemoCustomizer(ctx context.Context, cr *appsv1alpha1.NemoCustomizer, reason, message string) error { + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Ready, + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + }) + + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Failed, + Status: metav1.ConditionFalse, + Reason: Ready, + }) + cr.Status.State = v1alpha1.NemoCustomizerStatusReady + return u.updateNemoCustomizerStatus(ctx, cr) +} + func (u *updater) SetConditionsNotReady(ctx context.Context, obj client.Object, reason, message string) error { switch cr := obj.(type) { case *appsv1alpha1.NIMService: @@ -148,6 +169,8 @@ func (u *updater) SetConditionsNotReady(ctx context.Context, obj client.Object, return u.SetConditionsNotReadyNemoGuardrail(ctx, cr, reason, message) case *appsv1alpha1.NemoEntitystore: return u.SetConditionsNotReadyNemoEntitystore(ctx, cr, reason, message) + case *appsv1alpha1.NemoCustomizer: + return u.SetConditionsNotReadyNemoCustomizer(ctx, cr, reason, message) default: return fmt.Errorf("unknown CRD type for %v", obj) } @@ -207,6 +230,24 @@ func (u *updater) SetConditionsNotReadyNemoEntitystore(ctx context.Context, cr * return u.updateNemoEntitystoreStatus(ctx, cr) } +func (u *updater) SetConditionsNotReadyNemoCustomizer(ctx context.Context, cr *appsv1alpha1.NemoCustomizer, reason, message string) error { + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Ready, + Status: metav1.ConditionFalse, + Reason: reason, + Message: message, + }) + + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Failed, + Status: metav1.ConditionFalse, + Reason: Ready, + Message: message, + }) + cr.Status.State = v1alpha1.NemoCustomizerStatusNotReady + return u.updateNemoCustomizerStatus(ctx, cr) +} + func (u *updater) SetConditionsFailed(ctx context.Context, obj client.Object, reason, message string) error { switch cr := obj.(type) { case *appsv1alpha1.NIMService: @@ -215,6 +256,8 @@ func (u *updater) SetConditionsFailed(ctx context.Context, obj client.Object, re return u.SetConditionsFailedNemoGuardrail(ctx, cr, reason, message) case *appsv1alpha1.NemoEntitystore: return u.SetConditionsFailedNemoEntitystore(ctx, cr, reason, message) + case *appsv1alpha1.NemoCustomizer: + return u.SetConditionsFailedNemoCustomizer(ctx, cr, reason, message) default: return fmt.Errorf("unknown CRD type for %v", obj) } @@ -271,6 +314,23 @@ func (u *updater) SetConditionsFailedNemoEntitystore(ctx context.Context, cr *ap return u.updateNemoEntitystoreStatus(ctx, cr) } +func (u *updater) SetConditionsFailedNemoCustomizer(ctx context.Context, cr *appsv1alpha1.NemoCustomizer, reason, message string) error { + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Ready, + Status: metav1.ConditionFalse, + Reason: Failed, + }) + + meta.SetStatusCondition(&cr.Status.Conditions, metav1.Condition{ + Type: Failed, + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + }) + cr.Status.State = v1alpha1.NemoCustomizerStatusFailed + return u.updateNemoCustomizerStatus(ctx, cr) +} + func (u *updater) updateNIMServiceStatus(ctx context.Context, cr *appsv1alpha1.NIMService) error { obj := &appsv1alpha1.NIMService{} @@ -311,6 +371,19 @@ func (u *updater) updateNemoEntitystoreStatus(ctx context.Context, cr *appsv1alp return nil } +func (u *updater) updateNemoCustomizerStatus(ctx context.Context, cr *appsv1alpha1.NemoCustomizer) error { + obj := &appsv1alpha1.NemoCustomizer{} + errGet := u.client.Get(ctx, types.NamespacedName{Name: cr.Name, Namespace: cr.GetNamespace()}, obj) + if errGet != nil { + return errGet + } + obj.Status = cr.Status + if err := u.client.Status().Update(ctx, obj); err != nil { + return err + } + return nil +} + // UpdateCondition updates the given condition into the conditions list func UpdateCondition(conditions *[]metav1.Condition, conditionType string, status metav1.ConditionStatus, reason, message string) { for i := range *conditions { diff --git a/internal/controller/nemo_entitystore_controller_test.go b/internal/controller/nemo_entitystore_controller_test.go index 32000aff..19b90e16 100644 --- a/internal/controller/nemo_entitystore_controller_test.go +++ b/internal/controller/nemo_entitystore_controller_test.go @@ -63,14 +63,29 @@ var _ = Describe("NemoEntitystore Controller", func() { Message: "Required value", Field: "spec.databaseConfig", } - requiredDatabaseHostCause = metav1.StatusCause{ + requiredCredentialsCause = metav1.StatusCause{ Type: "FieldValueRequired", Message: "Required value", + Field: "spec.databaseConfig.credentials", + } + invalidDatabaseHostCause = metav1.StatusCause{ + Type: "FieldValueInvalid", + Message: "Invalid value: \"\": spec.databaseConfig.host in body should be at least 1 chars long", Field: "spec.databaseConfig.host", } - requiredDatabaseNameCause = metav1.StatusCause{ - Type: "FieldValueRequired", - Message: "Required value", + invalidUserCause = metav1.StatusCause{ + Type: "FieldValueInvalid", + Message: "Invalid value: \"\": spec.databaseConfig.credentials.user in body should be at least 1 chars long", + Field: "spec.databaseConfig.credentials.user", + } + invalidSecretNameCause = metav1.StatusCause{ + Type: "FieldValueInvalid", + Message: "Invalid value: \"\": spec.databaseConfig.credentials.secretName in body should be at least 1 chars long", + Field: "spec.databaseConfig.credentials.secretName", + } + invalidDatabaseNameCause = metav1.StatusCause{ + Type: "FieldValueInvalid", + Message: "Invalid value: \"\": spec.databaseConfig.databaseName in body should be at least 1 chars long", Field: "spec.databaseConfig.databaseName", } invalidDatabasePortCause = metav1.StatusCause{ @@ -78,21 +93,6 @@ var _ = Describe("NemoEntitystore Controller", func() { Message: "Invalid value: 65536: spec.databaseConfig.port in body should be less than or equal to 65535", Field: "spec.databaseConfig.port", } - requiredCredentialsCause = metav1.StatusCause{ - Type: "FieldValueRequired", - Message: "Required value", - Field: "spec.databaseConfig.credentials", - } - requiredUserCause = metav1.StatusCause{ - Type: "FieldValueRequired", - Message: "Required value", - Field: "spec.databaseConfig.credentials.user", - } - requiredSecretNameCause = metav1.StatusCause{ - Type: "FieldValueRequired", - Message: "Required value", - Field: "spec.databaseConfig.credentials.secretName", - } ) BeforeEach(func() { @@ -192,7 +192,7 @@ var _ = Describe("NemoEntitystore Controller", func() { Expect(err).To(HaveOccurred()) Expect(errors.IsInvalid(err)).To(BeTrue()) statusErr := err.(*errors.StatusError) - Expect(statusErr.ErrStatus.Details.Causes).To(ContainElements(requiredDatabaseHostCause, requiredDatabaseNameCause, requiredCredentialsCause)) + Expect(statusErr.ErrStatus.Details.Causes).To(ContainElements(invalidDatabaseHostCause, invalidDatabaseNameCause, requiredCredentialsCause)) }) It("should reject the resource if databasePort is invalid", func() { @@ -241,7 +241,7 @@ var _ = Describe("NemoEntitystore Controller", func() { Expect(err).To(HaveOccurred()) Expect(errors.IsInvalid(err)).To(BeTrue()) statusErr := err.(*errors.StatusError) - Expect(statusErr.ErrStatus.Details.Causes).To(ContainElements(requiredSecretNameCause, requiredUserCause)) + Expect(statusErr.ErrStatus.Details.Causes).To(ContainElements(invalidSecretNameCause, invalidUserCause)) }) It("should reject the resource if databaseConfig credentials is invalid", func() { @@ -266,7 +266,7 @@ var _ = Describe("NemoEntitystore Controller", func() { Expect(err).To(HaveOccurred()) Expect(errors.IsInvalid(err)).To(BeTrue()) statusErr := err.(*errors.StatusError) - Expect(statusErr.ErrStatus.Details.Causes).To(ContainElements(requiredSecretNameCause, requiredUserCause)) + Expect(statusErr.ErrStatus.Details.Causes).To(ContainElements(invalidSecretNameCause, invalidUserCause)) }) It("should successfully reconcile the NemoEntityStore resource", func() { diff --git a/internal/controller/nemocustomizer_controller.go b/internal/controller/nemocustomizer_controller.go new file mode 100644 index 00000000..7f162deb --- /dev/null +++ b/internal/controller/nemocustomizer_controller.go @@ -0,0 +1,595 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + "reflect" + + appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1" + "github.com/NVIDIA/k8s-nim-operator/internal/conditions" + "github.com/NVIDIA/k8s-nim-operator/internal/k8sutil" + "github.com/NVIDIA/k8s-nim-operator/internal/render" + "github.com/NVIDIA/k8s-nim-operator/internal/shared" + "github.com/NVIDIA/k8s-nim-operator/internal/utils" + "github.com/go-logr/logr" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + appsv1 "k8s.io/api/apps/v1" + autoscalingv2 "k8s.io/api/autoscaling/v2" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" +) + +// NemoCustomizerFinalizer is the finalizer annotation +const NemoCustomizerFinalizer = "finalizer.nemocustomizer.apps.nvidia.com" + +// NemoCustomizerReconciler reconciles a NemoCustomizer object +type NemoCustomizerReconciler struct { + client.Client + scheme *runtime.Scheme + log logr.Logger + updater conditions.Updater + renderer render.Renderer + Config *rest.Config + recorder record.EventRecorder + orchestratorType k8sutil.OrchestratorType +} + +// Ensure NemoCustomizerReconciler implements the Reconciler interface +var _ shared.Reconciler = &NemoCustomizerReconciler{} + +// NewNemoCustomizerReconciler creates a new reconciler for NemoCustomizer with the given platform +func NewNemoCustomizerReconciler(client client.Client, scheme *runtime.Scheme, updater conditions.Updater, renderer render.Renderer, log logr.Logger) *NemoCustomizerReconciler { + return &NemoCustomizerReconciler{ + Client: client, + scheme: scheme, + updater: updater, + renderer: renderer, + log: log, + } +} + +// +kubebuilder:rbac:groups=apps.nvidia.com,resources=nemocustomizers,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=apps.nvidia.com,resources=nemocustomizers/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=apps.nvidia.com,resources=nemocustomizers/finalizers,verbs=update +// +kubebuilder:rbac:groups=nvidia.com,resources=nemotrainingjobs;nemotrainingjobs/status;nemoentityhandlers,verbs=create;get;list;watch;update;delete;patch +// +kubebuilder:rbac:groups=batch.volcano.sh,resources=jobs;jobs/status,verbs=get;list;watch +// +kubebuilder:rbac:groups=nodeinfo.volcano.sh,resources=numatopologies,verbs=get;list;watch +// +kubebuilder:rbac:groups=scheduling.incubator.k8s.io;scheduling.volcano.sh,resources=queues;queues/status;podgroups,verbs=get;list;watch +// +kubebuilder:rbac:groups=config.openshift.io,resources=clusterversions;proxies,verbs=get;list;watch +// +kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=use,resourceNames=nonroot +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups="",resources=serviceaccounts;pods;pods/eviction;services;services/finalizers;endpoints,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups="",resources=persistentvolumeclaims;configmaps;secrets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=apps,resources=deployments;statefulsets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors;prometheusrules,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=scheduling.k8s.io,resources=priorityclasses,verbs=get;list;watch;create +// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalars,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups="",resources=events,verbs=create;update;patch +// +kubebuilder:rbac:groups="nodeinfo.volcano.sh",resources=numatopologies,verbs=get;list;watch + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the NemoCustomizer object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.2/pkg/reconcile +func (r *NemoCustomizerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + defer r.refreshMetrics(ctx) + + // Fetch the NemoCustomizer instance + NemoCustomizer := &appsv1alpha1.NemoCustomizer{} + if err := r.Get(ctx, req.NamespacedName, NemoCustomizer); err != nil { + if client.IgnoreNotFound(err) != nil { + logger.Error(err, "unable to fetch NemoCustomizer", "name", req.NamespacedName) + } + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + logger.Info("Reconciling", "NemoCustomizer", NemoCustomizer.Name) + + // Check if the instance is marked for deletion + if NemoCustomizer.ObjectMeta.DeletionTimestamp.IsZero() { + // Add finalizer if not present + if !controllerutil.ContainsFinalizer(NemoCustomizer, NemoCustomizerFinalizer) { + controllerutil.AddFinalizer(NemoCustomizer, NemoCustomizerFinalizer) + if err := r.Update(ctx, NemoCustomizer); err != nil { + return ctrl.Result{}, err + } + } + } else { + // The instance is being deleted + if controllerutil.ContainsFinalizer(NemoCustomizer, NemoCustomizerFinalizer) { + // Perform platform specific cleanup of resources + if err := r.cleanupNemoCustomizer(ctx, NemoCustomizer); err != nil { + r.GetEventRecorder().Eventf(NemoCustomizer, corev1.EventTypeNormal, "Delete", + "NemoCustomizer %s in deleted", NemoCustomizer.Name) + return ctrl.Result{}, err + } + + // Remove finalizer to allow for deletion + controllerutil.RemoveFinalizer(NemoCustomizer, NemoCustomizerFinalizer) + if err := r.Update(ctx, NemoCustomizer); err != nil { + r.GetEventRecorder().Eventf(NemoCustomizer, corev1.EventTypeNormal, "Delete", + "NemoCustomizer %s finalizer removed", NemoCustomizer.Name) + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + } + + // Fetch container orchestrator type + _, err := r.GetOrchestratorType() + if err != nil { + return ctrl.Result{}, fmt.Errorf("Unable to get container orchestrator type, %v", err) + } + + // Handle platform-specific reconciliation + if result, err := r.reconcileNemoCustomizer(ctx, NemoCustomizer); err != nil { + logger.Error(err, "error reconciling NemoCustomizer", "name", NemoCustomizer.Name) + return result, err + } + + return ctrl.Result{}, nil +} + +func (r *NemoCustomizerReconciler) cleanupNemoCustomizer(ctx context.Context, nemoCustomizer *appsv1alpha1.NemoCustomizer) error { + // All dependent (owned) objects will be automatically garbage collected. + // TODO: Handle any custom cleanup logic for the NIM microservice + return nil +} + +// GetScheme returns the scheme of the reconciler +func (r *NemoCustomizerReconciler) GetScheme() *runtime.Scheme { + return r.scheme +} + +// GetLogger returns the logger of the reconciler +func (r *NemoCustomizerReconciler) GetLogger() logr.Logger { + return r.log +} + +// GetClient returns the client instance +func (r *NemoCustomizerReconciler) GetClient() client.Client { + return r.Client +} + +// GetConfig returns the rest config +func (r *NemoCustomizerReconciler) GetConfig() *rest.Config { + return r.Config +} + +// GetUpdater returns the conditions updater instance +func (r *NemoCustomizerReconciler) GetUpdater() conditions.Updater { + return r.updater +} + +// GetRenderer returns the renderer instance +func (r *NemoCustomizerReconciler) GetRenderer() render.Renderer { + return r.renderer +} + +// GetEventRecorder returns the event recorder +func (r *NemoCustomizerReconciler) GetEventRecorder() record.EventRecorder { + return r.recorder +} + +// GetOrchestratorType returns the container platform type +func (r *NemoCustomizerReconciler) GetOrchestratorType() (k8sutil.OrchestratorType, error) { + if r.orchestratorType == "" { + orchestratorType, err := k8sutil.GetOrchestratorType(r.GetClient()) + if err != nil { + return k8sutil.Unknown, fmt.Errorf("Unable to get container orchestrator type, %v", err) + } + r.orchestratorType = orchestratorType + r.GetLogger().Info("Container orchestrator is successfully set", "type", orchestratorType) + } + return r.orchestratorType, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *NemoCustomizerReconciler) SetupWithManager(mgr ctrl.Manager) error { + r.recorder = mgr.GetEventRecorderFor("nemo-customizer-service-controller") + return ctrl.NewControllerManagedBy(mgr). + For(&appsv1alpha1.NemoCustomizer{}). + Owns(&appsv1.Deployment{}). + Owns(&appsv1.StatefulSet{}). + Owns(&corev1.Service{}). + Owns(&corev1.ServiceAccount{}). + Owns(&rbacv1.Role{}). + Owns(&rbacv1.RoleBinding{}). + Owns(&networkingv1.Ingress{}). + Owns(&autoscalingv2.HorizontalPodAutoscaler{}). + Owns(&corev1.ConfigMap{}). + WithEventFilter(predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + // Type assert to NemoCustomizer + if oldNemoCustomizer, ok := e.ObjectOld.(*appsv1alpha1.NemoCustomizer); ok { + newNemoCustomizer := e.ObjectNew.(*appsv1alpha1.NemoCustomizer) + + // Handle case where object is marked for deletion + if !newNemoCustomizer.ObjectMeta.DeletionTimestamp.IsZero() { + return true + } + + // Handle only spec updates + return !reflect.DeepEqual(oldNemoCustomizer.Spec, newNemoCustomizer.Spec) + } + // For other types we watch, reconcile them + return true + }, + }). + Complete(r) +} + +func (r *NemoCustomizerReconciler) refreshMetrics(ctx context.Context) { + logger := log.FromContext(ctx) + // List all instances + NemoCustomizerList := &appsv1alpha1.NemoCustomizerList{} + err := r.Client.List(ctx, NemoCustomizerList, &client.ListOptions{}) + if err != nil { + logger.Error(err, "unable to list NemoCustomizer in the cluster") + return + } +} + +func (r *NemoCustomizerReconciler) reconcileNemoCustomizer(ctx context.Context, NemoCustomizer *appsv1alpha1.NemoCustomizer) (ctrl.Result, error) { + logger := log.FromContext(ctx) + var err error + defer func() { + if err != nil { + r.GetEventRecorder().Eventf(NemoCustomizer, corev1.EventTypeWarning, conditions.Failed, + "NemoCustomizer %s failed, msg: %s", NemoCustomizer.Name, err.Error()) + } + }() + // Generate annotation for the current operator-version and apply to all resources + // Get generic name for all resources + namespacedName := types.NamespacedName{Name: NemoCustomizer.GetName(), Namespace: NemoCustomizer.GetNamespace()} + renderer := r.GetRenderer() + + // Sync serviceaccount + err = r.renderAndSyncResource(ctx, NemoCustomizer, &renderer, &corev1.ServiceAccount{}, func() (client.Object, error) { + return renderer.ServiceAccount(NemoCustomizer.GetServiceAccountParams()) + }, "serviceaccount", conditions.ReasonServiceAccountFailed) + if err != nil { + return ctrl.Result{}, err + } + + // Sync role + err = r.renderAndSyncResource(ctx, NemoCustomizer, &renderer, &rbacv1.Role{}, func() (client.Object, error) { + return renderer.Role(NemoCustomizer.GetRoleParams()) + }, "role", conditions.ReasonRoleFailed) + if err != nil { + return ctrl.Result{}, err + } + + // Sync rolebinding + err = r.renderAndSyncResource(ctx, NemoCustomizer, &renderer, &rbacv1.RoleBinding{}, func() (client.Object, error) { + return renderer.RoleBinding(NemoCustomizer.GetRoleBindingParams()) + }, "rolebinding", conditions.ReasonRoleBindingFailed) + if err != nil { + return ctrl.Result{}, err + } + + // Sync service + err = r.renderAndSyncResource(ctx, NemoCustomizer, &renderer, &corev1.Service{}, func() (client.Object, error) { + return renderer.Service(NemoCustomizer.GetServiceParams()) + }, "service", conditions.ReasonServiceFailed) + if err != nil { + return ctrl.Result{}, err + } + + // Sync ingress + if NemoCustomizer.IsIngressEnabled() { + err = r.renderAndSyncResource(ctx, NemoCustomizer, &renderer, &networkingv1.Ingress{}, func() (client.Object, error) { + return renderer.Ingress(NemoCustomizer.GetIngressParams()) + }, "ingress", conditions.ReasonIngressFailed) + if err != nil { + return ctrl.Result{}, err + } + } else { + err = r.cleanupResource(ctx, &networkingv1.Ingress{}, namespacedName) + if err != nil && !errors.IsNotFound(err) { + return ctrl.Result{}, err + } + } + + // Sync HPA + if NemoCustomizer.IsAutoScalingEnabled() { + err = r.renderAndSyncResource(ctx, NemoCustomizer, &renderer, &autoscalingv2.HorizontalPodAutoscaler{}, func() (client.Object, error) { + return renderer.HPA(NemoCustomizer.GetHPAParams()) + }, "hpa", conditions.ReasonHPAFailed) + if err != nil { + return ctrl.Result{}, err + } + } else { + // If autoscaling is disabled, ensure the HPA is deleted + err = r.cleanupResource(ctx, &autoscalingv2.HorizontalPodAutoscaler{}, namespacedName) + if err != nil { + return ctrl.Result{}, err + } + } + + // Sync Service Monitor + if NemoCustomizer.IsServiceMonitorEnabled() { + err = r.renderAndSyncResource(ctx, NemoCustomizer, &renderer, &monitoringv1.ServiceMonitor{}, func() (client.Object, error) { + return renderer.ServiceMonitor(NemoCustomizer.GetServiceMonitorParams()) + }, "servicemonitor", conditions.ReasonServiceMonitorFailed) + if err != nil { + return ctrl.Result{}, err + } + } + + // Sync Customizer ConfigMap + err = r.createOrUpdateConfig(ctx, NemoCustomizer) + if err != nil { + return ctrl.Result{}, err + } + + // Get params to render Deployment resource + deploymentParams := NemoCustomizer.GetDeploymentParams() + + // Calculate the hash of the config data + configHash := utils.CalculateHash(NemoCustomizer.Spec.CustomizerConfig) + annotations := deploymentParams.Annotations + if annotations == nil { + annotations = make(map[string]string) + } + + // Check if the hash has changed + if annotations["config-hash"] != configHash { + annotations["config-hash"] = configHash + deploymentParams.Annotations = annotations + } + + // Setup volume mounts with customizer config + deploymentParams.Volumes = NemoCustomizer.GetVolumes() + deploymentParams.VolumeMounts = NemoCustomizer.GetVolumeMounts() + + // Sync deployment + err = r.renderAndSyncResource(ctx, NemoCustomizer, &renderer, &appsv1.Deployment{}, func() (client.Object, error) { + return renderer.Deployment(deploymentParams) + }, "deployment", conditions.ReasonDeploymentFailed) + if err != nil { + return ctrl.Result{}, err + } + + // Wait for deployment + msg, ready, err := r.isDeploymentReady(ctx, &namespacedName) + if err != nil { + return ctrl.Result{}, err + } + + if !ready { + // Update status as NotReady + err = r.updater.SetConditionsNotReady(ctx, NemoCustomizer, conditions.NotReady, msg) + r.GetEventRecorder().Eventf(NemoCustomizer, corev1.EventTypeNormal, conditions.NotReady, + "NemoCustomizer %s not ready yet, msg: %s", NemoCustomizer.Name, msg) + } else { + // Update status as ready + err = r.updater.SetConditionsReady(ctx, NemoCustomizer, conditions.Ready, msg) + r.GetEventRecorder().Eventf(NemoCustomizer, corev1.EventTypeNormal, conditions.Ready, + "NemoCustomizer %s ready, msg: %s", NemoCustomizer.Name, msg) + } + + if err != nil { + logger.Error(err, "Unable to update status") + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +func (r *NemoCustomizerReconciler) createOrUpdateConfig(ctx context.Context, nemoCustomizer *appsv1alpha1.NemoCustomizer) error { + logger := log.FromContext(ctx) + + namespacedName := types.NamespacedName{Name: nemoCustomizer.GetConfigName(), Namespace: nemoCustomizer.GetNamespace()} + // Define the ConfigMap object + configMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: nemoCustomizer.GetConfigName(), + Namespace: nemoCustomizer.GetNamespace(), + }, + } + + // Initialize the ConfigMap data + configMap.Data = map[string]string{ + "config.yaml": nemoCustomizer.Spec.CustomizerConfig, + } + + if err := controllerutil.SetControllerReference(nemoCustomizer, configMap, r.GetScheme()); err != nil { + logger.Error(err, "failed to set owner", namespacedName) + statusError := r.updater.SetConditionsFailed(ctx, nemoCustomizer, conditions.ReasonConfigMapFailed, err.Error()) + if statusError != nil { + logger.Error(statusError, "failed to update status", "NemoCustomizer", nemoCustomizer.GetName()) + } + return err + } + + err := r.syncResource(ctx, &corev1.ConfigMap{}, configMap, namespacedName) + if err != nil { + logger.Error(err, "failed to sync", namespacedName) + statusError := r.updater.SetConditionsFailed(ctx, nemoCustomizer, conditions.ReasonConfigMapFailed, err.Error()) + if statusError != nil { + logger.Error(statusError, "failed to update status", "NemoCustomizer", nemoCustomizer.GetName()) + } + return err + } + + return nil +} + +func (r *NemoCustomizerReconciler) renderAndSyncResource(ctx context.Context, NemoCustomizer client.Object, renderer *render.Renderer, obj client.Object, renderFunc func() (client.Object, error), conditionType string, reason string) error { + logger := log.FromContext(ctx) + + namespacedName := types.NamespacedName{Name: NemoCustomizer.GetName(), Namespace: NemoCustomizer.GetNamespace()} + + resource, err := renderFunc() + if err != nil { + logger.Error(err, "failed to render", conditionType, namespacedName) + statusError := r.updater.SetConditionsFailed(ctx, NemoCustomizer, reason, err.Error()) + if statusError != nil { + logger.Error(statusError, "failed to update status", "NemoCustomizer", NemoCustomizer.GetName()) + } + return err + } + + // Check if the resource is nil + if resource == nil { + logger.V(2).Info("rendered nil resource") + return nil + } + + metaAccessor, ok := resource.(metav1.Object) + if !ok { + logger.V(2).Info("rendered un-initialized resource") + return nil + } + + if metaAccessor == nil || metaAccessor.GetName() == "" || metaAccessor.GetNamespace() == "" { + logger.V(2).Info("rendered un-initialized resource") + return nil + } + + if err = controllerutil.SetControllerReference(NemoCustomizer, resource, r.GetScheme()); err != nil { + logger.Error(err, "failed to set owner", conditionType, namespacedName) + statusError := r.updater.SetConditionsFailed(ctx, NemoCustomizer, reason, err.Error()) + if statusError != nil { + logger.Error(statusError, "failed to update status", "NemoCustomizer", NemoCustomizer.GetName()) + } + return err + } + + err = r.syncResource(ctx, obj, resource, namespacedName) + if err != nil { + logger.Error(err, "failed to sync", conditionType, namespacedName) + statusError := r.updater.SetConditionsFailed(ctx, NemoCustomizer, reason, err.Error()) + if statusError != nil { + logger.Error(statusError, "failed to update status", "NemoCustomizer", NemoCustomizer.GetName()) + } + return err + } + return nil +} + +// CheckDeploymentReadiness checks if the Deployment is ready +func (r *NemoCustomizerReconciler) isDeploymentReady(ctx context.Context, namespacedName *types.NamespacedName) (string, bool, error) { + deployment := &appsv1.Deployment{} + err := r.Get(ctx, client.ObjectKey{Name: namespacedName.Name, Namespace: namespacedName.Namespace}, deployment) + if err != nil { + if errors.IsNotFound(err) { + return "", false, nil + } + return "", false, err + } + + cond := getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing) + if cond != nil && cond.Reason == "ProgressDeadlineExceeded" { + return fmt.Sprintf("deployment %q exceeded its progress deadline", deployment.Name), false, nil + } + if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas { + return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d out of %d new replicas have been updated...\n", deployment.Name, deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas), false, nil + } + if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { + return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d old replicas are pending termination...\n", deployment.Name, deployment.Status.Replicas-deployment.Status.UpdatedReplicas), false, nil + } + if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { + return fmt.Sprintf("Waiting for deployment %q rollout to finish: %d of %d updated replicas are available...\n", deployment.Name, deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas), false, nil + } + return fmt.Sprintf("deployment %q successfully rolled out\n", deployment.Name), true, nil +} + +func (r *NemoCustomizerReconciler) syncResource(ctx context.Context, obj client.Object, desired client.Object, namespacedName types.NamespacedName) error { + logger := log.FromContext(ctx) + + err := r.Get(ctx, namespacedName, obj) + if err != nil && !errors.IsNotFound(err) { + return err + } + + if !utils.IsSpecChanged(obj, desired) { + logger.V(2).Info("Object spec has not changed, skipping update", "obj", obj) + return nil + } + logger.V(2).Info("Object spec has changed, updating") + + if errors.IsNotFound(err) { + err = r.Create(ctx, desired) + if err != nil { + return err + } + } else { + err = r.Update(ctx, desired) + if err != nil { + return err + } + } + return nil +} + +// cleanupResource deletes the given Kubernetes resource if it exists. +// If the resource does not exist or an error occurs during deletion, the function returns nil or the error. +// +// Parameters: +// ctx (context.Context): The context for the operation. +// obj (client.Object): The Kubernetes resource to delete. +// namespacedName (types.NamespacedName): The namespaced name of the resource. +// +// Returns: +// error: An error if the resource deletion fails, or nil if the resource is not found or deletion is successful. +func (r *NemoCustomizerReconciler) cleanupResource(ctx context.Context, obj client.Object, namespacedName types.NamespacedName) error { + + logger := log.FromContext(ctx) + + err := r.Get(ctx, namespacedName, obj) + if err != nil && !errors.IsNotFound(err) { + return err + } + + if errors.IsNotFound(err) { + return nil + } + + err = r.Delete(ctx, obj) + if err != nil { + return err + } + logger.V(2).Info("NIM Service object changed, deleting ", "obj", obj) + return nil +} diff --git a/internal/controller/nemocustomizer_controller_test.go b/internal/controller/nemocustomizer_controller_test.go new file mode 100644 index 00000000..fa9dcbb0 --- /dev/null +++ b/internal/controller/nemocustomizer_controller_test.go @@ -0,0 +1,504 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + "path/filepath" + "sort" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + appsv1 "k8s.io/api/apps/v1" + autoscalingv2 "k8s.io/api/autoscaling/v2" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/tools/record" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + crClient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1" + "github.com/NVIDIA/k8s-nim-operator/internal/conditions" + "github.com/NVIDIA/k8s-nim-operator/internal/render" +) + +func sortVolumeMounts(volumeMounts []corev1.VolumeMount) { + sort.SliceStable(volumeMounts, func(i, j int) bool { + return volumeMounts[i].Name < volumeMounts[j].Name + }) +} + +func sortVolumes(volumes []corev1.Volume) { + sort.SliceStable(volumes, func(i, j int) bool { + return volumes[i].Name < volumes[j].Name + }) +} + +var _ = Describe("NemoCustomizer Controller", func() { + var ( + client crClient.Client + reconciler *NemoCustomizerReconciler + scheme *runtime.Scheme + nemoCustomizer *appsv1alpha1.NemoCustomizer + volumeMounts []corev1.VolumeMount + volumes []corev1.Volume + ctx context.Context + ) + + BeforeEach(func() { + scheme = runtime.NewScheme() + Expect(appsv1alpha1.AddToScheme(scheme)).To(Succeed()) + Expect(appsv1.AddToScheme(scheme)).To(Succeed()) + Expect(rbacv1.AddToScheme(scheme)).To(Succeed()) + Expect(autoscalingv2.AddToScheme(scheme)).To(Succeed()) + Expect(networkingv1.AddToScheme(scheme)).To(Succeed()) + Expect(corev1.AddToScheme(scheme)).To(Succeed()) + Expect(monitoringv1.AddToScheme(scheme)).To(Succeed()) + Expect(batchv1.AddToScheme(scheme)).To(Succeed()) + + client = fake.NewClientBuilder().WithScheme(scheme). + WithStatusSubresource(&appsv1alpha1.NemoCustomizer{}). + WithStatusSubresource(&appsv1.Deployment{}). + Build() + + ctx = context.Background() + minReplicas := int32(1) + manifestsDir, err := filepath.Abs("../../manifests") + Expect(err).ToNot(HaveOccurred()) + + reconciler = &NemoCustomizerReconciler{ + Client: client, + scheme: scheme, + updater: conditions.NewUpdater(client), + renderer: render.NewRenderer(manifestsDir), + recorder: record.NewFakeRecorder(1000), + } + + nemoCustomizer = &appsv1alpha1.NemoCustomizer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nemocustomizer", + Namespace: "default", + }, + Spec: appsv1alpha1.NemoCustomizerSpec{ + Labels: map[string]string{"app": "nemo-customizer"}, + Annotations: map[string]string{"annotation-key": "annotation-value"}, + Image: appsv1alpha1.Image{Repository: "nvcr.io/nvidia/nemo-customizer", PullPolicy: "IfNotPresent", Tag: "v0.1.0", PullSecrets: []string{"ngc-secret"}}, + Env: []corev1.EnvVar{ + { + Name: "custom-env", + Value: "custom-value", + }, + }, + CustomizerConfig: "test-training-data", + WandBSecret: appsv1alpha1.WandBSecret{ + Name: "wandb-secret", + APIKeyKey: "api_key", + EncryptionKey: "encryption_key", + }, + OpenTelemetry: appsv1alpha1.OTelSpec{ + Enabled: ptr.To[bool](true), + DisableLogging: ptr.To[bool](false), + ExporterOtlpEndpoint: "http://opentelemetry-collector.default.svc.cluster.local:4317", + }, + DatabaseConfig: appsv1alpha1.DatabaseConfig{ + Credentials: &appsv1alpha1.DatabaseCredentials{ + User: "ncsuser", + SecretName: "ncs-pg-existing-secret", + PasswordKey: "password", + }, + Host: "ncs-pg.default.svc.cluster.local", + Port: 5432, + DatabaseName: "ncsdb", + }, + Replicas: 1, + Resources: &corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("512Mi"), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("256Mi"), + }, + }, + ReadinessProbe: appsv1alpha1.Probe{ + Enabled: ptr.To[bool](true), + Probe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/v1/health/ready", + Port: intstr.IntOrString{IntVal: 8000}, + }, + }, + InitialDelaySeconds: 15, + PeriodSeconds: 10, + TimeoutSeconds: 10, + FailureThreshold: 5, + }, + }, + LivenessProbe: appsv1alpha1.Probe{ + Enabled: ptr.To[bool](true), + Probe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/v1/health/live", + Port: intstr.IntOrString{StrVal: "api"}, + }, + }, + InitialDelaySeconds: 15, + PeriodSeconds: 10, + TimeoutSeconds: 10, + FailureThreshold: 5, + }, + }, + NodeSelector: map[string]string{"gpu-type": "h100"}, + Tolerations: []corev1.Toleration{ + { + Key: "key1", + Operator: corev1.TolerationOpEqual, + Value: "value1", + Effect: corev1.TaintEffectNoSchedule, + }, + }, + Expose: appsv1alpha1.Expose{ + Service: appsv1alpha1.Service{ + Type: corev1.ServiceTypeClusterIP, + Ports: []corev1.ServicePort{ + {Name: "api", Port: 8000, Protocol: corev1.ProtocolTCP}, + {Name: "internal", Port: 9009, Protocol: corev1.ProtocolTCP}, + }, + Annotations: map[string]string{ + "annotation-key-specific": "service", + }, + }, + Ingress: appsv1alpha1.Ingress{ + Enabled: ptr.To[bool](true), + Annotations: map[string]string{"annotation-key-specific": "ingress"}, + Spec: networkingv1.IngressSpec{ + Rules: []networkingv1.IngressRule{ + { + Host: "test-nemocustomizer.default.example.com", + IngressRuleValue: networkingv1.IngressRuleValue{ + HTTP: &networkingv1.HTTPIngressRuleValue{ + Paths: []networkingv1.HTTPIngressPath{ + { + Path: "/", + Backend: networkingv1.IngressBackend{ + Service: &networkingv1.IngressServiceBackend{ + Name: "test-nemocustomizer", + Port: networkingv1.ServiceBackendPort{ + Number: 8000, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + Scale: appsv1alpha1.Autoscaling{ + Enabled: ptr.To[bool](true), + Annotations: map[string]string{"annotation-key-specific": "HPA"}, + HPA: appsv1alpha1.HorizontalPodAutoscalerSpec{ + MinReplicas: &minReplicas, + MaxReplicas: 10, + Metrics: []autoscalingv2.MetricSpec{ + { + Type: autoscalingv2.ResourceMetricSourceType, + Resource: &autoscalingv2.ResourceMetricSource{ + Target: autoscalingv2.MetricTarget{ + Type: autoscalingv2.UtilizationMetricType, + }, + }, + }, + { + Type: autoscalingv2.PodsMetricSourceType, + Pods: &autoscalingv2.PodsMetricSource{ + Target: autoscalingv2.MetricTarget{ + Type: autoscalingv2.UtilizationMetricType, + }, + }, + }, + }, + }, + }, + Metrics: appsv1alpha1.Metrics{ + Enabled: ptr.To[bool](true), + ServiceMonitor: appsv1alpha1.ServiceMonitor{ + Annotations: map[string]string{"annotation-key-specific": "service-monitor"}, + Interval: "1m", + ScrapeTimeout: "30s", + }, + }, + }, + Status: appsv1alpha1.NemoCustomizerStatus{ + State: conditions.NotReady, + }, + } + + volumes = []corev1.Volume{ + { + Name: "config", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: nemoCustomizer.GetConfigName(), + }, + Items: []corev1.KeyToPath{ + { + Key: "config.yaml", + Path: "config.yaml", + }, + }, + }, + }, + }, + } + + volumeMounts = []corev1.VolumeMount{ + { + Name: "config", + MountPath: "/app/config", + ReadOnly: true, + }, + } + }) + + AfterEach(func() { + // "Cleanup the instance of NemoCustomizer" + namespacedName := types.NamespacedName{Name: nemoCustomizer.Name, Namespace: "default"} + + resource := &appsv1alpha1.NemoCustomizer{} + err := k8sClient.Get(ctx, namespacedName, resource) + if err == nil { + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + } + }) + + Describe("Reconcile", func() { + It("should create all resources for the NemoCustomizer", func() { + namespacedName := types.NamespacedName{Name: nemoCustomizer.Name, Namespace: "default"} + err := client.Create(context.TODO(), nemoCustomizer) + Expect(err).NotTo(HaveOccurred()) + + result, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: namespacedName}) + Expect(err).NotTo(HaveOccurred()) + Expect(result).To(Equal(ctrl.Result{})) + + err = client.Get(ctx, namespacedName, nemoCustomizer) + Expect(err).ToNot(HaveOccurred()) + Expect(nemoCustomizer.Finalizers).To(ContainElement(NemoCustomizerFinalizer)) + + // Role should be created + role := &rbacv1.Role{} + err = client.Get(context.TODO(), namespacedName, role) + Expect(err).NotTo(HaveOccurred()) + Expect(role.Name).To(Equal(nemoCustomizer.GetName())) + Expect(role.Namespace).To(Equal(nemoCustomizer.GetNamespace())) + + // RoleBinding should be created + roleBinding := &rbacv1.RoleBinding{} + err = client.Get(context.TODO(), namespacedName, roleBinding) + Expect(err).NotTo(HaveOccurred()) + Expect(roleBinding.Name).To(Equal(nemoCustomizer.GetName())) + Expect(roleBinding.Namespace).To(Equal(nemoCustomizer.GetNamespace())) + + // Service Account should be created + serviceAccount := &corev1.ServiceAccount{} + err = client.Get(context.TODO(), namespacedName, serviceAccount) + Expect(err).NotTo(HaveOccurred()) + Expect(serviceAccount.Name).To(Equal(nemoCustomizer.GetName())) + Expect(serviceAccount.Namespace).To(Equal(nemoCustomizer.GetNamespace())) + + // Service should be created + service := &corev1.Service{} + err = client.Get(context.TODO(), namespacedName, service) + Expect(err).NotTo(HaveOccurred()) + Expect(service.Name).To(Equal(nemoCustomizer.GetName())) + Expect(string(service.Spec.Type)).To(Equal(nemoCustomizer.GetServiceType())) + Expect(service.Namespace).To(Equal(nemoCustomizer.GetNamespace())) + Expect(service.Annotations["annotation-key"]).To(Equal("annotation-value")) + Expect(service.Annotations["annotation-key-specific"]).To(Equal("service")) + + // Ingress should be created + ingress := &networkingv1.Ingress{} + err = client.Get(context.TODO(), namespacedName, ingress) + Expect(err).NotTo(HaveOccurred()) + Expect(ingress.Name).To(Equal(nemoCustomizer.GetName())) + Expect(ingress.Namespace).To(Equal(nemoCustomizer.GetNamespace())) + Expect(ingress.Annotations["annotation-key"]).To(Equal("annotation-value")) + Expect(ingress.Annotations["annotation-key-specific"]).To(Equal("ingress")) + Expect(service.Spec.Ports[0].Name).To(Equal("api")) + Expect(service.Spec.Ports[0].Port).To(Equal(int32(8000))) + + // HPA should be deployed + hpa := &autoscalingv2.HorizontalPodAutoscaler{} + err = client.Get(context.TODO(), namespacedName, hpa) + Expect(err).NotTo(HaveOccurred()) + Expect(hpa.Name).To(Equal(nemoCustomizer.GetName())) + Expect(hpa.Namespace).To(Equal(nemoCustomizer.GetNamespace())) + Expect(hpa.Annotations["annotation-key"]).To(Equal("annotation-value")) + Expect(hpa.Annotations["annotation-key-specific"]).To(Equal("HPA")) + Expect(*hpa.Spec.MinReplicas).To(Equal(int32(1))) + Expect(hpa.Spec.MaxReplicas).To(Equal(int32(10))) + + // Service Monitor should be created + sm := &monitoringv1.ServiceMonitor{} + err = client.Get(context.TODO(), namespacedName, sm) + Expect(err).NotTo(HaveOccurred()) + Expect(sm.Name).To(Equal(nemoCustomizer.GetName())) + Expect(sm.Namespace).To(Equal(nemoCustomizer.GetNamespace())) + Expect(sm.Annotations["annotation-key"]).To(Equal("annotation-value")) + Expect(sm.Annotations["annotation-key-specific"]).To(Equal("service-monitor")) + Expect(sm.Spec.Endpoints[0].Port).To(Equal("service-port")) + Expect(sm.Spec.Endpoints[0].ScrapeTimeout).To(Equal(monitoringv1.Duration("30s"))) + Expect(sm.Spec.Endpoints[0].Interval).To(Equal(monitoringv1.Duration("1m"))) + + // Deployment should be created + deployment := &appsv1.Deployment{} + err = client.Get(context.TODO(), namespacedName, deployment) + Expect(err).NotTo(HaveOccurred()) + Expect(deployment.Name).To(Equal(nemoCustomizer.GetName())) + Expect(deployment.Namespace).To(Equal(nemoCustomizer.GetNamespace())) + Expect(deployment.Annotations["annotation-key"]).To(Equal("annotation-value")) + Expect(deployment.Spec.Template.Spec.Containers[0].Name).To(Equal(nemoCustomizer.GetContainerName())) + Expect(deployment.Spec.Template.Spec.Containers[0].Image).To(Equal(nemoCustomizer.GetImage())) + Expect(deployment.Spec.Template.Spec.Containers[0].ReadinessProbe).To(Equal(nemoCustomizer.Spec.ReadinessProbe.Probe)) + Expect(deployment.Spec.Template.Spec.Containers[0].LivenessProbe).To(Equal(nemoCustomizer.Spec.LivenessProbe.Probe)) + Expect(deployment.Spec.Template.Spec.Containers[0].StartupProbe).To(Equal(nemoCustomizer.Spec.StartupProbe.Probe)) + + sortVolumes(deployment.Spec.Template.Spec.Volumes) + sortVolumes(volumes) + Expect(deployment.Spec.Template.Spec.Volumes).To(Equal(volumes)) + + sortVolumeMounts(deployment.Spec.Template.Spec.Containers[0].VolumeMounts) + sortVolumeMounts(volumeMounts) + Expect(deployment.Spec.Template.Spec.Containers[0].VolumeMounts).To(Equal(volumeMounts)) + + Expect(deployment.Spec.Template.Spec.NodeSelector).To(Equal(nemoCustomizer.Spec.NodeSelector)) + Expect(deployment.Spec.Template.Spec.Tolerations).To(Equal(nemoCustomizer.Spec.Tolerations)) + + envVars := deployment.Spec.Template.Spec.Containers[0].Env + + // Verify standard environment variables + Expect(envVars).To(ContainElements( + corev1.EnvVar{Name: "NAMESPACE", Value: nemoCustomizer.Namespace}, + corev1.EnvVar{Name: "CONFIG_PATH", Value: "/app/config/config.yaml"}, + corev1.EnvVar{Name: "CUSTOMIZATIONS_CALLBACK_URL", Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:%d", nemoCustomizer.Name, nemoCustomizer.Namespace, nemoCustomizer.GetInternalServicePort())}, + corev1.EnvVar{Name: "LOG_LEVEL", Value: "INFO"}, + )) + + // Verify WandB environment variables + Expect(envVars).To(ContainElements( + corev1.EnvVar{Name: "WANDB_SECRET_NAME", Value: nemoCustomizer.Spec.WandBSecret.Name}, + corev1.EnvVar{Name: "WANDB_SECRET_KEY", Value: nemoCustomizer.Spec.WandBSecret.APIKeyKey}, + )) + + Expect(envVars).To(ContainElement(corev1.EnvVar{ + Name: "WANDB_ENCRYPTION_KEY", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: nemoCustomizer.Spec.WandBSecret.APIKeyKey, + LocalObjectReference: corev1.LocalObjectReference{ + Name: nemoCustomizer.Spec.WandBSecret.Name, + }, + }, + }, + })) + + // Verify postgres environment variables + Expect(envVars).To(ContainElement(corev1.EnvVar{ + Name: "POSTGRES_DB_PASSWORD", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: nemoCustomizer.Spec.DatabaseConfig.Credentials.PasswordKey, + LocalObjectReference: corev1.LocalObjectReference{ + Name: nemoCustomizer.Spec.DatabaseConfig.Credentials.SecretName, + }, + }, + }, + })) + + Expect(envVars).To(ContainElement(corev1.EnvVar{ + Name: "POSTGRES_DB_DSN", + Value: nemoCustomizer.GeneratePostgresConnString(), + })) + + // Verify OTEL environment variables + Expect(envVars).To(ContainElements( + corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: nemoCustomizer.Spec.OpenTelemetry.ExporterOtlpEndpoint}, + corev1.EnvVar{Name: "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED", Value: "true"}, + )) + }) + + It("should delete HPA when NemoCustomizer is updated", func() { + namespacedName := types.NamespacedName{Name: nemoCustomizer.Name, Namespace: "default"} + err := client.Create(context.TODO(), nemoCustomizer) + Expect(err).NotTo(HaveOccurred()) + + result, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: namespacedName}) + Expect(err).NotTo(HaveOccurred()) + Expect(result).To(Equal(ctrl.Result{})) + + // HPA should be deployed + hpa := &autoscalingv2.HorizontalPodAutoscaler{} + err = client.Get(context.TODO(), namespacedName, hpa) + Expect(err).NotTo(HaveOccurred()) + Expect(hpa.Name).To(Equal(nemoCustomizer.GetName())) + Expect(hpa.Namespace).To(Equal(nemoCustomizer.GetNamespace())) + Expect(*hpa.Spec.MinReplicas).To(Equal(int32(1))) + Expect(hpa.Spec.MaxReplicas).To(Equal(int32(10))) + + nemoCustomizer := &appsv1alpha1.NemoCustomizer{} + err = client.Get(context.TODO(), namespacedName, nemoCustomizer) + Expect(err).NotTo(HaveOccurred()) + nemoCustomizer.Spec.Scale.Enabled = ptr.To[bool](false) + nemoCustomizer.Spec.Expose.Ingress.Enabled = ptr.To[bool](false) + err = client.Update(context.TODO(), nemoCustomizer) + Expect(err).NotTo(HaveOccurred()) + + result, err = reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: namespacedName}) + Expect(err).NotTo(HaveOccurred()) + Expect(result).To(Equal(ctrl.Result{})) + hpa = &autoscalingv2.HorizontalPodAutoscaler{} + err = client.Get(context.TODO(), namespacedName, hpa) + Expect(err).To(HaveOccurred()) + Expect(errors.IsNotFound(err)).To(Equal(true)) + ingress := &networkingv1.Ingress{} + err = client.Get(context.TODO(), namespacedName, ingress) + Expect(err).To(HaveOccurred()) + Expect(errors.IsNotFound(err)).To(Equal(true)) + }) + }) +}) diff --git a/internal/render/render_test.go b/internal/render/render_test.go index c67bebcd..8566eb6a 100644 --- a/internal/render/render_test.go +++ b/internal/render/render_test.go @@ -163,8 +163,7 @@ var _ = Describe("K8s Resources Rendering", func() { Tolerations: []corev1.Toleration{ { Key: "key1", - Operator: corev1.TolerationOpEqual, - Value: "value1", + Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoSchedule, }, }, @@ -231,8 +230,7 @@ var _ = Describe("K8s Resources Rendering", func() { Tolerations: []corev1.Toleration{ { Key: "key1", - Operator: corev1.TolerationOpEqual, - Value: "value1", + Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoSchedule, }, }, diff --git a/internal/render/types/types.go b/internal/render/types/types.go index 26fcb463..6973ef6e 100644 --- a/internal/render/types/types.go +++ b/internal/render/types/types.go @@ -117,12 +117,14 @@ type StatefulSetParams struct { // ServiceParams holds the parameters for rendering a Service template type ServiceParams struct { - Name string - Namespace string - Port int32 - TargetPort int32 - PortName string - Protocol string + Name string + Namespace string + Port int32 + TargetPort int32 + PortName string + Protocol string + // Defines multiple ports for the service + Ports []corev1.ServicePort Type string Labels map[string]string Annotations map[string]string diff --git a/internal/utils/utils.go b/internal/utils/utils.go index c8dd1229..b9f58d6d 100644 --- a/internal/utils/utils.go +++ b/internal/utils/utils.go @@ -18,6 +18,7 @@ package utils import ( "crypto/sha256" + "encoding/hex" "encoding/json" "fmt" "hash/fnv" @@ -279,3 +280,9 @@ func SortHPAMetricsSpec(metrics []autoscalingv2.MetricSpec) []autoscalingv2.Metr }) return metrics } + +// CalculateHash calculates a SHA256 hash for the given data +func CalculateHash(data string) string { + hash := sha256.Sum256([]byte(data)) + return hex.EncodeToString(hash[:]) +} diff --git a/manifests/deployment.yaml b/manifests/deployment.yaml index da2c28d5..91afe4e0 100644 --- a/manifests/deployment.yaml +++ b/manifests/deployment.yaml @@ -50,6 +50,7 @@ spec: - name: {{ .Name }} mountPath: {{ .MountPath }} subPath: {{ .SubPath }} + readOnly: {{ .ReadOnly }} {{- end }} env: {{- if .Env }} @@ -77,6 +78,13 @@ spec: {{- if .ConfigMap }} configMap: name: {{ .ConfigMap.Name }} + {{- if .ConfigMap.Items }} + items: + {{- range .ConfigMap.Items }} + - key: {{ .Key }} + path: {{ .Path }} + {{- end }} + {{- end }} {{- end }} {{- if .Secret }} secret: diff --git a/manifests/service.yaml b/manifests/service.yaml index 36a5da7d..ddafc746 100644 --- a/manifests/service.yaml +++ b/manifests/service.yaml @@ -18,7 +18,14 @@ spec: {{- .SelectorLabels | yaml | nindent 4 }} {{- end }} ports: - {{- if .Port }} + {{- if .Ports }} + {{- range .Ports }} + - port: {{ .Port }} + targetPort: {{ .TargetPort }} + protocol: {{ .Protocol }} + name: {{ .Name }} + {{- end }} + {{- else }} - port: {{ .Port }} {{- if .TargetPort }} targetPort: {{ .TargetPort }}