Skip to content

Commit

Permalink
Update spec and remove redundant validation markers
Browse files Browse the repository at this point in the history
Signed-off-by: Shiva Krishna, Merla <smerla@nvidia.com>
  • Loading branch information
shivamerla committed Feb 1, 2025
1 parent ad941a9 commit 8af2740
Show file tree
Hide file tree
Showing 11 changed files with 344 additions and 528 deletions.
66 changes: 34 additions & 32 deletions api/apps/v1alpha1/nemo_common_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ type DataStore struct {
Endpoint string `json:"endpoint"`
}

// DatabaseConfig is the external database configuration
type DatabaseConfig struct {
// Host is the hostname of the database.
// Required, must not be empty.
//
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=1
Host string `json:"host,omitempty"`
Host string `json:"host"`
// Port is the port where the database is reachable at.
// If specified, this must be a valid port number, 0 < databasePort < 65536.
// Defaults to 5432.
Expand All @@ -47,29 +47,26 @@ type DatabaseConfig struct {
// DatabaseName is the database name for a NEMO Service.
// Required, must not be empty.
//
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=1
DatabaseName string `json:"databaseName,omitempty"`
DatabaseName string `json:"databaseName"`
// DatabaseCredentials stores the configuration to retrieve the database credentials.
// Required, must not be nil.
//
// +kubebuilder:validation:Required
Credentials *DatabaseCredentials `json:"credentials,omitempty"`
Credentials *DatabaseCredentials `json:"credentials"`
}

// DatabaseCredentials are the external database credentials
type DatabaseCredentials struct {
// User is the non-root username for a NEMO Service in the database.
// Required, must not be empty.
//
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=1
User string `json:"user,omitempty"`
User string `json:"user"`
// SecretName is the name of the secret which has the database credentials for a NEMO service user.
// Required, must not be empty.
//
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=1
SecretName string `json:"secretName,omitempty"`
SecretName string `json:"secretName"`
// PasswordKey is the name of the key in the `CredentialsSecret` secret for the database credentials.
// Defaults to "password".
//
Expand All @@ -82,15 +79,14 @@ type WandBSecret struct {
// Name is the name of the Kubernetes Secret containing the WandB API key.
// Required, must not be empty.
//
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=1
Name string `json:"name,omitempty"`
Name string `json:"name"`

// APIKeyKey is the key in the Secret that holds the WandB API key.
// Defaults to "apiKey".
// +kubebuilder:validation:Required
// +kubebuilder:default="apiKey"
APIKeyKey string `json:"apiKeyKey,omitempty"`
// +kubebuilder:validation:MinLength=1
APIKeyKey string `json:"apiKeyKey"`

// EncryptionKey is an optional key in the secret used for encrypting WandB credentials.
// This can be used for additional security layers if required.
Expand All @@ -102,42 +98,48 @@ type WandBSecret struct {

// OTelSpec defines the settings for OpenTelemetry
type OTelSpec struct {
// Enabled indicates whether OpenTelemetry is enabled
// +kubebuilder:validation:Required
// +kubebuilder:default=true
// Enabled indicates if opentelemetry collector and tracing are enabled
Enabled *bool `json:"enabled,omitempty"`

// ExporterOtlpEndpoint is the OTLP collector endpoint.
// +kubebuilder:validation:Required
ExporterOtlpEndpoint string `json:"exporterOtlpEndpoint,omitempty"`
// +kubebuilder:validation:MinLength=1
ExporterOtlpEndpoint string `json:"exporterOtlpEndpoint"`

// DisableLogging indicates whether Python logging auto-instrumentation should be disabled.
// +kubebuilder:validation:Optional
DisableLogging *bool `json:"disableLogging,omitempty"`

// LoggingEnabled indicates whether Python logging auto-instrumentation is enabled.
// ExporterConfig defines configuration for different OTel exporters
// +kubebuilder:validation:Optional
// +kubebuilder:default=true
LoggingEnabled *bool `json:"loggingEnabled,omitempty"`
ExporterConfig ExporterConfig `json:"exporterConfig,omitempty"`

// ExcludedUrls defines URLs to be excluded from tracing.
// +kubebuilder:validation:Optional
// +kubebuilder:default={"health"}
ExcludedUrls []string `json:"excludedUrls,omitempty"`

// LogLevel defines the log level (e.g., INFO, DEBUG).
// +kubebuilder:validation:Optional
// +kubebuilder:default="INFO"
LogLevel string `json:"logLevel,omitempty"`
}

// ExporterConfig stores configuration for different OTel exporters
type ExporterConfig struct {
// TracesExporter sets the traces exporter: (otlp, console, none).
// +kubebuilder:validation:Optional
// +kubebuilder:default="otlp"
TracesExporter string `json:"tracesExporter,omitempty"`

// MetricsExporter sets the metrics exporter: (otlp, console, none).
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum=otlp;console;none
// +kubebuilder:default="otlp"
MetricsExporter string `json:"metricsExporter,omitempty"`

// LogsExporter sets the logs exporter: (otlp, console, none).
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum=otlp;console;none
// +kubebuilder:default="otlp"
LogsExporter string `json:"logsExporter,omitempty"`

// ExcludedUrls defines URLs to be excluded from tracing.
// +kubebuilder:validation:Optional
// +kubebuilder:default="health"
ExcludedUrls string `json:"excludedUrls,omitempty"`

// LogLevel defines the log level (e.g., INFO, DEBUG).
// +kubebuilder:validation:Optional
// +kubebuilder:default="INFO"
LogLevel string `json:"logLevel,omitempty"`
}
120 changes: 42 additions & 78 deletions api/apps/v1alpha1/nemo_customizer_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"maps"
"os"
"strconv"
"strings"

rendertypes "github.com/NVIDIA/k8s-nim-operator/internal/render/types"
utils "github.com/NVIDIA/k8s-nim-operator/internal/utils"
Expand All @@ -47,6 +48,11 @@ const (
NemoCustomizerStatusReady = "Ready"
// NemoCustomizerStatusFailed indicates that NEMO CustomizerService has failed
NemoCustomizerStatusFailed = "Failed"

// SchedulerTypeVolcano indicates if the scheduler is volcano
SchedulerTypeVolcano = "volcano"
// SchedulerTypeRunAI indicates if the scheduler is run.ai
SchedulerTypeRunAI = "runai"
)

// NemoCustomizerSpec defines the desired state of NemoCustomizer
Expand Down Expand Up @@ -75,23 +81,21 @@ type NemoCustomizerSpec struct {
GroupID *int64 `json:"groupID,omitempty"`
RuntimeClass string `json:"runtimeClass,omitempty"`

// CustomizerConfig stores the customizer configuration
CustomizerConfig string `json:"customizerConfig,omitempty"`
// CustomizerConfig stores the customizer configuration for training and models
// +kubebuilder:validation:MinLength=1
CustomizerConfig string `json:"customizerConfig"`

// Scheduler Configuration
Scheduler Scheduler `json:"scheduler,omitempty"`

// OpenTelemetry Settings
OpenTelemetry OTelSpec `json:"otel,omitempty"`

// Persistent Volume Claims for storage
Storage StorageSpecs `json:"storage,omitempty"`
OpenTelemetry OTelSpec `json:"otel"`

// DatabaseConfig stores the database configuration
DatabaseConfig *DatabaseConfig `json:"databaseConfig,omitempty"`
DatabaseConfig DatabaseConfig `json:"databaseConfig"`

// WandBSecret stores the secret and encryption key for the Weights and Biases service.
WandBSecret *WandBSecret `json:"wandbSecret,omitempty"`
WandBSecret WandBSecret `json:"wandbSecret"`
}

// Scheduler defines the configuration for the scheduler
Expand All @@ -100,46 +104,6 @@ type Scheduler struct {
// +kubebuilder:validation:Enum=volcano;runai
// +kubebuilder:default:=volcano
Type string `json:"type,omitempty"`
// Volcano scheduler configuration
Volcano *VolcanoConfig `json:"volcano,omitempty"`
// RunAI scheduler configuration
RunAI *RunAIConfig `json:"runai,omitempty"`
}

// StorageSpecs defines workspace and model storage configurations
type StorageSpecs struct {
Workspace StorageSpec `json:"workspace,omitempty"`
Model StorageSpec `json:"model,omitempty"`
}

// StorageSpec defines the persistent volume claim configuration for storage
type StorageSpec struct {
Create bool `json:"create,omitempty"`
Name string `json:"name,omitempty"`
StorageClass string `json:"storageClass,omitempty"`
Size string `json:"size,omitempty"`
VolumeAccessMode string `json:"volumeAccessMode,omitempty"`
}

// RunAIConfig defines the configuration for Run.AI scheduler
type RunAIConfig struct {
// Enabled indicates whether Run.AI scheduler is enabled
Enabled bool `json:"enabled,omitempty"`
}

const (
// SchedulerTypeVolcano indicates if the scheduler is volcano
SchedulerTypeVolcano = "volcano"
// SchedulerTypeRunAI indicates if the scheduler is run.ai
SchedulerTypeRunAI = "runai"
)

// VolcanoConfig defines the configuration for Volcano scheduler
type VolcanoConfig struct {
// Enabled indicates whether Volcano scheduler is enabled
Enabled bool `json:"enabled,omitempty"`
// AdmissionURL for the Volcano admission controller
AdmissionURL string `json:"admissionURL,omitempty"`
}

// NemoCustomizerStatus defines the observed state of NemoCustomizer
Expand Down Expand Up @@ -251,43 +215,43 @@ func (n *NemoCustomizer) GetStandardEnv() []corev1.EnvVar {
return envVars
}

// IsOtelEnabled return true if Open Telemetry is enabled
// IsOtelEnabled returns true if Open Telemetry Collector is enabled
func (n *NemoCustomizer) IsOtelEnabled() bool {
return n.Spec.OpenTelemetry.Enabled != nil && *n.Spec.OpenTelemetry.Enabled
}

// GetOtelEnv generates the OpenTelemetry-related environment variables.
// GetOtelEnv generates OpenTelemetry-related environment variables.
func (n *NemoCustomizer) GetOtelEnv() []corev1.EnvVar {
return []corev1.EnvVar{
{
Name: "OTEL_EXPORTER_OTLP_ENDPOINT",
Value: n.Spec.OpenTelemetry.ExporterOtlpEndpoint,
},
{
Name: "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED",
Value: strconv.FormatBool(*n.Spec.OpenTelemetry.LoggingEnabled),
},
{
Name: "OTEL_TRACES_EXPORTER",
Value: n.Spec.OpenTelemetry.TracesExporter,
},
{
Name: "OTEL_METRICS_EXPORTER",
Value: n.Spec.OpenTelemetry.MetricsExporter,
},
{
Name: "OTEL_LOGS_EXPORTER",
Value: n.Spec.OpenTelemetry.LogsExporter,
},
{
var otelEnvVars []corev1.EnvVar

otelEnvVars = append(otelEnvVars,
corev1.EnvVar{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: n.Spec.OpenTelemetry.ExporterOtlpEndpoint},
corev1.EnvVar{Name: "OTEL_TRACES_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.TracesExporter},
corev1.EnvVar{Name: "OTEL_METRICS_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.MetricsExporter},
corev1.EnvVar{Name: "OTEL_LOGS_EXPORTER", Value: n.Spec.OpenTelemetry.ExporterConfig.LogsExporter},
corev1.EnvVar{Name: "OTEL_LOG_LEVEL", Value: n.Spec.OpenTelemetry.LogLevel},
)

if len(n.Spec.OpenTelemetry.ExcludedUrls) > 0 {
otelEnvVars = append(otelEnvVars, corev1.EnvVar{
Name: "OTEL_PYTHON_EXCLUDED_URLS",
Value: n.Spec.OpenTelemetry.ExcludedUrls,
},
{
Name: "OTEL_LOG_LEVEL",
Value: n.Spec.OpenTelemetry.LogLevel,
},
Value: strings.Join(n.Spec.OpenTelemetry.ExcludedUrls, ","),
})
}

if n.Spec.OpenTelemetry.DisableLogging != nil {
otelEnvVars = append(otelEnvVars, corev1.EnvVar{
Name: "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED",
Value: strconv.FormatBool(!*n.Spec.OpenTelemetry.DisableLogging),
})
} else {
otelEnvVars = append(otelEnvVars, corev1.EnvVar{
Name: "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED",
Value: strconv.FormatBool(true),
})
}

return otelEnvVars
}

// GetPostgresEnv returns the PostgreSQL environment variables for a Kubernetes pod.
Expand Down
Loading

0 comments on commit 8af2740

Please sign in to comment.