Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add APP_INSIGHTS_ID to image build #1266

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ jobs:
IMAGE_NAMESPACE=${{ github.repository }} \
PLATFORM=${{ matrix.platform }}/${{ matrix.arch }} \
IMAGE_REGISTRY=${{ vars.ACR_NAME }} \
APP_INSIGHTS_ID=${{ secrets.AZURE_APP_INSIGHTS_ID }} \
BUILDX_ACTION=--push
else
make retina-image \
Expand Down Expand Up @@ -102,6 +103,7 @@ jobs:
IMAGE_NAMESPACE=${{ github.repository }} \
PLATFORM=${{ matrix.platform }}/${{ matrix.arch }} \
IMAGE_REGISTRY=${{ vars.ACR_NAME }} \
APP_INSIGHTS_ID=${{ secrets.AZURE_APP_INSIGHTS_ID }} \
WINDOWS_YEARS=${{ matrix.year }} \
BUILDX_ACTION=--push
else
Expand Down Expand Up @@ -153,6 +155,7 @@ jobs:
IMAGE_NAMESPACE=${{ github.repository }} \
PLATFORM=${{ matrix.platform }}/${{ matrix.arch }} \
IMAGE_REGISTRY=${{ vars.ACR_NAME }} \
APP_INSIGHTS_ID=${{ secrets.AZURE_APP_INSIGHTS_ID }} \
BUILDX_ACTION=--push
else
make retina-operator-image \
Expand Down
6 changes: 2 additions & 4 deletions cmd/standard/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"fmt"
"os"
"strings"
"time"

"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -48,8 +47,7 @@ import (
)

const (
logFileName = "retina.log"
heartbeatInterval = 15 * time.Minute
logFileName = "retina.log"

nodeNameEnvKey = "NODE_NAME"
nodeIPEnvKey = "NODE_IP"
Expand Down Expand Up @@ -309,7 +307,7 @@ func (d *Daemon) Start() error {
defer controllerMgr.Stop(ctx)

// start heartbeat goroutine for application insights
go tel.Heartbeat(ctx, heartbeatInterval)
go tel.Heartbeat(ctx, daemonConfig.TelemetryInterval)

// Start controller manager, which will start http server and plugin manager.
go controllerMgr.Start(ctx)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ data:
enableAnnotations: {{ .Values.enableAnnotations }}
bypassLookupIPOfInterest: {{ .Values.bypassLookupIPOfInterest }}
dataAggregationLevel: {{ .Values.dataAggregationLevel }}
telemetryInterval: {{ .Values.daemonset.telemetryInterval }}
{{- end}}
---
{{- if .Values.os.windows}}
Expand All @@ -48,6 +49,7 @@ data:
enableTelemetry: {{ .Values.enableTelemetry }}
enablePodLevel: {{ .Values.enablePodLevel }}
remoteContext: {{ .Values.remoteContext }}
telemetryInterval: {{ .Values.daemonset.telemetryInterval }}
{{- end}}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ data:
captureDebug: {{ .Values.capture.debug }}
captureJobNumLimit: {{ .Values.capture.jobNumLimit }}
enableManagedStorageAccount: {{ .Values.capture.enableManagedStorageAccount }}
telemetryInterval: {{ .Values.operator.telemetryInterval }}
{{- if .Values.capture.enableManagedStorageAccount }}
azureCredentialConfig: /etc/cloud-config/azure.json
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ operator:
args:
- "--config"
- "/retina/operator-config.yaml"
telemetryInterval: "5m"

image:
repository: ghcr.io/microsoft/retina/retina-agent
Expand Down Expand Up @@ -87,6 +88,7 @@ daemonset:
metricsBindAddress: ":18080"
ports:
containerPort: 10093
telemetryInterval: "15m"

# volume mounts with name and mountPath
volumeMounts:
Expand Down
5 changes: 1 addition & 4 deletions operator/cmd/standard/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"net/http"
"net/http/pprof"
"os"
"time"

"go.uber.org/zap/zapcore"

Expand Down Expand Up @@ -55,8 +54,6 @@ var (
MaxFileSizeMB = 100
MaxBackups = 3
MaxAgeDays = 30

HeartbeatFrequency = 5 * time.Minute
)

func init() {
Expand Down Expand Up @@ -255,7 +252,7 @@ func (o *Operator) Start() {
}

// start heartbeat goroutine for application insights
go tel.Heartbeat(ctx, HeartbeatFrequency)
go tel.Heartbeat(ctx, oconfig.TelemetryInterval)
}

func EnablePProf() {
Expand Down
11 changes: 9 additions & 2 deletions operator/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package config

import (
"fmt"
"time"

"github.com/microsoft/retina/pkg/config"
"github.com/spf13/viper"
Expand All @@ -14,8 +15,9 @@ type OperatorConfig struct {
EnableTelemetry bool `yaml:"enableTelemetry"`
LogLevel string `yaml:"logLevel"`
// EnableRetinaEndpoint indicates whether to enable RetinaEndpoint
EnableRetinaEndpoint bool `yaml:"enableRetinaEndpoint"`
RemoteContext bool `yaml:"remoteContext"`
EnableRetinaEndpoint bool `yaml:"enableRetinaEndpoint"`
RemoteContext bool `yaml:"remoteContext"`
TelemetryInterval time.Duration `yaml:"telemetryInterval"`
}

func GetConfig(cfgFileName string) (*OperatorConfig, error) {
Expand All @@ -35,5 +37,10 @@ func GetConfig(cfgFileName string) (*OperatorConfig, error) {
return nil, fmt.Errorf("error unmarshalling config: %w", err)
}

// If unset, default telemetry interval to 5 minutes.
if cfg.TelemetryInterval == 0 {
cfg.TelemetryInterval = 5 * time.Minute
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's keep to 15min, and since this is a configmap change we'll need to notify a few people

https://github.com/microsoft/retina/pull/1266/files#diff-754f5ed2c6297aa0f46f6627521cda46a66d540bcdc9713857a6a84f7fc7358aL52

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

time to add CODEOWNERS?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be better if a zero value TelemetryInterval means no telemetry/the previous behavior. That keeps any changed behavior intentionally opt-in, backcompat, and obvious

Copy link
Contributor Author

@alexcastilio alexcastilio Jan 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@matmerr currently it is 15 min for the daemonset heartbeat. This config is for operator, which is 5 min. No change in default values:
https://github.com/microsoft/retina/pull/1266/files#diff-3707c2e5d521fb2df733befba56a5cf6044f7a0d08e8a2ffd1b8ce2a2351731dL59

@rbtr currently a different key in the yaml will enable telemetry:

Do you think enableTelemetry key should be removed and keep only the telemetryInterval? (disable telemetry if time is set to 0)

}

return &cfg, nil
}
24 changes: 24 additions & 0 deletions operator/config/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package config_test

import (
"testing"
"time"

"github.com/microsoft/retina/operator/config"
)

func TestGetConfig(t *testing.T) {
c, err := config.GetConfig("./testwith/config.yaml")
if err != nil {
t.Errorf("Expected no error, instead got %+v", err)
}

if !c.InstallCRDs ||
!c.EnableTelemetry ||
c.LogLevel != "info" ||
!c.EnableRetinaEndpoint ||
!c.RemoteContext ||
c.TelemetryInterval != 15*time.Minute {
t.Errorf("Expeted config should be same as ./testwith/config.yaml; instead got %+v", c)
}
}
9 changes: 9 additions & 0 deletions operator/config/testwith/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiServer:
host: "0.0.0.0"
port: 10093
installCRDs: true
enableTelemetry: true
logLevel: info
enableRetinaEndpoint: true
remoteContext: true
telemetryInterval: "15m"
6 changes: 6 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type Config struct {
BypassLookupIPOfInterest bool `yaml:"bypassLookupIPOfInterest"`
DataAggregationLevel Level `yaml:"dataAggregationLevel"`
MonitorSockPath string `yaml:"monitorSockPath"`
TelemetryInterval time.Duration `yaml:"telemetryInterval"`
}

func GetConfig(cfgFilename string) (*Config, error) {
Expand Down Expand Up @@ -107,6 +108,11 @@ func GetConfig(cfgFilename string) (*Config, error) {
log.Print("metricsInterval is deprecated, please use metricsIntervalDuration instead")
}

// If unset, default telemetry interval to 15 minutes.
if config.TelemetryInterval == 0 {
config.TelemetryInterval = 15 * time.Minute
}

return &config, nil
}

Expand Down
1 change: 1 addition & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ func TestGetConfig(t *testing.T) {
!c.EnableRetinaEndpoint ||
c.RemoteContext ||
c.EnableAnnotations ||
c.TelemetryInterval != 15*time.Minute ||
c.DataAggregationLevel != Low {
t.Fatalf("Expeted config should be same as ./testwith/config.yaml; instead got %+v", c)
}
Expand Down
1 change: 1 addition & 0 deletions pkg/config/testwith/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ metricsIntervalDuration: "10s"
# used to export telemetry to AppInsights
telemetryEnabled: true
dataAggregationLevel: "low"
telemetryInterval: "15m"
Loading