-
Notifications
You must be signed in to change notification settings - Fork 66
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
OXY-1514: only track sampled spans and through a setting track all spans. #107
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -26,6 +26,13 @@ pub struct TracingSettings { | |||||
|
||||||
/// The strategy used to sample traces. | ||||||
pub sampling_strategy: SamplingStrategy, | ||||||
/// Enable liveness tracking of all generated spans. Even if the spans are | ||||||
/// unsampled. This can be useful for debugging potential hangs cause by | ||||||
/// some objects remaining in memory. The default value is false, meaning | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
/// only sampled spans are tracked. | ||||||
/// | ||||||
/// To get a json dump of the currently active spans, query: `/debug/traces` | ||||||
pub track_all_spans: bool, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to make the whole liveness tracking functionality, i.e. introduce |
||||||
} | ||||||
|
||||||
#[cfg(not(feature = "settings"))] | ||||||
|
@@ -35,6 +42,7 @@ impl Default for TracingSettings { | |||||
enabled: TracingSettings::default_enabled(), | ||||||
output: Default::default(), | ||||||
sampling_strategy: Default::default(), | ||||||
track_all_spans: false, | ||||||
} | ||||||
} | ||||||
} | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,11 +1,13 @@ | ||||||
use crate::telemetry::tracing::live::LiveReferenceHandle; | ||||||
|
||||||
use super::init::TracingHarness; | ||||||
use super::live::SharedSpanHandle; | ||||||
use super::StartTraceOptions; | ||||||
use rand::{self, Rng}; | ||||||
|
||||||
use cf_rustracing::sampler::BoxSampler; | ||||||
use cf_rustracing::tag::Tag; | ||||||
use cf_rustracing_jaeger::span::{Span, SpanContext, SpanContextState}; | ||||||
use parking_lot::RwLock; | ||||||
use rand::{self, Rng}; | ||||||
use std::borrow::Cow; | ||||||
use std::error::Error; | ||||||
use std::sync::Arc; | ||||||
|
@@ -15,37 +17,49 @@ pub(crate) type Tracer = cf_rustracing::Tracer<BoxSampler<SpanContextState>, Spa | |||||
/// Shared span with mutability and additional reference tracking for | ||||||
/// ad-hoc inspection. | ||||||
#[derive(Clone, Debug)] | ||||||
pub(crate) struct SharedSpanInner(SharedSpanHandle); | ||||||
pub(crate) enum SharedSpanHandle { | ||||||
Tracked(Arc<LiveReferenceHandle<Arc<RwLock<Span>>>>), | ||||||
Unsampled(Arc<RwLock<Span>>), | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. bikeshed?
Suggested change
|
||||||
} | ||||||
|
||||||
impl SharedSpanInner { | ||||||
impl SharedSpanHandle { | ||||||
pub(crate) fn new(span: Span) -> Self { | ||||||
Self( | ||||||
TracingHarness::get() | ||||||
.active_roots | ||||||
.track(Arc::new(parking_lot::RwLock::new(span))), | ||||||
) | ||||||
let is_sampled = span.is_sampled(); | ||||||
|
||||||
TracingHarness::get() | ||||||
.active_roots | ||||||
.track(Arc::new(RwLock::new(span)), is_sampled) | ||||||
} | ||||||
} | ||||||
|
||||||
impl From<SharedSpanInner> for Arc<parking_lot::RwLock<Span>> { | ||||||
fn from(value: SharedSpanInner) -> Self { | ||||||
Arc::clone(&value.0) | ||||||
pub(crate) fn read(&self) -> parking_lot::RwLockReadGuard<'_, Span> { | ||||||
match self { | ||||||
SharedSpanHandle::Tracked(handle) => handle.read(), | ||||||
SharedSpanHandle::Unsampled(rw_lock) => rw_lock.read(), | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
impl std::ops::Deref for SharedSpanInner { | ||||||
type Target = SharedSpanHandle; | ||||||
pub(crate) fn write(&self) -> parking_lot::RwLockWriteGuard<'_, Span> { | ||||||
match self { | ||||||
SharedSpanHandle::Tracked(handle) => handle.write(), | ||||||
SharedSpanHandle::Unsampled(rw_lock) => rw_lock.write(), | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
fn deref(&self) -> &Self::Target { | ||||||
&self.0 | ||||||
impl From<SharedSpanHandle> for Arc<RwLock<Span>> { | ||||||
fn from(value: SharedSpanHandle) -> Self { | ||||||
match value { | ||||||
SharedSpanHandle::Tracked(handle) => Arc::clone(&handle), | ||||||
SharedSpanHandle::Unsampled(rw_lock) => Arc::clone(&rw_lock), | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
#[derive(Debug, Clone)] | ||||||
pub(crate) struct SharedSpan { | ||||||
// NOTE: we intentionally use a lock without poisoning here to not | ||||||
// panic the threads if they just share telemetry with failed thread. | ||||||
pub(crate) inner: SharedSpanInner, | ||||||
pub(crate) inner: SharedSpanHandle, | ||||||
// NOTE: store sampling flag separately, so we don't need to acquire lock | ||||||
// every time we need to check the flag. | ||||||
is_sampled: bool, | ||||||
|
@@ -56,7 +70,7 @@ impl From<Span> for SharedSpan { | |||||
let is_sampled = inner.is_sampled(); | ||||||
|
||||||
Self { | ||||||
inner: SharedSpanInner::new(inner), | ||||||
inner: SharedSpanHandle::new(inner), | ||||||
is_sampled, | ||||||
} | ||||||
} | ||||||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -1,16 +1,20 @@ | ||||
mod event_output; | ||||
mod live_reference_set; | ||||
|
||||
use cf_rustracing_jaeger::span::Span; | ||||
use live_reference_set::{LiveReferenceHandle, LiveReferenceSet}; | ||||
use cf_rustracing_jaeger::Span; | ||||
use live_reference_set::LiveReferenceSet; | ||||
use parking_lot::RwLock; | ||||
use std::sync::Arc; | ||||
use std::time::SystemTime; | ||||
|
||||
type SharedSpanInner = Arc<parking_lot::RwLock<Span>>; | ||||
pub(crate) type SharedSpanHandle = Arc<LiveReferenceHandle<SharedSpanInner>>; | ||||
pub(crate) use live_reference_set::LiveReferenceHandle; | ||||
|
||||
use crate::telemetry::tracing::internal::SharedSpanHandle; | ||||
// pub(crate) type SharedSpanHandle = Arc<SharedSpanHandle>; | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
|
||||
pub(crate) struct ActiveRoots { | ||||
roots: LiveReferenceSet<SharedSpanInner>, | ||||
roots: LiveReferenceSet<Arc<RwLock<Span>>>, | ||||
always_track: bool, | ||||
start: SystemTime, | ||||
} | ||||
|
||||
|
@@ -19,16 +23,86 @@ impl Default for ActiveRoots { | |||
Self { | ||||
roots: Default::default(), | ||||
start: SystemTime::now(), | ||||
always_track: false, | ||||
} | ||||
} | ||||
} | ||||
|
||||
impl ActiveRoots { | ||||
pub(crate) fn new(always_track: bool) -> Self { | ||||
Self { | ||||
always_track, | ||||
..Default::default() | ||||
} | ||||
} | ||||
|
||||
pub(crate) fn get_active_traces(&self) -> String { | ||||
event_output::spans_to_trace_events(self.start, &self.roots.get_live_references()) | ||||
} | ||||
|
||||
pub(crate) fn track(&self, value: SharedSpanInner) -> SharedSpanHandle { | ||||
self.roots.track(value) | ||||
pub(crate) fn track(&self, value: Arc<RwLock<Span>>, is_sampled: bool) -> SharedSpanHandle { | ||||
if is_sampled || self.always_track { | ||||
SharedSpanHandle::Tracked(self.roots.track(value)) | ||||
} else { | ||||
SharedSpanHandle::Unsampled(value) | ||||
} | ||||
} | ||||
} | ||||
|
||||
#[cfg(test)] | ||||
mod tests { | ||||
use crate::telemetry::tracing::{self, StartTraceOptions, TracingHarness}; | ||||
use crate::telemetry::TelemetryContext; | ||||
|
||||
#[test] | ||||
fn unsampled_spans_are_not_captured() { | ||||
let ctx = TelemetryContext::test(); | ||||
{ | ||||
let _scope = ctx.scope(); | ||||
let _root = tracing::start_trace( | ||||
"root", | ||||
StartTraceOptions { | ||||
stitch_with_trace: None, | ||||
override_sampling_ratio: Some(0.0), // never sample | ||||
}, | ||||
); | ||||
|
||||
{ | ||||
let _span1 = tracing::span("span1"); | ||||
} | ||||
let _span2 = tracing::span("span2"); | ||||
let _span2_1 = tracing::span("span2_1"); | ||||
|
||||
let harness = TracingHarness::get(); | ||||
let live_spans: Vec<_> = harness.active_roots.roots.get_live_references(); | ||||
|
||||
assert!(live_spans.is_empty()); | ||||
} | ||||
} | ||||
|
||||
#[test] | ||||
fn sampled_spans_are_captured() { | ||||
let ctx = TelemetryContext::test(); | ||||
{ | ||||
let _scope = ctx.scope(); | ||||
let _root = tracing::start_trace( | ||||
"root", | ||||
StartTraceOptions { | ||||
stitch_with_trace: None, | ||||
override_sampling_ratio: Some(1.0), // always sample | ||||
}, | ||||
); | ||||
|
||||
{ | ||||
let _span1 = tracing::span("span1"); | ||||
} | ||||
let _span2 = tracing::span("span2"); | ||||
let _span2_1 = tracing::span("span2_1"); | ||||
|
||||
let harness = TracingHarness::get(); | ||||
let live_spans: Vec<_> = harness.active_roots.roots.get_live_references(); | ||||
|
||||
assert_eq!(live_spans.len(), 3); // span1 was dropped so it's not "live" anymore. | ||||
} | ||||
} | ||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.