diff --git a/.circleci/config.yml b/.circleci/config.yml index ed9caf20e38..6aabbeeb7f8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -65,7 +65,7 @@ save_bundle_checksum: &save_bundle_checksum command: | if [ "$CI_BUNDLE_CACHE_HIT" != 1 ]; then # Recompute gemfiles/*.lock checksum, as those files might have changed - cat Gemfile Gemfile.lock Appraisals gemfiles/*.gemfile gemfiles/*.gemfile.lock | md5sum > .circleci/bundle_checksum + cat Gemfile Gemfile.lock ruby-*.gemfile gemfiles/*.gemfile gemfiles/*.gemfile.lock | md5sum > .circleci/bundle_checksum fi cp .circleci/bundle_checksum /usr/local/bundle/bundle_checksum step_bundle_install: &step_bundle_install @@ -96,7 +96,7 @@ step_compute_bundle_checksum: &step_compute_bundle_checksum # updating the gemset lock files produces extremely large commits. command: | bundle lock # Create Gemfile.lock - cat Gemfile Gemfile.lock Appraisals gemfiles/*.gemfile gemfiles/*.gemfile.lock | md5sum > .circleci/bundle_checksum + cat Gemfile Gemfile.lock ruby-*.gemfile gemfiles/*.gemfile gemfiles/*.gemfile.lock | md5sum > .circleci/bundle_checksum step_get_test_agent_trace_check_results: &step_get_test_agent_trace_check_results run: name: Get APM Test Agent Trace Check Results diff --git a/.github/workflows/test-memory-leaks.yaml b/.github/workflows/test-memory-leaks.yaml index e1842cb38b6..44d7cb094bf 100644 --- a/.github/workflows/test-memory-leaks.yaml +++ b/.github/workflows/test-memory-leaks.yaml @@ -7,10 +7,10 @@ jobs: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: - ruby-version: 3.4.0-preview2 # TODO: Use stable version once 3.4 is out + ruby-version: 3.4.1 bundler-cache: true # runs 'bundle install' and caches installed gems automatically bundler: latest - cache-version: v1 # bump this to invalidate cache + cache-version: v2 # bump this to invalidate cache - run: sudo apt-get update && (sudo apt-get install -y valgrind || sleep 5 && sudo apt-get install -y valgrind) && valgrind --version - run: gem update --system 3.5.23 # TODO: This is a workaround for a buggy rubygems in 3.4.0-preview2; remove once stable version 3.4 is out - run: bundle exec rake compile spec:profiling:memcheck diff --git a/Steepfile b/Steepfile index 24d49808692..d2cce9c3ef6 100644 --- a/Steepfile +++ b/Steepfile @@ -80,7 +80,6 @@ target :datadog do ignore 'lib/datadog/core/buffer/thread_safe.rb' ignore 'lib/datadog/core/chunker.rb' ignore 'lib/datadog/core/configuration.rb' - ignore 'lib/datadog/core/configuration/agent_settings_resolver.rb' ignore 'lib/datadog/core/configuration/base.rb' ignore 'lib/datadog/core/configuration/components.rb' ignore 'lib/datadog/core/configuration/dependency_resolver.rb' diff --git a/benchmarks/profiler_gc.rb b/benchmarks/profiler_gc.rb index e96198cdfba..401af152d84 100644 --- a/benchmarks/profiler_gc.rb +++ b/benchmarks/profiler_gc.rb @@ -14,7 +14,7 @@ def create_profiler # We take a dummy sample so that the context for the main thread is created, as otherwise the GC profiling methods do # not create it (because we don't want to do memory allocations in the middle of GC) - Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample(@collector, Thread.current) + Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample(@collector, Thread.current, false) end def run_benchmark @@ -29,7 +29,7 @@ def run_benchmark x.report('profiler gc') do Datadog::Profiling::Collectors::ThreadContext::Testing._native_on_gc_start(@collector) Datadog::Profiling::Collectors::ThreadContext::Testing._native_on_gc_finish(@collector) - Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample_after_gc(@collector, false) + Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample_after_gc(@collector, false, false) end x.save! "#{File.basename(__FILE__)}-results.json" unless VALIDATE_BENCHMARK_MODE @@ -52,7 +52,7 @@ def run_benchmark estimated_gc_per_minute.times do Datadog::Profiling::Collectors::ThreadContext::Testing._native_on_gc_start(@collector) Datadog::Profiling::Collectors::ThreadContext::Testing._native_on_gc_finish(@collector) - Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample_after_gc(@collector, false) + Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample_after_gc(@collector, false, false) end @recorder.serialize diff --git a/benchmarks/profiler_sample_gvl.rb b/benchmarks/profiler_sample_gvl.rb index 76e8528f417..7688730b714 100644 --- a/benchmarks/profiler_sample_gvl.rb +++ b/benchmarks/profiler_sample_gvl.rb @@ -27,7 +27,7 @@ def initialize @target_thread = thread_with_very_deep_stack # Sample once to trigger thread context creation for all threads (including @target_thread) - Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample(@collector, PROFILER_OVERHEAD_STACK_THREAD) + Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample(@collector, PROFILER_OVERHEAD_STACK_THREAD, false) end def create_profiler diff --git a/benchmarks/profiler_sample_loop_v2.rb b/benchmarks/profiler_sample_loop_v2.rb index e007375cf6c..624f29bad25 100644 --- a/benchmarks/profiler_sample_loop_v2.rb +++ b/benchmarks/profiler_sample_loop_v2.rb @@ -37,7 +37,7 @@ def run_benchmark ) x.report("stack collector #{ENV['CONFIG']}") do - Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample(@collector, PROFILER_OVERHEAD_STACK_THREAD) + Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample(@collector, PROFILER_OVERHEAD_STACK_THREAD, false) end x.save! "#{File.basename(__FILE__)}-results.json" unless VALIDATE_BENCHMARK_MODE diff --git a/benchmarks/profiler_sample_serialize.rb b/benchmarks/profiler_sample_serialize.rb index 47ae1b6854a..b90997c42d5 100644 --- a/benchmarks/profiler_sample_serialize.rb +++ b/benchmarks/profiler_sample_serialize.rb @@ -35,7 +35,7 @@ def run_benchmark simulate_seconds = 60 (samples_per_second * simulate_seconds).times do - Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample(@collector, PROFILER_OVERHEAD_STACK_THREAD) + Datadog::Profiling::Collectors::ThreadContext::Testing._native_sample(@collector, PROFILER_OVERHEAD_STACK_THREAD, false) end @recorder.serialize diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index e4e9b69473e..723e2d93b33 100644 --- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -1171,6 +1171,16 @@ static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *u return; } + // If Ruby is in the middle of raising an exception, we don't want to try to sample. This is because if we accidentally + // trigger an exception inside the profiler code, bad things will happen (specifically, Ruby will try to kill off the + // thread even though we may try to catch the exception). + // + // Note that "in the middle of raising an exception" means the exception itself has already been allocated. + // What's getting allocated now is probably the backtrace objects (@ivoanjo or at least that's what I've observed) + if (is_raised_flag_set(rb_thread_current())) { + return; + } + // Hot path: Dynamic sampling rate is usually enabled and the sampling decision is usually false if (RB_LIKELY(state->dynamic_sampling_rate_enabled && !discrete_dynamic_sampler_should_sample(&state->allocation_sampler))) { state->stats.allocation_skipped++; diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.c b/ext/datadog_profiling_native_extension/collectors_thread_context.c index 0b56268bed5..4afb23c5a9e 100644 --- a/ext/datadog_profiling_native_extension/collectors_thread_context.c +++ b/ext/datadog_profiling_native_extension/collectors_thread_context.c @@ -9,6 +9,7 @@ #include "private_vm_api_access.h" #include "stack_recorder.h" #include "time_helpers.h" +#include "unsafe_api_calls_check.h" // Used to trigger sampling of threads, based on external "events", such as: // * periodic timer for cpu-time and wall-time @@ -203,10 +204,10 @@ static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _val static int hash_map_per_thread_context_free_values(st_data_t _thread, st_data_t value_per_thread_context, st_data_t _argument); static VALUE _native_new(VALUE klass); static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self); -static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE profiler_overhead_stack_thread); +static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE profiler_overhead_stack_thread, VALUE allow_exception); static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance); static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance); -static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state); +static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state, VALUE allow_exception); static void update_metrics_and_sample( struct thread_context_collector_state *state, VALUE thread_being_sampled, @@ -290,6 +291,7 @@ static void otel_without_ddtrace_trace_identifiers_for( ); static struct otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key); static uint64_t otel_span_id_to_uint(VALUE otel_span_id); +static VALUE safely_lookup_hash_without_going_into_ruby_code(VALUE hash, VALUE key); void collectors_thread_context_init(VALUE profiling_module) { VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors"); @@ -310,11 +312,11 @@ void collectors_thread_context_init(VALUE profiling_module) { rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, -1); rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1); rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1); - rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2); + rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 3); rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 3); rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1); rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1); - rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 2); + rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 3); rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0); rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1); rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1); @@ -504,31 +506,49 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec. // It SHOULD NOT be used for other purposes. -static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE profiler_overhead_stack_thread) { +static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE profiler_overhead_stack_thread, VALUE allow_exception) { + ENFORCE_BOOLEAN(allow_exception); + if (!is_thread_alive(profiler_overhead_stack_thread)) rb_raise(rb_eArgError, "Unexpected: profiler_overhead_stack_thread is not alive"); + if (allow_exception == Qfalse) debug_enter_unsafe_context(); + thread_context_collector_sample(collector_instance, monotonic_wall_time_now_ns(RAISE_ON_FAILURE), profiler_overhead_stack_thread); + + if (allow_exception == Qfalse) debug_leave_unsafe_context(); + return Qtrue; } // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_on_gc_start(DDTRACE_UNUSED VALUE self, VALUE collector_instance) { + debug_enter_unsafe_context(); + thread_context_collector_on_gc_start(collector_instance); + + debug_leave_unsafe_context(); + return Qtrue; } // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_on_gc_finish(DDTRACE_UNUSED VALUE self, VALUE collector_instance) { + debug_enter_unsafe_context(); + (void) !thread_context_collector_on_gc_finish(collector_instance); + + debug_leave_unsafe_context(); + return Qtrue; } // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec. // It SHOULD NOT be used for other purposes. -static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state) { +static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state, VALUE allow_exception) { ENFORCE_BOOLEAN(reset_monotonic_to_system_state); + ENFORCE_BOOLEAN(allow_exception); struct thread_context_collector_state *state; TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state); @@ -537,7 +557,12 @@ static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_ state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER; } + if (allow_exception == Qfalse) debug_enter_unsafe_context(); + thread_context_collector_sample_after_gc(collector_instance); + + if (allow_exception == Qfalse) debug_leave_unsafe_context(); + return Qtrue; } @@ -982,7 +1007,13 @@ static void trigger_sample_for_thread( // It SHOULD NOT be used for other purposes. static VALUE _native_thread_list(DDTRACE_UNUSED VALUE _self) { VALUE result = rb_ary_new(); + + debug_enter_unsafe_context(); + ddtrace_thread_list(result); + + debug_leave_unsafe_context(); + return result; } @@ -1501,7 +1532,12 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object) { + debug_enter_unsafe_context(); + thread_context_collector_sample_allocation(collector_instance, NUM2UINT(sample_weight), new_object); + + debug_leave_unsafe_context(); + return Qtrue; } @@ -1597,7 +1633,7 @@ static void ddtrace_otel_trace_identifiers_for( // trace and span representing it. Each ddtrace trace is then connected to the previous otel span, forming a linked // list. The local root span is going to be the trace/span we find at the end of this linked list. while (otel_values != Qnil) { - VALUE otel_span = rb_hash_lookup(otel_values, otel_current_span_key); + VALUE otel_span = safely_lookup_hash_without_going_into_ruby_code(otel_values, otel_current_span_key); if (otel_span == Qnil) break; VALUE next_trace = rb_ivar_get(otel_span, at_datadog_trace_id); if (next_trace == Qnil) break; @@ -1640,7 +1676,12 @@ void thread_context_collector_sample_skipped_allocation_samples(VALUE self_insta } static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples) { + debug_enter_unsafe_context(); + thread_context_collector_sample_skipped_allocation_samples(collector_instance, NUM2UINT(skipped_samples)); + + debug_leave_unsafe_context(); + return Qtrue; } @@ -1709,7 +1750,7 @@ static void otel_without_ddtrace_trace_identifiers_for( VALUE root_span_type = rb_ivar_get(local_root_span.span, at_kind_id /* @kind */); // We filter out spans that don't have `kind: :server` - if (root_span_type == Qnil || !RB_TYPE_P(root_span_type, T_SYMBOL) || SYM2ID(root_span_type) != server_id) return; + if (root_span_type == Qnil || !RB_TYPE_P(root_span_type, T_SYMBOL) || !RB_STATIC_SYM_P(root_span_type) || SYM2ID(root_span_type) != server_id) return; VALUE trace_resource = rb_ivar_get(local_root_span.span, at_name_id /* @name */); if (!RB_TYPE_P(trace_resource, T_STRING)) return; @@ -1726,7 +1767,7 @@ static struct otel_span otel_span_from(VALUE otel_context, VALUE otel_current_sp if (context_entries == Qnil || !RB_TYPE_P(context_entries, T_HASH)) return failed; // If it exists, context_entries is expected to be a Hash[OpenTelemetry::Context::Key, OpenTelemetry::Trace::Span] - VALUE span = rb_hash_lookup(context_entries, otel_current_span_key); + VALUE span = safely_lookup_hash_without_going_into_ruby_code(context_entries, otel_current_span_key); if (span == Qnil) return failed; // If it exists, span_context is expected to be a OpenTelemetry::Trace::SpanContext (don't confuse it with OpenTelemetry::Context) @@ -1979,31 +2020,53 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) { static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread) { ENFORCE_THREAD(thread); + debug_enter_unsafe_context(); + thread_context_collector_on_gvl_waiting(thread_from_thread_object(thread)); + + debug_leave_unsafe_context(); + return Qnil; } static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread) { ENFORCE_THREAD(thread); + debug_enter_unsafe_context(); + intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(thread); + + debug_leave_unsafe_context(); + return LONG2NUM(gvl_waiting_at); } static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread) { ENFORCE_THREAD(thread); - return thread_context_collector_on_gvl_running(thread_from_thread_object(thread)) == ON_GVL_RUNNING_SAMPLE ? Qtrue : Qfalse; + debug_enter_unsafe_context(); + + VALUE result = thread_context_collector_on_gvl_running(thread_from_thread_object(thread)) == ON_GVL_RUNNING_SAMPLE ? Qtrue : Qfalse; + + debug_leave_unsafe_context(); + + return result; } static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread) { ENFORCE_THREAD(thread); - return thread_context_collector_sample_after_gvl_running( + debug_enter_unsafe_context(); + + VALUE result = thread_context_collector_sample_after_gvl_running( collector_instance, thread, monotonic_wall_time_now_ns(RAISE_ON_FAILURE) ); + + debug_leave_unsafe_context(); + + return result; } static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns) { @@ -2030,3 +2093,37 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) { DDTRACE_UNUSED long current_cpu_time_ns ) { return false; } #endif // NO_GVL_INSTRUMENTATION + +#define MAX_SAFE_LOOKUP_SIZE 16 + +typedef struct { VALUE lookup_key; VALUE result; } safe_lookup_hash_state; + +static int safe_lookup_hash_iterate(VALUE key, VALUE value, VALUE state_ptr) { + safe_lookup_hash_state *state = (safe_lookup_hash_state *) state_ptr; + + if (key == state->lookup_key) { + state->result = value; + return ST_STOP; + } + + return ST_CONTINUE; +} + +// This method exists because we need to look up a hash during sampling, but we don't want to invoke any +// Ruby code as a side effect. To be able to look up by hash, `rb_hash_lookup` calls `#hash` on the key, +// which we want to avoid. +// Thus, instead, we opt to just iterate through the hash and check if we can find what we're looking for. +// +// To avoid having too much overhead here we only iterate in hashes up to MAX_SAFE_LOOKUP_SIZE. +// Note that we don't even try to iterate if the hash is bigger -- this is to avoid flaky behavior where +// depending on the internal storage order of the hash we may or not find the key, and instead we always +// enforce the size. +static VALUE safely_lookup_hash_without_going_into_ruby_code(VALUE hash, VALUE key) { + if (!RB_TYPE_P(hash, T_HASH) || RHASH_SIZE(hash) > MAX_SAFE_LOOKUP_SIZE) return Qnil; + + safe_lookup_hash_state state = {.lookup_key = key, .result = Qnil}; + + rb_hash_foreach(hash, safe_lookup_hash_iterate, (VALUE) &state); + + return state.result; +} diff --git a/ext/datadog_profiling_native_extension/private_vm_api_access.c b/ext/datadog_profiling_native_extension/private_vm_api_access.c index 59e5c644882..f7c188861d7 100644 --- a/ext/datadog_profiling_native_extension/private_vm_api_access.c +++ b/ext/datadog_profiling_native_extension/private_vm_api_access.c @@ -800,3 +800,6 @@ static inline int ddtrace_imemo_type(VALUE imemo) { return current_thread; } #endif + +// Is the VM smack in the middle of raising an exception? +bool is_raised_flag_set(VALUE thread) { return thread_struct_from_object(thread)->ec->raised_flag > 0; } diff --git a/ext/datadog_profiling_native_extension/private_vm_api_access.h b/ext/datadog_profiling_native_extension/private_vm_api_access.h index c40992274cb..3e412f51ea5 100644 --- a/ext/datadog_profiling_native_extension/private_vm_api_access.h +++ b/ext/datadog_profiling_native_extension/private_vm_api_access.h @@ -68,3 +68,5 @@ const char *imemo_kind(VALUE imemo); #define ENFORCE_THREAD(value) \ { if (RB_UNLIKELY(!rb_typeddata_is_kind_of(value, RTYPEDDATA_TYPE(rb_thread_current())))) raise_unexpected_type(value, ADD_QUOTES(value), "Thread", __FILE__, __LINE__, __func__); } + +bool is_raised_flag_set(VALUE thread); diff --git a/ext/datadog_profiling_native_extension/profiling.c b/ext/datadog_profiling_native_extension/profiling.c index 315ca0d2954..a7bfe0d466b 100644 --- a/ext/datadog_profiling_native_extension/profiling.c +++ b/ext/datadog_profiling_native_extension/profiling.c @@ -11,6 +11,7 @@ #include "ruby_helpers.h" #include "setup_signal_handler.h" #include "time_helpers.h" +#include "unsafe_api_calls_check.h" // Each class/module here is implemented in their separate file void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module); @@ -56,6 +57,7 @@ void DDTRACE_EXPORT Init_datadog_profiling_native_extension(void) { collectors_thread_context_init(profiling_module); http_transport_init(profiling_module); stack_recorder_init(profiling_module); + unsafe_api_calls_check_init(); // Hosts methods used for testing the native code using RSpec VALUE testing_module = rb_define_module_under(native_extension_module, "Testing"); diff --git a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c new file mode 100644 index 00000000000..c3c23b7f1c7 --- /dev/null +++ b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c @@ -0,0 +1,47 @@ +#include +#include +#include + +#include "datadog_ruby_common.h" +#include "unsafe_api_calls_check.h" +#include "extconf.h" + +static bool inside_unsafe_context = false; + +#ifndef NO_POSTPONED_TRIGGER + static rb_postponed_job_handle_t check_for_unsafe_api_calls_handle; +#endif + +static void check_for_unsafe_api_calls(DDTRACE_UNUSED void *_unused); + +void unsafe_api_calls_check_init(void) { + #ifndef NO_POSTPONED_TRIGGER + int unused_flags = 0; + + check_for_unsafe_api_calls_handle = rb_postponed_job_preregister(unused_flags, check_for_unsafe_api_calls, NULL); + + if (check_for_unsafe_api_calls_handle == POSTPONED_JOB_HANDLE_INVALID) { + rb_raise(rb_eRuntimeError, "Failed to register check_for_unsafe_api_calls_handle postponed job (got POSTPONED_JOB_HANDLE_INVALID)"); + } + #endif +} + +void debug_enter_unsafe_context(void) { + inside_unsafe_context = true; + + #ifndef NO_POSTPONED_TRIGGER + rb_postponed_job_trigger(check_for_unsafe_api_calls_handle); + #else + rb_postponed_job_register(0, check_for_unsafe_api_calls, NULL); + #endif +} + +void debug_leave_unsafe_context(void) { + inside_unsafe_context = false; +} + +static void check_for_unsafe_api_calls(DDTRACE_UNUSED void *_unused) { + if (inside_unsafe_context) rb_bug( + "Datadog Ruby profiler detected callback nested inside sample. Please report this at https://github.com/datadog/dd-trace-rb/blob/master/CONTRIBUTING.md#found-a-bug" + ); +} diff --git a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h new file mode 100644 index 00000000000..c6c8dc56cf5 --- /dev/null +++ b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.h @@ -0,0 +1,31 @@ +#pragma once + +// This checker is used to detect accidental thread scheduling switching points happening during profiling sampling. +// +// Specifically, when the profiler is sampling, we're never supposed to call into Ruby code (e.g. methods +// implemented using Ruby code) or allocate Ruby objects. +// That's because those events introduce thread switch points, and really we don't the VM switching between threads +// in the middle of the profiler sampling. +// This includes raising exceptions, unless we're trying to stop the profiler, and even then we must be careful. +// +// The above is especially true in situations such as GC profiling or allocation/heap profiling, as in those situations +// we can even crash the Ruby VM if we switch away at the wrong time. +// +// The below APIs can be used to detect these situations. They work by relying on the following observation: +// in most (all?) thread switch points, Ruby will check for interrupts and run the postponed jobs. +// +// Thus, if we set a flag while we're sampling (inside_unsafe_context), trigger the postponed job, and then only unset +// the flag after sampling, he correct thing to happen is that the postponed job should never see the flag. +// +// If, however, we have a bug and there's a thread switch point, our postponed job will see the flag and immediately +// stop the Ruby VM before further damage happens (and hopefully giving us a stack trace clearly pointing to the culprit). + +void unsafe_api_calls_check_init(void); + +// IMPORTANT: This method **MUST** only be called from test code, as it causes an immediate hard-crash on the Ruby VM +// when it detects a potential issue, and that's not something we want for production apps. +// +// In the future we may introduce some kind of setting (off by default) to also allow this to be safely be used +// in production code if needed. +void debug_enter_unsafe_context(void); +void debug_leave_unsafe_context(void); diff --git a/integration/images/ruby/3.4/Dockerfile b/integration/images/ruby/3.4/Dockerfile index 3cc6648d99f..96500f1c668 100644 --- a/integration/images/ruby/3.4/Dockerfile +++ b/integration/images/ruby/3.4/Dockerfile @@ -1,4 +1,4 @@ -FROM ruby:3.4.0-preview2 +FROM ruby:3.4 ENV DEBIAN_FRONTEND=noninteractive diff --git a/lib/datadog/core/configuration.rb b/lib/datadog/core/configuration.rb index 826a3bb414a..4b2b21d6ee7 100644 --- a/lib/datadog/core/configuration.rb +++ b/lib/datadog/core/configuration.rb @@ -236,7 +236,7 @@ def safely_synchronize rescue ThreadError => e logger_without_components.error( 'Detected deadlock during datadog initialization. ' \ - 'Please report this at https://github.com/DataDog/dd-trace-rb/blob/master/CONTRIBUTING.md#found-a-bug' \ + 'Please report this at https://github.com/datadog/dd-trace-rb/blob/master/CONTRIBUTING.md#found-a-bug' \ "\n\tSource:\n\t#{Array(e.backtrace).join("\n\t")}" ) nil diff --git a/lib/datadog/core/configuration/agent_settings_resolver.rb b/lib/datadog/core/configuration/agent_settings_resolver.rb index 7d7c048f553..5ced057274a 100644 --- a/lib/datadog/core/configuration/agent_settings_resolver.rb +++ b/lib/datadog/core/configuration/agent_settings_resolver.rb @@ -19,21 +19,49 @@ module Configuration # Whenever there is a conflict (different configurations are provided in different orders), it MUST warn the users # about it and pick a value based on the following priority: code > environment variable > defaults. class AgentSettingsResolver - AgentSettings = Struct.new( - :adapter, - :ssl, - :hostname, - :port, - :uds_path, - :timeout_seconds, - keyword_init: true - ) do - def initialize(*) - super + # Immutable container for the resulting settings + class AgentSettings + attr_reader :adapter, :ssl, :hostname, :port, :uds_path, :timeout_seconds + + def initialize(adapter: nil, ssl: nil, hostname: nil, port: nil, uds_path: nil, timeout_seconds: nil) + @adapter = adapter + @ssl = ssl + @hostname = hostname + @port = port + @uds_path = uds_path + @timeout_seconds = timeout_seconds freeze end + + def url + case adapter + when Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER + hostname = self.hostname + hostname = "[#{hostname}]" if hostname =~ IPV6_REGEXP + "#{ssl ? 'https' : 'http'}://#{hostname}:#{port}/" + when Datadog::Core::Configuration::Ext::Agent::UnixSocket::ADAPTER + "unix://#{uds_path}" + else + raise ArgumentError, "Unexpected adapter: #{adapter}" + end + end + + def ==(other) + self.class == other.class && + adapter == other.adapter && + ssl == other.ssl && + hostname == other.hostname && + port == other.port && + uds_path == other.uds_path && + timeout_seconds == other.timeout_seconds + end end + # IPv6 regular expression from + # https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses + # Does not match IPv4 addresses. + IPV6_REGEXP = /\A(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\z)/.freeze # rubocop:disable Layout/LineLength + def self.call(settings, logger: Datadog.logger) new(settings, logger: logger).send(:call) end diff --git a/lib/datadog/core/crashtracking/agent_base_url.rb b/lib/datadog/core/crashtracking/agent_base_url.rb deleted file mode 100644 index 74b59a1cda5..00000000000 --- a/lib/datadog/core/crashtracking/agent_base_url.rb +++ /dev/null @@ -1,28 +0,0 @@ -# frozen_string_literal: true - -require_relative '../configuration/ext' - -module Datadog - module Core - module Crashtracking - # This module provides a method to resolve the base URL of the agent - module AgentBaseUrl - # IPv6 regular expression from - # https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses - # Does not match IPv4 addresses. - IPV6_REGEXP = /\A(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\z)/.freeze # rubocop:disable Layout/LineLength - - def self.resolve(agent_settings) - case agent_settings.adapter - when Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER - hostname = agent_settings.hostname - hostname = "[#{hostname}]" if hostname =~ IPV6_REGEXP - "#{agent_settings.ssl ? 'https' : 'http'}://#{hostname}:#{agent_settings.port}/" - when Datadog::Core::Configuration::Ext::Agent::UnixSocket::ADAPTER - "unix://#{agent_settings.uds_path}" - end - end - end - end - end -end diff --git a/lib/datadog/core/crashtracking/component.rb b/lib/datadog/core/crashtracking/component.rb index ee04e1c5cc8..460a7974bc5 100644 --- a/lib/datadog/core/crashtracking/component.rb +++ b/lib/datadog/core/crashtracking/component.rb @@ -3,7 +3,6 @@ require 'libdatadog' require_relative 'tag_builder' -require_relative 'agent_base_url' require_relative '../utils/only_once' require_relative '../utils/at_fork_monkey_patch' @@ -31,8 +30,7 @@ class Component def self.build(settings, agent_settings, logger:) tags = TagBuilder.call(settings) - agent_base_url = AgentBaseUrl.resolve(agent_settings) - logger.warn('Missing agent base URL; cannot enable crash tracking') unless agent_base_url + agent_base_url = agent_settings.url ld_library_path = ::Libdatadog.ld_library_path logger.warn('Missing ld_library_path; cannot enable crash tracking') unless ld_library_path diff --git a/lib/datadog/profiling/http_transport.rb b/lib/datadog/profiling/http_transport.rb index 2c89c6548b7..9de76899494 100644 --- a/lib/datadog/profiling/http_transport.rb +++ b/lib/datadog/profiling/http_transport.rb @@ -13,13 +13,11 @@ class HttpTransport def initialize(agent_settings:, site:, api_key:, upload_timeout_seconds:) @upload_timeout_milliseconds = (upload_timeout_seconds * 1_000).to_i - validate_agent_settings(agent_settings) - @exporter_configuration = if agentless?(site, api_key) [:agentless, site, api_key].freeze else - [:agent, base_url_from(agent_settings)].freeze + [:agent, agent_settings.url].freeze end status, result = validate_exporter(exporter_configuration) @@ -75,29 +73,6 @@ def export(flush) private - def base_url_from(agent_settings) - case agent_settings.adapter - when Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER - "#{agent_settings.ssl ? "https" : "http"}://#{agent_settings.hostname}:#{agent_settings.port}/" - when Datadog::Core::Configuration::Ext::Agent::UnixSocket::ADAPTER - "unix://#{agent_settings.uds_path}" - else - raise ArgumentError, "Unexpected adapter: #{agent_settings.adapter}" - end - end - - def validate_agent_settings(agent_settings) - supported_adapters = [ - Datadog::Core::Configuration::Ext::Agent::UnixSocket::ADAPTER, - Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER - ] - unless supported_adapters.include?(agent_settings.adapter) - raise ArgumentError, - "Unsupported transport configuration for profiling: Adapter #{agent_settings.adapter} " \ - " is not supported" - end - end - def agentless?(site, api_key) site && api_key && Core::Environment::VariableHelpers.env_to_bool(Profiling::Ext::ENV_AGENTLESS, false) end diff --git a/ruby-3.5.gemfile b/ruby-3.5.gemfile new file mode 100644 index 00000000000..ce83d653fc6 --- /dev/null +++ b/ruby-3.5.gemfile @@ -0,0 +1,67 @@ +source 'https://rubygems.org' + +gemspec + +gem 'base64' +gem 'benchmark-ips', '~> 2.8' +gem 'benchmark-memory', '< 0.2' # V0.2 only works with 2.5+ +gem 'bigdecimal' +gem 'climate_control', '~> 0.2.0' +gem 'concurrent-ruby' + +# Optional extensions +# TODO: Move this to Appraisals? +# dogstatsd v5, but lower than 5.2, has possible memory leak with datadog. +# @see https://github.com/DataDog/dogstatsd-ruby/issues/182 +gem 'dogstatsd-ruby', '>= 3.3.0', '!= 5.0.0', '!= 5.0.1', '!= 5.1.0' + +gem 'extlz4', '~> 0.3', '>= 0.3.3' + +# Profiler testing dependencies +# NOTE: We're excluding versions 3.7.0 and 3.7.1 for the reasons documented in #1424. +# Since most of our customers won't have BUNDLE_FORCE_RUBY_PLATFORM=true, it's not something we want to add +# to our CI, so we just shortcut and exclude specific versions that were affecting our CI. +gem 'google-protobuf', ['~> 3.0', '!= 3.7.0', '!= 3.7.1'] + +gem 'json-schema', '< 3' # V3 only works with 2.5+ +gem 'memory_profiler', '~> 0.9' +gem 'mutex_m' +gem 'os', '~> 1.1' +gem 'pimpmychangelog', '>= 0.1.2' +gem 'pry' +gem 'pry-stack_explorer' +gem 'rake', '>= 10.5' +gem 'rake-compiler', '~> 1.1', '>= 1.1.1' # To compile native extensions +gem 'rspec', '~> 3.13' +gem 'rspec-collection_matchers', '~> 1.1' +gem 'rspec-wait', '~> 0' +gem 'rspec_junit_formatter', '>= 0.5.1' + +# 1.50 is the last version to support Ruby 2.6 +gem 'rubocop', '~> 1.50.0', require: false +gem 'rubocop-packaging', '~> 0.5.2', require: false +gem 'rubocop-performance', '~> 1.9', require: false +# 2.20 is the last version to support Ruby 2.6 +gem 'rubocop-rspec', ['~> 2.20', '< 2.21'], require: false + +# Merging branch coverage results does not work for old, unsupported rubies and JRuby +# We have a fix up for review, https://github.com/simplecov-ruby/simplecov/pull/972, +# but given it only affects unsupported version of Ruby, it might not get merged. +gem 'simplecov', git: 'https://github.com/DataDog/simplecov', ref: '3bb6b7ee58bf4b1954ca205f50dd44d6f41c57db' +gem 'simplecov-cobertura', '~> 2.1.0' # Used by codecov + +gem 'warning', '~> 1' # NOTE: Used in spec_helper.rb +gem 'webmock', '>= 3.10.0' +gem 'webrick', '>= 1.8.2' + +group :check do + gem 'rbs', '~> 3.7', require: false + gem 'steep', '~> 1', '>= 1.9.1', require: false + gem 'ruby_memcheck', '>= 3' + gem 'standard', require: false +end + +group :dev do + gem 'ruby-lsp', require: false + gem 'appraisal', '~> 2.4.0', require: false +end diff --git a/sig/datadog/core/configuration/agent_settings_resolver.rbs b/sig/datadog/core/configuration/agent_settings_resolver.rbs index 040a9aa960a..e3a2472abf4 100644 --- a/sig/datadog/core/configuration/agent_settings_resolver.rbs +++ b/sig/datadog/core/configuration/agent_settings_resolver.rbs @@ -2,9 +2,8 @@ module Datadog module Core module Configuration class AgentSettingsResolver - class AgentSettings < ::Struct[untyped] + class AgentSettings def initialize: (adapter: untyped, ssl: untyped, hostname: untyped, port: untyped, uds_path: untyped, timeout_seconds: untyped) -> void - def merge: (**::Hash[untyped, untyped] member_values) -> AgentSettingsResolver attr_reader adapter: untyped attr_reader ssl: untyped @@ -12,8 +11,26 @@ module Datadog attr_reader port: untyped attr_reader uds_path: untyped attr_reader timeout_seconds: untyped + + def url: () -> ::String end + @settings: untyped + @logger: untyped + @configured_hostname: untyped + @configured_port: untyped + @configured_ssl: untyped + @configured_timeout_seconds: untyped + @configured_uds_path: untyped + @uds_fallback: untyped + @mixed_http_and_uds: untyped + @parsed_url: untyped + + # IPv6 regular expression from + # https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses + # Does not match IPv4 addresses. + IPV6_REGEXP: ::Regexp + def self.call: (untyped settings, ?logger: untyped) -> untyped private @@ -38,33 +55,31 @@ module Datadog def configured_uds_path: () -> untyped - def try_parsing_as_integer: (value: untyped, friendly_name: untyped) -> untyped + def parsed_url_ssl?: () -> (nil | untyped) - def try_parsing_as_boolean: (value: untyped, friendly_name: untyped) -> untyped + def try_parsing_as_integer: (value: untyped, friendly_name: untyped) -> untyped - def ssl?: () -> bool + def ssl?: () -> (false | untyped) def hostname: () -> untyped def port: () -> untyped - def uds_path: () -> untyped - def timeout_seconds: () -> untyped - def uds_fallback: () -> untyped + def parsed_url_uds_path: () -> (nil | untyped) - def should_use_uds_fallback?: () -> untyped + def uds_path: () -> (nil | untyped) - def should_use_uds?: () -> bool + def uds_fallback: () -> untyped - def can_use_uds?: () -> bool + def should_use_uds?: () -> untyped - def parsed_url: () -> untyped + def mixed_http_and_uds: () -> untyped - def parsed_url_ssl?: () -> untyped + def can_use_uds?: () -> untyped - def parsed_url_uds_path: () -> untyped + def parsed_url: () -> untyped def pick_from: (*untyped configurations_in_priority_order) -> untyped @@ -72,7 +87,17 @@ module Datadog def log_warning: (untyped message) -> (untyped | nil) + def http_scheme?: (untyped uri) -> untyped + + def parsed_http_url: () -> (untyped | nil) + + def unix_scheme?: (untyped uri) -> untyped + class DetectedConfiguration + @friendly_name: untyped + + @value: untyped + attr_reader friendly_name: untyped attr_reader value: untyped diff --git a/sig/datadog/core/configuration/ext.rbs b/sig/datadog/core/configuration/ext.rbs index 71b7acc0951..77f9eae8265 100644 --- a/sig/datadog/core/configuration/ext.rbs +++ b/sig/datadog/core/configuration/ext.rbs @@ -15,6 +15,7 @@ module Datadog end module Agent + ENV_DEFAULT_HOST: 'DD_AGENT_HOST' ENV_DEFAULT_PORT: 'DD_TRACE_AGENT_PORT' ENV_DEFAULT_URL: 'DD_TRACE_AGENT_URL' ENV_DEFAULT_TIMEOUT_SECONDS: 'DD_TRACE_AGENT_TIMEOUT_SECONDS' diff --git a/sig/datadog/core/crashtracking/agent_base_url.rbs b/sig/datadog/core/crashtracking/agent_base_url.rbs deleted file mode 100644 index 3a2c77f8e85..00000000000 --- a/sig/datadog/core/crashtracking/agent_base_url.rbs +++ /dev/null @@ -1,10 +0,0 @@ -module Datadog - module Core - module Crashtracking - module AgentBaseUrl - IPV6_REGEXP: Regexp - def self.resolve: (Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings) -> ::String? - end - end - end -end diff --git a/sig/datadog/profiling/http_transport.rbs b/sig/datadog/profiling/http_transport.rbs index 8c58ea8180e..af663a225b7 100644 --- a/sig/datadog/profiling/http_transport.rbs +++ b/sig/datadog/profiling/http_transport.rbs @@ -19,10 +19,6 @@ module Datadog private - def base_url_from: (Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings agent_settings) -> ::String - - def validate_agent_settings: (Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings agent_settings) -> void - def agentless?: (::String? site, ::String? api_key) -> bool def validate_exporter: (exporter_configuration_array exporter_configuration) -> [:ok | :error, ::String?] diff --git a/spec/datadog/core/configuration/agent_settings_resolver_spec.rb b/spec/datadog/core/configuration/agent_settings_resolver_spec.rb index ef2a6766ea1..c057e520739 100644 --- a/spec/datadog/core/configuration/agent_settings_resolver_spec.rb +++ b/spec/datadog/core/configuration/agent_settings_resolver_spec.rb @@ -803,4 +803,61 @@ end end end + + describe 'url' do + context 'when using HTTP adapter' do + before do + datadog_settings.agent.host = 'example.com' + datadog_settings.agent.port = 8080 + end + + context 'when SSL is enabled' do + before { datadog_settings.agent.use_ssl = true } + + it 'returns the correct base URL' do + expect(resolver.url).to eq('https://example.com:8080/') + end + end + + context 'when SSL is disabled' do + before { datadog_settings.agent.use_ssl = false } + + it 'returns the correct base URL' do + expect(resolver.url).to eq('http://example.com:8080/') + end + end + + context 'when hostname is an IPv4 address' do + before { datadog_settings.agent.host = '1.2.3.4' } + + it 'returns the correct base URL' do + expect(resolver.url).to eq('http://1.2.3.4:8080/') + end + end + + context 'when hostname is an IPv6 address' do + before { datadog_settings.agent.host = '1234:1234::1' } + + it 'returns the correct base URL' do + expect(resolver.url).to eq('http://[1234:1234::1]:8080/') + end + end + end + + context 'when using UnixSocket adapter' do + before { datadog_settings.agent.uds_path = '/var/run/datadog.sock' } + + it 'returns the correct base URL' do + expect(resolver.url).to eq('unix:///var/run/datadog.sock') + end + end + + context 'when using an unknown adapter' do + it 'raises an exception' do + agent_settings = Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings.new(adapter: :unknown) + + expect { agent_settings.url }.to raise_error(ArgumentError, /Unexpected adapter/) + end + end + end end diff --git a/spec/datadog/core/crashtracking/agent_base_url_spec.rb b/spec/datadog/core/crashtracking/agent_base_url_spec.rb deleted file mode 100644 index 407b74daf26..00000000000 --- a/spec/datadog/core/crashtracking/agent_base_url_spec.rb +++ /dev/null @@ -1,98 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' -require 'datadog/core/crashtracking/agent_base_url' - -RSpec.describe Datadog::Core::Crashtracking::AgentBaseUrl do - describe '.resolve' do - context 'when using HTTP adapter' do - context 'when SSL is enabled' do - let(:agent_settings) do - instance_double( - Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings, - adapter: Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER, - ssl: true, - hostname: 'example.com', - port: 8080 - ) - end - - it 'returns the correct base URL' do - expect(described_class.resolve(agent_settings)).to eq('https://example.com:8080/') - end - end - - context 'when SSL is disabled' do - let(:agent_settings) do - instance_double( - Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings, - adapter: Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER, - ssl: false, - hostname: 'example.com', - port: 8080 - ) - end - - it 'returns the correct base URL' do - expect(described_class.resolve(agent_settings)).to eq('http://example.com:8080/') - end - end - - context 'when hostname is an IPv4 address' do - let(:agent_settings) do - instance_double( - Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings, - adapter: Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER, - ssl: false, - hostname: '1.2.3.4', - port: 8080 - ) - end - - it 'returns the correct base URL' do - expect(described_class.resolve(agent_settings)).to eq('http://1.2.3.4:8080/') - end - end - - context 'when hostname is an IPv6 address' do - let(:agent_settings) do - instance_double( - Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings, - adapter: Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER, - ssl: false, - hostname: '1234:1234::1', - port: 8080 - ) - end - - it 'returns the correct base URL' do - expect(described_class.resolve(agent_settings)).to eq('http://[1234:1234::1]:8080/') - end - end - end - - context 'when using UnixSocket adapter' do - let(:agent_settings) do - instance_double( - Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings, - adapter: Datadog::Core::Configuration::Ext::Agent::UnixSocket::ADAPTER, - uds_path: '/var/run/datadog.sock' - ) - end - - it 'returns the correct base URL' do - expect(described_class.resolve(agent_settings)).to eq('unix:///var/run/datadog.sock') - end - end - - context 'when using unknownm adapter' do - let(:agent_settings) do - instance_double(Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings, adapter: 'unknown') - end - - it 'returns nil' do - expect(described_class.resolve(agent_settings)).to be_nil - end - end - end -end diff --git a/spec/datadog/core/crashtracking/component_spec.rb b/spec/datadog/core/crashtracking/component_spec.rb index 034bd5f441e..fcbef942d43 100644 --- a/spec/datadog/core/crashtracking/component_spec.rb +++ b/spec/datadog/core/crashtracking/component_spec.rb @@ -9,7 +9,9 @@ describe '.build' do let(:settings) { Datadog::Core::Configuration::Settings.new } - let(:agent_settings) { double('agent_settings') } + let(:agent_settings) do + instance_double(Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings) + end let(:tags) { { 'tag1' => 'value1' } } let(:agent_base_url) { 'agent_base_url' } let(:ld_library_path) { 'ld_library_path' } @@ -19,8 +21,7 @@ it 'creates a new instance of Component and starts it' do expect(Datadog::Core::Crashtracking::TagBuilder).to receive(:call).with(settings) .and_return(tags) - expect(Datadog::Core::Crashtracking::AgentBaseUrl).to receive(:resolve).with(agent_settings) - .and_return(agent_base_url) + expect(agent_settings).to receive(:url).and_return(agent_base_url) expect(::Libdatadog).to receive(:ld_library_path) .and_return(ld_library_path) expect(::Libdatadog).to receive(:path_to_crashtracking_receiver_binary) @@ -42,32 +43,13 @@ end end - context 'when missing `agent_base_url`' do - let(:agent_base_url) { nil } - - it 'returns nil' do - expect(Datadog::Core::Crashtracking::TagBuilder).to receive(:call).with(settings) - .and_return(tags) - expect(Datadog::Core::Crashtracking::AgentBaseUrl).to receive(:resolve).with(agent_settings) - .and_return(agent_base_url) - expect(::Libdatadog).to receive(:ld_library_path) - .and_return(ld_library_path) - expect(::Libdatadog).to receive(:path_to_crashtracking_receiver_binary) - .and_return(path_to_crashtracking_receiver_binary) - expect(logger).to receive(:warn).with(/cannot enable crash tracking/) - - expect(described_class.build(settings, agent_settings, logger: logger)).to be_nil - end - end - context 'when missing `ld_library_path`' do let(:ld_library_path) { nil } it 'returns nil' do expect(Datadog::Core::Crashtracking::TagBuilder).to receive(:call).with(settings) .and_return(tags) - expect(Datadog::Core::Crashtracking::AgentBaseUrl).to receive(:resolve).with(agent_settings) - .and_return(agent_base_url) + expect(agent_settings).to receive(:url).and_return(agent_base_url) expect(::Libdatadog).to receive(:ld_library_path) .and_return(ld_library_path) expect(::Libdatadog).to receive(:path_to_crashtracking_receiver_binary) @@ -84,8 +66,7 @@ it 'returns nil' do expect(Datadog::Core::Crashtracking::TagBuilder).to receive(:call).with(settings) .and_return(tags) - expect(Datadog::Core::Crashtracking::AgentBaseUrl).to receive(:resolve).with(agent_settings) - .and_return(agent_base_url) + expect(agent_settings).to receive(:url).and_return(agent_base_url) expect(::Libdatadog).to receive(:ld_library_path) .and_return(ld_library_path) expect(::Libdatadog).to receive(:path_to_crashtracking_receiver_binary) @@ -102,8 +83,7 @@ it 'returns an instance of Component that failed to start' do expect(Datadog::Core::Crashtracking::TagBuilder).to receive(:call).with(settings) .and_return(tags) - expect(Datadog::Core::Crashtracking::AgentBaseUrl).to receive(:resolve).with(agent_settings) - .and_return(agent_base_url) + expect(agent_settings).to receive(:url).and_return(agent_base_url) expect(::Libdatadog).to receive(:ld_library_path) .and_return(ld_library_path) expect(::Libdatadog).to receive(:path_to_crashtracking_receiver_binary) diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb index e7b1bd28ba0..0296be0ff72 100644 --- a/spec/datadog/profiling/collectors/thread_context_spec.rb +++ b/spec/datadog/profiling/collectors/thread_context_spec.rb @@ -66,8 +66,8 @@ end end - def sample(profiler_overhead_stack_thread: Thread.current) - described_class::Testing._native_sample(cpu_and_wall_time_collector, profiler_overhead_stack_thread) + def sample(profiler_overhead_stack_thread: Thread.current, allow_exception: false) + described_class::Testing._native_sample(cpu_and_wall_time_collector, profiler_overhead_stack_thread, allow_exception) end def on_gc_start @@ -78,8 +78,12 @@ def on_gc_finish described_class::Testing._native_on_gc_finish(cpu_and_wall_time_collector) end - def sample_after_gc(reset_monotonic_to_system_state: false) - described_class::Testing._native_sample_after_gc(cpu_and_wall_time_collector, reset_monotonic_to_system_state) + def sample_after_gc(reset_monotonic_to_system_state: false, allow_exception: false) + described_class::Testing._native_sample_after_gc( + cpu_and_wall_time_collector, + reset_monotonic_to_system_state, + allow_exception, + ) end def sample_allocation(weight:, new_object: Object.new) @@ -584,6 +588,7 @@ def self.otel_otlp_exporter_available? false end + # When opentelemetry-sdk is on the Gemfile, but not opentelemetry-exporter-otlp context "when trace comes from otel sdk", if: otel_sdk_available? && !otel_otlp_exporter_available? do let(:otel_tracer) do require "datadog/opentelemetry" @@ -618,6 +623,31 @@ def self.otel_otlp_exporter_available? expect(t1_sample.labels).to_not include("trace endpoint": anything) end + describe 'accessing the current span' do + before do + allow(Datadog.logger).to receive(:error) + + # initialize otel context reading + sample + # clear samples + recorder.serialize! + end + + it 'does not try to hash the CURRENT_SPAN_KEY' do + inner_check_ran = false + + otel_tracer.in_span("profiler.test") do |_span| + expect(OpenTelemetry::Trace.const_get(:CURRENT_SPAN_KEY)).to_not receive(:hash) + + sample_allocation(weight: 1) + + inner_check_ran = true + end + + expect(inner_check_ran).to be true + end + end + context "when there are multiple otel spans nested" do let(:t1) do Thread.new(ready_queue, otel_tracer) do |ready_queue, otel_tracer| @@ -717,6 +747,7 @@ def self.otel_otlp_exporter_available? end end + # When opentelemetry-sdk AND opentelemetry-exporter-otlp are on the Gemfile context( "when trace comes from otel sdk and the ddtrace otel support is not loaded", if: otel_sdk_available? && otel_otlp_exporter_available? @@ -765,7 +796,7 @@ def otel_span_id_to_i(span_id) expect(t1_sample.labels).to_not include("trace endpoint": anything) end - context 'reading CURRENT_SPAN_KEY' do + describe 'reading CURRENT_SPAN_KEY into otel_current_span_key' do let!(:ran_log) { [] } let(:setup_failure) do @@ -782,18 +813,18 @@ def otel_span_id_to_i(span_id) ) end - context 'raises an exception' do + context 'when an exception is raised' do before { setup_failure } after { expect(ran_log).to eq [:ran_code] } it 'does not leave the exception pending' do - sample + sample(allow_exception: true) expect($!).to be nil end it 'omits the "local root span id" and "span id" labels in the sample' do - sample + sample(allow_exception: true) expect(t1_sample.labels.keys).to_not include(:"local root span id", :"span id") end @@ -814,6 +845,61 @@ def otel_span_id_to_i(span_id) end end + describe 'accessing the current span' do + before do + allow(OpenTelemetry.logger).to receive(:error) + + # initialize otel context reading + sample + # clear samples + recorder.serialize! + end + + it 'does not try to hash the CURRENT_SPAN_KEY' do + inner_check_ran = false + + otel_tracer.in_span("profiler.test") do |_span| + expect(OpenTelemetry::Trace.const_get(:CURRENT_SPAN_KEY)).to_not receive(:hash) + + sample_allocation(weight: 1) + + inner_check_ran = true + end + + expect(inner_check_ran).to be true + end + + context 'when there are more than MAX_SAFE_LOOKUP_SIZE entries in the otel context' do + let(:max_safe_lookup_size) { 16 } # Value of MAX_SAFE_LOOKUP_SIZE in C code + + it 'does not try to look up the context' do + otel_tracer.in_span("profiler.test") do |_span| + current_size = OpenTelemetry::Context.current.instance_variable_get(:@entries).size + + OpenTelemetry::Context.with_values( + Array.new((max_safe_lookup_size + 1 - current_size)) { |it| ["key_#{it}", it] }.to_h + ) do + sample_allocation(weight: 12) + end + + OpenTelemetry::Context.with_values( + Array.new((max_safe_lookup_size - current_size)) { |it| ["key_#{it}", it] }.to_h + ) do + sample_allocation(weight: 34) + end + end + + result = samples_for_thread(samples, Thread.current) + + expect(result.size).to be 2 + expect(result.find { |it| it.values.fetch(:"alloc-samples") == 12 }.labels.keys) + .to_not include(:"local root span id", :"span id") + expect(result.find { |it| it.values.fetch(:"alloc-samples") == 34 }.labels.keys) + .to include(:"local root span id", :"span id") + end + end + end + context 'when otel_context_enabled is false' do let(:otel_context_enabled) { false } @@ -1433,7 +1519,7 @@ def sample_and_check(expected_state:) context "when called before on_gc_start/on_gc_finish" do it do - expect { sample_after_gc }.to raise_error(RuntimeError, /Unexpected call to sample_after_gc/) + expect { sample_after_gc(allow_exception: true) }.to raise_error(RuntimeError, /Unexpected call to sample_after_gc/) end end @@ -1451,7 +1537,8 @@ def sample_and_check(expected_state:) it do sample_after_gc - expect { sample_after_gc }.to raise_error(RuntimeError, /Unexpected call to sample_after_gc/) + expect { sample_after_gc(allow_exception: true) } + .to raise_error(RuntimeError, /Unexpected call to sample_after_gc/) end end diff --git a/spec/datadog/profiling/http_transport_spec.rb b/spec/datadog/profiling/http_transport_spec.rb index 44423984fab..0cd98ba09e3 100644 --- a/spec/datadog/profiling/http_transport_spec.rb +++ b/spec/datadog/profiling/http_transport_spec.rb @@ -127,6 +127,19 @@ http_transport end end + + context "when hostname is an ipv6 address" do + let(:hostname) { "1234:1234::1" } + + it "provides the correct ipv6 address-safe url to the exporter" do + expect(described_class) + .to receive(:_native_validate_exporter) + .with([:agent, "http://[1234:1234::1]:12345/"]) + .and_return([:ok, nil]) + + http_transport + end + end end context "when additionally site and api_key are provided" do diff --git a/suppressions/ruby-3.4.supp b/suppressions/ruby-3.4.supp index 646151fa9fb..71af0c40baa 100644 --- a/suppressions/ruby-3.4.supp +++ b/suppressions/ruby-3.4.supp @@ -77,3 +77,19 @@ obj:/usr/bin/tr ... } + +# When a Ruby process forks, it looks like Ruby doesn't clean up the memory of old threads? +{ + ruby-native-thread-memory-4 + Memcheck:Leak + fun:calloc + fun:calloc1 + fun:rb_gc_impl_calloc + fun:ruby_xcalloc_body + fun:ruby_xcalloc + fun:native_thread_alloc + fun:native_thread_create_dedicated + fun:native_thread_create + fun:thread_create_core + ... +}