From 58949b2b566c0b301da1513a1a2970857082f4f4 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Mon, 24 Feb 2025 12:08:49 -0500 Subject: [PATCH 01/11] i#7157 dyn inject: Improve ord tracking for synthetic records Fixes handling of some cases related to ordinal tracking when there are queued synthetic records. get_input_record_ordinal, which has existing logic to discount records that are read-ahead by the scheduler, was missing logic to correctly account for synthetic records in the queue. This caused a higher count to be subtracted from the record ordinal returned by the input stream, particularly when a new input is switched to (in which case the guarding condition for the read-ahead adjustment was satisfied). Added tracking for whether each queued record is real, that is, read from the input reader or synthetic. Fixes an issue with instrs_pre_read tracking which should be decremented only when a real record is passed on to the caller. Adds a test that verifies the bookkeeping for use_input_ordinals case, which was affected by the above issue. Issue: #7157 --- .../scheduler/scheduler_dynamic.cpp | 9 +- .../drcachesim/scheduler/scheduler_impl.cpp | 143 ++++++++++-------- clients/drcachesim/scheduler/scheduler_impl.h | 20 ++- .../drcachesim/scheduler/scheduler_replay.cpp | 3 +- .../drcachesim/tests/scheduler_unit_tests.cpp | 35 +++-- 5 files changed, 132 insertions(+), 78 deletions(-) diff --git a/clients/drcachesim/scheduler/scheduler_dynamic.cpp b/clients/drcachesim/scheduler/scheduler_dynamic.cpp index 8f8dad07eb8..979c2c66910 100644 --- a/clients/drcachesim/scheduler/scheduler_dynamic.cpp +++ b/clients/drcachesim/scheduler/scheduler_dynamic.cpp @@ -458,7 +458,8 @@ scheduler_dynamic_tmpl_t::check_for_input_switch( // boundaries so we live with those being before the switch. // XXX: Once we insert kernel traces, we may have to try harder // to stop before the post-syscall records. - if (this->record_type_is_instr_boundary(record, outputs_[output].last_record) && + if (this->record_type_is_instr_boundary(record, + outputs_[output].last_record.record) && // We want to delay the context switch until after the injected syscall trace. !outputs_[output].in_syscall_code) { if (input->switch_to_input != sched_type_t::INVALID_INPUT_ORDINAL) { @@ -507,7 +508,8 @@ scheduler_dynamic_tmpl_t::check_for_input_switch( this->process_marker(*input, output, marker_type, marker_value); } if (options_.quantum_unit == sched_type_t::QUANTUM_INSTRUCTIONS && - this->record_type_is_instr_boundary(record, outputs_[output].last_record) && + this->record_type_is_instr_boundary(record, + outputs_[output].last_record.record) && !outputs_[output].in_context_switch_code) { ++input->instrs_in_quantum; if (input->instrs_in_quantum > options_.quantum_duration_instrs) { @@ -546,7 +548,8 @@ scheduler_dynamic_tmpl_t::check_for_input_switch( // We only switch on instruction boundaries. We could possibly switch // in between (e.g., scatter/gather long sequence of reads/writes) by // setting input->switching_pre_instruction. - this->record_type_is_instr_boundary(record, outputs_[output].last_record)) { + this->record_type_is_instr_boundary(record, + outputs_[output].last_record.record)) { if (outputs_[output].in_syscall_code) { // XXX: Maybe this should be printed only once per-syscall-instance to // reduce log spam. diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index 3ff55edf368..bcc3476d401 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -564,8 +564,8 @@ scheduler_impl_tmpl_t::insert_switch_tid_pid( tid.size = 0; tid.addr = static_cast(input.tid); - input.queue.push_front(pid); - input.queue.push_front(tid); + input.queue.emplace_front(pid, false); + input.queue.emplace_front(tid, false); } /*************************************************************************** @@ -850,7 +850,8 @@ scheduler_impl_tmpl_t::init( ? spec_type_t::USE_NOPS // TODO i#5843: Add more flags for other options. : spec_type_t::LAST_FROM_TRACE, - static_cast(get_time_micros()), create_invalid_record(), verbosity_); + static_cast(get_time_micros()), + next_record_t(create_invalid_record(), false), verbosity_); if (options_.single_lockstep_output) outputs_.back().stream = global_stream_.get(); if (options_.schedule_record_ostream != nullptr) { @@ -1523,8 +1524,9 @@ scheduler_impl_tmpl_t::get_initial_input_content( // Maybe we should disallow it so we don't need checks like this? options_.mapping != sched_type_t::MAP_AS_PREVIOUSLY) { RecordType record = create_invalid_record(); + bool real_record = false; stream_status_t res = - advance_region_of_interest(/*output=*/-1, record, input); + advance_region_of_interest(/*output=*/-1, record, input, real_record); if (res == sched_type_t::STATUS_SKIPPED) { input.next_timestamp = static_cast(input.reader->get_last_timestamp()); @@ -1548,7 +1550,7 @@ scheduler_impl_tmpl_t::get_initial_input_content( // the non-consuming queue loop vs the consuming and queue-pushback // reader loop. for (const auto &record : input.queue) { - if (!process_next_initial_record(input, record, found_filetype, + if (!process_next_initial_record(input, record.record, found_filetype, found_timestamp)) break; } @@ -1592,7 +1594,8 @@ scheduler_impl_tmpl_t::get_initial_input_content( // we skip (see skip_instructions()). Thus, we abort with an error. if (record_type_is_instr(record)) break; - input.queue.push_back(record); + input.queue.emplace_back(record, true); + ++input.real_records_in_queue; ++(*input.reader); } } @@ -1632,7 +1635,8 @@ scheduler_impl_tmpl_t::open_reader( RecordType record = **reader; if (record_type_has_tid(record, tid)) break; - input.queue.push_back(record); + input.queue.emplace_back(record, true); + ++input.real_records_in_queue; ++(*reader); } if (tid == INVALID_THREAD_ID) { @@ -1821,10 +1825,14 @@ scheduler_impl_tmpl_t::get_input_record_ordinal( return 0; uint64_t ord = inputs_[index].reader->get_record_ordinal(); if (get_instr_ordinal(inputs_[index]) == 0) { + uint64_t adjust = + inputs_[index].cur_from_queue && inputs_[index].cur_real_record ? 1 : 0; + adjust += inputs_[index].real_records_in_queue; + assert(ord >= adjust); // Account for get_initial_input_content() readahead for filetype/timestamp. // If this gets any more complex, the scheduler stream should track its // own counts for every input and just ignore the input stream's tracking. - ord -= inputs_[index].queue.size() + (inputs_[index].cur_from_queue ? 1 : 0); + ord -= adjust; } return ord; } @@ -1852,7 +1860,8 @@ scheduler_impl_tmpl_t::get_input_first_timestamp( return 0; uint64_t res = inputs_[index].reader->get_first_timestamp(); if (get_instr_ordinal(inputs_[index]) == 0 && - (!inputs_[index].queue.empty() || inputs_[index].cur_from_queue)) { + (inputs_[index].real_records_in_queue > 0 || + (inputs_[index].cur_from_queue && inputs_[index].cur_real_record))) { // Account for get_initial_input_content() readahead for filetype/timestamp. res = 0; } @@ -1871,7 +1880,8 @@ scheduler_impl_tmpl_t::get_input_last_timestamp( return 0; uint64_t res = inputs_[index].reader->get_last_timestamp(); if (get_instr_ordinal(inputs_[index]) == 0 && - (!inputs_[index].queue.empty() || inputs_[index].cur_from_queue)) { + (inputs_[index].real_records_in_queue > 0 || + (inputs_[index].cur_from_queue && inputs_[index].cur_real_record))) { // Account for get_initial_input_content() readahead for filetype/timestamp. res = 0; } @@ -1881,7 +1891,7 @@ scheduler_impl_tmpl_t::get_input_last_timestamp( template typename scheduler_tmpl_t::stream_status_t scheduler_impl_tmpl_t::advance_region_of_interest( - output_ordinal_t output, RecordType &record, input_info_t &input) + output_ordinal_t output, RecordType &record, input_info_t &input, bool &real_record) { assert(input.lock->owned_by_cur_thread()); uint64_t cur_instr = get_instr_ordinal(input); @@ -1913,7 +1923,7 @@ scheduler_impl_tmpl_t::advance_region_of_interest( if (status != sched_type_t::STATUS_OK) return status; } - input.queue.push_back(create_thread_exit(input.tid)); + input.queue.push_back({ create_thread_exit(input.tid), false }); stream_status_t status = mark_input_eof(input); // For early EOF we still need our synthetic exit so do not return it yet. if (status != sched_type_t::STATUS_OK && @@ -1932,8 +1942,9 @@ scheduler_impl_tmpl_t::advance_region_of_interest( if (input.cur_region > 0) { VPRINT(this, 3, "skip_instructions input=%d: inserting separator marker\n", input.index); - input.queue.push_back(record); + input.queue.emplace_back(record, real_record); record = create_region_separator_marker(input.tid, input.cur_region); + real_record = false; } return sched_type_t::STATUS_OK; } @@ -2011,11 +2022,12 @@ scheduler_impl_tmpl_t::clear_input_queue(input_info_t &i int i = 0; while (!input.queue.empty()) { assert(i == 0 || - (!record_type_is_instr(input.queue.front()) && - !record_type_is_encoding(input.queue.front()))); + (!record_type_is_instr(input.queue.front().record) && + !record_type_is_encoding(input.queue.front().record))); ++i; input.queue.pop_front(); } + input.real_records_in_queue = 0; } template @@ -2032,8 +2044,8 @@ scheduler_impl_tmpl_t::skip_instructions(input_info_t &i // For a skip of 0 we still need to clear non-instrs from the queue, but // should not have an instr in there. assert(skip_amount > 0 || input.queue.empty() || - (!record_type_is_instr(input.queue.front()) && - !record_type_is_encoding(input.queue.front()))); + (!record_type_is_instr(input.queue.front().record) && + !record_type_is_encoding(input.queue.front().record))); clear_input_queue(input); input.reader->skip_instructions(skip_amount); VPRINT(this, 3, "skip_instructions: input=%d amount=%" PRIu64 "\n", input.index, @@ -2072,7 +2084,7 @@ scheduler_impl_tmpl_t::skip_instructions(input_info_t &i VPRINT(this, 3, "skip_instructions input=%d: inserting separator marker\n", input.index); input.queue.push_back( - create_region_separator_marker(input.tid, input.cur_region)); + { create_region_separator_marker(input.tid, input.cur_region), false }); } return sched_type_t::STATUS_SKIPPED; } @@ -2288,36 +2300,38 @@ scheduler_impl_tmpl_t::set_cur_input( stream->page_size_ = inputs_[input].reader->get_page_size(); } - if (inputs_[input].pid != INVALID_PID) { - insert_switch_tid_pid(inputs_[input]); - } - - if (!switch_sequence_.empty() && - outputs_[output].stream->get_instruction_ordinal() > 0) { - switch_type_t switch_type = sched_type_t::SWITCH_INVALID; - if (prev_workload != inputs_[input].workload) - switch_type = sched_type_t::SWITCH_PROCESS; - else - switch_type = sched_type_t::SWITCH_THREAD; - // Inject kernel context switch code. Since the injected records belong to this - // input (the kernel is acting on behalf of this input) we insert them into the - // input's queue, but ahead of any prior queued items. This is why we walk in - // reverse, for the push_front calls to the deque. We update the tid of the - // records here to match. They are considered as is_record_synthetic() and do - // not affect input stream ordinals. - // XXX: These will appear before the top headers of a new thread which is slightly - // odd to have regular records with the new tid before the top headers. - if (!switch_sequence_[switch_type].empty()) { - for (int i = static_cast(switch_sequence_[switch_type].size()) - 1; - i >= 0; --i) { - RecordType record = switch_sequence_[switch_type][i]; - record_type_set_tid(record, inputs_[input].tid); - inputs_[input].queue.push_front(record); + if (prev_workload >= 0) { + if (inputs_[input].pid != INVALID_PID) { + insert_switch_tid_pid(inputs_[input]); + } + if (!switch_sequence_.empty() && + outputs_[output].stream->get_instruction_ordinal() > 0) { + switch_type_t switch_type = sched_type_t::SWITCH_INVALID; + if (prev_workload != inputs_[input].workload) + switch_type = sched_type_t::SWITCH_PROCESS; + else + switch_type = sched_type_t::SWITCH_THREAD; + // Inject kernel context switch code. Since the injected records belong to + // this input (the kernel is acting on behalf of this input) we insert them + // into the input's queue, but ahead of any prior queued items. This is why + // we walk in reverse, for the push_front calls to the deque. We update the + // tid of the records here to match. They are considered as + // is_record_synthetic() and do not affect input stream ordinals. + // XXX: These will appear before the top headers of a new thread which is + // slightly odd to have regular records with the new tid before the top + // headers. + if (!switch_sequence_[switch_type].empty()) { + for (int i = static_cast(switch_sequence_[switch_type].size()) - 1; + i >= 0; --i) { + RecordType record = switch_sequence_[switch_type][i]; + record_type_set_tid(record, inputs_[input].tid); + inputs_[input].queue.emplace_front(record, false); + } + VPRINT(this, 3, + "Inserted %zu switch records for type %d from %d.%d to %d.%d\n", + switch_sequence_[switch_type].size(), switch_type, prev_workload, + outputs_[output].prev_input, inputs_[input].workload, input); } - VPRINT(this, 3, - "Inserted %zu switch records for type %d from %d.%d to %d.%d\n", - switch_sequence_[switch_type].size(), switch_type, prev_workload, - outputs_[output].prev_input, inputs_[input].workload, input); } } @@ -2517,6 +2531,7 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp } while (true) { input->cur_from_queue = false; + input->cur_real_record = false; if (input->needs_init) { // We pay the cost of this conditional to support ipc_reader_t::init() which // blocks and must be called right before reading its first record. @@ -2528,9 +2543,13 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp input->needs_init = false; } if (!input->queue.empty()) { - record = input->queue.front(); - input->queue.pop_front(); + record = input->queue.front().record; input->cur_from_queue = true; + input->cur_real_record = input->queue.front().is_real; + input->queue.pop_front(); + if (input->cur_real_record) { + --input->real_records_in_queue; + } } else { // We again have a flag check because reader_t::init() does an initial ++ // and so we want to skip that on the first record but perform a ++ prior @@ -2563,12 +2582,14 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp continue; } else { record = **input->reader; + input->cur_real_record = true; } } VPRINT(this, 5, "next_record[%d]: candidate record from %d (@%" PRId64 "): ", output, input->index, get_instr_ordinal(*input)); - if (input->instrs_pre_read > 0 && record_type_is_instr(record)) + if (input->instrs_pre_read > 0 && input->cur_real_record && + record_type_is_instr(record)) --input->instrs_pre_read; VDO(this, 5, print_record(record);); bool need_new_input = false; @@ -2586,7 +2607,7 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp // We have to put the candidate record in the queue before we release // the lock since another output may grab this input. VPRINT(this, 5, "next_record[%d]: queuing candidate record\n", output); - input->queue.push_back(record); + input->queue.emplace_back(record, input->cur_real_record); lock.unlock(); res = pick_next_input(output, blocked_time); if (res != sched_type_t::STATUS_OK && res != sched_type_t::STATUS_WAIT && @@ -2610,8 +2631,8 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp // we've already reset to 0. if (!preempt && options_.mapping == sched_type_t::MAP_TO_ANY_OUTPUT) { if (options_.quantum_unit == sched_type_t::QUANTUM_INSTRUCTIONS && - record_type_is_instr_boundary(record, - outputs_[output].last_record)) { + record_type_is_instr_boundary( + record, outputs_[output].last_record.record)) { assert(inputs_[prev_input].instrs_in_quantum > 0); --inputs_[prev_input].instrs_in_quantum; } else if (options_.quantum_unit == sched_type_t::QUANTUM_TIME) { @@ -2630,7 +2651,8 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp lock.lock(); if (res != sched_type_t::STATUS_SKIPPED) { // Get our candidate record back. - record = input->queue.back(); + record = input->queue.back().record; + input->cur_real_record = input->queue.back().is_real; input->queue.pop_back(); } } @@ -2643,7 +2665,8 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp if (input->needs_roi && options_.mapping != sched_type_t::MAP_AS_PREVIOUSLY && !input->regions_of_interest.empty()) { input_ordinal_t prev_input = input->index; - res = advance_region_of_interest(output, record, *input); + res = advance_region_of_interest(output, record, *input, + input->cur_real_record); if (res == sched_type_t::STATUS_SKIPPED) { // We need either the queue or to re-de-ref the reader so we loop, // but we do not want to come back here. @@ -2670,7 +2693,7 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp update_next_record(output, record); VDO(this, 4, print_record(record);); - outputs_[output].last_record = record; + outputs_[output].last_record = { record, input->cur_real_record }; record_type_has_tid(record, input->last_record_tid); record_type_has_pid(record, input->pid); return sched_type_t::STATUS_OK; @@ -2737,11 +2760,11 @@ scheduler_impl_tmpl_t::unread_last_record(output_ordinal input_info_t *&input) { auto &outinfo = outputs_[output]; - if (record_type_is_invalid(outinfo.last_record)) + if (record_type_is_invalid(outinfo.last_record.record)) return sched_type_t::STATUS_INVALID; if (!outinfo.speculation_stack.empty()) return sched_type_t::STATUS_INVALID; - record = outinfo.last_record; + record = outinfo.last_record.record; input = &inputs_[outinfo.cur_input]; std::lock_guard lock(*input->lock); VPRINT(this, 4, "next_record[%d]: unreading last record, from %d\n", output, @@ -2753,7 +2776,7 @@ scheduler_impl_tmpl_t::unread_last_record(output_ordinal if (options_.quantum_unit == sched_type_t::QUANTUM_INSTRUCTIONS && record_type_is_instr(record)) --input->instrs_in_quantum; - outinfo.last_record = create_invalid_record(); + outinfo.last_record = { create_invalid_record(), false }; return sched_type_t::STATUS_OK; } @@ -2765,7 +2788,7 @@ scheduler_impl_tmpl_t::start_speculation( auto &outinfo = outputs_[output]; if (outinfo.speculation_stack.empty()) { if (queue_current_record) { - if (record_type_is_invalid(outinfo.last_record)) + if (record_type_is_invalid(outinfo.last_record.record)) return sched_type_t::STATUS_INVALID; inputs_[outinfo.cur_input].queue.push_back(outinfo.last_record); } diff --git a/clients/drcachesim/scheduler/scheduler_impl.h b/clients/drcachesim/scheduler/scheduler_impl.h index 29a429be858..cb83d4a0a59 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.h +++ b/clients/drcachesim/scheduler/scheduler_impl.h @@ -159,6 +159,15 @@ template class scheduler_impl_tmpl_t protected: typedef speculator_tmpl_t spec_type_t; + struct next_record_t { + next_record_t(RecordType record, bool is_real) + : record(record) + , is_real(is_real) + { + } + RecordType record; + bool is_real = false; + }; struct input_info_t { input_info_t() : lock(new mutex_dbg_owned) @@ -195,8 +204,11 @@ template class scheduler_impl_tmpl_t // If non-empty these records should be returned before incrementing the reader. // This is used for read-ahead and inserting synthetic records. // We use a deque so we can iterate over it. - std::deque queue; + + std::deque queue; + uint64_t real_records_in_queue = 0; bool cur_from_queue; + bool cur_real_record; std::set binding; int priority = 0; std::vector regions_of_interest; @@ -423,7 +435,7 @@ template class scheduler_impl_tmpl_t output_info_t(scheduler_impl_tmpl_t *scheduler_impl, output_ordinal_t ordinal, typename spec_type_t::speculator_flags_t speculator_flags, - int rand_seed, RecordType last_record_init, int verbosity = 0) + int rand_seed, next_record_t last_record_init, int verbosity = 0) : self_stream(scheduler_impl, ordinal, verbosity) , stream(&self_stream) , ready_queue(rand_seed) @@ -467,7 +479,7 @@ template class scheduler_impl_tmpl_t // while this field holds the instruction's start PC. The use case is for // queueing a read-ahead instruction record for start_speculation(). addr_t prev_speculate_pc = 0; - RecordType last_record; // Set to TRACE_TYPE_INVALID in constructor. + next_record_t last_record; // Set to TRACE_TYPE_INVALID in constructor. // A list of schedule segments. During replay, this is read by other threads, // but it is only written at init time. std::vector record; @@ -687,7 +699,7 @@ template class scheduler_impl_tmpl_t // If STATUS_SKIPPED or STATUS_STOLE is returned, a new next record needs to be read. stream_status_t advance_region_of_interest(output_ordinal_t output, RecordType &record, - input_info_t &input); + input_info_t &input, bool &real_record); // Discards the contents of the input queue. Meant to be used when skipping // input records. diff --git a/clients/drcachesim/scheduler/scheduler_replay.cpp b/clients/drcachesim/scheduler/scheduler_replay.cpp index a79f037cfef..a582a838e22 100644 --- a/clients/drcachesim/scheduler/scheduler_replay.cpp +++ b/clients/drcachesim/scheduler/scheduler_replay.cpp @@ -425,7 +425,8 @@ scheduler_replay_tmpl_t::pick_next_input_for_mode( // a synthetic thread exit record. We need to first throw out the // queued candidate record, if any. this->clear_input_queue(inputs_[index]); - inputs_[index].queue.push_back(this->create_thread_exit(inputs_[index].tid)); + inputs_[index].queue.emplace_back(this->create_thread_exit(inputs_[index].tid), + false); VPRINT(this, 2, "early end for input %d\n", index); // We're done with this entry but we need the queued record to be read, // so we do not move past the entry. diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index 0b134488f81..2c9baf75de0 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -5770,7 +5770,7 @@ test_unscheduled() } static void -test_kernel_switch_sequences() +test_kernel_switch_sequences(bool use_input_ordinals) { std::cerr << "\n----------------\nTesting kernel switch sequences\n"; static constexpr memref_tid_t TID_IN_SWITCHES = 1; @@ -5838,9 +5838,18 @@ test_kernel_switch_sequences() } sched_inputs.emplace_back(std::move(readers)); } + dynamorio::drmemtrace::scheduler_tmpl_t< + dynamorio::drmemtrace::_memref_t, + dynamorio::drmemtrace::reader_t>::scheduler_flags_t flags = + scheduler_t::SCHEDULER_DEFAULTS; + if (use_input_ordinals) { + flags = static_cast::scheduler_flags_t>( + flags | scheduler_t::SCHEDULER_USE_INPUT_ORDINALS); + } scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT, - scheduler_t::DEPENDENCY_TIMESTAMPS, - scheduler_t::SCHEDULER_DEFAULTS, + scheduler_t::DEPENDENCY_TIMESTAMPS, flags, /*verbosity=*/3); sched_ops.quantum_duration_instrs = INSTR_QUANTUM; sched_ops.kernel_switch_reader = std::move(switch_reader); @@ -5888,9 +5897,12 @@ test_kernel_switch_sequences() sched_as_string[i] += 'A' + static_cast(memref.instr.tid - TID_BASE); } + bool now_switch = false; if (memref.marker.type == TRACE_TYPE_MARKER && - memref.marker.marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_START) + memref.marker.marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_START) { + now_switch = true; in_switch[i] = true; + } if (in_switch[i]) { // Test that switch code is marked synthetic. assert(outputs[i]->is_record_synthetic()); @@ -5899,10 +5911,13 @@ test_kernel_switch_sequences() assert(outputs[i]->get_input_interface()->get_record_ordinal() == prev_in_ord[i] || // Won't match if we just switched inputs. - (memref.marker.type == TRACE_TYPE_MARKER && - memref.marker.marker_type == - TRACE_MARKER_TYPE_CONTEXT_SWITCH_START)); - assert(outputs[i]->get_record_ordinal() > prev_out_ord[i]); + now_switch); + if (use_input_ordinals) { + assert(outputs[i]->get_record_ordinal() == prev_out_ord[i] || + now_switch); + } else { + assert(outputs[i]->get_record_ordinal() > prev_out_ord[i]); + } } else assert(!outputs[i]->is_record_synthetic()); if (type_is_instr(memref.instr.type)) @@ -6698,7 +6713,6 @@ test_main(int argc, const char *argv[]) assert(argc == 2); // Avoid races with lazy drdecode init (b/279350357). dr_standalone_init(); - test_serial(); test_parallel(); test_param_checks(); @@ -6730,7 +6744,8 @@ test_main(int argc, const char *argv[]) test_inactive(); test_direct_switch(); test_unscheduled(); - test_kernel_switch_sequences(); + test_kernel_switch_sequences(/*use_input_ordinals=*/true); + test_kernel_switch_sequences(/*use_input_ordinals=*/false); test_random_schedule(); test_record_scheduler(); test_rebalancing(); From c4f2595f6fe19b5306f991b66ee4b9890e8be468 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Mon, 24 Feb 2025 13:38:01 -0500 Subject: [PATCH 02/11] Add note about get_instruction_ordinal not being WAI for USE_INPUT_ORDINALS --- clients/drcachesim/scheduler/scheduler_impl.cpp | 6 ++++-- clients/drcachesim/tests/scheduler_unit_tests.cpp | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index bcc3476d401..b02425f58b3 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -2304,8 +2304,7 @@ scheduler_impl_tmpl_t::set_cur_input( if (inputs_[input].pid != INVALID_PID) { insert_switch_tid_pid(inputs_[input]); } - if (!switch_sequence_.empty() && - outputs_[output].stream->get_instruction_ordinal() > 0) { + if (!switch_sequence_.empty()) { switch_type_t switch_type = sched_type_t::SWITCH_INVALID; if (prev_workload != inputs_[input].workload) switch_type = sched_type_t::SWITCH_PROCESS; @@ -2731,6 +2730,9 @@ scheduler_impl_tmpl_t::update_next_record(output_ordinal VPRINT(this, 2, "output %d base timestamp = %zu\n", output, outputs_[output].base_timestamp); } + // FIXME: When USE_INPUT_ORDINALS is enabled, this returns the input-local + // instruction ordinal (which not only is not global, but it may also include + // the read-ahead instructions). uint64_t instr_ord = outputs_[output].stream->get_instruction_ordinal(); uint64_t idle_count = outputs_[output].idle_count; uintptr_t new_time = static_cast( diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index 2c9baf75de0..a7d5c5ee085 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -5772,7 +5772,9 @@ test_unscheduled() static void test_kernel_switch_sequences(bool use_input_ordinals) { - std::cerr << "\n----------------\nTesting kernel switch sequences\n"; + std::cerr + << "\n----------------\nTesting kernel switch sequences for use_input_ordinals: " + << use_input_ordinals << "\n"; static constexpr memref_tid_t TID_IN_SWITCHES = 1; static constexpr addr_t PROCESS_SWITCH_PC_START = 0xfeed101; static constexpr addr_t THREAD_SWITCH_PC_START = 0xcafe101; From f53f9b0cea49ea3c51caf102cada76b0cf9a6fd0 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Mon, 24 Feb 2025 13:58:33 -0500 Subject: [PATCH 03/11] Cleanup and renaming --- .../drcachesim/scheduler/scheduler_impl.cpp | 50 ++++++++++--------- clients/drcachesim/scheduler/scheduler_impl.h | 14 +++--- .../drcachesim/tests/scheduler_unit_tests.cpp | 2 + 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index b02425f58b3..a462567f92e 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -95,6 +95,9 @@ typedef dynamorio::drmemtrace::record_file_reader_t default_record_file_reader_t; #endif +static constexpr bool IS_REAL = true; +static constexpr bool IS_SYNTHETIC = false; + std::string replay_file_checker_t::check(archive_istream_t *infile) { @@ -564,8 +567,8 @@ scheduler_impl_tmpl_t::insert_switch_tid_pid( tid.size = 0; tid.addr = static_cast(input.tid); - input.queue.emplace_front(pid, false); - input.queue.emplace_front(tid, false); + input.queue.emplace_front(pid, IS_SYNTHETIC); + input.queue.emplace_front(tid, IS_SYNTHETIC); } /*************************************************************************** @@ -851,7 +854,7 @@ scheduler_impl_tmpl_t::init( // TODO i#5843: Add more flags for other options. : spec_type_t::LAST_FROM_TRACE, static_cast(get_time_micros()), - next_record_t(create_invalid_record(), false), verbosity_); + cached_record_t(create_invalid_record(), false), verbosity_); if (options_.single_lockstep_output) outputs_.back().stream = global_stream_.get(); if (options_.schedule_record_ostream != nullptr) { @@ -1524,9 +1527,9 @@ scheduler_impl_tmpl_t::get_initial_input_content( // Maybe we should disallow it so we don't need checks like this? options_.mapping != sched_type_t::MAP_AS_PREVIOUSLY) { RecordType record = create_invalid_record(); - bool real_record = false; + bool is_record_real = false; stream_status_t res = - advance_region_of_interest(/*output=*/-1, record, input, real_record); + advance_region_of_interest(/*output=*/-1, record, input, is_record_real); if (res == sched_type_t::STATUS_SKIPPED) { input.next_timestamp = static_cast(input.reader->get_last_timestamp()); @@ -1594,7 +1597,7 @@ scheduler_impl_tmpl_t::get_initial_input_content( // we skip (see skip_instructions()). Thus, we abort with an error. if (record_type_is_instr(record)) break; - input.queue.emplace_back(record, true); + input.queue.emplace_back(record, IS_REAL); ++input.real_records_in_queue; ++(*input.reader); } @@ -1635,7 +1638,7 @@ scheduler_impl_tmpl_t::open_reader( RecordType record = **reader; if (record_type_has_tid(record, tid)) break; - input.queue.emplace_back(record, true); + input.queue.emplace_back(record, IS_REAL); ++input.real_records_in_queue; ++(*reader); } @@ -1826,7 +1829,7 @@ scheduler_impl_tmpl_t::get_input_record_ordinal( uint64_t ord = inputs_[index].reader->get_record_ordinal(); if (get_instr_ordinal(inputs_[index]) == 0) { uint64_t adjust = - inputs_[index].cur_from_queue && inputs_[index].cur_real_record ? 1 : 0; + inputs_[index].cur_from_queue && inputs_[index].is_cur_record_real ? 1 : 0; adjust += inputs_[index].real_records_in_queue; assert(ord >= adjust); // Account for get_initial_input_content() readahead for filetype/timestamp. @@ -1861,7 +1864,7 @@ scheduler_impl_tmpl_t::get_input_first_timestamp( uint64_t res = inputs_[index].reader->get_first_timestamp(); if (get_instr_ordinal(inputs_[index]) == 0 && (inputs_[index].real_records_in_queue > 0 || - (inputs_[index].cur_from_queue && inputs_[index].cur_real_record))) { + (inputs_[index].cur_from_queue && inputs_[index].is_cur_record_real))) { // Account for get_initial_input_content() readahead for filetype/timestamp. res = 0; } @@ -1881,7 +1884,7 @@ scheduler_impl_tmpl_t::get_input_last_timestamp( uint64_t res = inputs_[index].reader->get_last_timestamp(); if (get_instr_ordinal(inputs_[index]) == 0 && (inputs_[index].real_records_in_queue > 0 || - (inputs_[index].cur_from_queue && inputs_[index].cur_real_record))) { + (inputs_[index].cur_from_queue && inputs_[index].is_cur_record_real))) { // Account for get_initial_input_content() readahead for filetype/timestamp. res = 0; } @@ -1891,7 +1894,8 @@ scheduler_impl_tmpl_t::get_input_last_timestamp( template typename scheduler_tmpl_t::stream_status_t scheduler_impl_tmpl_t::advance_region_of_interest( - output_ordinal_t output, RecordType &record, input_info_t &input, bool &real_record) + output_ordinal_t output, RecordType &record, input_info_t &input, + bool &is_record_real) { assert(input.lock->owned_by_cur_thread()); uint64_t cur_instr = get_instr_ordinal(input); @@ -1942,9 +1946,9 @@ scheduler_impl_tmpl_t::advance_region_of_interest( if (input.cur_region > 0) { VPRINT(this, 3, "skip_instructions input=%d: inserting separator marker\n", input.index); - input.queue.emplace_back(record, real_record); + input.queue.emplace_back(record, is_record_real); record = create_region_separator_marker(input.tid, input.cur_region); - real_record = false; + is_record_real = false; } return sched_type_t::STATUS_OK; } @@ -2324,7 +2328,7 @@ scheduler_impl_tmpl_t::set_cur_input( i >= 0; --i) { RecordType record = switch_sequence_[switch_type][i]; record_type_set_tid(record, inputs_[input].tid); - inputs_[input].queue.emplace_front(record, false); + inputs_[input].queue.emplace_front(record, IS_SYNTHETIC); } VPRINT(this, 3, "Inserted %zu switch records for type %d from %d.%d to %d.%d\n", @@ -2530,7 +2534,7 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp } while (true) { input->cur_from_queue = false; - input->cur_real_record = false; + input->is_cur_record_real = false; if (input->needs_init) { // We pay the cost of this conditional to support ipc_reader_t::init() which // blocks and must be called right before reading its first record. @@ -2544,9 +2548,9 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp if (!input->queue.empty()) { record = input->queue.front().record; input->cur_from_queue = true; - input->cur_real_record = input->queue.front().is_real; + input->is_cur_record_real = input->queue.front().is_real; input->queue.pop_front(); - if (input->cur_real_record) { + if (input->is_cur_record_real) { --input->real_records_in_queue; } } else { @@ -2581,13 +2585,13 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp continue; } else { record = **input->reader; - input->cur_real_record = true; + input->is_cur_record_real = true; } } VPRINT(this, 5, "next_record[%d]: candidate record from %d (@%" PRId64 "): ", output, input->index, get_instr_ordinal(*input)); - if (input->instrs_pre_read > 0 && input->cur_real_record && + if (input->instrs_pre_read > 0 && input->is_cur_record_real && record_type_is_instr(record)) --input->instrs_pre_read; VDO(this, 5, print_record(record);); @@ -2606,7 +2610,7 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp // We have to put the candidate record in the queue before we release // the lock since another output may grab this input. VPRINT(this, 5, "next_record[%d]: queuing candidate record\n", output); - input->queue.emplace_back(record, input->cur_real_record); + input->queue.emplace_back(record, input->is_cur_record_real); lock.unlock(); res = pick_next_input(output, blocked_time); if (res != sched_type_t::STATUS_OK && res != sched_type_t::STATUS_WAIT && @@ -2651,7 +2655,7 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp if (res != sched_type_t::STATUS_SKIPPED) { // Get our candidate record back. record = input->queue.back().record; - input->cur_real_record = input->queue.back().is_real; + input->is_cur_record_real = input->queue.back().is_real; input->queue.pop_back(); } } @@ -2665,7 +2669,7 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp !input->regions_of_interest.empty()) { input_ordinal_t prev_input = input->index; res = advance_region_of_interest(output, record, *input, - input->cur_real_record); + input->is_cur_record_real); if (res == sched_type_t::STATUS_SKIPPED) { // We need either the queue or to re-de-ref the reader so we loop, // but we do not want to come back here. @@ -2692,7 +2696,7 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp update_next_record(output, record); VDO(this, 4, print_record(record);); - outputs_[output].last_record = { record, input->cur_real_record }; + outputs_[output].last_record = { record, input->is_cur_record_real }; record_type_has_tid(record, input->last_record_tid); record_type_has_pid(record, input->pid); return sched_type_t::STATUS_OK; diff --git a/clients/drcachesim/scheduler/scheduler_impl.h b/clients/drcachesim/scheduler/scheduler_impl.h index cb83d4a0a59..4f8bfbe93b2 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.h +++ b/clients/drcachesim/scheduler/scheduler_impl.h @@ -159,8 +159,8 @@ template class scheduler_impl_tmpl_t protected: typedef speculator_tmpl_t spec_type_t; - struct next_record_t { - next_record_t(RecordType record, bool is_real) + struct cached_record_t { + cached_record_t(RecordType record, bool is_real) : record(record) , is_real(is_real) { @@ -205,10 +205,10 @@ template class scheduler_impl_tmpl_t // This is used for read-ahead and inserting synthetic records. // We use a deque so we can iterate over it. - std::deque queue; + std::deque queue; uint64_t real_records_in_queue = 0; bool cur_from_queue; - bool cur_real_record; + bool is_cur_record_real; std::set binding; int priority = 0; std::vector regions_of_interest; @@ -435,7 +435,7 @@ template class scheduler_impl_tmpl_t output_info_t(scheduler_impl_tmpl_t *scheduler_impl, output_ordinal_t ordinal, typename spec_type_t::speculator_flags_t speculator_flags, - int rand_seed, next_record_t last_record_init, int verbosity = 0) + int rand_seed, cached_record_t last_record_init, int verbosity = 0) : self_stream(scheduler_impl, ordinal, verbosity) , stream(&self_stream) , ready_queue(rand_seed) @@ -479,7 +479,7 @@ template class scheduler_impl_tmpl_t // while this field holds the instruction's start PC. The use case is for // queueing a read-ahead instruction record for start_speculation(). addr_t prev_speculate_pc = 0; - next_record_t last_record; // Set to TRACE_TYPE_INVALID in constructor. + cached_record_t last_record; // Set to TRACE_TYPE_INVALID in constructor. // A list of schedule segments. During replay, this is read by other threads, // but it is only written at init time. std::vector record; @@ -699,7 +699,7 @@ template class scheduler_impl_tmpl_t // If STATUS_SKIPPED or STATUS_STOLE is returned, a new next record needs to be read. stream_status_t advance_region_of_interest(output_ordinal_t output, RecordType &record, - input_info_t &input, bool &real_record); + input_info_t &input, bool &is_record_real); // Discards the contents of the input queue. Meant to be used when skipping // input records. diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index a7d5c5ee085..27d807642b9 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -5916,6 +5916,7 @@ test_kernel_switch_sequences(bool use_input_ordinals) now_switch); if (use_input_ordinals) { assert(outputs[i]->get_record_ordinal() == prev_out_ord[i] || + // Won't match if we just switched inputs. now_switch); } else { assert(outputs[i]->get_record_ordinal() > prev_out_ord[i]); @@ -6715,6 +6716,7 @@ test_main(int argc, const char *argv[]) assert(argc == 2); // Avoid races with lazy drdecode init (b/279350357). dr_standalone_init(); + test_serial(); test_parallel(); test_param_checks(); From 1fbc2753bd34418a5da3ca7049eb318cc629ca45 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Mon, 24 Feb 2025 14:14:27 -0500 Subject: [PATCH 04/11] Add FIXME for instrs_pre_read tracking issue --- clients/drcachesim/scheduler/scheduler_impl.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index a462567f92e..adbb259f654 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -2591,6 +2591,9 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp VPRINT(this, 5, "next_record[%d]: candidate record from %d (@%" PRId64 "): ", output, input->index, get_instr_ordinal(*input)); + // FIXME: This is likely too premature; we should either move it to later, + // or undo the decrement for cases we don't actually end up returning the + // pre-read instruction to the caller. if (input->instrs_pre_read > 0 && input->is_cur_record_real && record_type_is_instr(record)) --input->instrs_pre_read; From 5f682f06319f0d7867ebe55b77a94d28ac171e31 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Mon, 24 Feb 2025 14:39:03 -0500 Subject: [PATCH 05/11] Some more cleanup --- clients/drcachesim/scheduler/scheduler_impl.cpp | 9 +++++---- clients/drcachesim/scheduler/scheduler_replay.cpp | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index adbb259f654..805751a6c55 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -1927,7 +1927,7 @@ scheduler_impl_tmpl_t::advance_region_of_interest( if (status != sched_type_t::STATUS_OK) return status; } - input.queue.push_back({ create_thread_exit(input.tid), false }); + input.queue.push_back({ create_thread_exit(input.tid), IS_SYNTHETIC }); stream_status_t status = mark_input_eof(input); // For early EOF we still need our synthetic exit so do not return it yet. if (status != sched_type_t::STATUS_OK && @@ -2088,7 +2088,8 @@ scheduler_impl_tmpl_t::skip_instructions(input_info_t &i VPRINT(this, 3, "skip_instructions input=%d: inserting separator marker\n", input.index); input.queue.push_back( - { create_region_separator_marker(input.tid, input.cur_region), false }); + { create_region_separator_marker(input.tid, input.cur_region), + IS_SYNTHETIC }); } return sched_type_t::STATUS_SKIPPED; } @@ -2738,7 +2739,7 @@ scheduler_impl_tmpl_t::update_next_record(output_ordinal outputs_[output].base_timestamp); } // FIXME: When USE_INPUT_ORDINALS is enabled, this returns the input-local - // instruction ordinal (which not only is not global, but it may also include + // instruction ordinal (which not only is not global, but it would also count // the read-ahead instructions). uint64_t instr_ord = outputs_[output].stream->get_instruction_ordinal(); uint64_t idle_count = outputs_[output].idle_count; @@ -2785,7 +2786,7 @@ scheduler_impl_tmpl_t::unread_last_record(output_ordinal if (options_.quantum_unit == sched_type_t::QUANTUM_INSTRUCTIONS && record_type_is_instr(record)) --input->instrs_in_quantum; - outinfo.last_record = { create_invalid_record(), false }; + outinfo.last_record = { create_invalid_record(), IS_SYNTHETIC }; return sched_type_t::STATUS_OK; } diff --git a/clients/drcachesim/scheduler/scheduler_replay.cpp b/clients/drcachesim/scheduler/scheduler_replay.cpp index a582a838e22..86bcf1994cd 100644 --- a/clients/drcachesim/scheduler/scheduler_replay.cpp +++ b/clients/drcachesim/scheduler/scheduler_replay.cpp @@ -426,7 +426,7 @@ scheduler_replay_tmpl_t::pick_next_input_for_mode( // queued candidate record, if any. this->clear_input_queue(inputs_[index]); inputs_[index].queue.emplace_back(this->create_thread_exit(inputs_[index].tid), - false); + /*is_real=*/false); VPRINT(this, 2, "early end for input %d\n", index); // We're done with this entry but we need the queued record to be read, // so we do not move past the entry. From 5efb673cf8d2f71af448bdd6858a65cbf0c8cd71 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Mon, 24 Feb 2025 15:31:28 -0500 Subject: [PATCH 06/11] Include switch sequence injection count in scheduler stats --- clients/drcachesim/common/memtrace_stream.h | 6 +++++- clients/drcachesim/scheduler/scheduler_impl.cpp | 7 ++++++- clients/drcachesim/tests/core_serial.templatex | 1 + .../drcachesim/tests/schedule_stats_nopreempt.templatex | 5 +++++ clients/drcachesim/tests/scheduler_unit_tests.cpp | 9 +++++++++ clients/drcachesim/tests/switch_insertion.templatex | 2 +- clients/drcachesim/tools/schedule_stats.cpp | 7 ++++++- clients/drcachesim/tools/schedule_stats.h | 4 +++- 8 files changed, 36 insertions(+), 5 deletions(-) diff --git a/clients/drcachesim/common/memtrace_stream.h b/clients/drcachesim/common/memtrace_stream.h index b479ff5785c..44e48891e3a 100644 --- a/clients/drcachesim/common/memtrace_stream.h +++ b/clients/drcachesim/common/memtrace_stream.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2022-2024 Google, Inc. All rights reserved. + * Copyright (c) 2022-2025 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -112,6 +112,10 @@ class memtrace_stream_t { * inputs from being scheduled onto an output. */ SCHED_STAT_HIT_OUTPUT_LIMIT, + /** + * Counts the instances when the context switch sequence was injected. + */ + SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS, /** Count of statistic types. */ SCHED_STAT_TYPE_COUNT, }; diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index 805751a6c55..8d7d605b191 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -659,7 +659,7 @@ scheduler_impl_tmpl_t::~scheduler_impl_tmpl_t() outputs_[i].stats[memtrace_stream_t::SCHED_STAT_RUNQUEUE_STEALS]); VPRINT(this, 1, " %-35s: %9" PRId64 "\n", "Runqueue rebalances", outputs_[i].stats[memtrace_stream_t::SCHED_STAT_RUNQUEUE_REBALANCES]); - VPRINT(this, 1, " %-35s: %9" PRId64 "\n", "Ouput limits hit", + VPRINT(this, 1, " %-35s: %9" PRId64 "\n", "Output limits hit", outputs_[i].stats[memtrace_stream_t::SCHED_STAT_HIT_OUTPUT_LIMIT]); #ifndef NDEBUG VPRINT(this, 1, " %-35s: %9" PRId64 "\n", "Runqueue lock acquired", @@ -667,6 +667,9 @@ scheduler_impl_tmpl_t::~scheduler_impl_tmpl_t() VPRINT(this, 1, " %-35s: %9" PRId64 "\n", "Runqueue lock contended", outputs_[i].ready_queue.lock->get_count_contended()); #endif + VPRINT( + this, 1, " %-35s: %9" PRId64 "\n", "Switch sequence injections", + outputs_[i].stats[memtrace_stream_t::SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS]); } } @@ -2325,6 +2328,8 @@ scheduler_impl_tmpl_t::set_cur_input( // slightly odd to have regular records with the new tid before the top // headers. if (!switch_sequence_[switch_type].empty()) { + ++outputs_[output] + .stats[memtrace_stream_t::SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS]; for (int i = static_cast(switch_sequence_[switch_type].size()) - 1; i >= 0; --i) { RecordType record = switch_sequence_[switch_type][i]; diff --git a/clients/drcachesim/tests/core_serial.templatex b/clients/drcachesim/tests/core_serial.templatex index 1fdc9b85311..4ad9fe5dfe0 100644 --- a/clients/drcachesim/tests/core_serial.templatex +++ b/clients/drcachesim/tests/core_serial.templatex @@ -8,6 +8,7 @@ Total counts: 106490 instructions per context switch 6 voluntary context switches 0 direct context switches + 0 context switch sequence injections 100.00% voluntary switches 0.00% direct switches 4 switches input-to-input diff --git a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex index b04b4d291de..e511c57a31a 100644 --- a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex +++ b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex @@ -8,6 +8,7 @@ Total counts: 106490 instructions per context switch 6 voluntary context switches 0 direct context switches + 0 context switch sequence injections 100\.00% voluntary switches 0\.00% direct switches 5 switches input-to-input @@ -45,6 +46,7 @@ Core #0 counts: *[0-9]* instructions per context switch . voluntary context switches 0 direct context switches + 0 context switch sequence injections 100\.00% voluntary switches 0\.00% direct switches .* @@ -56,6 +58,7 @@ Core #1 counts: *[0-9]* instructions per context switch . voluntary context switches 0 direct context switches + 0 context switch sequence injections 100\.00% voluntary switches 0\.00% direct switches .* @@ -67,6 +70,7 @@ Core #2 counts: *[0-9]* instructions per context switch . voluntary context switches 0 direct context switches + 0 context switch sequence injections 100\.00% voluntary switches 0\.00% direct switches .* @@ -78,6 +82,7 @@ Core #3 counts: *[0-9]* instructions per context switch . voluntary context switches 0 direct context switches + 0 context switch sequence injections 100\.00% voluntary switches 0\.00% direct switches .* diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index 27d807642b9..bbb9b47ca6a 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -5876,6 +5876,7 @@ test_kernel_switch_sequences(bool use_input_ordinals) std::vector in_switch(NUM_OUTPUTS, false); std::vector prev_in_ord(NUM_OUTPUTS, 0); std::vector prev_out_ord(NUM_OUTPUTS, 0); + std::vector switch_seq_count(NUM_OUTPUTS, 0); while (num_eof < NUM_OUTPUTS) { for (int i = 0; i < NUM_OUTPUTS; i++) { if (eof[i]) @@ -5930,7 +5931,9 @@ test_kernel_switch_sequences(bool use_input_ordinals) case TRACE_MARKER_TYPE_VERSION: sched_as_string[i] += 'v'; break; case TRACE_MARKER_TYPE_TIMESTAMP: sched_as_string[i] += '0'; break; case TRACE_MARKER_TYPE_CONTEXT_SWITCH_END: + assert(in_switch[i]); in_switch[i] = false; + ++switch_seq_count[i]; ANNOTATE_FALLTHROUGH; case TRACE_MARKER_TYPE_CONTEXT_SWITCH_START: if (memref.marker.marker_value == scheduler_t::SWITCH_PROCESS) @@ -5948,6 +5951,12 @@ test_kernel_switch_sequences(bool use_input_ordinals) prev_out_ord[i] = outputs[i]->get_record_ordinal(); } } + for (int i = 0; i < NUM_OUTPUTS; i++) { + assert(switch_seq_count[i] > 0); + assert(switch_seq_count[i] == + static_cast(outputs[i]->get_schedule_statistic( + memtrace_stream_t::SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS))); + } // Check the high-level strings. for (int i = 0; i < NUM_OUTPUTS; i++) { std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; diff --git a/clients/drcachesim/tests/switch_insertion.templatex b/clients/drcachesim/tests/switch_insertion.templatex index 91f6dc04667..b8668f659dd 100644 --- a/clients/drcachesim/tests/switch_insertion.templatex +++ b/clients/drcachesim/tests/switch_insertion.templatex @@ -3,6 +3,6 @@ Total counts: [1-9][0-9][0-9][0-9][0-9][0-9] total \(fetched\) instructions 5971 total unique \(fetched\) instructions [1-9][0-9][0-9][0-9][0-9][0-9] total userspace instructions - [1-9][0-9][0-9] total kernel instructions + [1-9][0-9] total kernel instructions [1-9][0-9][0-9][0-9][0-9][0-9] total non-fetched instructions .* diff --git a/clients/drcachesim/tools/schedule_stats.cpp b/clients/drcachesim/tools/schedule_stats.cpp index 5d4b26b6b81..abc1ae6e044 100644 --- a/clients/drcachesim/tools/schedule_stats.cpp +++ b/clients/drcachesim/tools/schedule_stats.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2024 Google, Inc. All rights reserved. + * Copyright (c) 2017-2025 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -166,6 +166,9 @@ schedule_stats_t::get_scheduler_stats(memtrace_stream_t *stream, counters_t &cou memtrace_stream_t::SCHED_STAT_RUNQUEUE_REBALANCES)); counters.at_output_limit = static_cast( stream->get_schedule_statistic(memtrace_stream_t::SCHED_STAT_HIT_OUTPUT_LIMIT)); + counters.switch_sequence_injections = + static_cast(stream->get_schedule_statistic( + memtrace_stream_t::SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS)); // XXX: Currently, schedule_stats is measuring swap-ins to a real input. If we // want to match what "perf" targeting this app would record, which is swap-outs, @@ -420,6 +423,8 @@ schedule_stats_t::print_counters(const counters_t &counters) << counters.voluntary_switches << " voluntary context switches\n"; std::cerr << std::setw(12) << counters.direct_switches << " direct context switches\n"; + std::cerr << std::setw(12) << counters.switch_sequence_injections + << " context switch sequence injections\n"; print_percentage(static_cast(counters.voluntary_switches), static_cast(counters.total_switches), "% voluntary switches\n"); diff --git a/clients/drcachesim/tools/schedule_stats.h b/clients/drcachesim/tools/schedule_stats.h index cc082a84232..95133455d4e 100644 --- a/clients/drcachesim/tools/schedule_stats.h +++ b/clients/drcachesim/tools/schedule_stats.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023-2024 Google, Inc. All rights reserved. + * Copyright (c) 2023-2025 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -216,6 +216,7 @@ class schedule_stats_t : public analysis_tool_t { syscalls += rhs.syscalls; maybe_blocking_syscalls += rhs.maybe_blocking_syscalls; direct_switch_requests += rhs.direct_switch_requests; + switch_sequence_injections += rhs.switch_sequence_injections; observed_migrations += rhs.observed_migrations; waits += rhs.waits; idles += rhs.idles; @@ -252,6 +253,7 @@ class schedule_stats_t : public analysis_tool_t { int64_t syscalls = 0; int64_t maybe_blocking_syscalls = 0; int64_t direct_switch_requests = 0; + int64_t switch_sequence_injections = 0; // Our observed migrations will be <= the scheduler's reported migrations // for a dynamic schedule as we don't know the initial runqueue allocation // and so can't see the migration of an input that didn't execute in the From e790c3da39e6c4925c4023ea45b0439e0849bed0 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Mon, 24 Feb 2025 17:18:46 -0500 Subject: [PATCH 07/11] Fix injection count --- .../drcachesim/scheduler/scheduler_impl.cpp | 96 +++++++++++-------- clients/drcachesim/scheduler/scheduler_impl.h | 4 +- 2 files changed, 57 insertions(+), 43 deletions(-) diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index 8d7d605b191..9d8c1f0c358 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -2308,42 +2308,6 @@ scheduler_impl_tmpl_t::set_cur_input( stream->page_size_ = inputs_[input].reader->get_page_size(); } - if (prev_workload >= 0) { - if (inputs_[input].pid != INVALID_PID) { - insert_switch_tid_pid(inputs_[input]); - } - if (!switch_sequence_.empty()) { - switch_type_t switch_type = sched_type_t::SWITCH_INVALID; - if (prev_workload != inputs_[input].workload) - switch_type = sched_type_t::SWITCH_PROCESS; - else - switch_type = sched_type_t::SWITCH_THREAD; - // Inject kernel context switch code. Since the injected records belong to - // this input (the kernel is acting on behalf of this input) we insert them - // into the input's queue, but ahead of any prior queued items. This is why - // we walk in reverse, for the push_front calls to the deque. We update the - // tid of the records here to match. They are considered as - // is_record_synthetic() and do not affect input stream ordinals. - // XXX: These will appear before the top headers of a new thread which is - // slightly odd to have regular records with the new tid before the top - // headers. - if (!switch_sequence_[switch_type].empty()) { - ++outputs_[output] - .stats[memtrace_stream_t::SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS]; - for (int i = static_cast(switch_sequence_[switch_type].size()) - 1; - i >= 0; --i) { - RecordType record = switch_sequence_[switch_type][i]; - record_type_set_tid(record, inputs_[input].tid); - inputs_[input].queue.emplace_front(record, IS_SYNTHETIC); - } - VPRINT(this, 3, - "Inserted %zu switch records for type %d from %d.%d to %d.%d\n", - switch_sequence_[switch_type].size(), switch_type, prev_workload, - outputs_[output].prev_input, inputs_[input].workload, input); - } - } - } - inputs_[input].prev_time_in_quantum = outputs_[output].cur_time->load(std::memory_order_acquire); @@ -2436,27 +2400,77 @@ scheduler_impl_tmpl_t::pick_next_input(output_ordinal_t } // We can't easily place these stats inside set_cur_input() as we call that to // temporarily give up our input. - update_switch_stats(output, prev_index, index); + on_context_switch(output, prev_index, index); set_cur_input(output, index); return res; } template void -scheduler_impl_tmpl_t::update_switch_stats( +scheduler_impl_tmpl_t::on_context_switch( output_ordinal_t output, input_ordinal_t prev_input, input_ordinal_t new_input) { + bool do_inject_switch_seq = false; if (prev_input == new_input) ++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_NOP]; else if (prev_input != sched_type_t::INVALID_INPUT_ORDINAL && - new_input != sched_type_t::INVALID_INPUT_ORDINAL) + new_input != sched_type_t::INVALID_INPUT_ORDINAL) { ++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_INPUT]; - else if (new_input == sched_type_t::INVALID_INPUT_ORDINAL) + do_inject_switch_seq = true; + } else if (new_input == sched_type_t::INVALID_INPUT_ORDINAL) ++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_IDLE]; else { ++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_IDLE_TO_INPUT]; // Reset the flag so we'll try to steal if we go idle again. outputs_[output].tried_to_steal_on_idle = false; + do_inject_switch_seq = true; + } + + // We want to insert the context switch sequence on input-to-input and + // idle-to-input cases. This is a better control point to do that than + // set_cur_input. Here we get the stolen input events too, and we don't have + // to filter out the init-time set_cur_input cases. + // RFC: Was there any other benefit to doing this in set_cur_input before? + // It is harder to differentiate idle-to-input events from start of output when + // USE_INPUT_ORDINALS is set, since get_instruction_ordinal() does not return the + // intended global value in that case. + if (do_inject_switch_seq) { + if (inputs_[new_input].pid != INVALID_PID) { + insert_switch_tid_pid(inputs_[new_input]); + } + if (!switch_sequence_.empty()) { + switch_type_t switch_type = sched_type_t::SWITCH_INVALID; + if (prev_input == sched_type_t::INVALID_INPUT_ORDINAL || + inputs_[prev_input].workload != inputs_[new_input].workload) + switch_type = sched_type_t::SWITCH_PROCESS; + else + switch_type = sched_type_t::SWITCH_THREAD; + // Inject kernel context switch code. Since the injected records belong to + // this input (the kernel is acting on behalf of this input) we insert them + // into the input's queue, but ahead of any prior queued items. This is why + // we walk in reverse, for the push_front calls to the deque. We update the + // tid of the records here to match. They are considered as + // is_record_synthetic() and do not affect input stream ordinals. + // XXX: These will appear before the top headers of a new thread which is + // slightly odd to have regular records with the new tid before the top + // headers. + if (!switch_sequence_[switch_type].empty()) { + ++outputs_[output] + .stats[memtrace_stream_t::SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS]; + for (int i = static_cast(switch_sequence_[switch_type].size()) - 1; + i >= 0; --i) { + RecordType record = switch_sequence_[switch_type][i]; + record_type_set_tid(record, inputs_[new_input].tid); + inputs_[new_input].queue.emplace_front(record, IS_SYNTHETIC); + } + VPRINT(this, 3, "Inserted %zu switch for type %d from %d.%d to %d.%d\n", + switch_sequence_[switch_type].size(), switch_type, + prev_input != sched_type_t::INVALID_INPUT_ORDINAL + ? inputs_[prev_input].workload + : -1, + prev_input, inputs_[new_input].workload, new_input); + } + } } } @@ -2688,7 +2702,7 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp } else if (res == sched_type_t::STATUS_STOLE) { // We need to loop to get the new record. input = &inputs_[outputs_[output].cur_input]; - update_switch_stats(output, prev_input, input->index); + on_context_switch(output, prev_input, input->index); lock.unlock(); lock = std::unique_lock(*input->lock); lock.lock(); diff --git a/clients/drcachesim/scheduler/scheduler_impl.h b/clients/drcachesim/scheduler/scheduler_impl.h index 4f8bfbe93b2..3b4c7fe07e2 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.h +++ b/clients/drcachesim/scheduler/scheduler_impl.h @@ -418,8 +418,8 @@ template class scheduler_impl_tmpl_t scale_blocked_time(uint64_t initial_time) const; void - update_switch_stats(output_ordinal_t output, input_ordinal_t prev_input, - input_ordinal_t new_input); + on_context_switch(output_ordinal_t output, input_ordinal_t prev_input, + input_ordinal_t new_input); /// /////////////////////////////////////////////////////////////////////////// From 3502e28d5cbfc3f8e5e6ea4ba534334c3a7818ed Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Mon, 24 Feb 2025 19:20:02 -0500 Subject: [PATCH 08/11] Change switch_insertion to use schedule_stats to verify switch injection count --- .../drcachesim/scheduler/scheduler_impl.cpp | 12 ++++--- .../tests/switch_insertion.templatex | 32 +++++++++++++++---- suite/tests/CMakeLists.txt | 2 +- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index 9d8c1f0c358..e7712d2eec5 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -2431,9 +2431,11 @@ scheduler_impl_tmpl_t::on_context_switch( // set_cur_input. Here we get the stolen input events too, and we don't have // to filter out the init-time set_cur_input cases. // RFC: Was there any other benefit to doing this in set_cur_input before? - // It is harder to differentiate idle-to-input events from start of output when - // USE_INPUT_ORDINALS is set, since get_instruction_ordinal() does not return the - // intended global value in that case. + // If this logic is kept a part of set_cur_input, when USE_INPUT_ORDINALS is + // set, it is harder to differentiate between idle-to-input events and start + // of output; this is because get_instruction_ordinal() does not return the + // intended global value in that case (it returns the input-local value + // which is also not adjusted for the scheduler read-ahead, so is non-zero). if (do_inject_switch_seq) { if (inputs_[new_input].pid != INVALID_PID) { insert_switch_tid_pid(inputs_[new_input]); @@ -2758,8 +2760,8 @@ scheduler_impl_tmpl_t::update_next_record(output_ordinal outputs_[output].base_timestamp); } // FIXME: When USE_INPUT_ORDINALS is enabled, this returns the input-local - // instruction ordinal (which not only is not global, but it would also count - // the read-ahead instructions). + // instruction ordinal (which not only is not global, but also counts the + // read-ahead instructions). uint64_t instr_ord = outputs_[output].stream->get_instruction_ordinal(); uint64_t idle_count = outputs_[output].idle_count; uintptr_t new_time = static_cast( diff --git a/clients/drcachesim/tests/switch_insertion.templatex b/clients/drcachesim/tests/switch_insertion.templatex index b8668f659dd..096cf5355da 100644 --- a/clients/drcachesim/tests/switch_insertion.templatex +++ b/clients/drcachesim/tests/switch_insertion.templatex @@ -1,8 +1,28 @@ -Basic counts tool results: +Schedule stats tool results: Total counts: - [1-9][0-9][0-9][0-9][0-9][0-9] total \(fetched\) instructions - 5971 total unique \(fetched\) instructions - [1-9][0-9][0-9][0-9][0-9][0-9] total userspace instructions - [1-9][0-9] total kernel instructions - [1-9][0-9][0-9][0-9][0-9][0-9] total non-fetched instructions + 4 cores +.* + 638960 instructions + 11 total context switches +.* + 6 voluntary context switches + 0 direct context switches + 11 context switch sequence injections +.* + 9 switches input-to-input + 5 switches input-to-idle + 2 switches idle-to-input +.* +Core #0 counts: +.* + 117031 instructions + 8 total context switches +.* + 3 voluntary context switches + 0 direct context switches + 8 context switch sequence injections +.* + 6 switches input-to-input + 3 switches input-to-idle + 2 switches idle-to-input .* diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index aa1c39b6d21..9ec5d3d6f47 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4067,7 +4067,7 @@ if (BUILD_CLIENTS) set(switch_file "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/mock_switch_sequences.x64.zip") torunonly_simtool(switch_insertion ${ci_shared_app} - "-indir ${thread_trace_dir} -tool basic_counts -core_sharded -sched_quantum 1000 -sched_switch_file ${switch_file}" + "-indir ${thread_trace_dir} -tool schedule_stats -core_sharded -sched_quantum 1000 -sched_switch_file ${switch_file}" "") set(tool.switch_insertion_rawtemp ON) # no preprocessor From 052145309d5287faf3a4ad8980da3faa1d067f9c Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Tue, 25 Feb 2025 16:18:36 -0500 Subject: [PATCH 09/11] Reviewer suggested edits from diffbase PRs --- clients/drcachesim/common/memtrace_stream.h | 4 +- .../drcachesim/scheduler/scheduler_impl.cpp | 93 ++++++++++--------- .../drcachesim/tests/scheduler_unit_tests.cpp | 2 +- clients/drcachesim/tools/schedule_stats.cpp | 2 +- 4 files changed, 51 insertions(+), 50 deletions(-) diff --git a/clients/drcachesim/common/memtrace_stream.h b/clients/drcachesim/common/memtrace_stream.h index 44e48891e3a..b02b4dfddf7 100644 --- a/clients/drcachesim/common/memtrace_stream.h +++ b/clients/drcachesim/common/memtrace_stream.h @@ -113,9 +113,9 @@ class memtrace_stream_t { */ SCHED_STAT_HIT_OUTPUT_LIMIT, /** - * Counts the instances when the context switch sequence was injected. + * Counts the instances when the kernel context switch sequence was injected. */ - SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS, + SCHED_STAT_KERNEL_SWITCH_SEQUENCE_INJECTIONS, /** Count of statistic types. */ SCHED_STAT_TYPE_COUNT, }; diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index e7712d2eec5..9febd602219 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -669,7 +669,8 @@ scheduler_impl_tmpl_t::~scheduler_impl_tmpl_t() #endif VPRINT( this, 1, " %-35s: %9" PRId64 "\n", "Switch sequence injections", - outputs_[i].stats[memtrace_stream_t::SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS]); + outputs_[i] + .stats[memtrace_stream_t::SCHED_STAT_KERNEL_SWITCH_SEQUENCE_INJECTIONS]); } } @@ -2417,9 +2418,13 @@ scheduler_impl_tmpl_t::on_context_switch( new_input != sched_type_t::INVALID_INPUT_ORDINAL) { ++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_INPUT]; do_inject_switch_seq = true; - } else if (new_input == sched_type_t::INVALID_INPUT_ORDINAL) + } else if (new_input == sched_type_t::INVALID_INPUT_ORDINAL) { + // XXX: For now, we do not inject a kernel context switch sequence on + // input-to-idle transitions (note that we do so on idle-to-input though). + // However, we may want to inject some other suitable sequence, but we're not + // sure yet. ++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_IDLE]; - else { + } else { ++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_SWITCH_IDLE_TO_INPUT]; // Reset the flag so we'll try to steal if we go idle again. outputs_[output].tried_to_steal_on_idle = false; @@ -2430,50 +2435,46 @@ scheduler_impl_tmpl_t::on_context_switch( // idle-to-input cases. This is a better control point to do that than // set_cur_input. Here we get the stolen input events too, and we don't have // to filter out the init-time set_cur_input cases. - // RFC: Was there any other benefit to doing this in set_cur_input before? - // If this logic is kept a part of set_cur_input, when USE_INPUT_ORDINALS is - // set, it is harder to differentiate between idle-to-input events and start - // of output; this is because get_instruction_ordinal() does not return the - // intended global value in that case (it returns the input-local value - // which is also not adjusted for the scheduler read-ahead, so is non-zero). - if (do_inject_switch_seq) { - if (inputs_[new_input].pid != INVALID_PID) { - insert_switch_tid_pid(inputs_[new_input]); - } - if (!switch_sequence_.empty()) { - switch_type_t switch_type = sched_type_t::SWITCH_INVALID; - if (prev_input == sched_type_t::INVALID_INPUT_ORDINAL || - inputs_[prev_input].workload != inputs_[new_input].workload) - switch_type = sched_type_t::SWITCH_PROCESS; - else - switch_type = sched_type_t::SWITCH_THREAD; - // Inject kernel context switch code. Since the injected records belong to - // this input (the kernel is acting on behalf of this input) we insert them - // into the input's queue, but ahead of any prior queued items. This is why - // we walk in reverse, for the push_front calls to the deque. We update the - // tid of the records here to match. They are considered as - // is_record_synthetic() and do not affect input stream ordinals. - // XXX: These will appear before the top headers of a new thread which is - // slightly odd to have regular records with the new tid before the top - // headers. - if (!switch_sequence_[switch_type].empty()) { - ++outputs_[output] - .stats[memtrace_stream_t::SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS]; - for (int i = static_cast(switch_sequence_[switch_type].size()) - 1; - i >= 0; --i) { - RecordType record = switch_sequence_[switch_type][i]; - record_type_set_tid(record, inputs_[new_input].tid); - inputs_[new_input].queue.emplace_front(record, IS_SYNTHETIC); - } - VPRINT(this, 3, "Inserted %zu switch for type %d from %d.%d to %d.%d\n", - switch_sequence_[switch_type].size(), switch_type, - prev_input != sched_type_t::INVALID_INPUT_ORDINAL - ? inputs_[prev_input].workload - : -1, - prev_input, inputs_[new_input].workload, new_input); - } - } + if (!do_inject_switch_seq) + return; + if (inputs_[new_input].pid != INVALID_PID) { + insert_switch_tid_pid(inputs_[new_input]); } + if (switch_sequence_.empty()) + return; + switch_type_t switch_type = sched_type_t::SWITCH_INVALID; + if ( // XXX: idle-to-input transitions are assumed to be process switches + // for now. But we may want to improve this heuristic. + prev_input == sched_type_t::INVALID_INPUT_ORDINAL || + inputs_[prev_input].workload != inputs_[new_input].workload) + switch_type = sched_type_t::SWITCH_PROCESS; + else + switch_type = sched_type_t::SWITCH_THREAD; + if (switch_sequence_[switch_type].empty()) + return; + // Inject kernel context switch code. Since the injected records belong to + // this input (the kernel is acting on behalf of this input) we insert them + // into the input's queue, but ahead of any prior queued items. This is why + // we walk in reverse, for the push_front calls to the deque. We update the + // tid of the records here to match. They are considered as + // is_record_synthetic() and do not affect input stream ordinals. + // XXX: These will appear before the top headers of a new thread which is + // slightly odd to have regular records with the new tid before the top + // headers. + ++outputs_[output] + .stats[memtrace_stream_t::SCHED_STAT_KERNEL_SWITCH_SEQUENCE_INJECTIONS]; + for (int i = static_cast(switch_sequence_[switch_type].size()) - 1; i >= 0; + --i) { + RecordType record = switch_sequence_[switch_type][i]; + record_type_set_tid(record, inputs_[new_input].tid); + inputs_[new_input].queue.emplace_front(record, IS_SYNTHETIC); + } + VPRINT(this, 3, "Inserted %zu switch for type %d from %d.%d to %d.%d\n", + switch_sequence_[switch_type].size(), switch_type, + prev_input != sched_type_t::INVALID_INPUT_ORDINAL + ? inputs_[prev_input].workload + : -1, + prev_input, inputs_[new_input].workload, new_input); } template diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index bbb9b47ca6a..ee2721d43ab 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -5955,7 +5955,7 @@ test_kernel_switch_sequences(bool use_input_ordinals) assert(switch_seq_count[i] > 0); assert(switch_seq_count[i] == static_cast(outputs[i]->get_schedule_statistic( - memtrace_stream_t::SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS))); + memtrace_stream_t::SCHED_STAT_KERNEL_SWITCH_SEQUENCE_INJECTIONS))); } // Check the high-level strings. for (int i = 0; i < NUM_OUTPUTS; i++) { diff --git a/clients/drcachesim/tools/schedule_stats.cpp b/clients/drcachesim/tools/schedule_stats.cpp index abc1ae6e044..fd7b4849bb4 100644 --- a/clients/drcachesim/tools/schedule_stats.cpp +++ b/clients/drcachesim/tools/schedule_stats.cpp @@ -168,7 +168,7 @@ schedule_stats_t::get_scheduler_stats(memtrace_stream_t *stream, counters_t &cou stream->get_schedule_statistic(memtrace_stream_t::SCHED_STAT_HIT_OUTPUT_LIMIT)); counters.switch_sequence_injections = static_cast(stream->get_schedule_statistic( - memtrace_stream_t::SCHED_STAT_SWITCH_SEQUENCE_INJECTIONS)); + memtrace_stream_t::SCHED_STAT_KERNEL_SWITCH_SEQUENCE_INJECTIONS)); // XXX: Currently, schedule_stats is measuring swap-ins to a real input. If we // want to match what "perf" targeting this app would record, which is swap-outs, From 6bc5d2ca494d659d746a1393e45797101163fcd0 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Tue, 25 Feb 2025 16:32:41 -0500 Subject: [PATCH 10/11] Add real_records_in_queue tracking --- clients/drcachesim/scheduler/scheduler_impl.cpp | 9 ++++++--- clients/drcachesim/scheduler/scheduler_impl.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index 1f25bae38a0..0d0ad3d2fcc 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -858,7 +858,7 @@ scheduler_impl_tmpl_t::init( // TODO i#5843: Add more flags for other options. : spec_type_t::LAST_FROM_TRACE, static_cast(get_time_micros()), - cached_record_t(create_invalid_record(), false), verbosity_); + cached_record_t(create_invalid_record(), IS_SYNTHETIC), verbosity_); if (options_.single_lockstep_output) outputs_.back().stream = global_stream_.get(); if (options_.schedule_record_ostream != nullptr) { @@ -2573,9 +2573,8 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp input->cur_from_queue = true; input->is_cur_record_real = input->queue.front().is_real; input->queue.pop_front(); - if (input->is_cur_record_real) { + if (input->is_cur_record_real) --input->real_records_in_queue; - } } else { // We again have a flag check because reader_t::init() does an initial ++ // and so we want to skip that on the first record but perform a ++ prior @@ -2637,6 +2636,8 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp // the lock since another output may grab this input. VPRINT(this, 5, "next_record[%d]: queuing candidate record\n", output); input->queue.emplace_back(record, input->is_cur_record_real); + if (input->is_cur_record_real) + ++input->real_records_in_queue; lock.unlock(); res = pick_next_input(output, blocked_time); if (res != sched_type_t::STATUS_OK && res != sched_type_t::STATUS_WAIT && @@ -2682,6 +2683,8 @@ scheduler_impl_tmpl_t::next_record(output_ordinal_t outp // Get our candidate record back. record = input->queue.back().record; input->is_cur_record_real = input->queue.back().is_real; + if (input->is_cur_record_real) + --input->real_records_in_queue; input->queue.pop_back(); } } diff --git a/clients/drcachesim/scheduler/scheduler_impl.h b/clients/drcachesim/scheduler/scheduler_impl.h index 3b4c7fe07e2..6e886f456fe 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.h +++ b/clients/drcachesim/scheduler/scheduler_impl.h @@ -204,7 +204,7 @@ template class scheduler_impl_tmpl_t // If non-empty these records should be returned before incrementing the reader. // This is used for read-ahead and inserting synthetic records. // We use a deque so we can iterate over it. - + // Remember to adjust real_records_in_queue when adding or removing from this. std::deque queue; uint64_t real_records_in_queue = 0; bool cur_from_queue; From b16b7c5c0fe270db5b0750e9e5374bb1df601c4a Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Tue, 25 Feb 2025 16:37:48 -0500 Subject: [PATCH 11/11] Include 'kernel' in log --- clients/drcachesim/scheduler/scheduler_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp index 0d0ad3d2fcc..a077f2325cb 100644 --- a/clients/drcachesim/scheduler/scheduler_impl.cpp +++ b/clients/drcachesim/scheduler/scheduler_impl.cpp @@ -668,7 +668,7 @@ scheduler_impl_tmpl_t::~scheduler_impl_tmpl_t() outputs_[i].ready_queue.lock->get_count_contended()); #endif VPRINT( - this, 1, " %-35s: %9" PRId64 "\n", "Switch sequence injections", + this, 1, " %-35s: %9" PRId64 "\n", "Kernel switch sequence injections", outputs_[i] .stats[memtrace_stream_t::SCHED_STAT_KERNEL_SWITCH_SEQUENCE_INJECTIONS]); }