Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scale frequency to suppress RCU CPU stall warning #67

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ OBJS := \
aclint.o \
$(OBJS_EXTRA)

LDFLAGS := -pg

deps := $(OBJS:%.o=.%.o.d)

$(BIN): $(OBJS)
Expand All @@ -78,6 +80,8 @@ E :=
S := $E $E

SMP ?= 1
CFLAGS += -D SEMU_SMP=$(SMP)
CFLAGS += -D SEMU_BOOT_TARGET_TIME=10
.PHONY: riscv-harts.dtsi
riscv-harts.dtsi:
$(Q)python3 scripts/gen-hart-dts.py $@ $(SMP) $(CLOCK_FREQ)
Expand Down
30 changes: 30 additions & 0 deletions auto_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash

# Create a directory to store logs (optional)
mkdir -p logs

for N in $(seq 1 32); do
echo "============================================="
echo "Starting experiment with SMP=$N"
echo "============================================="

# 1) Clean
make clean

# 2) Build and run checks with SMP=N, capturing emulator output
# The 'tee' command copies output to the terminal AND a log file
echo "Building and running 'make check SMP=$N'..."
make check SMP=$N 2>&1 | tee "logs/emulator_SMP_${N}.log"

# 3) After the emulator run, record gprof output
# We assume 'gprof ./semu' uses data from 'gmon.out'
echo "Running gprof for SMP=$N..."
gprof ./semu > "logs/gprof_SMP_${N}.log" 2>&1

echo "Done with SMP=$N. Logs saved:"
echo " - logs/emulator_SMP_${N}.log"
echo " - logs/gprof_SMP_${N}.log"
echo
done

echo "All experiments complete!"
8 changes: 8 additions & 0 deletions riscv.c
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,14 @@ static void op_sret(hart_t *vm)
vm->s_mode = vm->sstatus_spp;
vm->sstatus_sie = vm->sstatus_spie;

/* After the booting process is complete, initrd will be loaded. At this
* point, the sytstem will switch to U mode for the first time. Therefore,
* by checking whether the switch to U mode has already occurred, we can
* determine if the boot process has been completed.
*/
if (!boot_complete && !vm->s_mode)
boot_complete = true;

/* Reset stack */
vm->sstatus_spp = false;
vm->sstatus_spie = true;
Expand Down
208 changes: 191 additions & 17 deletions utils.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#include "utils.h"
Expand All @@ -19,6 +21,14 @@
#endif
#endif

bool boot_complete = false;
Copy link
Collaborator

@ranvd ranvd Jan 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest moving boot_complete variable into vm_t for a more conceptually accurate design.

Copy link
Collaborator Author

@Mes0903 Mes0903 Jan 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we move boot_complete into vm_t, all existing functions for semu_timer_t would need an additional vm_t parameter. For example, semu_timer_get would change to:

semu_timer_get(vm_t *vm, semu_timer_t *timer)

This change would indirectly require the areas that call this function to also pass in a vm_tparameter. For instance, since semu_timer_get is called within aclint_mtimer_update_interrupts, the API of aclint_mtimer_update_interrupts would also need to be updated to include vm_t.

As this pattern continues, the API changes would proliferate significantly. Perhaps we could introduce a static bool pointer pointing to boot_complete and assign its value during semu_timer_init. This way, we would only need to modify the parameters of semu_timer_init.

static double scale_factor;

/* for testing */
uint64_t count = 0;
struct timespec boot_begin, boot_end;
double TEST_ns_per_call, TEST_predict_sec;

/* Calculate "x * n / d" without unnecessary overflow or loss of precision.
*
* Reference:
Expand All @@ -32,35 +42,199 @@ static inline uint64_t mult_frac(uint64_t x, uint64_t n, uint64_t d)
return q * n + r * n / d;
}

void semu_timer_init(semu_timer_t *timer, uint64_t freq)
/* On POSIX => use clock_gettime().
* On macOS => use mach_absolute_time().
* Else => fallback to time(0) in seconds, convert to ns.
*
* Now, the POSIX/macOS logic can be clearly reused. Meanwhile, the fallback
* path might just do a coarser approach with time(0).
*/
static inline uint64_t host_time_ns()
{
timer->freq = freq;
semu_timer_rebase(timer, 0);
#if defined(HAVE_POSIX_TIMER)
struct timespec ts;
clock_gettime(CLOCKID, &ts);
return (uint64_t) ts.tv_sec * 1e9 + (uint64_t) ts.tv_nsec;

#elif defined(HAVE_MACH_TIMER)
static mach_timebase_info_data_t ts = {0};
if (ts.denom == 0)
(void) mach_timebase_info(&ts);

uint64_t now = mach_absolute_time();
/* convert to nanoseconds: (now * t.numer / t.denom) */
return mult_frac(now, ts.numer, (uint64_t) ts.denom);

#else
/* Minimal fallback: time(0) in seconds => convert to ns. */
time_t now_sec = time(0);
return (uint64_t) now_sec * 1e9;
#endif
}

static uint64_t semu_timer_clocksource(uint64_t freq)
/* Measure the overhead of a high-resolution timer call, typically
* 'clock_gettime()' on POSIX or 'mach_absolute_time()' on macOS.
*
* 1) Times how long it takes to call 'host_time_ns()' repeatedly (iterations).
* 2) Derives an average overhead per call => ns_per_call.
* 3) Because semu_timer_clocksource is ~10% of boot overhead, and called ~2e8
* times * SMP, we get predict_sec = ns_per_call * SMP * 2. Then set
* 'scale_factor' so the entire boot completes in SEMU_BOOT_TARGET_TIME
* seconds.
*/
static void measure_bogomips_ns(uint64_t iterations)
{
#if defined(HAVE_POSIX_TIMER)
struct timespec t;
clock_gettime(CLOCKID, &t);
return t.tv_sec * freq + mult_frac(t.tv_nsec, freq, 1e9);
/* Perform 'iterations' times calling the host HRT.
*
*
* Assuming the cost of loop overhead is 'e' and the cost of 'host_time_ns'
* is 't', we perform a two-stage measurement to eliminate the loop
* overhead. In the first loop, 'host_time_ns' is called only once per
* iteration, while in the second loop, it is called twice per iteration.
*
* In this way, the cost of the first loop is 'e + t', and the cost of the
* second loop is 'e + 2t'. By subtracting the two, we can effectively
* eliminate the loop overhead.
*
* Reference:
* https://ates.dev/posts/2025-01-12-accurate-benchmarking/
*/
const uint64_t start_ns_1 = host_time_ns();
for (uint64_t loops = 0; loops < iterations; loops++)
(void) host_time_ns();

const uint64_t end_ns_1 = host_time_ns();
const uint64_t elapsed_ns_1 = end_ns_1 - start_ns_1;

/* Second measurement */
const uint64_t start_ns_2 = host_time_ns();
for (uint64_t loops = 0; loops < iterations; loops++) {
(void) host_time_ns();
(void) host_time_ns();
}

const uint64_t end_ns_2 = host_time_ns();
const uint64_t elapsed_ns_2 = end_ns_2 - start_ns_2;

/* Calculate average overhead per call */
const double ns_per_call =
(double) (elapsed_ns_2 - elapsed_ns_1) / (double) iterations;

/* 'semu_timer_clocksource' is called ~2e8 times per SMP. Each call's
* overhead ~ ns_per_call. The total overhead is ~ ns_per_call * SMP *
* 2e8. That overhead is about 10% of the entire boot, so effectively:
* predict_sec = ns_per_call * SMP * 2e8 * (100%/10%) / 1e9
* = ns_per_call * SMP * 2.0
* Then scale_factor = (desired_time) / (predict_sec).
*/
const double predict_sec = ns_per_call * SEMU_SMP * 2.0;
scale_factor = SEMU_BOOT_TARGET_TIME / predict_sec;

/* for testing */
TEST_ns_per_call = ns_per_call;
TEST_predict_sec = predict_sec;
}

/* The main function that returns the "emulated time" in ticks.
*
* Before the boot completes, we scale time by 'scale_factor' for a "fake
* increments" approach. After boot completes, we switch to real time
* with an offset bridging so that there's no big jump.
*/
static uint64_t semu_timer_clocksource(semu_timer_t *timer)
{
count++;

/* After boot process complete, the timer will switch to real time. Thus,
* there is an offset between the real time and the emulator time.
*
* After switching to real time, the correct way to update time is to
* calculate the increment of time. Then add it to the emulator time.
*/
static int64_t offset = 0;
static bool first_switch = true;

#if defined(HAVE_POSIX_TIMER) || defined(HAVE_MACH_TIMER)
uint64_t now_ns = host_time_ns();

/* real_ticks = (now_ns * freq) / 1e9 */
uint64_t real_ticks = mult_frac(now_ns, timer->freq, 1e9);

/* scaled_ticks = (now_ns * (freq*scale_factor)) / 1e9
* = ((now_ns * freq) / 1e9) * scale_factor
*/
uint64_t scaled_ticks = real_ticks * scale_factor;

if (!boot_complete)
return scaled_ticks; /* Return scaled ticks in the boot phase. */

/* The boot is done => switch to real freq with an offset bridging. */
if (first_switch) {
clock_gettime(CLOCKID, &boot_end);
double boot_time = (boot_end.tv_sec - boot_begin.tv_sec) +
(boot_end.tv_nsec - boot_begin.tv_nsec) / 1e9;

first_switch = false;
offset = (int64_t) (real_ticks - scaled_ticks);
printf(
"\033[1;31m[SEMU LOG]: Boot time: %.5f seconds, called %ld "
"times semu_timer_total_ticks\033[0m\n",
boot_time, count);

printf(
"\033[1;31m[SEMU LOG]: ns_per_call = %.5f, predict_sec = %.5f, "
"scale_factor = %.5f\033[0m\n",
TEST_ns_per_call, TEST_predict_sec, scale_factor);

exit(0);
}
return (uint64_t) ((int64_t) real_ticks - offset);

#elif defined(HAVE_MACH_TIMER)
static mach_timebase_info_data_t t;
if (t.denom == 0)
(void) mach_timebase_info(&t);
return mult_frac(mult_frac(mach_absolute_time(), t.numer, t.denom), freq,
1e9);
#else
return time(0) * freq;
/* Because we don't rely on sub-second calls to 'host_time_ns()' here,
* we directly use time(0). This means the time resolution is coarse (1
* second), but the logic is the same: we do a scaled approach pre-boot,
* then real freq with an offset post-boot.
*/
time_t now_sec = time(0);

/* Before boot done, scale time. */
if (!boot_complete)
return (uint64_t) now_sec * (uint64_t) (timer->freq * scale_factor);

if (first_switch) {
first_switch = false;
uint64_t real_val = (uint64_t) now_sec * (uint64_t) timer->freq;
uint64_t scaled_val =
(uint64_t) now_sec * (uint64_t) (timer->freq * scale_factor);
offset = (int64_t) (real_val - scaled_val);
}

/* Return real freq minus offset. */
uint64_t real_freq_val = (uint64_t) now_sec * (uint64_t) timer->freq;
return real_freq_val - offset;
#endif
}

void semu_timer_init(semu_timer_t *timer, uint64_t freq)
{
/* Measure how long each call to 'host_time_ns()' roughly takes,
* then use that to pick 'scale_factor'. For example, pass freq
* as the loop count or some large number to get a stable measure.
*/
measure_bogomips_ns(freq);

clock_gettime(CLOCKID, &boot_begin);
timer->freq = freq;
semu_timer_rebase(timer, 0);
}

uint64_t semu_timer_get(semu_timer_t *timer)
{
return semu_timer_clocksource(timer->freq) - timer->begin;
return semu_timer_clocksource(timer) - timer->begin;
}

void semu_timer_rebase(semu_timer_t *timer, uint64_t time)
{
timer->begin = semu_timer_clocksource(timer->freq) - time;
timer->begin = semu_timer_clocksource(timer) - time;
}
3 changes: 3 additions & 0 deletions utils.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#pragma once

#include <stdbool.h>
#include <stdint.h>

extern bool boot_complete; /* Time to reach the first user process. */

/* TIMER */
typedef struct {
uint64_t begin;
Expand Down
Loading