Skip to content

Commit

Permalink
Set default number of threads to performance core count on macOS
Browse files Browse the repository at this point in the history
On Apple Silicon systems with a mix of performance and efficiency cores,
performance is better if the number of threads is set to the performance core
count, rather than the total number of cores.

Fixes #342.
  • Loading branch information
robertknight committed Jan 25, 2025
1 parent 968dade commit b4ccbda
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 5 deletions.
2 changes: 1 addition & 1 deletion rten-simd/src/isa_detection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
/// Functions for reading system info on macOS.
#[cfg(target_os = "macos")]
#[allow(unused)]
mod macos {
pub mod macos {
/// Detect availability of AVX-512 on macOS, where `is_x86_feature_detected`
/// can return false even if AVX-512 is available.
///
Expand Down
38 changes: 34 additions & 4 deletions src/threading.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,24 @@ impl ThreadPool {
}
}

/// Return the optimal number of cores to use for maximum performance.
///
/// This may be less than the total number of cores on systems with heterogenous
/// cores (eg. a mix of performance and efficiency).
fn optimal_core_count() -> u32 {
let mut core_count = num_cpus::get_physical().max(1) as u32;

#[cfg(target_os = "macos")]
{
use rten_simd::isa_detection::macos::sysctl_int;
if let Ok(perf_core_count) = sysctl_int(c"hw.perflevel0.physicalcpu") {
core_count = core_count.clamp(1, perf_core_count as u32);
}
}

core_count
}

/// Return the [Rayon][rayon] thread pool which is used to execute RTen models.
///
/// This differs from Rayon's default global thread pool in that it is tuned for
Expand All @@ -41,23 +59,35 @@ impl ThreadPool {
pub fn thread_pool() -> &'static ThreadPool {
static THREAD_POOL: OnceLock<ThreadPool> = OnceLock::new();
THREAD_POOL.get_or_init(|| {
let physical_cpus = num_cpus::get_physical();
let physical_cpus = optimal_core_count();

let num_threads = if let Some(threads_var) = env::var_os("RTEN_NUM_THREADS") {
let requested_threads: Result<usize, _> = threads_var.to_string_lossy().parse();
let requested_threads: Result<u32, _> = threads_var.to_string_lossy().parse();
match requested_threads {
Ok(n_threads) => n_threads.clamp(1, num_cpus::get()),
Ok(n_threads) => n_threads.clamp(1, num_cpus::get() as u32),
Err(_) => physical_cpus,
}
} else {
physical_cpus
};

let pool = rayon::ThreadPoolBuilder::new()
.num_threads(num_threads)
.num_threads(num_threads as usize)
.thread_name(|index| format!("rten-{}", index))
.build();

ThreadPool { pool: pool.ok() }
})
}

#[cfg(test)]
mod tests {
use super::optimal_core_count;

#[test]
fn test_optimal_core_count() {
let max_cores = num_cpus::get_physical() as u32;
let opt_cores = optimal_core_count();
assert!(opt_cores >= 1 && opt_cores <= max_cores);
}
}

0 comments on commit b4ccbda

Please sign in to comment.