Skip to content

Commit

Permalink
Merge branch 'master' into evaluator-trait
Browse files Browse the repository at this point in the history
  • Loading branch information
primenumber committed May 4, 2024
2 parents 23fc861 + 5ce34d2 commit 407fa75
Show file tree
Hide file tree
Showing 14 changed files with 128 additions and 74 deletions.
92 changes: 46 additions & 46 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,60 +30,60 @@ $ cargo run --release -- ffobench

## Benchmark result

- Date: 2024/02/21
- Date: 2024/03/29
- Hardware: AMD Ryzen 9 7950X3D, DDR5-4800 64GB
- Environment: Linux 6.5.0-18-generic, Ubuntu 22.04.4, rustc 1.78.0-nightly
- Environment: Linux 6.5.0-26-generic, Ubuntu 22.04.4, rustc 1.78.0-nightly

FFO 40-59

|No.|empties|result|answer|move|nodes|time|NPS|
|---:|---:|---:|---:|---:|---:|:--:|---:|
|40|20|+38|+38|A2|90.4M| 0.054s|1644M/s|
|41|22| +0| +0|H4| 117M| 0.092s|1267M/s|
|42|22| +6| +6|G2| 287M| 0.177s|1617M/s|
|43|23|-12|-12|C7| 159M| 0.121s|1303M/s|
|44|23|-14|-14|D2| 111M| 0.083s|1322M/s|
|45|24| +6| +6|B2|1.57G| 1.010s|1561M/s|
|46|24| -8| -8|B3| 494M| 0.349s|1412M/s|
|47|25| +4| +4|G2| 167M| 0.123s|1350M/s|
|48|25|+28|+28|F6| 901M| 0.680s|1323M/s|
|49|26|+16|+16|E1|3.10G| 1.988s|1559M/s|
|50|26|+10|+10|D8|3.56G| 2.773s|1285M/s|
|51|27| +6| +6|E2|1.47G| 1.240s|1192M/s|
|52|27| +0| +0|A3|1.31G| 1.080s|1215M/s|
|53|28| -2| -2|D8|5.77G| 4.829s|1196M/s|
|54|28| -2| -2|C7|15.6G| 11.420s|1368M/s|
|55|29| +0| +0|G6|29.2G| 28.391s|1030M/s|
|56|29| +2| +2|H5|4.56G| 4.727s|965M/s|
|57|30|-10|-10|A6|19.8G| 18.593s|1066M/s|
|58|30| +4| +4|G1|4.83G| 4.961s|973M/s|
|59|34|+64|+64|G8|1.66k| 0.034s|0M/s|

[Total] elapsed: 82738454us, node count: 93254297417, NPS: 1127097412nodes/sec
|40|20|+38|+38|A2|76.2M| 0.062s|1209M/s|
|41|22| +0| +0|H4|87.2M| 0.080s|1076M/s|
|42|22| +6| +6|G2| 270M| 0.178s|1511M/s|
|43|23|-12|-12|C7| 164M| 0.146s|1122M/s|
|44|23|-14|-14|D2|84.3M| 0.081s|1029M/s|
|45|24| +6| +6|B2|1.51G| 0.978s|1547M/s|
|46|24| -8| -8|B3| 443M| 0.329s|1343M/s|
|47|25| +4| +4|G2| 133M| 0.122s|1084M/s|
|48|25|+28|+28|F6| 874M| 0.676s|1291M/s|
|49|26|+16|+16|E1|3.14G| 2.047s|1536M/s|
|50|26|+10|+10|D8|3.24G| 2.543s|1276M/s|
|51|27| +6| +6|E2|1.41G| 1.188s|1191M/s|
|52|27| +0| +0|A3|1.42G| 1.175s|1208M/s|
|53|28| -2| -2|D8|4.91G| 4.038s|1216M/s|
|54|28| -2| -2|C7|13.6G| 9.923s|1375M/s|
|55|29| +0| +0|G6|27.3G| 25.529s|1070M/s|
|56|29| +2| +2|H5|4.26G| 4.629s|921M/s|
|57|30|-10|-10|A6|19.6G| 18.350s|1071M/s|
|58|30| +4| +4|G1|4.48G| 4.760s|942M/s|
|59|34|+64|+64|G8|1.26k| 0.029s|0M/s|

[Total] elapsed: 76872921us, node count: 87193095283, NPS: 1134249800nodes/sec

FFO 60-79

|No.|empties|result|answer|move|nodes|time|NPS|
|---:|---:|---:|---:|---:|---:|:--:|---:|
|60|24|+20|+20|C2| 216M| 0.164s|1313M/s|
|61|25|-14|-14|G1| 339M| 0.318s|1062M/s|
|62|27|+28|+28|E8|8.51G| 7.369s|1154M/s|
|63|27| -2| -2|F2|2.88G| 2.393s|1203M/s|
|64|27|+20|+20|B4|11.2G| 9.350s|1207M/s|
|65|28|+10|+10|G1|29.2G| 20.527s|1422M/s|
|66|28|+30|+30|H3|21.1G| 16.079s|1314M/s|
|67|28|+22|+22|H3|28.2G| 20.363s|1386M/s|
|68|30|+28|+28|E8| 139G| 107.049s|1301M/s|
|69|30| +0| +0|H3|15.7G| 14.081s|1121M/s|
|70|30|-24|-24|E3|14.4G| 13.097s|1106M/s|
|71|31|+20|+20|D2|20.1G| 19.281s|1044M/s|
|72|31|+24|+24|E1| 258G| 298.006s|867M/s|
|73|31| -4| -4|G4|29.4G| 34.672s|849M/s|
|74|31|-30|-30|F1| 611G| 563.534s|1084M/s|
|75|32|+14|+14|D2| 299G| 225.346s|1327M/s|
|76|32|+32|+32|A3|2.12T|1869.331s|1134M/s|
|77|34|+34|+34|B7|1.13T|1015.754s|1121M/s|
|78|34| +8| +8|F1| 673G| 844.518s|797M/s|
|79|36|+64|+64|D7|56.8G| 43.134s|1319M/s|

[Total] elapsed: 5124378900us, node count: 5480838886501, NPS: 1069561598nodes/sec
|60|24|+20|+20|C2| 213M| 0.175s|1213M/s|
|61|25|-14|-14|G1| 344M| 0.325s|1058M/s|
|62|27|+28|+28|E8|7.61G| 6.587s|1155M/s|
|63|27| -2| -2|F2|2.89G| 2.383s|1215M/s|
|64|27|+20|+20|B4|10.4G| 8.675s|1204M/s|
|65|28|+10|+10|G1|26.6G| 18.953s|1405M/s|
|66|28|+30|+30|H3|19.7G| 15.215s|1299M/s|
|67|28|+22|+22|H3|25.8G| 18.823s|1375M/s|
|68|30|+28|+28|E8| 116G| 90.898s|1287M/s|
|69|30| +0| +0|H3|14.5G| 13.385s|1086M/s|
|70|30|-24|-24|E3|13.0G| 11.413s|1146M/s|
|71|31|+20|+20|D2|21.3G| 21.350s|999M/s|
|72|31|+24|+24|E1| 178G| 197.643s|902M/s|
|73|31| -4| -4|G4|25.8G| 31.323s|824M/s|
|74|31|-30|-30|F1| 599G| 580.678s|1031M/s|
|75|32|+14|+14|D2| 234G| 180.377s|1300M/s|
|76|32|+32|+32|A3|1.56T|1441.196s|1087M/s|
|77|34|+34|+34|B7|1.23T|1122.816s|1100M/s|
|78|34| +8| +8|F1| 571G| 831.075s|687M/s|
|79|36|+64|+64|D7|14.6G| 12.269s|1191M/s|

[Total] elapsed: 4605575684us, node count: 4685891940872, NPS: 1017438918nodes/sec
2 changes: 1 addition & 1 deletion src/book.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ fn search<Eval: Evaluator>(
solve_obj.cache_gen += 1;
if board.empty().count_ones() <= 18 {
let mut solve_obj = solve_obj.clone();
solve_with_move(board, &mut solve_obj, &sub_solver.clone())
solve_with_move(board, &mut solve_obj, &sub_solver.clone(), None)
} else {
let start = Instant::now();
let timer = Timer {
Expand Down
4 changes: 2 additions & 2 deletions src/engine/bits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ pub trait BitManip {
}

impl BitManip for u64 {
#[cfg(target_feature = "avx2")]
#[cfg(all(target_feature = "bmi2", not(slow_pext)))]
fn pext(&self, mask: u64) -> u64 {
unsafe { _pext_u64(*self, mask) }
}

#[cfg(not(target_feature = "avx2"))]
#[cfg(not(all(target_feature = "bmi2", not(slow_pext))))]
fn pext(&self, mut mask: u64) -> u64 {
let mut x = *self;
x = x & mask;
Expand Down
9 changes: 6 additions & 3 deletions src/engine/board.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#[cfg(test)]
mod test;
#[cfg(target_feature = "neon")]
use std::arch::aarch64::*;
use crate::engine::bits::*;
use crate::engine::hand::*;
use anyhow::Result;
use clap::ArgMatches;
#[cfg(target_feature = "neon")]
use std::arch::aarch64::*;
use std::cmp::min;
use std::fmt;
use std::io::{BufWriter, Write};
Expand Down Expand Up @@ -55,7 +55,10 @@ fn smart_upper_bit(x: u64x4) -> u64x4 {
}
}

#[cfg(not(any(target_feature = "neon", all(target_feature = "avx512cd", target_feature = "avx512vl"))))]
#[cfg(not(any(
target_feature = "neon",
all(target_feature = "avx512cd", target_feature = "avx512vl")
)))]
fn smart_upper_bit(mut x: u64x4) -> u64x4 {
x |= x >> u64x4::from_array([8, 1, 7, 9]);
x |= x >> u64x4::from_array([16, 2, 14, 18]);
Expand Down
26 changes: 25 additions & 1 deletion src/engine/endgame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ use crate::engine::hand::*;
use crate::engine::search::*;
use crate::engine::table::*;
use arrayvec::ArrayVec;
use crc64::Crc64;
use std::cmp::max;
use std::io::Write;

fn near_leaf<Eval: Evaluator>(solve_obj: &mut SolveObj<Eval>, board: Board) -> (i8, SolveStat) {
let (score, node_count) = solve_obj.last_cache.solve_last(board);
Expand Down Expand Up @@ -211,8 +213,30 @@ pub fn solve_inner<Eval: Evaluator>(
CutType::LessThanAlpha(v) => return (v, SolveStat::one_stcut()),
}
}
if rem < solve_obj.params.res_cache_limit {
if rem < solve_obj.params.local_res_cache_limit {
fastest_first(solve_obj, board, (alpha, beta), passed)
} else if rem < solve_obj.params.res_cache_limit {
let mut crc64 = Crc64::new();
crc64.write(&board.player.to_le_bytes()).unwrap();
crc64.write(&board.opponent.to_le_bytes()).unwrap();
let hash = crc64.get();
let res_cache = solve_obj.local_res_cache.get(board, hash);
let lookup_result = make_lookup_result(res_cache, (&mut alpha, &mut beta));
let (lower, upper) = match lookup_result {
CacheLookupResult::Cut(v) => return (v, SolveStat::zero()),
CacheLookupResult::NoCut(l, u, _) => (l, u),
};
let (res, stat) = fastest_first(solve_obj, board, (alpha, beta), passed);
let record = make_record(
solve_obj.local_cache_gen,
board,
res,
None,
(alpha, beta),
(lower, upper),
);
solve_obj.local_res_cache.update(&record, hash);
(res, stat)
} else if rem < solve_obj.params.eval_ordering_limit {
let (lower, upper) = match lookup_table(solve_obj, board, (&mut alpha, &mut beta)) {
CacheLookupResult::Cut(v) => return (v, SolveStat::zero()),
Expand Down
12 changes: 6 additions & 6 deletions src/engine/last_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,23 +63,23 @@ impl LastCache {
}
}

#[cfg(target_feature = "bmi2")]
#[cfg(all(target_feature = "bmi2", not(slow_pext)))]
fn get_col_bits(bits: u64, mask: u64, _col: usize) -> u64 {
bits.pext(mask)
}

#[cfg(not(target_feature = "bmi2"))]
#[cfg(not(all(target_feature = "bmi2", not(slow_pext))))]
fn get_col_bits(mut bits: u64, mask: u64, col: usize) -> u64 {
bits &= mask;
((bits >> col).wrapping_mul(0x0002_0408_1020_4081) >> 49) & 0xff
}

#[cfg(target_feature = "bmi2")]
#[cfg(all(target_feature = "bmi2", not(slow_pext)))]
fn get_diag1_bits(bits: u64, mask: u64, _row: usize, _col: usize) -> u64 {
bits.pext(mask)
}

#[cfg(not(target_feature = "bmi2"))]
#[cfg(not(all(target_feature = "bmi2", not(slow_pext))))]
fn get_diag1_bits(mut bits: u64, mask: u64, row: usize, col: usize) -> u64 {
bits &= mask;
let width = if row >= col {
Expand All @@ -92,12 +92,12 @@ impl LastCache {
(bits.wrapping_mul(0x0101_0101_0101_0101) >> 56) & ((1 << width) - 1)
}

#[cfg(target_feature = "bmi2")]
#[cfg(all(target_feature = "bmi2", not(slow_pext)))]
fn get_diag2_bits(bits: u64, mask: u64, _row: usize, _col: usize) -> u64 {
bits.pext(mask)
}

#[cfg(not(target_feature = "bmi2"))]
#[cfg(not(all(target_feature = "bmi2", not(slow_pext))))]
fn get_diag2_bits(mut bits: u64, mask: u64, row: usize, col: usize) -> u64 {
bits &= mask;
let width = if row + col >= 7 {
Expand Down
5 changes: 4 additions & 1 deletion src/engine/midgame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ fn simplified_abdada_intro<Eval: Evaluator>(
if depth >= ctx.solve_obj.params.parallel_depth_limit || rem < ctx.solve_obj.params.parallel_empties_limit {
let (res, stat) = solve_inner(&mut ctx.solve_obj, board, (alpha, beta), passed);
ctx.stats.merge(stat);
ctx.solve_obj.local_cache_gen += 1;
return Some((res, None));
}
ctx.stats.merge(SolveStat::one());
Expand Down Expand Up @@ -148,12 +149,14 @@ pub fn simplified_abdada<Eval: Evaluator>(
(alpha, beta): (i8, i8),
passed: bool,
depth: i8,
num_threads: Option<usize>,
) -> (i8, Option<Hand>, SolveStat) {
thread::scope(|s| {
let mut handles = Vec::new();
let cs_hash = Arc::new(DashSet::new());
let finished = Arc::new(AtomicBool::new(false));
for _ in 0..num_cpus::get() {
let num_threads = num_threads.unwrap_or(num_cpus::get());
for _ in 0..num_threads {
let solve_obj = solve_obj.clone();
let cs_hash = cs_hash.clone();
let finished = finished.clone();
Expand Down
23 changes: 21 additions & 2 deletions src/engine/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pub struct SearchParams {
pub parallel_empties_limit: i8,
pub eval_ordering_limit: i8,
pub res_cache_limit: i8,
pub local_res_cache_limit: i8,
pub stability_cut_limit: i8,
pub ffs_ordering_limit: i8,
pub static_ordering_limit: i8,
Expand All @@ -48,21 +49,25 @@ pub struct SearchParams {
pub struct SolveObj<Eval: Evaluator> {
pub res_cache: Arc<ResCacheTable>,
pub eval_cache: Arc<EvalCacheTable>,
pub local_res_cache: CacheArray<ResCache>,
pub evaluator: Arc<Eval>,
pub last_cache: Arc<LastCache>,
pub params: SearchParams,
pub cache_gen: u32,
pub local_cache_gen: u32,
}

impl<Eval: Evaluator> Clone for SolveObj<Eval> {
fn clone(&self) -> Self {
SolveObj::<Eval> {
res_cache: self.res_cache.clone(),
eval_cache: self.eval_cache.clone(),
local_res_cache: self.local_res_cache.clone(),
evaluator: self.evaluator.clone(),
last_cache: self.last_cache.clone(),
params: self.params.clone(),
cache_gen: self.cache_gen.clone(),
local_cache_gen: self.local_cache_gen.clone(),
}
}
}
Expand All @@ -78,10 +83,12 @@ impl<Eval: Evaluator> SolveObj<Eval> {
SolveObj {
res_cache,
eval_cache,
local_res_cache: CacheArray::<ResCache>::new(65536),
evaluator,
last_cache: Arc::new(LastCache::new()),
params,
cache_gen,
local_cache_gen: 0,
}
}
}
Expand Down Expand Up @@ -294,21 +301,25 @@ pub fn solve<Eval: Evaluator>(
(alpha, beta): (i8, i8),
passed: bool,
depth: i8,
num_threads: Option<usize>,
) -> (i8, Option<Hand>, SolveStat) {
simplified_abdada(solve_obj, board, (alpha, beta), passed, depth)
simplified_abdada(solve_obj, board, (alpha, beta), passed, depth, num_threads)
}

// num_threads: number of searching threads, use number of cpus when None
pub fn solve_with_move<Eval: Evaluator>(
board: Board,
solve_obj: &mut SolveObj<Eval>,
_sub_solver: &Arc<SubSolver>,
num_threads: Option<usize>,
) -> Hand {
if let Some(best) = simplified_abdada(
solve_obj,
board,
(-(BOARD_SIZE as i8), BOARD_SIZE as i8),
false,
0,
num_threads,
)
.1
{
Expand All @@ -318,7 +329,15 @@ pub fn solve_with_move<Eval: Evaluator>(
let mut result = -65;
for pos in board.mobility() {
let next = board.play(pos).unwrap();
let res = -simplified_abdada(solve_obj, next, (-(BOARD_SIZE as i8), -result), false, 0).0;
let res = -simplified_abdada(
solve_obj,
next,
(-(BOARD_SIZE as i8), -result),
false,
0,
num_threads,
)
.0;
if res > result {
result = res;
best_pos = Some(pos);
Expand Down
Loading

0 comments on commit 407fa75

Please sign in to comment.