Skip to content

Commit

Permalink
minor fixes & rename seed_scan_*
Browse files Browse the repository at this point in the history
  • Loading branch information
zyxar committed Dec 23, 2023
1 parent 0ea36ef commit 3e2efde
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 32 deletions.
25 changes: 5 additions & 20 deletions PLA-SeedFinder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
#set(CMAKE_VERBOSE_MAKEFILE ON)

# set(CMAKE_AUTOMOC ON)
# set(CMAKE_AUTORCC ON)
# set(CMAKE_AUTOUIC ON)

add_custom_target(build-time-make-directory ALL
COMMAND ${CMAKE_COMMAND} -E make_directory Assembly/)

#Find threads library
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
Expand Down Expand Up @@ -92,6 +85,9 @@ target_include_directories(PLA-SeedFinder PRIVATE Source/)

#enable MP with MSVC (Build with Multiple Processes)
if (MSVC)
add_custom_target(build-time-make-directory ALL
COMMAND ${CMAKE_COMMAND} -E make_directory Assembly/)

target_compile_options(PLA-SeedFinder PRIVATE /FAs /FaAssembly/ /MP /W4)
target_compile_options(PLA-SeedFinder PRIVATE /wd5054) # Deprecated enum arithemtic
target_compile_options(PLA-SeedFinder PRIVATE /wd4505) # unreferenced local function has been removed
Expand All @@ -106,11 +102,10 @@ if (MSVC)
target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake)

else()
target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic -O2)
if (X86)
target_compile_options(PLA-SeedFinder PRIVATE -msse4.2)

target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic)

set(ARCH_FLAGS_09_Nehalem -march=nehalem)
set(ARCH_FLAGS_13_Haswell -march=haswell)
set(ARCH_FLAGS_17_Skylake -march=skylake-avx512)
Expand All @@ -119,16 +114,6 @@ else()
target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_08_Nehalem)
target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_13_Haswell)
target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake)
else ()
target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic)
SET_SOURCE_FILES_PROPERTIES(
Source/Kernels/SeedScan_Default.cpp
PROPERTIES COMPILE_FLAGS -O2
)
SET_SOURCE_FILES_PROPERTIES(
Source/Kernels/SeedScan_aarch64.cpp
PROPERTIES COMPILE_FLAGS -O2
)
endif (X86)
endif()

Expand Down Expand Up @@ -162,4 +147,4 @@ if (WIN32)
"*.dll"
)
file(COPY ${MY_DLLS} DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
endif(WIN32)
endif(WIN32)
8 changes: 4 additions & 4 deletions PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace PokemonAutomation{

#if defined __aarch64__ && defined __APPLE__

bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){
bool seed_scan_common_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){
simd_ulong4 t = simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid);
simd_ulong4 seed = simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000);
simd_ulong4 delta = simd_make_ulong4(0x500000000, 0x500000000, 0x500000000, 0x500000000);
Expand All @@ -29,7 +29,7 @@ bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start
return false;
}

bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){
bool seed_scan_thorough_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){
desired_pid &= 0xefffffff;
simd_ulong4 t = simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid);
simd_ulong4 seed = simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000);
Expand Down Expand Up @@ -57,7 +57,7 @@ bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t sta
return false;
}

bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){
bool seed_scan_common_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){
simd_ulong8 t = simd_make_ulong8(simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid),
simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid));
simd_ulong8 seed = simd_make_ulong8(simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000),
Expand All @@ -83,7 +83,7 @@ bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start
return false;
}

bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){
bool seed_scan_thorough_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){
desired_pid &= 0xefffffff;
simd_ulong8 t = simd_make_ulong8(simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid),
simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid));
Expand Down
2 changes: 1 addition & 1 deletion PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ class XoroShiro8{
}

#undef MAGIC_NUMBER
#endif
#endif
14 changes: 7 additions & 7 deletions PLA-SeedFinder/Source/SeedScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,16 @@ bool seed_scan_thorough_unroll4_SSE41(size_t rolls, uint32_t desired_pid, uint64
bool seed_scan_thorough_unroll8_AVX2(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations);
bool seed_scan_thorough_unroll16_AVX512(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations);

bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations);
bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations);
bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations);
bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations);
bool seed_scan_common_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations);
bool seed_scan_thorough_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations);
bool seed_scan_common_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations);
bool seed_scan_thorough_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations);


void print_isa(){
#ifdef __aarch64__
#if defined __APPLE__
cout << "Instruction Set: AARCH64" << endl;
cout << "Instruction Set: NEON" << endl;
return;
#endif
#else
Expand Down Expand Up @@ -99,7 +99,7 @@ bool seed_scan_common(size_t rolls, uint32_t desired_pid, uint64_t start_seed, u
#endif
#if defined __aarch64__ && defined __APPLE__
uint64_t block = iterations / 8 * 8;
if (block > 0 && seed_scan_common_unroll8(rolls, desired_pid, start_seed, block)){
if (block > 0 && seed_scan_common_unroll8_NEON(rolls, desired_pid, start_seed, block)){
return true;
}
start_seed += block * 0x100000000;
Expand Down Expand Up @@ -146,7 +146,7 @@ bool seed_scan_thorough(size_t rolls, uint32_t desired_pid, uint64_t start_seed,
#endif
#if defined __aarch64__ && defined __APPLE__
uint64_t block = iterations / 8 * 8;
if (block > 0 && seed_scan_thorough_unroll8(rolls, desired_pid, start_seed, block)){
if (block > 0 && seed_scan_thorough_unroll8_NEON(rolls, desired_pid, start_seed, block)){
return true;
}
start_seed += block * 0x100000000;
Expand Down

0 comments on commit 3e2efde

Please sign in to comment.