From 5bac769951a3f7c606594de6b0f863965db5a0cc Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Sun, 19 Nov 2023 17:59:29 +0800 Subject: [PATCH 01/10] Fixes Main.cpp --- PLA-SeedFinder/Source/Main.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/PLA-SeedFinder/Source/Main.cpp b/PLA-SeedFinder/Source/Main.cpp index 70f39c7..8cec426 100644 --- a/PLA-SeedFinder/Source/Main.cpp +++ b/PLA-SeedFinder/Source/Main.cpp @@ -50,7 +50,6 @@ void test(){ } #endif -#include int main(){ From 33daf788b979dfd7869d0873996d89040d10d25e Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Sun, 19 Nov 2023 18:13:16 +0800 Subject: [PATCH 02/10] Fixes SeedScan.cpp --- PLA-SeedFinder/Source/SeedScan.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/PLA-SeedFinder/Source/SeedScan.cpp b/PLA-SeedFinder/Source/SeedScan.cpp index d394332..e3c2c3a 100644 --- a/PLA-SeedFinder/Source/SeedScan.cpp +++ b/PLA-SeedFinder/Source/SeedScan.cpp @@ -33,6 +33,8 @@ bool seed_scan_thorough_unroll16_AVX512(size_t rolls, uint32_t desired_pid, uint void print_isa(){ +#ifdef __aarch64__ +#else #if !_MSC_VER || _WIN64 if (CPU_CAPABILITY.OS_AVX512 && CPU_CAPABILITY.HW_AVX512_DQ){ cout << "Instruction Set: AVX512" << endl; @@ -47,6 +49,7 @@ void print_isa(){ cout << "Instruction Set: SSE4.1" << endl; return; } +#endif cout << "Instruction Set: Default" << endl; } From 65201abb4dbe40f423265564ec5b67eb4564ed85 Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Sun, 19 Nov 2023 17:47:30 +0800 Subject: [PATCH 03/10] Fixes CMakeLists.txt --- PLA-SeedFinder/CMakeLists.txt | 140 +++++++++++++++++++++------------- 1 file changed, 86 insertions(+), 54 deletions(-) diff --git a/PLA-SeedFinder/CMakeLists.txt b/PLA-SeedFinder/CMakeLists.txt index ca9a256..23a70cd 100644 --- a/PLA-SeedFinder/CMakeLists.txt +++ b/PLA-SeedFinder/CMakeLists.txt @@ -16,9 +16,9 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) #set(CMAKE_VERBOSE_MAKEFILE ON) -set(CMAKE_AUTOMOC ON) -set(CMAKE_AUTORCC ON) -set(CMAKE_AUTOUIC ON) +# set(CMAKE_AUTOMOC ON) +# set(CMAKE_AUTORCC ON) +# set(CMAKE_AUTOUIC ON) add_custom_target(build-time-make-directory ALL COMMAND ${CMAKE_COMMAND} -E make_directory Assembly/) @@ -27,20 +27,27 @@ add_custom_target(build-time-make-directory ALL set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +#detect cpu arch +if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + set (X86 TRUE) +else () + set (X86 FALSE) +endif () #add current directory to find tesseractPA.lib #link_directories(${CMAKE_CURRENT_LIST_DIR}) -file(GLOB MAIN_SOURCES +if (X86) + file(GLOB MAIN_SOURCES Source/Compiler.h - Source/CpuId.cpp + Source/CpuFeatures.cpp Source/CpuFeatures.h Source/DynamicParallelizer.cpp Source/DynamicParallelizer.h Source/Kernels/SeedScan_Default.cpp - Source/Kernels/SeedScan_x64_AVX2.cpp - Source/Kernels/SeedScan_x64_AVX512.cpp - Source/Kernels/SeedScan_x64_SSE41.cpp + Source/Kernels/SeedScan_x86_AVX2.cpp + Source/Kernels/SeedScan_x86_AVX512.cpp + Source/Kernels/SeedScan_x86_SSE41.cpp Source/Kernels/XoroShiro1_Default.h Source/Kernels/XoroShiro2_SSE2.h Source/Kernels/XoroShiro4_AVX2.h @@ -54,8 +61,31 @@ file(GLOB MAIN_SOURCES Source/SeedScan.h Source/Tools.cpp Source/Tools.h -) -add_executable(PLA-SeedFinder WIN32 ${MAIN_SOURCES}) + ) +else () + file(GLOB MAIN_SOURCES + Source/Compiler.h + Source/DynamicParallelizer.cpp + Source/DynamicParallelizer.h + Source/Kernels/SeedScan_Default.cpp + Source/Kernels/XoroShiro1_Default.h + Source/Main.cpp + Source/PLA-SeedFinder.cpp + Source/PLA-SeedFinder.h + Source/ReportCandidates.cpp + Source/ReportCandidates.h + Source/SeedScan.cpp + Source/SeedScan.h + Source/Tools.cpp + Source/Tools.h + ) +endif (X86) + +if (WIN32) + add_executable(PLA-SeedFinder WIN32 ${MAIN_SOURCES}) +else () + add_executable(PLA-SeedFinder ${MAIN_SOURCES}) +endif () set_target_properties(PLA-SeedFinder PROPERTIES LINKER_LANGUAGE CXX) target_link_libraries(PLA-SeedFinder) target_link_libraries(PLA-SeedFinder Threads::Threads) @@ -82,50 +112,52 @@ if (MSVC) target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake) else() - target_compile_options(PLA-SeedFinder PRIVATE -msse4.2) - - target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic -DPA_STATIC) - - set(ARCH_FLAGS_09_Nehalem -march=nehalem) - set(ARCH_FLAGS_13_Haswell -march=haswell) - set(ARCH_FLAGS_17_Skylake -march=skylake-avx512) - - # Run-time ISA dispatching - target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_08_Nehalem) - target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_13_Haswell) - target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake) + if (X86) + target_compile_options(PLA-SeedFinder PRIVATE -msse4.2) + + target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic -DPA_STATIC) + + set(ARCH_FLAGS_09_Nehalem -march=nehalem) + set(ARCH_FLAGS_13_Haswell -march=haswell) + set(ARCH_FLAGS_17_Skylake -march=skylake-avx512) + + # Run-time ISA dispatching + target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_08_Nehalem) + target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_13_Haswell) + target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake) + else () + target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic -DPA_STATIC) + endif (X86) endif() - -# Run-time CPU dispatching. -if (ARCH_FLAGS_09_Nehalem) -SET_SOURCE_FILES_PROPERTIES( - Source/Kernels/SeedScan_x64_SSE41.cpp - PROPERTIES COMPILE_FLAGS ${ARCH_FLAGS_09_Nehalem} -) -endif() -if (ARCH_FLAGS_13_Haswell) -SET_SOURCE_FILES_PROPERTIES( - Source/Kernels/SeedScan_x64_AVX2.cpp - PROPERTIES COMPILE_FLAGS ${ARCH_FLAGS_13_Haswell} -) -endif() -if (ARCH_FLAGS_17_Skylake) -SET_SOURCE_FILES_PROPERTIES( - Source/Kernels/SeedScan_x64_AVX512.cpp - PROPERTIES COMPILE_FLAGS ${ARCH_FLAGS_17_Skylake} -) -endif() - - - - - - -#copy needed dlls -#file(COPY *.dll DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) -file(GLOB MY_DLLS - "*.dll" -) -file(COPY ${MY_DLLS} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +if(X86) + # Run-time CPU dispatching. + if (ARCH_FLAGS_09_Nehalem) + SET_SOURCE_FILES_PROPERTIES( + Source/Kernels/SeedScan_x86_SSE41.cpp + PROPERTIES COMPILE_FLAGS ${ARCH_FLAGS_09_Nehalem} + ) + endif() + if (ARCH_FLAGS_13_Haswell) + SET_SOURCE_FILES_PROPERTIES( + Source/Kernels/SeedScan_x86_AVX2.cpp + PROPERTIES COMPILE_FLAGS ${ARCH_FLAGS_13_Haswell} + ) + endif() + if (ARCH_FLAGS_17_Skylake) + SET_SOURCE_FILES_PROPERTIES( + Source/Kernels/SeedScan_x86_AVX512.cpp + PROPERTIES COMPILE_FLAGS ${ARCH_FLAGS_17_Skylake} + ) + endif() +endif(X86) + +if (WIN32) + #copy needed dlls + #file(COPY *.dll DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + file(GLOB MY_DLLS + "*.dll" + ) + file(COPY ${MY_DLLS} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +endif(WIN32) \ No newline at end of file From 10566a7bb4ab8f8ec9ac31c7bd8eb572ba515b77 Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Mon, 20 Nov 2023 17:31:29 +0800 Subject: [PATCH 04/10] PLA-SeedFinder.cpp: remove unused var --- PLA-SeedFinder/Source/PLA-SeedFinder.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/PLA-SeedFinder/Source/PLA-SeedFinder.cpp b/PLA-SeedFinder/Source/PLA-SeedFinder.cpp index b6ad35f..64d3cf8 100644 --- a/PLA-SeedFinder/Source/PLA-SeedFinder.cpp +++ b/PLA-SeedFinder/Source/PLA-SeedFinder.cpp @@ -56,8 +56,6 @@ int32_t pa_PLA_find_seeds_threads( << stats.ivs[4] << " " << stats.ivs[5] << std::endl; - EcPidMatchReporter reporter(stats); - std::cout << std::endl; print_isa(); std::cout << "Threads: " << threads << std::endl; From bc42091a0478fadc1ffa38ca801fb3318624c179 Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Tue, 21 Nov 2023 15:51:00 +0800 Subject: [PATCH 05/10] add .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build/ From 2bef5640fd1cbf43b5e5c44323767b7015ed0bed Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Fri, 1 Dec 2023 18:37:24 +0800 Subject: [PATCH 06/10] compilation optimization --- PLA-SeedFinder/CMakeLists.txt | 11 ++++------- PLA-SeedFinder/Source/Kernels/XoroShiro1_Default.h | 6 +----- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/PLA-SeedFinder/CMakeLists.txt b/PLA-SeedFinder/CMakeLists.txt index 23a70cd..5e0f067 100644 --- a/PLA-SeedFinder/CMakeLists.txt +++ b/PLA-SeedFinder/CMakeLists.txt @@ -64,20 +64,13 @@ if (X86) ) else () file(GLOB MAIN_SOURCES - Source/Compiler.h Source/DynamicParallelizer.cpp - Source/DynamicParallelizer.h Source/Kernels/SeedScan_Default.cpp - Source/Kernels/XoroShiro1_Default.h Source/Main.cpp Source/PLA-SeedFinder.cpp - Source/PLA-SeedFinder.h Source/ReportCandidates.cpp - Source/ReportCandidates.h Source/SeedScan.cpp - Source/SeedScan.h Source/Tools.cpp - Source/Tools.h ) endif (X86) @@ -127,6 +120,10 @@ else() target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake) else () target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic -DPA_STATIC) + SET_SOURCE_FILES_PROPERTIES( + Source/Kernels/SeedScan_Default.cpp + PROPERTIES COMPILE_FLAGS -O2 + ) endif (X86) endif() diff --git a/PLA-SeedFinder/Source/Kernels/XoroShiro1_Default.h b/PLA-SeedFinder/Source/Kernels/XoroShiro1_Default.h index 50cb6b8..7952ae0 100644 --- a/PLA-SeedFinder/Source/Kernels/XoroShiro1_Default.h +++ b/PLA-SeedFinder/Source/Kernels/XoroShiro1_Default.h @@ -18,11 +18,7 @@ namespace PokemonAutomation{ class XoroShiroX1_Default{ public: - PA_FORCE_INLINE XoroShiroX1_Default(uint64_t seed){ - state[0] = seed; - state[1] = 0x82A2B175229D6A5B; - } - + PA_FORCE_INLINE XoroShiroX1_Default(uint64_t seed) : state {seed, 0x82A2B175229D6A5B} {} PA_FORCE_INLINE uint64_t get_int64(){ return state[0] + state[1]; } From 5634e08e584d56e00766c08259e080b2b1f1afa7 Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Mon, 4 Dec 2023 00:00:38 +0800 Subject: [PATCH 07/10] Fixes PA_EXPORT def --- PLA-SeedFinder/CMakeLists.txt | 4 ++-- PLA-SeedFinder/Source/PLA-SeedFinder.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/PLA-SeedFinder/CMakeLists.txt b/PLA-SeedFinder/CMakeLists.txt index 5e0f067..c4b1acd 100644 --- a/PLA-SeedFinder/CMakeLists.txt +++ b/PLA-SeedFinder/CMakeLists.txt @@ -108,7 +108,7 @@ else() if (X86) target_compile_options(PLA-SeedFinder PRIVATE -msse4.2) - target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic -DPA_STATIC) + target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic) set(ARCH_FLAGS_09_Nehalem -march=nehalem) set(ARCH_FLAGS_13_Haswell -march=haswell) @@ -119,7 +119,7 @@ else() target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_13_Haswell) target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake) else () - target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic -DPA_STATIC) + target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic) SET_SOURCE_FILES_PROPERTIES( Source/Kernels/SeedScan_Default.cpp PROPERTIES COMPILE_FLAGS -O2 diff --git a/PLA-SeedFinder/Source/PLA-SeedFinder.h b/PLA-SeedFinder/Source/PLA-SeedFinder.h index b172778..037094a 100644 --- a/PLA-SeedFinder/Source/PLA-SeedFinder.h +++ b/PLA-SeedFinder/Source/PLA-SeedFinder.h @@ -6,6 +6,7 @@ #include +#if defined(_WIN32) #if defined _WINDLL #define PA_EXPORT __declspec(dllexport) #elif defined PA_STATIC @@ -13,6 +14,9 @@ #else #define PA_EXPORT __declspec(dllimport) #endif +#else +#define PA_EXPORT +#endif #ifdef __cplusplus extern "C" { From bc1466ed0231ee1b5404e2cddf9f0bde358e393d Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Sun, 3 Dec 2023 21:05:45 +0800 Subject: [PATCH 08/10] impl seed scan unroll funcs for aarch64 using apple simd --- PLA-SeedFinder/CMakeLists.txt | 5 + .../Source/Kernels/SeedScan_aarch64.cpp | 148 ++++++++++++++++++ .../Source/Kernels/XoroShiro_aarch64.h | 56 +++++++ PLA-SeedFinder/Source/SeedScan.cpp | 23 ++- 4 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp create mode 100644 PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h diff --git a/PLA-SeedFinder/CMakeLists.txt b/PLA-SeedFinder/CMakeLists.txt index c4b1acd..a1c3367 100644 --- a/PLA-SeedFinder/CMakeLists.txt +++ b/PLA-SeedFinder/CMakeLists.txt @@ -66,6 +66,7 @@ else () file(GLOB MAIN_SOURCES Source/DynamicParallelizer.cpp Source/Kernels/SeedScan_Default.cpp + Source/Kernels/SeedScan_aarch64.cpp Source/Main.cpp Source/PLA-SeedFinder.cpp Source/ReportCandidates.cpp @@ -124,6 +125,10 @@ else() Source/Kernels/SeedScan_Default.cpp PROPERTIES COMPILE_FLAGS -O2 ) + SET_SOURCE_FILES_PROPERTIES( + Source/Kernels/SeedScan_aarch64.cpp + PROPERTIES COMPILE_FLAGS -O2 + ) endif (X86) endif() diff --git a/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp b/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp new file mode 100644 index 0000000..c8043af --- /dev/null +++ b/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp @@ -0,0 +1,148 @@ +#include +#include +#include "XoroShiro_aarch64.h" + +namespace PokemonAutomation{ + +#if defined __aarch64__ && defined __APPLE__ + +bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ + simd_ulong4 seed = simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000); + simd_ulong4 delta = simd_make_ulong4(0x500000000, 0x500000000, 0x500000000, 0x500000000); + // simd_ulong4 u = simd_make_ulong4(UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX); + simd_ulong4 pid; + iterations /= 4; + + do { + XoroShiro4 rng(seed); + rng.next(); + size_t lc = rolls; + do { + rng.next(); + pid = rng.get(); // & u; + simd_ulong1* i = (simd_ulong1*) &pid; + if ((uint32_t)i[0] == desired_pid || + (uint32_t)i[1] == desired_pid || + (uint32_t)i[2] == desired_pid || + (uint32_t)i[3] == desired_pid) { + return true; + } + } while (--lc); + seed += delta; + } while (--iterations); + return false; +} + +bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ + desired_pid &= 0xefffffff; + simd_ulong4 seed = simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000); + simd_ulong4 delta = simd_make_ulong4(0x500000000, 0x500000000, 0x500000000, 0x500000000); + simd_ulong4 u = simd_make_ulong4(0xefffffff, 0xefffffff, 0xefffffff, 0xefffffff); + simd_ulong4 pid; + iterations /= 4; + + do { + XoroShiro4 rng(seed); + size_t lc = rolls + 1; + do { + rng.next(); + pid = rng.get(); + simd_ulong1* i = (simd_ulong1*) &pid; + if ((uint32_t)i[0] == UINT32_MAX || + (uint32_t)i[1] == UINT32_MAX || + (uint32_t)i[2] == UINT32_MAX || + (uint32_t)i[3] == UINT32_MAX) { + return true; + } + pid &= u; + if ((uint32_t)i[0] == desired_pid || + (uint32_t)i[1] == desired_pid || + (uint32_t)i[2] == desired_pid || + (uint32_t)i[3] == desired_pid) { + return true; + } + } while (--lc); + seed += delta; + } while (--iterations); + return false; +} + +bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ + simd_ulong8 seed = simd_make_ulong8(simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000), + simd_make_ulong4(start_seed+0x500000000, start_seed+0x600000000, start_seed+0x700000000, start_seed+0x800000000)); + simd_ulong8 delta = simd_make_ulong8(simd_make_ulong4(0x900000000, 0x900000000, 0x900000000, 0x900000000), + simd_make_ulong4(0x900000000, 0x900000000, 0x900000000, 0x900000000)); + simd_ulong8 pid; + iterations /= 8; + + do { + XoroShiro8 rng(seed); + rng.next(); + size_t lc = rolls; + do { + rng.next(); + pid = rng.get(); // & u; + simd_ulong1* i = (simd_ulong1*) &pid; + if ((uint32_t)i[0] == desired_pid || + (uint32_t)i[1] == desired_pid || + (uint32_t)i[2] == desired_pid || + (uint32_t)i[3] == desired_pid || + (uint32_t)i[4] == desired_pid || + (uint32_t)i[5] == desired_pid || + (uint32_t)i[6] == desired_pid || + (uint32_t)i[7] == desired_pid) { + return true; + } + } while (--lc); + seed += delta; + } while (--iterations); + return false; +} + +bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ + desired_pid &= 0xefffffff; + simd_ulong8 seed = simd_make_ulong8(simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000), + simd_make_ulong4(start_seed+0x500000000, start_seed+0x600000000, start_seed+0x700000000, start_seed+0x800000000)); + simd_ulong8 delta = simd_make_ulong8(simd_make_ulong4(0x900000000, 0x900000000, 0x900000000, 0x900000000), + simd_make_ulong4(0x900000000, 0x900000000, 0x900000000, 0x900000000)); + simd_ulong8 u = simd_make_ulong8(simd_make_ulong4(0xefffffff, 0xefffffff, 0xefffffff, 0xefffffff), + simd_make_ulong4(0xefffffff, 0xefffffff, 0xefffffff, 0xefffffff)); + simd_ulong8 pid; + iterations /= 8; + + do { + XoroShiro8 rng(seed); + size_t lc = rolls + 1; + do { + rng.next(); + pid = rng.get(); + simd_ulong1* i = (simd_ulong1*) &pid; + if ((uint32_t)i[0] == UINT32_MAX || + (uint32_t)i[1] == UINT32_MAX || + (uint32_t)i[2] == UINT32_MAX || + (uint32_t)i[3] == UINT32_MAX || + (uint32_t)i[4] == UINT32_MAX || + (uint32_t)i[5] == UINT32_MAX || + (uint32_t)i[6] == UINT32_MAX || + (uint32_t)i[7] == UINT32_MAX) { + return true; + } + pid &= u; + if ((uint32_t)i[0] == desired_pid || + (uint32_t)i[1] == desired_pid || + (uint32_t)i[2] == desired_pid || + (uint32_t)i[3] == desired_pid || + (uint32_t)i[4] == desired_pid || + (uint32_t)i[5] == desired_pid || + (uint32_t)i[6] == desired_pid || + (uint32_t)i[7] == desired_pid) { + return true; + } + } while (--lc); + seed += delta; + } while (--iterations); + return false; +} + +#endif +} diff --git a/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h b/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h new file mode 100644 index 0000000..03dc988 --- /dev/null +++ b/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h @@ -0,0 +1,56 @@ +#pragma once + +#if defined __aarch64__ && defined __APPLE__ +#include +#include "Compiler.h" + +#define MAGIC_NUMBER 0x82A2B175229D6A5B + +namespace PokemonAutomation{ + +static const simd_ulong4 magic_ulong4 = simd_make_ulong4(MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER); + +class XoroShiro4{ +public: + PA_FORCE_INLINE XoroShiro4(simd_ulong4 seed) + : state {seed, magic_ulong4} {} + PA_FORCE_INLINE simd_ulong4 get(){ return state[0] + state[1]; } + PA_FORCE_INLINE void next(){ + simd_ulong4 s0 = state[0]; + simd_ulong4 s1 = state[1]; + s1 = s1 ^ s0; + s0 = (s0 << 24) | (s0 >> 40); + state[1] = (s1 << 37) | (s1 >> 27); + s1 = s1 ^ (s1 << 16); + state[0] = s0 ^ s1; + } + +private: + simd_ulong4 state[2]; +}; + + +class XoroShiro8{ +public: + PA_FORCE_INLINE XoroShiro8(simd_ulong8 seed) + : state {seed, simd_make_ulong8(magic_ulong4, magic_ulong4)} {} + PA_FORCE_INLINE simd_ulong8 get(){ return state[0] + state[1]; } + PA_FORCE_INLINE void next(){ + simd_ulong8 s0 = state[0]; + simd_ulong8 s1 = state[1]; + s1 = s1 ^ s0; + s0 = (s0 << 24) | (s0 >> 40); + state[1] = (s1 << 37) | (s1 >> 27); + s1 = s1 ^ (s1 << 16); + state[0] = s0 ^ s1; + } + +private: + simd_ulong8 state[2]; +}; + + +} + +#undef MAGIC_NUMBER +#endif \ No newline at end of file diff --git a/PLA-SeedFinder/Source/SeedScan.cpp b/PLA-SeedFinder/Source/SeedScan.cpp index e3c2c3a..f21fba7 100644 --- a/PLA-SeedFinder/Source/SeedScan.cpp +++ b/PLA-SeedFinder/Source/SeedScan.cpp @@ -30,10 +30,16 @@ bool seed_scan_thorough_unroll4_SSE41(size_t rolls, uint32_t desired_pid, uint64 bool seed_scan_thorough_unroll8_AVX2(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); bool seed_scan_thorough_unroll16_AVX512(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); void print_isa(){ #ifdef __aarch64__ + cout << "Instruction Set: M1" << endl; + return; #else #if !_MSC_VER || _WIN64 if (CPU_CAPABILITY.OS_AVX512 && CPU_CAPABILITY.HW_AVX512_DQ){ @@ -89,7 +95,14 @@ bool seed_scan_common(size_t rolls, uint32_t desired_pid, uint64_t start_seed, u iterations -= block; } #endif - +#if defined __aarch64__ && defined __APPLE__ + uint64_t block = iterations / 8 * 8; + if (block > 0 && seed_scan_common_unroll8(rolls, desired_pid, start_seed, block)){ + return true; + } + start_seed += block * 0x100000000; + iterations -= block; +#endif if (iterations > 0){ return seed_scan_common_Default(rolls, desired_pid, start_seed, iterations); } @@ -129,6 +142,14 @@ bool seed_scan_thorough(size_t rolls, uint32_t desired_pid, uint64_t start_seed, iterations -= block; } #endif +#if defined __aarch64__ && defined __APPLE__ + uint64_t block = iterations / 8 * 8; + if (block > 0 && seed_scan_thorough_unroll8(rolls, desired_pid, start_seed, block)){ + return true; + } + start_seed += block * 0x100000000; + iterations -= block; +#endif if (iterations > 0){ return seed_scan_thorough_Default(rolls, desired_pid, start_seed, iterations); From 0ea36ef61b6b70c80b3d09ebce34d1091eb614b9 Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Sun, 3 Dec 2023 23:09:36 +0800 Subject: [PATCH 09/10] use simd_reduce_* to test if result matches --- .../Source/Kernels/SeedScan_aarch64.cpp | 69 ++++++------------- .../Source/Kernels/XoroShiro_aarch64.h | 5 ++ PLA-SeedFinder/Source/SeedScan.cpp | 4 +- 3 files changed, 28 insertions(+), 50 deletions(-) diff --git a/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp b/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp index c8043af..dab53f6 100644 --- a/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp +++ b/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp @@ -7,9 +7,9 @@ namespace PokemonAutomation{ #if defined __aarch64__ && defined __APPLE__ bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ + simd_ulong4 t = simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid); simd_ulong4 seed = simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000); simd_ulong4 delta = simd_make_ulong4(0x500000000, 0x500000000, 0x500000000, 0x500000000); - // simd_ulong4 u = simd_make_ulong4(UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX); simd_ulong4 pid; iterations /= 4; @@ -19,12 +19,8 @@ bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start size_t lc = rolls; do { rng.next(); - pid = rng.get(); // & u; - simd_ulong1* i = (simd_ulong1*) &pid; - if ((uint32_t)i[0] == desired_pid || - (uint32_t)i[1] == desired_pid || - (uint32_t)i[2] == desired_pid || - (uint32_t)i[3] == desired_pid) { + pid = rng.get_masked(); + if (simd_reduce_min(pid ^ t) == 0) { return true; } } while (--lc); @@ -35,9 +31,10 @@ bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ desired_pid &= 0xefffffff; + simd_ulong4 t = simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid); simd_ulong4 seed = simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000); simd_ulong4 delta = simd_make_ulong4(0x500000000, 0x500000000, 0x500000000, 0x500000000); - simd_ulong4 u = simd_make_ulong4(0xefffffff, 0xefffffff, 0xefffffff, 0xefffffff); + simd_ulong4 mask = simd_make_ulong4(0xefffffff, 0xefffffff, 0xefffffff, 0xefffffff); simd_ulong4 pid; iterations /= 4; @@ -46,19 +43,12 @@ bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t sta size_t lc = rolls + 1; do { rng.next(); - pid = rng.get(); - simd_ulong1* i = (simd_ulong1*) &pid; - if ((uint32_t)i[0] == UINT32_MAX || - (uint32_t)i[1] == UINT32_MAX || - (uint32_t)i[2] == UINT32_MAX || - (uint32_t)i[3] == UINT32_MAX) { + pid = rng.get_masked(); + if (simd_reduce_max(pid) == UINT32_MAX) { return true; } - pid &= u; - if ((uint32_t)i[0] == desired_pid || - (uint32_t)i[1] == desired_pid || - (uint32_t)i[2] == desired_pid || - (uint32_t)i[3] == desired_pid) { + pid &= mask; + if (simd_reduce_min(pid ^ t) == 0) { return true; } } while (--lc); @@ -68,6 +58,8 @@ bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t sta } bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ + simd_ulong8 t = simd_make_ulong8(simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid), + simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid)); simd_ulong8 seed = simd_make_ulong8(simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000), simd_make_ulong4(start_seed+0x500000000, start_seed+0x600000000, start_seed+0x700000000, start_seed+0x800000000)); simd_ulong8 delta = simd_make_ulong8(simd_make_ulong4(0x900000000, 0x900000000, 0x900000000, 0x900000000), @@ -81,16 +73,8 @@ bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start size_t lc = rolls; do { rng.next(); - pid = rng.get(); // & u; - simd_ulong1* i = (simd_ulong1*) &pid; - if ((uint32_t)i[0] == desired_pid || - (uint32_t)i[1] == desired_pid || - (uint32_t)i[2] == desired_pid || - (uint32_t)i[3] == desired_pid || - (uint32_t)i[4] == desired_pid || - (uint32_t)i[5] == desired_pid || - (uint32_t)i[6] == desired_pid || - (uint32_t)i[7] == desired_pid) { + pid = rng.get_masked(); + if (simd_reduce_min(pid ^ t) == 0) { return true; } } while (--lc); @@ -101,11 +85,13 @@ bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ desired_pid &= 0xefffffff; + simd_ulong8 t = simd_make_ulong8(simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid), + simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid)); simd_ulong8 seed = simd_make_ulong8(simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000), simd_make_ulong4(start_seed+0x500000000, start_seed+0x600000000, start_seed+0x700000000, start_seed+0x800000000)); simd_ulong8 delta = simd_make_ulong8(simd_make_ulong4(0x900000000, 0x900000000, 0x900000000, 0x900000000), simd_make_ulong4(0x900000000, 0x900000000, 0x900000000, 0x900000000)); - simd_ulong8 u = simd_make_ulong8(simd_make_ulong4(0xefffffff, 0xefffffff, 0xefffffff, 0xefffffff), + simd_ulong8 u1 = simd_make_ulong8(simd_make_ulong4(0xefffffff, 0xefffffff, 0xefffffff, 0xefffffff), simd_make_ulong4(0xefffffff, 0xefffffff, 0xefffffff, 0xefffffff)); simd_ulong8 pid; iterations /= 8; @@ -115,27 +101,12 @@ bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t sta size_t lc = rolls + 1; do { rng.next(); - pid = rng.get(); - simd_ulong1* i = (simd_ulong1*) &pid; - if ((uint32_t)i[0] == UINT32_MAX || - (uint32_t)i[1] == UINT32_MAX || - (uint32_t)i[2] == UINT32_MAX || - (uint32_t)i[3] == UINT32_MAX || - (uint32_t)i[4] == UINT32_MAX || - (uint32_t)i[5] == UINT32_MAX || - (uint32_t)i[6] == UINT32_MAX || - (uint32_t)i[7] == UINT32_MAX) { + pid = rng.get_masked(); + if (simd_reduce_max(pid) == UINT32_MAX) { return true; } - pid &= u; - if ((uint32_t)i[0] == desired_pid || - (uint32_t)i[1] == desired_pid || - (uint32_t)i[2] == desired_pid || - (uint32_t)i[3] == desired_pid || - (uint32_t)i[4] == desired_pid || - (uint32_t)i[5] == desired_pid || - (uint32_t)i[6] == desired_pid || - (uint32_t)i[7] == desired_pid) { + pid &= u1; + if (simd_reduce_min(pid ^ t) == 0) { return true; } } while (--lc); diff --git a/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h b/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h index 03dc988..a791780 100644 --- a/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h +++ b/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h @@ -9,12 +9,16 @@ namespace PokemonAutomation{ static const simd_ulong4 magic_ulong4 = simd_make_ulong4(MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER); +static const simd_ulong4 mask_ulong4 = simd_make_ulong4(UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX); +static const simd_ulong8 mask_ulong8 = simd_make_ulong8(simd_make_ulong4(UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX), + simd_make_ulong4(UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX)); class XoroShiro4{ public: PA_FORCE_INLINE XoroShiro4(simd_ulong4 seed) : state {seed, magic_ulong4} {} PA_FORCE_INLINE simd_ulong4 get(){ return state[0] + state[1]; } + PA_FORCE_INLINE simd_ulong4 get_masked(){ return (state[0] + state[1]) & mask_ulong4; } PA_FORCE_INLINE void next(){ simd_ulong4 s0 = state[0]; simd_ulong4 s1 = state[1]; @@ -35,6 +39,7 @@ class XoroShiro8{ PA_FORCE_INLINE XoroShiro8(simd_ulong8 seed) : state {seed, simd_make_ulong8(magic_ulong4, magic_ulong4)} {} PA_FORCE_INLINE simd_ulong8 get(){ return state[0] + state[1]; } + PA_FORCE_INLINE simd_ulong8 get_masked(){ return (state[0] + state[1]) & mask_ulong8; } PA_FORCE_INLINE void next(){ simd_ulong8 s0 = state[0]; simd_ulong8 s1 = state[1]; diff --git a/PLA-SeedFinder/Source/SeedScan.cpp b/PLA-SeedFinder/Source/SeedScan.cpp index f21fba7..cf92836 100644 --- a/PLA-SeedFinder/Source/SeedScan.cpp +++ b/PLA-SeedFinder/Source/SeedScan.cpp @@ -38,8 +38,10 @@ bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t sta void print_isa(){ #ifdef __aarch64__ - cout << "Instruction Set: M1" << endl; + #if defined __APPLE__ + cout << "Instruction Set: AARCH64" << endl; return; + #endif #else #if !_MSC_VER || _WIN64 if (CPU_CAPABILITY.OS_AVX512 && CPU_CAPABILITY.HW_AVX512_DQ){ From 3e2efde17d6b9fa9ec16844cb2bed52b0507f638 Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Fri, 22 Dec 2023 13:48:34 +0800 Subject: [PATCH 10/10] minor fixes & rename seed_scan_* --- PLA-SeedFinder/CMakeLists.txt | 25 ++++--------------- .../Source/Kernels/SeedScan_aarch64.cpp | 8 +++--- .../Source/Kernels/XoroShiro_aarch64.h | 2 +- PLA-SeedFinder/Source/SeedScan.cpp | 14 +++++------ 4 files changed, 17 insertions(+), 32 deletions(-) diff --git a/PLA-SeedFinder/CMakeLists.txt b/PLA-SeedFinder/CMakeLists.txt index a1c3367..9c04d27 100644 --- a/PLA-SeedFinder/CMakeLists.txt +++ b/PLA-SeedFinder/CMakeLists.txt @@ -16,13 +16,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) #set(CMAKE_VERBOSE_MAKEFILE ON) -# set(CMAKE_AUTOMOC ON) -# set(CMAKE_AUTORCC ON) -# set(CMAKE_AUTOUIC ON) - -add_custom_target(build-time-make-directory ALL - COMMAND ${CMAKE_COMMAND} -E make_directory Assembly/) - #Find threads library set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) @@ -92,6 +85,9 @@ target_include_directories(PLA-SeedFinder PRIVATE Source/) #enable MP with MSVC (Build with Multiple Processes) if (MSVC) + add_custom_target(build-time-make-directory ALL + COMMAND ${CMAKE_COMMAND} -E make_directory Assembly/) + target_compile_options(PLA-SeedFinder PRIVATE /FAs /FaAssembly/ /MP /W4) target_compile_options(PLA-SeedFinder PRIVATE /wd5054) # Deprecated enum arithemtic target_compile_options(PLA-SeedFinder PRIVATE /wd4505) # unreferenced local function has been removed @@ -106,11 +102,10 @@ if (MSVC) target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake) else() + target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic -O2) if (X86) target_compile_options(PLA-SeedFinder PRIVATE -msse4.2) - target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic) - set(ARCH_FLAGS_09_Nehalem -march=nehalem) set(ARCH_FLAGS_13_Haswell -march=haswell) set(ARCH_FLAGS_17_Skylake -march=skylake-avx512) @@ -119,16 +114,6 @@ else() target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_08_Nehalem) target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_13_Haswell) target_compile_definitions(PLA-SeedFinder PRIVATE PA_AutoDispatch_17_Skylake) - else () - target_compile_options(PLA-SeedFinder PRIVATE -Wall -Wpedantic) - SET_SOURCE_FILES_PROPERTIES( - Source/Kernels/SeedScan_Default.cpp - PROPERTIES COMPILE_FLAGS -O2 - ) - SET_SOURCE_FILES_PROPERTIES( - Source/Kernels/SeedScan_aarch64.cpp - PROPERTIES COMPILE_FLAGS -O2 - ) endif (X86) endif() @@ -162,4 +147,4 @@ if (WIN32) "*.dll" ) file(COPY ${MY_DLLS} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) -endif(WIN32) \ No newline at end of file +endif(WIN32) diff --git a/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp b/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp index dab53f6..db60a3a 100644 --- a/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp +++ b/PLA-SeedFinder/Source/Kernels/SeedScan_aarch64.cpp @@ -6,7 +6,7 @@ namespace PokemonAutomation{ #if defined __aarch64__ && defined __APPLE__ -bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ +bool seed_scan_common_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ simd_ulong4 t = simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid); simd_ulong4 seed = simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000); simd_ulong4 delta = simd_make_ulong4(0x500000000, 0x500000000, 0x500000000, 0x500000000); @@ -29,7 +29,7 @@ bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start return false; } -bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ +bool seed_scan_thorough_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ desired_pid &= 0xefffffff; simd_ulong4 t = simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid); simd_ulong4 seed = simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000); @@ -57,7 +57,7 @@ bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t sta return false; } -bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ +bool seed_scan_common_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ simd_ulong8 t = simd_make_ulong8(simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid), simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid)); simd_ulong8 seed = simd_make_ulong8(simd_make_ulong4(start_seed, start_seed+0x200000000, start_seed+0x300000000, start_seed+0x400000000), @@ -83,7 +83,7 @@ bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start return false; } -bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ +bool seed_scan_thorough_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations){ desired_pid &= 0xefffffff; simd_ulong8 t = simd_make_ulong8(simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid), simd_make_ulong4((uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid, (uint64_t)desired_pid)); diff --git a/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h b/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h index a791780..16dee6d 100644 --- a/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h +++ b/PLA-SeedFinder/Source/Kernels/XoroShiro_aarch64.h @@ -58,4 +58,4 @@ class XoroShiro8{ } #undef MAGIC_NUMBER -#endif \ No newline at end of file +#endif diff --git a/PLA-SeedFinder/Source/SeedScan.cpp b/PLA-SeedFinder/Source/SeedScan.cpp index cf92836..2978c38 100644 --- a/PLA-SeedFinder/Source/SeedScan.cpp +++ b/PLA-SeedFinder/Source/SeedScan.cpp @@ -30,16 +30,16 @@ bool seed_scan_thorough_unroll4_SSE41(size_t rolls, uint32_t desired_pid, uint64 bool seed_scan_thorough_unroll8_AVX2(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); bool seed_scan_thorough_unroll16_AVX512(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); -bool seed_scan_common_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); -bool seed_scan_thorough_unroll4(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); -bool seed_scan_common_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); -bool seed_scan_thorough_unroll8(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_common_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_thorough_unroll4_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_common_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); +bool seed_scan_thorough_unroll8_NEON(size_t rolls, uint32_t desired_pid, uint64_t start_seed, uint64_t iterations); void print_isa(){ #ifdef __aarch64__ #if defined __APPLE__ - cout << "Instruction Set: AARCH64" << endl; + cout << "Instruction Set: NEON" << endl; return; #endif #else @@ -99,7 +99,7 @@ bool seed_scan_common(size_t rolls, uint32_t desired_pid, uint64_t start_seed, u #endif #if defined __aarch64__ && defined __APPLE__ uint64_t block = iterations / 8 * 8; - if (block > 0 && seed_scan_common_unroll8(rolls, desired_pid, start_seed, block)){ + if (block > 0 && seed_scan_common_unroll8_NEON(rolls, desired_pid, start_seed, block)){ return true; } start_seed += block * 0x100000000; @@ -146,7 +146,7 @@ bool seed_scan_thorough(size_t rolls, uint32_t desired_pid, uint64_t start_seed, #endif #if defined __aarch64__ && defined __APPLE__ uint64_t block = iterations / 8 * 8; - if (block > 0 && seed_scan_thorough_unroll8(rolls, desired_pid, start_seed, block)){ + if (block > 0 && seed_scan_thorough_unroll8_NEON(rolls, desired_pid, start_seed, block)){ return true; } start_seed += block * 0x100000000;