From c6f25cafc63cc89996709a2ec1cb2ddca96ca5a5 Mon Sep 17 00:00:00 2001 From: xiaying Date: Mon, 22 Jul 2024 19:51:53 +0800 Subject: [PATCH] MNN:Sync: Sync Internal 2.9.3 --- CMakeLists.txt | 43 +- .../arm32/bf16/MNNAxByClampBroadcastC4_BF16.S | 0 .../bf16/MNNConvRunForLineDepthwise_BF16.S | 0 .../bf16/MNNConvRunForUnitDepthWise_BF16.S | 0 .../cpubackend}/arm/arm32/bf16/MNNGelu_BF16.S | 0 .../arm32/bf16/MNNPackC4ForMatMul_A_BF16.S | 208 ++ .../arm/arm32/bf16/MNNPackC4_BF16.S | 202 ++ .../arm32/bf16/MNNPackedMatMulRemain_BF16.S | 154 ++ .../arm/arm32/bf16/MNNPackedMatMul_BF16.S | 211 ++ .../arm32/bf16/MNNReluWithSlopeChannelBF16.S | 0 .../arm/arm32/bf16/MNNUnPackC4_BF16.S | 0 .../bf16/ARMV86_MNNPackedMatMulRemain_BF16.S | 566 ++++ .../arm64/bf16/ARMV86_MNNPackedMatMul_BF16.S | 286 +++ .../arm64/bf16/MNNAxByClampBroadcastC4_BF16.S | 0 .../bf16/MNNConvRunForLineDepthwise_BF16.S | 0 .../bf16/MNNConvRunForUnitDepthWise_BF16.S | 0 .../cpubackend}/arm/arm64/bf16/MNNGelu_BF16.S | 0 .../arm64/bf16/MNNPackC4ForMatMul_A_BF16.S | 260 ++ .../arm/arm64/bf16/MNNPackC4_BF16.S | 0 .../arm/arm64/bf16/MNNPackC8_BF16.S | 126 + .../arm64/bf16/MNNPackedMatMulRemain_BF16.S | 672 +++++ .../arm/arm64/bf16/MNNPackedMatMul_BF16.S | 501 ++++ .../arm64/bf16/MNNReluWithSlopeChannelBF16.S | 0 .../arm/arm64/bf16/MNNUnPackC4_BF16.S | 0 .../cpubackend}/bf16/BF16Backend.cpp | 0 .../cpubackend}/bf16/BF16Backend.hpp | 0 .../cpubackend}/bf16/BF16Binary.cpp | 0 .../cpubackend}/bf16/BF16Binary.hpp | 0 backupcode/cpubackend/bf16/BF16Functions.cpp | 918 +++++++ backupcode/cpubackend/bf16/BF16Functions.hpp | 16 + .../cpubackend}/bf16/BF16Unary.cpp | 0 .../cpubackend}/bf16/BF16Unary.hpp | 0 backupcode/cpubackend/bf16/CMakeLists.txt | 19 + backupcode/cpubackend/bf16/VecHalf.hpp | 517 ++++ .../bf16/WinogradOptFunctionHalf.cpp | 0 .../bf16/WinogradOptFunctionHalf.hpp | 0 .../cpubackend}/bf16/register.py | 0 docs/contribute/op.md | 50 +- docs/contribute/pic1.png | Bin 0 -> 714878 bytes docs/contribute/pic2.png | Bin 0 -> 806040 bytes docs/faq.md | 2 +- docs/start/demo.md | 2 +- docs/tools/test.md | 6 +- docs/transformers/diffusion.md | 71 +- docs/transformers/llm.md | 6 + express/Executor.cpp | 40 +- express/Expr.cpp | 31 +- express/RuntimeAttr.hpp | 2 +- express/module/Module.cpp | 10 +- express/module/PipelineModule.cpp | 6 +- include/MNN/Interpreter.hpp | 14 + include/MNN/MNNDefine.h | 2 +- include/MNN/expr/Executor.hpp | 2 +- project/android/build_64.sh | 1 - project/ios/MNN.xcodeproj/project.pbxproj | 72 +- project/ios/Playground/AppDelegate.mm | 53 +- pymnn/pip_package/pyproject.toml | 2 +- pymnn/src/llm.h | 10 +- source/backend/arm82/Arm82Functions.cpp | 53 +- .../arm64/low_memory/MNNCountMinMax_ARM82.S | 278 ++ .../low_memory/MNNDynamicQuanInput_ARM82.S | 268 ++ .../MNNDynamicQuantAndReorder_ARM82.S | 433 ++++ .../arm64/low_memory/MNNDynamicQuantFP16.S | 231 +- .../low_memory/MNNGemmHybridInt4FP16_sdot.S | 314 --- .../low_memory/MNNGemmHybridInt4FP16_smmla.S | 506 ---- .../low_memory/MNNGemmHybridInt8FP16_sdot.S | 303 --- .../low_memory/MNNGemmHybridInt8FP16_smmla.S | 566 ---- ...MNNGemmInt8AddBiasScale_ARMV82_Unit_FP16.S | 665 +++++ ...GemmInt8AddBiasScale_ARMV82_w4_Unit_FP16.S | 690 +++++ ...MNNGemmInt8AddBiasScale_ARMV86_Unit_FP16.S | 855 ++++++ ...GemmInt8AddBiasScale_ARMV86_w4_Unit_FP16.S | 875 +++++++ .../asm/arm64/low_memory/MNNQuantScaleFP16.S | 114 +- source/backend/cpu/CMakeLists.txt | 1 - source/backend/cpu/CPUAttention.cpp | 735 ++++-- source/backend/cpu/CPUAttention.hpp | 18 +- source/backend/cpu/CPUBackend.cpp | 208 +- source/backend/cpu/CPUBackend.hpp | 35 +- source/backend/cpu/CPUConvolution.cpp | 221 +- source/backend/cpu/CPUConvolution.hpp | 6 + .../backend/cpu/CPUConvolutionDepthwise.cpp | 8 +- source/backend/cpu/CPUDeconvolution.cpp | 6 +- source/backend/cpu/CPUDeconvolution.hpp | 2 +- source/backend/cpu/CPUMatMul.cpp | 96 +- source/backend/cpu/CPURelu.cpp | 11 +- source/backend/cpu/CPURuntime.cpp | 681 ++--- source/backend/cpu/CPURuntime.hpp | 32 +- source/backend/cpu/CPUSoftMaxInt8.cpp | 317 --- source/backend/cpu/CPUSoftMaxInt8.hpp | 39 - source/backend/cpu/CPUSoftmax.cpp | 113 +- source/backend/cpu/CPUSoftmax.hpp | 6 + source/backend/cpu/ThreadPool.cpp | 167 +- source/backend/cpu/ThreadPool.hpp | 10 +- source/backend/cpu/arm/CommonNeonBF16.cpp | 187 -- .../arm32/MNNGemmInt8AddBiasScale_16x4_Unit.S | 216 +- .../MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S | 99 +- .../MNNGemmInt8AddBiasScale_16x4_w4_Unit.S | 392 +++ .../MNNLineDepthWiseInt8AddBiasScaleUnit.S | 2 +- .../arm32/MNNPackedSparseQuantMatMulEpx1.S | 26 +- .../arm32/MNNPackedSparseQuantMatMulEpx4.S | 8 +- .../arm32/bf16/MNNPackC4ForMatMul_A_BF16.S | 61 +- .../cpu/arm/arm32/bf16/MNNPackC4_BF16.S | 73 +- .../arm32/bf16/MNNPackedMatMulRemain_BF16.S | 28 +- .../cpu/arm/arm32/bf16/MNNPackedMatMul_BF16.S | 68 +- .../arm64/MNNGemmInt8AddBiasScale_16x4_Unit.S | 373 ++- .../MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S | 182 +- .../MNNGemmInt8AddBiasScale_ARMV82_Unit.S | 634 ++++- .../MNNGemmInt8AddBiasScale_ARMV86_Unit.S | 1634 +++++++++--- .../MNNLineDepthWiseInt8AddBiasScaleUnit.S | 6 +- ...DepthWiseInt8AddBiasScale_ARMV82_Unit3X3.S | 29 +- .../arm/arm64/MNNPackC4Int8ForMatMulA_ARM82.S | 202 ++ .../arm/arm64/MNNPackC4Int8ForMatMulA_ARM86.S | 318 +++ .../arm64/MNNPackedSparseQuantMatMulEpx1.S | 5 +- .../arm64/MNNPackedSparseQuantMatMulEpx4.S | 5 +- .../bf16/ARMV86_MNNPackedMatMulRemain_BF16.S | 102 +- .../arm64/bf16/ARMV86_MNNPackedMatMul_BF16.S | 55 +- .../arm64/bf16/MNNPackC4ForMatMul_A_BF16.S | 108 +- .../cpu/arm/arm64/bf16/MNNPackC8_BF16.S | 42 +- .../arm64/bf16/MNNPackedMatMulRemain_BF16.S | 121 +- .../cpu/arm/arm64/bf16/MNNPackedMatMul_BF16.S | 83 +- .../arm64/low_memory/MNNDynamicQuantFP32.S | 55 +- .../MNNDynamicUpdateConvBiasScale.S | 229 ++ .../arm64/low_memory/MNNGemmHybridInt4FP32.S | 308 --- .../low_memory/MNNGemmHybridInt4FP32_sdot.S | 413 --- .../low_memory/MNNGemmHybridInt4FP32_smmla.S | 476 ---- .../arm64/low_memory/MNNGemmHybridInt8FP32.S | 293 --- .../low_memory/MNNGemmHybridInt8FP32_sdot.S | 396 --- .../low_memory/MNNGemmHybridInt8FP32_smmla.S | 445 ---- .../MNNGemmInt8AddBiasScale_16x4_w4_Unit.S | 830 ++++++ .../MNNGemmInt8AddBiasScale_ARMV82_w4_Unit.S | 999 +++++++ .../MNNGemmInt8AddBiasScale_ARMV86_w4_Unit.S | 1205 +++++++++ source/backend/cpu/bf16/BF16Functions.cpp | 991 ++----- source/backend/cpu/bf16/CMakeLists.txt | 8 - .../backend/cpu/compute/CommonOptFunction.cpp | 625 ++--- .../backend/cpu/compute/CommonOptFunction.h | 37 +- .../cpu/compute/ConvInt8TiledExecutor.cpp | 683 ++++- .../cpu/compute/ConvInt8TiledExecutor.hpp | 29 +- .../backend/cpu/compute/ConvInt8Winograd.cpp | 32 +- .../cpu/compute/Convolution1x1Strassen.cpp | 26 +- .../cpu/compute/ConvolutionDepthwise3x3.cpp | 14 +- .../cpu/compute/ConvolutionDepthwise3x3.hpp | 1 + .../cpu/compute/ConvolutionFloatFactory.cpp | 14 +- .../backend/cpu/compute/ConvolutionHybrid.cpp | 401 --- .../backend/cpu/compute/ConvolutionHybrid.hpp | 48 - .../cpu/compute/ConvolutionPackWinograd.cpp | 68 +- .../cpu/compute/ConvolutionTiledExecutor.cpp | 14 +- .../cpu/compute/ConvolutionTiledExecutor.hpp | 2 +- .../cpu/compute/ConvolutionWinogradImpl.cpp | 13 - .../cpu/compute/ConvolutionWinogradImpl.hpp | 3 - .../compute/DenseConvolutionTiledExecutor.cpp | 148 +- .../compute/DenseConvolutionTiledExecutor.hpp | 1 - .../backend/cpu/compute/GemmInt8Executor.cpp | 44 +- .../backend/cpu/compute/GemmInt8Executor.hpp | 4 +- .../cpu/compute/IdstConvolutionInt8.cpp | 18 +- .../cpu/compute/IdstConvolutionInt8.hpp | 1 + .../backend/cpu/compute/Int8FunctionsOpt.cpp | 194 +- source/backend/cpu/compute/Int8FunctionsOpt.h | 24 +- .../compute/SparseConvInt8TiledExecutor.cpp | 16 +- .../compute/SparseConvInt8TiledExecutor.hpp | 2 +- .../SparseConvolutionTiledExecutor.cpp | 4 + source/backend/cpu/x86_x64/AVX2Functions.cpp | 3 - .../cpu/x86_x64/FunctionDispatcher.cpp | 6 +- .../cpu/x86_x64/avx/FunctionSummary.hpp | 5 - source/backend/cpu/x86_x64/avx/GemmAVX2.cpp | 174 -- source/backend/cpu/x86_x64/avx/GemmCommon.cpp | 16 +- source/backend/cpu/x86_x64/avx/GemmInt8.cpp | 870 ++++++- .../backend/cpu/x86_x64/avx512/GemmInt8.cpp | 77 +- .../x86_x64/avx512/GemmInt8_4_4_64_NOVNNI.cpp | 1 + .../cpu/x86_x64/avx512/GemmInt8_VNNI.cpp | 2283 +++++++++++++++- .../cpu/x86_x64/avx512/Matmul_4_4_64.inl | 2287 ++++++++++++++++- .../cpu/x86_x64/avx512/PackedFunction.cpp | 30 +- .../cpu/x86_x64/sse/FunctionSummary.hpp | 7 +- source/backend/cpu/x86_x64/sse/GemmCommon.cpp | 18 +- .../backend/cpu/x86_x64/sse/GemmFunction.hpp | 158 -- source/backend/cpu/x86_x64/sse/GemmInt8.cpp | 375 ++- source/backend/cpu/x86_x64/sse/GemmSSE.cpp | 38 - .../backend/cpu/x86_x64/sse/MathFunctions.cpp | 2 +- source/backend/metal/MetalBackend.hpp | 10 +- source/backend/metal/MetalBackend.mm | 161 +- source/backend/opencl/core/OpenCLBackend.cpp | 2 +- source/backend/opencl/core/OpenCLGemmTune.cpp | 305 +-- .../backend/opencl/core/OpenCLOPRegister.cpp | 13 +- .../opencl/core/OpenCLRunningUtils.cpp | 4 +- .../opencl/core/runtime/OpenCLRuntime.cpp | 4 +- .../buffer/AttentionBufExecution.cpp | 5 +- .../buffer/AttentionBufExecution.hpp | 2 - .../execution/buffer/ConvBufExecution.cpp | 174 +- .../execution/buffer/ConvBufExecution.hpp | 2 +- .../execution/buffer/ConvBufWinograd.cpp | 28 +- .../buffer/GroupNormBufExecution.cpp | 4 +- .../buffer/GroupNormBufExecution.hpp | 2 - .../execution/buffer/LoopBufExecution.cpp | 69 +- .../buffer/SelfAttentionBufExecution.cpp | 99 +- .../buffer/SelfAttentionBufExecution.hpp | 3 - .../buffer/SplitGeluBufExecution.cpp | 4 +- .../buffer/SplitGeluBufExecution.hpp | 2 - .../opencl/execution/cl/conv_2d_buf.cl | 278 +- .../backend/opencl/execution/cl/gemm_buf.cl | 162 +- .../opencl/execution/cl/matmul_params_buf.cl | 824 +++--- .../opencl/execution/cl/opencl_program.cc | 1141 ++++---- .../opencl/execution/cl/self_attention_buf.cl | 86 +- .../execution/cl/winogradTransform_buf.cl | 6 - source/core/Backend.hpp | 48 +- source/core/Concurrency.h | 2 +- source/core/ConvolutionCommon.cpp | 46 +- source/core/ConvolutionCommon.hpp | 2 +- source/core/FileLoader.cpp | 29 +- source/core/IDSTDecoder.hpp | 1 - source/core/Interpreter.cpp | 13 +- source/core/OpCommonUtils.cpp | 1 - source/core/Session.cpp | 13 +- source/core/Session.hpp | 3 +- source/geometry/GeometryOPRegister.cpp | 4 +- test.sh | 12 +- test/TestUtils.h | 2 +- test/core/ThreadPoolTest.cpp | 8 +- test/expr/ModuleTest.cpp | 6 +- test/op/ConvInt8Test.cpp | 20 +- test/op/ConvolutionTest.cpp | 2 +- test/op/DeconvolutionTest.cpp | 71 +- test/op/PReLUTest.cpp | 23 +- test/op/SoftmaxTest.cpp | 47 - test/speed/HybridConvSpeedTest.cpp | 65 +- tools/converter/source/common/cli.cpp | 154 +- .../source/optimizer/PostConverter.cpp | 10 +- tools/converter/source/optimizer/Program.cpp | 21 +- tools/converter/source/optimizer/Program.hpp | 2 +- .../source/optimizer/TemplateMerge.cpp | 4 - .../ConvDeQuantizeLinearFuseToConvInt8.cpp | 324 ++- .../optimizer/merge/ConvertMatMulToConv2D.cpp | 96 +- .../onnxextra/OnnxConvolutionMerge.cpp | 48 +- .../onnxextra/OnnxDeQuantizeLinear.cpp | 47 +- .../source/optimizer/onnxextra/OnnxGemm.cpp | 11 +- .../onnxextra/OnnxQuantizeLinear.cpp | 10 +- .../postconvert/TransformGroupConvolution.cpp | 143 +- tools/cpp/ModuleBasic.cpp | 29 +- tools/cpp/checkFile.cpp | 4 +- tools/cpp/getPerformance.cpp | 32 +- tools/cv/CMakeLists.txt | 2 +- tools/script/apply_gptq.py | 19 +- tools/script/get_model.py | 4 +- tools/script/make_test_for_mnn.py | 8 +- tools/script/modelTest.py | 14 +- tools/script/register.py | 23 +- tools/script/testMNNFromOnnx.py | 2 +- tools/script/testMNNFromTf.py | 2 +- tools/script/testMNNFromTflite.py | 2 +- tools/script/testMNNFromTorch.py | 2 +- tools/train/source/demo/demoMain.cpp | 2 +- tools/train/source/nn/NN.cpp | 10 + transformers/diffusion/README.md | 45 - transformers/diffusion/env.yaml | 10 + transformers/diffusion/main.cpp | 14 +- transformers/diffusion/pipeline.cpp | 28 +- transformers/llm/engine/include/llm/llm.hpp | 121 + transformers/llm/engine/llm_demo.cpp | 4 +- transformers/llm/engine/src/llm.cpp | 107 +- .../{include/llm.hpp => src/llmconfig.hpp} | 202 +- transformers/llm/engine/src/tokenizer.cpp | 50 +- .../llm/engine/{include => src}/tokenizer.hpp | 11 +- 259 files changed, 28068 insertions(+), 11824 deletions(-) rename {source/backend/cpu => backupcode/cpubackend}/arm/arm32/bf16/MNNAxByClampBroadcastC4_BF16.S (100%) rename {source/backend/cpu => backupcode/cpubackend}/arm/arm32/bf16/MNNConvRunForLineDepthwise_BF16.S (100%) rename {source/backend/cpu => backupcode/cpubackend}/arm/arm32/bf16/MNNConvRunForUnitDepthWise_BF16.S (100%) rename {source/backend/cpu => backupcode/cpubackend}/arm/arm32/bf16/MNNGelu_BF16.S (100%) create mode 100644 backupcode/cpubackend/arm/arm32/bf16/MNNPackC4ForMatMul_A_BF16.S create mode 100644 backupcode/cpubackend/arm/arm32/bf16/MNNPackC4_BF16.S create mode 100644 backupcode/cpubackend/arm/arm32/bf16/MNNPackedMatMulRemain_BF16.S create mode 100644 backupcode/cpubackend/arm/arm32/bf16/MNNPackedMatMul_BF16.S rename {source/backend/cpu => backupcode/cpubackend}/arm/arm32/bf16/MNNReluWithSlopeChannelBF16.S (100%) rename {source/backend/cpu => backupcode/cpubackend}/arm/arm32/bf16/MNNUnPackC4_BF16.S (100%) create mode 100644 backupcode/cpubackend/arm/arm64/bf16/ARMV86_MNNPackedMatMulRemain_BF16.S create mode 100644 backupcode/cpubackend/arm/arm64/bf16/ARMV86_MNNPackedMatMul_BF16.S rename {source/backend/cpu => backupcode/cpubackend}/arm/arm64/bf16/MNNAxByClampBroadcastC4_BF16.S (100%) rename {source/backend/cpu => backupcode/cpubackend}/arm/arm64/bf16/MNNConvRunForLineDepthwise_BF16.S (100%) rename {source/backend/cpu => backupcode/cpubackend}/arm/arm64/bf16/MNNConvRunForUnitDepthWise_BF16.S (100%) rename {source/backend/cpu => backupcode/cpubackend}/arm/arm64/bf16/MNNGelu_BF16.S (100%) create mode 100644 backupcode/cpubackend/arm/arm64/bf16/MNNPackC4ForMatMul_A_BF16.S rename {source/backend/cpu => backupcode/cpubackend}/arm/arm64/bf16/MNNPackC4_BF16.S (100%) create mode 100644 backupcode/cpubackend/arm/arm64/bf16/MNNPackC8_BF16.S create mode 100644 backupcode/cpubackend/arm/arm64/bf16/MNNPackedMatMulRemain_BF16.S create mode 100644 backupcode/cpubackend/arm/arm64/bf16/MNNPackedMatMul_BF16.S rename {source/backend/cpu => backupcode/cpubackend}/arm/arm64/bf16/MNNReluWithSlopeChannelBF16.S (100%) rename {source/backend/cpu => backupcode/cpubackend}/arm/arm64/bf16/MNNUnPackC4_BF16.S (100%) rename {source/backend/cpu => backupcode/cpubackend}/bf16/BF16Backend.cpp (100%) rename {source/backend/cpu => backupcode/cpubackend}/bf16/BF16Backend.hpp (100%) rename {source/backend/cpu => backupcode/cpubackend}/bf16/BF16Binary.cpp (100%) rename {source/backend/cpu => backupcode/cpubackend}/bf16/BF16Binary.hpp (100%) create mode 100644 backupcode/cpubackend/bf16/BF16Functions.cpp create mode 100644 backupcode/cpubackend/bf16/BF16Functions.hpp rename {source/backend/cpu => backupcode/cpubackend}/bf16/BF16Unary.cpp (100%) rename {source/backend/cpu => backupcode/cpubackend}/bf16/BF16Unary.hpp (100%) create mode 100644 backupcode/cpubackend/bf16/CMakeLists.txt create mode 100644 backupcode/cpubackend/bf16/VecHalf.hpp rename {source/backend/cpu => backupcode/cpubackend}/bf16/WinogradOptFunctionHalf.cpp (100%) rename {source/backend/cpu => backupcode/cpubackend}/bf16/WinogradOptFunctionHalf.hpp (100%) rename {source/backend/cpu => backupcode/cpubackend}/bf16/register.py (100%) create mode 100644 docs/contribute/pic1.png create mode 100644 docs/contribute/pic2.png create mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNCountMinMax_ARM82.S create mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuanInput_ARM82.S create mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuantAndReorder_ARM82.S delete mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt4FP16_sdot.S delete mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt4FP16_smmla.S delete mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt8FP16_sdot.S delete mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt8FP16_smmla.S create mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_Unit_FP16.S create mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_w4_Unit_FP16.S create mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_Unit_FP16.S create mode 100644 source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_w4_Unit_FP16.S delete mode 100644 source/backend/cpu/CPUSoftMaxInt8.cpp delete mode 100644 source/backend/cpu/CPUSoftMaxInt8.hpp delete mode 100644 source/backend/cpu/arm/CommonNeonBF16.cpp create mode 100644 source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_w4_Unit.S create mode 100644 source/backend/cpu/arm/arm64/MNNPackC4Int8ForMatMulA_ARM82.S create mode 100644 source/backend/cpu/arm/arm64/MNNPackC4Int8ForMatMulA_ARM86.S create mode 100644 source/backend/cpu/arm/arm64/low_memory/MNNDynamicUpdateConvBiasScale.S delete mode 100644 source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32.S delete mode 100644 source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32_sdot.S delete mode 100644 source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32_smmla.S delete mode 100644 source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32.S delete mode 100644 source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32_sdot.S delete mode 100644 source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32_smmla.S create mode 100644 source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_16x4_w4_Unit.S create mode 100644 source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_w4_Unit.S create mode 100644 source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_w4_Unit.S delete mode 100644 source/backend/cpu/compute/ConvolutionHybrid.cpp delete mode 100644 source/backend/cpu/compute/ConvolutionHybrid.hpp delete mode 100644 transformers/diffusion/README.md create mode 100644 transformers/diffusion/env.yaml create mode 100644 transformers/llm/engine/include/llm/llm.hpp rename transformers/llm/engine/{include/llm.hpp => src/llmconfig.hpp} (58%) rename transformers/llm/engine/{include => src}/tokenizer.hpp (98%) diff --git a/CMakeLists.txt b/CMakeLists.txt index abb66d6c8..b85353bb3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -408,6 +408,8 @@ include(${CMAKE_CURRENT_LIST_DIR}/source/backend/cpu/CMakeLists.txt) SET(MNN_PUB_HDRS "") SET(MNN_EXPR_PUB_HDRS "") +set(MNN_EXTRA_HEADERS "") + list(APPEND MNN_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/MNNDefine.h") list(APPEND MNN_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/Interpreter.hpp") list(APPEND MNN_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/HalideRuntime.h") @@ -430,6 +432,19 @@ list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Neur list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/ExecutorScope.hpp") list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Scope.hpp") +# Add Extra Header +IF(MNN_BUILD_OPENCV) + file(GLOB MNN_CV_HDRS ${CMAKE_CURRENT_SOURCE_DIR}/tools/cv/include/cv/*.hpp PARENT_SCOPE) + file(GLOB MNN_CV_IMGHDRS ${CMAKE_CURRENT_SOURCE_DIR}/tools/cv/include/cv/imgproc/*.hpp PARENT_SCOPE) + list(APPEND MNN_EXTRA_HEADERS ${MNN_CV_HDRS}) + list(APPEND MNN_EXTRA_HEADERS ${MNN_CV_IMGHDRS}) +ENDIF() +IF(MNN_BUILD_LLM) + file(GLOB MNN_LLM_HDRS ${CMAKE_CURRENT_SOURCE_DIR}/transformers/llm/engine/include/llm/*) + list(APPEND MNN_EXTRA_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/transformers/llm/engine/include/llm/llm.hpp) +ENDIF() + + set(MNN_DEPS "") set(MNN_EXTRA_DEPENDS "") @@ -659,11 +674,11 @@ IF(MNN_TENSORRT) ENDIF() IF(MNN_SEP_BUILD) - add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS}) + add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS} ${MNN_EXTRA_HEADERS}) target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS}) ELSE() IF(MNN_BUILD_SHARED_LIBS) - add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS}) + add_library(MNN SHARED ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS} ${MNN_EXTRA_HEADERS}) if (WIN32) foreach(TARGET ${MNN_TARGETS}) target_compile_definitions(${TARGET} PRIVATE "-DBUILDING_MNN_DLL") @@ -673,7 +688,7 @@ ELSE() target_compile_definitions(MNN INTERFACE "-DUSING_MNN_DLL") endif() ELSE() - add_library(MNN STATIC ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS}) + add_library(MNN STATIC ${CMAKE_CURRENT_LIST_DIR}/cmake/dummy.cpp ${MNN_OBJECTS_TO_LINK} ${MNN_PUB_HDRS} ${MNN_EXPR_PUB_HDRS} ${MNN_EXTRA_HEADERS}) ENDIF() target_link_libraries(MNN PUBLIC ${MNN_EXTRA_DEPENDS}) ENDIF() @@ -729,7 +744,6 @@ IF(MNN_BUILD_OPENCV AND NOT MNN_SEP_BUILD) ENDIF() target_sources(MNN PRIVATE $) ENDIF() - IF(MNN_BUILD_LLM) # add_definitions(-DMNN_BUILD_LLM) include(${CMAKE_CURRENT_LIST_DIR}/transformers/llm/engine/CMakeLists.txt) @@ -831,6 +845,27 @@ ELSE() ARCHIVE DESTINATION lib FRAMEWORK DESTINATION /Library/Frameworks/ ) + IF(MNN_BUILD_OPENCV) + if (NOT MNN_AAPL_FMWK) + INSTALL(FILES ${MNN_CV_HDRS} DESTINATION include/MNN/cv) + INSTALL(FILES ${MNN_CV_IMGHDRS} DESTINATION include/MNN/cv/imgproc) + endif() + FOREACH(HDR ${MNN_CV_HDRS}) + SET_SOURCE_FILES_PROPERTIES(${HDR} PROPERTIES MACOSX_PACKAGE_LOCATION Headers/cv/ ) + ENDFOREACH() + FOREACH(HDR ${MNN_CV_IMGHDRS}) + SET_SOURCE_FILES_PROPERTIES(${HDR} PROPERTIES MACOSX_PACKAGE_LOCATION Headers/cv/imgproc ) + ENDFOREACH() + ENDIF() + IF(MNN_BUILD_LLM) + if (NOT MNN_AAPL_FMWK) + INSTALL(FILES ${MNN_LLM_HDRS} DESTINATION include/MNN/llm) + endif() + FOREACH(HDR ${MNN_LLM_HDRS}) + SET_SOURCE_FILES_PROPERTIES(${HDR} PROPERTIES MACOSX_PACKAGE_LOCATION Headers/llm ) + ENDFOREACH() + ENDIF() + if (NOT MNN_AAPL_FMWK) INSTALL(FILES ${MNN_PUB_HDRS} DESTINATION include/MNN/) INSTALL(FILES ${MNN_EXPR_PUB_HDRS} DESTINATION include/MNN/expr/) diff --git a/source/backend/cpu/arm/arm32/bf16/MNNAxByClampBroadcastC4_BF16.S b/backupcode/cpubackend/arm/arm32/bf16/MNNAxByClampBroadcastC4_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm32/bf16/MNNAxByClampBroadcastC4_BF16.S rename to backupcode/cpubackend/arm/arm32/bf16/MNNAxByClampBroadcastC4_BF16.S diff --git a/source/backend/cpu/arm/arm32/bf16/MNNConvRunForLineDepthwise_BF16.S b/backupcode/cpubackend/arm/arm32/bf16/MNNConvRunForLineDepthwise_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm32/bf16/MNNConvRunForLineDepthwise_BF16.S rename to backupcode/cpubackend/arm/arm32/bf16/MNNConvRunForLineDepthwise_BF16.S diff --git a/source/backend/cpu/arm/arm32/bf16/MNNConvRunForUnitDepthWise_BF16.S b/backupcode/cpubackend/arm/arm32/bf16/MNNConvRunForUnitDepthWise_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm32/bf16/MNNConvRunForUnitDepthWise_BF16.S rename to backupcode/cpubackend/arm/arm32/bf16/MNNConvRunForUnitDepthWise_BF16.S diff --git a/source/backend/cpu/arm/arm32/bf16/MNNGelu_BF16.S b/backupcode/cpubackend/arm/arm32/bf16/MNNGelu_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm32/bf16/MNNGelu_BF16.S rename to backupcode/cpubackend/arm/arm32/bf16/MNNGelu_BF16.S diff --git a/backupcode/cpubackend/arm/arm32/bf16/MNNPackC4ForMatMul_A_BF16.S b/backupcode/cpubackend/arm/arm32/bf16/MNNPackC4ForMatMul_A_BF16.S new file mode 100644 index 000000000..663ffae68 --- /dev/null +++ b/backupcode/cpubackend/arm/arm32/bf16/MNNPackC4ForMatMul_A_BF16.S @@ -0,0 +1,208 @@ +// +// NEON_MNNPackC4ForMatMul_A_BF16.S +// MNN +// +// Created by MNN on 2021/02/21. +// Copyright © 2018-2021 Alibaba Group Holding Limited +// +#ifdef __arm__ +#ifndef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 +asm_function NEON_MNNPackC4ForMatMul_A_BF16 +// treate float pointer as int16_t* +//void NEON_MNNPackC4ForMatMul_A_BF16(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el) +//Auto: r0: dest, r1:sourceGroup, r2: info, r3:el +push {r4-r8, r10, r11, lr} // avoid to touch platform-register r-9 +ldr r10, [r2, #0] // number +ldr r4, [r2, #4] // eReal +ldr r11, [r2, #8] // eDest +ldr r6, [r2, #12] // xOffset +// xOffset -> xOffset * 4 * sizeof(float) +// eReal -> eReal * 4 * sizeof(float) +// eDest -> eDest * sizeof(float) +mov r12, #2 // sizeof(int16_t) +mov lr, #8 // sizeof(int16_t) * 4 +mul r4, lr, r4 +mul r11, r12, r11 +mul r6, lr, r6 + +LoopNumber: +ldr r5, [r3, #4] // l +ldr r8, [r3, #8] // eOffset +ldr r7, [r3, #12] // lOffset + +push {r0, r1} +ldr r1, [r1, #0] + +// Compute dest ptr: r0 = r0 + eOffset * sizeof(float) + lOffset * eDest * sizeof(float) +; mov lr, #2 //sizeof(int16_t) +mul r7, r11, r7 +mul r8, r12, r8 +add r0, r0, r7 +add r0, r0, r8 + +ldr r2, [r3, #0] // e + +Body: +cmp r2, #12 +bne Right + cmp r5, #4 + blt LoopEL3 + LoopL4: + mov r2, r1 +.macro MAIN_TRANSPOSE + vld1.16 {d16}, [r1], r6 // load size: 4 * sizeof(int16_t) + vld1.16 {d19}, [r1], r6 + vld1.16 {d22}, [r1], r6 + vld1.16 {d25}, [r1], r6 + vld1.16 {d17}, [r1], r6 + vld1.16 {d20}, [r1], r6 + vld1.16 {d23}, [r1], r6 + vld1.16 {d26}, [r1], r6 + vld1.16 {d18}, [r1], r6 + vld1.16 {d21}, [r1], r6 + vld1.16 {d24}, [r1], r6 + vld1.16 {d27}, [r1], r6 + + // transpose each 4 16-bit elements in 2 d_n vectors, by transpose 16-bit and scale up transpose 32-bit. + vtrn.16 d16, d19 + vtrn.16 d22, d25 + // vswp d0[2-3], d2[0-1] + // vswp d1[2-3], d3[0-1] + // swap half of 64-bit is equal to transpose in 32-bit unit. + vtrn.32 d16, d22 + vtrn.32 d19, d25 + + vtrn.16 d17, d20 + vtrn.16 d23, d26 + vtrn.32 d17, d23 + vtrn.32 d20, d26 + + vtrn.16 d18, d21 + vtrn.16 d24, d27 + vtrn.32 d18, d24 + vtrn.32 d21, d27 + // after transpose from 12x4 to 4x12, memory layout is + // +-------+------+------+ + // | d16...|d17...|d18...| + // +-------+------+------+ + // | d19...|d20...|d21...| + // +-------+------+------+ + // | d22...|d23...|d24...| + // +-------+------+------+ + // | d25...|d26...|d27...| + // +-------+------+------+ +.endm + MAIN_TRANSPOSE + + vstm r0!, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27} // store at one time: 12 * 4 * sizeof(int16_t) + + add r1, r2, r4 + sub r5, r5, #4 + cmp r5, #4 + bge LoopL4 + + LoopEL3: + cmp r5, #3 + blt LoopEL2 + MAIN_TRANSPOSE + + vstm r0!, {d16, d17, d18, d19, d20, d21, d22, d23, d24} + + b LoopEEnd + + LoopEL2: + cmp r5, #2 + blt LoopEL1 + MAIN_TRANSPOSE + + vstm r0!, {d16, d17, d18, d19, d20, d21} + + b LoopEEnd + + LoopEL1: + cmp r5, #0 + beq LoopEEnd + MAIN_TRANSPOSE + + vstm r0!, {d16, d17, d18} + + LoopEEnd: + +b End + + +Right: + +LoopE1: + mov lr, r5 + mov r7, r1 + mov r8, r0 + cmp r5, #4 + blt LoopE1L3 + LoopE1L4: + vld1.16 {d0}, [r1], r4 + vst1.16 {d0[0]}, [r0], r11 + vst1.16 {d0[1]}, [r0], r11 + vst1.16 {d0[2]}, [r0], r11 + vst1.16 {d0[3]}, [r0], r11 + sub r5, r5, #4 + cmp r5, #4 + bge LoopE1L4 + + LoopE1L3: + cmp r5, #3 + blt LoopE1L2 + vld1.16 {d0}, [r1], r4 + vst1.16 {d0[0]}, [r0], r11 + vst1.16 {d0[1]}, [r0], r11 + vst1.16 {d0[2]}, [r0], r11 + + sub r5, r5, #3 + + LoopE1L2: + cmp r5, #2 + blt LoopE1L1 + vld1.16 {d0}, [r1], r4 + vst1.16 {d0[0]}, [r0], r11 + vst1.16 {d0[1]}, [r0], r11 + sub r5, r5, #2 + + LoopE1L1: + cmp r5, #1 + blt LoopE1End + vld1.16 {d0[0]}, [r1], r4 + vst1.16 {d0[0]}, [r0], r11 + + LoopE1End: + + subs r2, r2, #1 + add r0, r8, r12 // !!!! caution : sizeof(int16_t) + add r1, r7, r6 + mov r5, lr + bne LoopE1 + +End: + +pop {r0, r1} +subs r10, r10, #1 + +// x3 is (const int32_t* el), this array size of 4. as a result for next struct element, +// address added by 4 * sizeof(int32_t) +add r3, r3, #16 + +// x1 is (const int16_t** sourceGroup), even though data content is int16_t, +// the element in sourceGroup in 'int16_t*', as a result for next struct element, +// value added by sizeof(void*) +add r1, r1, #4 + +bne LoopNumber + +pop {r4-r8, r10, r11, pc} + +#endif +#endif diff --git a/backupcode/cpubackend/arm/arm32/bf16/MNNPackC4_BF16.S b/backupcode/cpubackend/arm/arm32/bf16/MNNPackC4_BF16.S new file mode 100644 index 000000000..70b9e61e4 --- /dev/null +++ b/backupcode/cpubackend/arm/arm32/bf16/MNNPackC4_BF16.S @@ -0,0 +1,202 @@ +// +// MNNPackC4_BF16.S +// MNN +// +// Created by MNN on 2021/02/26. +// Copyright © 2018-2021 Alibaba Group Holding Limited +// + + + + +#ifdef __arm__ +#ifndef __aarch64__ + +#include "MNNAsmGlobal.h" +.text +.align 5 + +// .macro transpose +// vtrn.16 d0, d1 +// vtrn.16 d2, d3 +// vswp d0[2-3], d1[2-3] // should swap high half of d-vector, the half is 32-bit. there is no instruction, we use vst4.16 instead +// vswp d2[2-3], d3[2-3] +// .endm + +asm_function MNNPackC4_BF16 +// treate float pointer as int16_t* +//void MNNPackC4_BF16(float* dst, const float* src, size_t area, size_t depth, int32_t* areaOffset) +//Auto load: +//r0:dst, r1:src, r2:area, r3:depth + +push {r4-r8, r10, lr} // avoid to touch platform-register r-9 + +ldr lr, [sp, #28] +ldr r10, [lr, #4] +ldr lr, [lr, #0] + +mul r4, r2, r3 +cmp r4, #0 +beq UpEnd + +//r4: srcDepthOffset:srcArea*sizeof(int16_t) +mov r4, #2 +mul r4, lr, r4 + +//r10 -> 4 * (dstArea * sizeof(int16_t) - area * sizeof(int16_t)) +mov r12, #8 +sub r10, r10, r2 +mul r10, r12, r10 + +//lr -> (srcArea * sizeof(int16_t) - area * sizeof(int16_t)) +mov r12, #2 +sub lr, lr, r2 +mul lr, r12, lr + +UpL4: +cmp r3, #3 +ble UpL3 + +UpL4Loop: +add r5, r1, r4 +add r6, r4, r5 +add r7, r4, r6 +mov r8, r2 +cmp r8, #3 +ble UpL4AreaRemain +UpL4AreaLoop: +vld1.16 {d0}, [r1]! // load 4 elements of 16-bit into 64bit vector register d0 +vld1.16 {d1}, [r5]! +vld1.16 {d2}, [r6]! +vld1.16 {d3}, [r7]! +// transpose // no suitable instruction to transpose int16_t type +vst4.16 {d0, d1, d2, d3}, [r0]! +sub r8, r8, #4 +cmp r8, #4 +bge UpL4AreaLoop + +UpL4AreaRemain: +cmp r8, #0 +beq UpL4AreaRemainEnd +UpL4AreaRemainLoop: +vld1.16 {d0[0]}, [r1]! +vld1.16 {d0[1]}, [r5]! +vld1.16 {d0[2]}, [r6]! +vld1.16 {d0[3]}, [r7]! + +vst1.16 {d0}, [r0]! + +subs r8, r8, #1 +bne UpL4AreaRemainLoop +UpL4AreaRemainEnd: +sub r3, r3, #4 +add r1, r7, lr +cmp r3, #4 +add r0, r10, r0 +bge UpL4Loop + +UpL3: +cmp r3, #2 +ble UpL2 +add r5, r1, r4 +add r6, r4, r5 +mov r8, r2 +cmp r8, #3 +ble UpL3AreaRemain +UpL3AreaLoop: +vld1.16 {d0}, [r1]! +vmov.i16 d3, #0 +vld1.16 {d1}, [r5]! +vld1.16 {d2}, [r6]! +// transpose // no suitable instruction to transpose int16_t type +vst4.16 {d0, d1, d2, d3}, [r0]! +sub r8, r8, #4 +cmp r8, #4 +bge UpL3AreaLoop + +cmp r8, #0 +beq UpL3AreaRemainEnd +UpL3AreaRemain: +vmov.i16 d0, #0 +vld1.16 {d0[0]}, [r1]! +vld1.16 {d0[1]}, [r5]! +vld1.16 {d0[2]}, [r6]! + +vst1.16 {d0}, [r0]! + +subs r8, r8, #1 +bne UpL3AreaRemain + +UpL3AreaRemainEnd: +sub r3, r3, #3 + + +UpL2: +cmp r3, #1 +ble UpL1 +add r5, r1, r4 +mov r8, r2 +cmp r8, #3 +ble UpL2AreaRemain +UpL2AreaLoop: +vld1.16 {d0}, [r1]! +vmov.i16 d3, #0 +vld1.16 {d1}, [r5]! +vmov.i16 d2, #0 +// transpose // no suitable instruction to transpose int16_t type +vst4.16 {d0, d1, d2, d3}, [r0]! +sub r8, r8, #4 +cmp r8, #4 +bge UpL2AreaLoop + +cmp r8, #0 +beq UpL2AreaRemainEnd +UpL2AreaRemain: +vmov.i16 d0, #0 +vld1.16 {d0[0]}, [r1]! +vld1.16 {d0[1]}, [r5]! + +vst1.16 {d0}, [r0]! + +subs r8, r8, #1 +bne UpL2AreaRemain + +UpL2AreaRemainEnd: +sub r3, r3, #2 + +UpL1: +cmp r3, #0 +beq UpEnd +mov r8, r2 +cmp r8, #3 +ble UpL1AreaRemain +UpL1AreaLoop: +vld1.16 {d0}, [r1]! +vmov.i16 d3, #0 +vmov.i16 d1, #0 +vmov.i16 d2, #0 +// transpose // no suitable instruction to transpose int16_t type +vst4.16 {d0, d1, d2, d3}, [r0]! +sub r8, r8, #4 +cmp r8, #4 +bge UpL1AreaLoop + +cmp r8, #0 +beq UpL1AreaRemainEnd +UpL1AreaRemain: +vmov.i16 d0, #0 +vld1.16 {d0[0]}, [r1]! + +vst1.16 {d0}, [r0]! + +subs r8, r8, #1 +bne UpL1AreaRemain + +UpL1AreaRemainEnd: + +UpEnd: + +pop {r4-r8, r10, pc} + +#endif +#endif diff --git a/backupcode/cpubackend/arm/arm32/bf16/MNNPackedMatMulRemain_BF16.S b/backupcode/cpubackend/arm/arm32/bf16/MNNPackedMatMulRemain_BF16.S new file mode 100644 index 000000000..252f1956a --- /dev/null +++ b/backupcode/cpubackend/arm/arm32/bf16/MNNPackedMatMulRemain_BF16.S @@ -0,0 +1,154 @@ +// +// NEON_MNNPackedMatMulRemain_BF16.S +// MNN +// +// Created by MNN on 2021/02/24. +// Copyright © 2018-2021 Alibaba Group Holding Limited. +// + +#ifdef __arm__ +#ifndef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 +// 12 * 8 MatMul +asm_function NEON_MNNPackedMatMulRemain_BF16 +// treate float pointer as int16_t* +//void NEON_MNNPackedMatMulRemain_BF16(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias); +//Auto r0: C, r1:A, r2:B, r3:eSize, +//r4:parameter, r5: cache no usage, r6:postParameters, r7:bias + +push {r4-r8, r10, r11, lr} // avoid to touch platform-register r-9 +ldr r4, [sp, #32] +ldr r6, [sp, #36] +ldr r7, [sp, #40] +ldr r12, [r4, #0] +cmp r6, #0 +beq Start +vld1.32 {q3}, [r6] +vdup.f32 q12, d7[0] // min +vdup.f32 q13, d7[1] // max +Start: +cmp r3, #4 +blt L1 + +LoopE4: + ldr r5, [r4, #8] // h + add r5, r5, #3 + lsr r5, r5, #2 // r5 = UP_DIV(r5, 4) + mov lr, r0 + mov r11, r2 + push {r7} + LoopE4H: + mov r10, r1 + ldr r8, [r4, #4] // l + vmov.i32 q8, #0 + vmov.i32 q9, #0 + vmov.i32 q10, #0 + vmov.i32 q11, #0 + LoopE4L: + vld1.16 {d0}, [r10], r12 + vld1.16 {d2}, [r11]! // load 4 * sizeof(int16_t) + vshll.s16 q0, d0, #16 // shift left long of each int16_t as float32 + vshll.s16 q1, d2, #16 + vmla.f32 q8, q1, d0[0] + vmla.f32 q9, q1, d0[1] + vmla.f32 q10, q1, d1[0] + vmla.f32 q11, q1, d1[1] + subs r8, r8, #1 + bne LoopE4L + cmp r6, #0 + beq StoreE4 + vld1.16 {d28}, [r7]! // load 4 * sizeof(int16_t) + vshll.s16 q14, d28, #16 // shift left long of each int16_t as float32 + vmla.f32 q8, q14, d6[1] + vmla.f32 q9, q14, d6[1] + vmla.f32 q10, q14, d6[1] + vmla.f32 q11, q14, d6[1] + + PostTreatE4: + vmax.f32 q8, q8, q12 + vmax.f32 q9, q9, q12 + vmax.f32 q10, q10, q12 + vmax.f32 q11, q11, q12 + + vmin.f32 q8, q8, q13 + vmin.f32 q9, q9, q13 + vmin.f32 q10, q10, q13 + vmin.f32 q11, q11, q13 + + StoreE4: + ldr r8, [r4, #20] + add r11, r11, r8 + ldr r8, [r4, #12] + + vshrn.i32 d16, q8, #16 // shift right 16bit of each float32 as int16_t + vshrn.i32 d17, q9, #16 + vshrn.i32 d18, q10, #16 + vshrn.i32 d19, q11, #16 + vst1.16 {d16, d17}, [lr]! + vst1.16 {d18, d19}, [lr], r8 + sub lr, lr, #16 + subs r5, r5, #1 // move 4 colum along lP dim. lP = l / 4 + bne LoopE4H + sub r3, r3, #4 // move 4 colum along e dim. + add r0, r0, #32 // move address of 4 * 4 * sizeof(int16_t) + add r1, r1, #8 // move address of 4 * sizeof(int16_t) in src tile block + cmp r3, #4 + pop {r7} + bge LoopE4 + +L1: +cmp r3, #0 +beq End +LoopE1: + ldr r5, [r4, #8] // h + add r5, r5, #3 + lsr r5, r5, #2 + mov lr, r0 + mov r11, r2 + push {r7} + LoopE1H: + mov r10, r1 + ldr r8, [r4, #4] // l + vmov.i32 q15, #0 + LoopE1L: + vld1.16 {d0[0]}, [r10], r12 + vld1.16 {d2}, [r11]! // load 4 * sizeof(int16_t) + vshll.s16 q0, d0, #16 // shift left long of each int16_t as float32 + vshll.s16 q1, d2, #16 + + vmla.f32 q15, q1, d0[0] + subs r8, r8, #1 + bne LoopE1L + cmp r6, #0 + beq StoreE1 + vld1.16 {d28}, [r7]! // load 4 * sizeof(int16_t) + vshll.s16 q14, d28, #16 // shift left long of each int16_t as float32 + vmla.f32 q15, q14, d6[1] + + PostTreatE1: + vmax.f32 q15, q15, q12 + vmin.f32 q15, q15, q13 + + StoreE1: + ldr r8, [r4, #20] + add r11, r11, r8 + ldr r8, [r4, #12] + + vshrn.i32 d30, q15, #16 // shift right 16bit of each float32 as int16_t + vst1.16 {d30}, [lr], r8 + subs r5, r5, #1 + bne LoopE1H + subs r3, r3, #1 + add r0, r0, #8 // move address of 4 * sizeof(int16_t) + add r1, r1, #2 // move address of 1 * sizeof(int16_t) + pop {r7} + bne LoopE1 +End: +pop {r4-r8, r10, r11, pc} + +#endif +#endif diff --git a/backupcode/cpubackend/arm/arm32/bf16/MNNPackedMatMul_BF16.S b/backupcode/cpubackend/arm/arm32/bf16/MNNPackedMatMul_BF16.S new file mode 100644 index 000000000..3b9ab3d48 --- /dev/null +++ b/backupcode/cpubackend/arm/arm32/bf16/MNNPackedMatMul_BF16.S @@ -0,0 +1,211 @@ +// +// NEON_MNNPackedMatMul_BF16.S +// MNN +// +// Created by MNN on 2021/02/24. +// Copyright © 2018-2021 Alibaba Group Holding Limited. +// + +#ifdef __arm__ +#ifndef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 +// 12 * 8 MatMul +asm_function NEON_MNNPackedMatMul_BF16 +// treate float pointer as int16_t* +//void NEON_MNNPackedMatMul_BF16(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias); +// Auto: r0: C, r1:A, r2:B, r3:parameter +// Load from sp: r5: postParameters, r6:bias + +push {r4-r8, r10, r11, lr} // avoid to touch platform-register r-9 +ldr r5, [sp, #32] +ldr r6, [sp, #36] + +ldr r4, [r3, #8] // h +ldr r7, [r3, #4] // l +add r4, r4, #3 +ldr r8, [r3, #12]//cStride +ldr r3, [r3, #20]//bExtraStride +lsr r4, r4, #2 + +sub r8, r8, #96 // after segment "Store", total line stride is CStride, all vst. offset is 12 * 4 * size_t(int16_t) = 96byte + +vpush {q4-q7} +// q0, q1, q2: src +// q3: weight +// q4 - q15: dst + +LoopH: + subs r12, r7, #1 + mov r11, r1 + vld1.16 {d6}, [r2]! + vld1.16 {d0, d1}, [r11]! // load 2 * 4 * sizeof(int16_t) + vshll.s16 q3, d6, #16 // shift left long of each int16_t as float32 + vshll.s16 q1, d1, #16 // !! caution: must shll d1 before d0 + vshll.s16 q0, d0, #16 + + vmul.f32 q4, q3, d0[0] + vmul.f32 q5, q3, d0[1] + vmul.f32 q6, q3, d1[0] + vld1.16 {d4}, [r11]! // load 4 * sizeof(int16_t) + vshll.s16 q2, d4, #16 + vmul.f32 q7, q3, d1[1] + + vmul.f32 q8, q3, d2[0] + vmul.f32 q9, q3, d2[1] + vmul.f32 q10, q3, d3[0] + vmul.f32 q11, q3, d3[1] + + vmul.f32 q12, q3, d4[0] + vmul.f32 q13, q3, d4[1] + vmul.f32 q14, q3, d5[0] + vmul.f32 q15, q3, d5[1] + beq LoopLEnd + LoopL: + vld1.16 {d6}, [r2]! + vld1.16 {d0, d1}, [r11]! // load 2 * 4 * sizeof(int16_t) + vshll.s16 q3, d6, #16 // shift left long of each int16_t as float32 + vshll.s16 q1, d1, #16 // !! caution: must shll d1 before d0 + vshll.s16 q0, d0, #16 + + vmla.f32 q4, q3, d0[0] + vmla.f32 q5, q3, d0[1] + vmla.f32 q6, q3, d1[0] + vld1.16 {d4}, [r11]! + vshll.s16 q2, d4, #16 + + vmla.f32 q7, q3, d1[1] + + vmla.f32 q8, q3, d2[0] + vmla.f32 q9, q3, d2[1] + vmla.f32 q10, q3, d3[0] + vmla.f32 q11, q3, d3[1] + + vmla.f32 q12, q3, d4[0] + vmla.f32 q13, q3, d4[1] + vmla.f32 q14, q3, d5[0] + vmla.f32 q15, q3, d5[1] + + subs r12, r12, #1 + bne LoopL + LoopLEnd: + cmp r5, #0 + beq Store + vld1.32 {q0}, [r5] // parameter remains float + cmp r6, #0 + beq LoadOrigin + vld1.16 {d6}, [r6]! // load 4 * sizeof(int16_t) + vshll.s16 q3, d6, #16 // shift left long of each int16_t as int32_t + vmla.f32 q4, q3, d0[1] + vmla.f32 q5, q3, d0[1] + vmla.f32 q6, q3, d0[1] + vmla.f32 q7, q3, d0[1] + vmla.f32 q8, q3, d0[1] + vmla.f32 q9, q3, d0[1] + vmla.f32 q10, q3, d0[1] + vmla.f32 q11, q3, d0[1] + vmla.f32 q12, q3, d0[1] + vmla.f32 q13, q3, d0[1] + vmla.f32 q14, q3, d0[1] + vmla.f32 q15, q3, d0[1] + + b PostTreat + + LoadOrigin: + mov r11, r0 + vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) + vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t + vshll.s16 q1, d2, #16 + vmla.f32 q4, q1, d0[1] + vmla.f32 q5, q2, d0[1] + + vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) + vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t + vshll.s16 q1, d2, #16 + vmla.f32 q6, q1, d0[1] + vmla.f32 q7, q2, d0[1] + + vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) + vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t + vshll.s16 q1, d2, #16 + vmla.f32 q8, q1, d0[1] + vmla.f32 q9, q2, d0[1] + + vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) + vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t + vshll.s16 q1, d2, #16 + vmla.f32 q10, q1, d0[1] + vmla.f32 q11, q2, d0[1] + + vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) + vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t + vshll.s16 q1, d2, #16 + vmla.f32 q12, q1, d0[1] + vmla.f32 q13, q2, d0[1] + + vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) + vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t + vshll.s16 q1, d2, #16 + vmla.f32 q14, q1, d0[1] + vmla.f32 q15, q2, d0[1] + + PostTreat: + vdup.f32 q2, d1[0] // min + vdup.f32 q1, d1[1] // max + + vmax.f32 q4, q4, q2 + vmax.f32 q5, q5, q2 + vmax.f32 q6, q6, q2 + vmax.f32 q7, q7, q2 + vmax.f32 q8, q8, q2 + vmax.f32 q9, q9, q2 + vmax.f32 q10, q10, q2 + vmax.f32 q11, q11, q2 + vmax.f32 q12, q12, q2 + vmax.f32 q13, q13, q2 + vmax.f32 q14, q14, q2 + vmax.f32 q15, q15, q2 + + vmin.f32 q4, q4, q1 + vmin.f32 q5, q5, q1 + vmin.f32 q6, q6, q1 + vmin.f32 q7, q7, q1 + vmin.f32 q8, q8, q1 + vmin.f32 q9, q9, q1 + vmin.f32 q10, q10, q1 + vmin.f32 q11, q11, q1 + vmin.f32 q12, q12, q1 + vmin.f32 q13, q13, q1 + vmin.f32 q14, q14, q1 + vmin.f32 q15, q15, q1 + + Store: + vshrn.i32 d8, q4, #16 // !!caution: these instructions has relying, eg: d10 must be written after reading q5. shift right 16bit of each float32 as int16_t + vshrn.i32 d9, q5, #16 + vshrn.i32 d10, q6, #16 + vshrn.i32 d11, q7, #16 + vshrn.i32 d12, q8, #16 + vshrn.i32 d13, q9, #16 + vshrn.i32 d14, q10, #16 + vshrn.i32 d15, q11, #16 + vshrn.i32 d16, q12, #16 + vshrn.i32 d17, q13, #16 + vshrn.i32 d18, q14, #16 + vshrn.i32 d19, q15, #16 + + vstm r0!, {d8, d9, d10, d11, d12, d13, d14, d15, d16, d17, d18, d19} + + add r0, r0, r8 + add r2, r2, r3 + + subs r4, r4, #1 + bne LoopH + +vpop {q4-q7} +pop {r4-r8, r10, r11, pc} + +#endif +#endif diff --git a/source/backend/cpu/arm/arm32/bf16/MNNReluWithSlopeChannelBF16.S b/backupcode/cpubackend/arm/arm32/bf16/MNNReluWithSlopeChannelBF16.S similarity index 100% rename from source/backend/cpu/arm/arm32/bf16/MNNReluWithSlopeChannelBF16.S rename to backupcode/cpubackend/arm/arm32/bf16/MNNReluWithSlopeChannelBF16.S diff --git a/source/backend/cpu/arm/arm32/bf16/MNNUnPackC4_BF16.S b/backupcode/cpubackend/arm/arm32/bf16/MNNUnPackC4_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm32/bf16/MNNUnPackC4_BF16.S rename to backupcode/cpubackend/arm/arm32/bf16/MNNUnPackC4_BF16.S diff --git a/backupcode/cpubackend/arm/arm64/bf16/ARMV86_MNNPackedMatMulRemain_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/ARMV86_MNNPackedMatMulRemain_BF16.S new file mode 100644 index 000000000..2acfe6930 --- /dev/null +++ b/backupcode/cpubackend/arm/arm64/bf16/ARMV86_MNNPackedMatMulRemain_BF16.S @@ -0,0 +1,566 @@ +// +// ARMV86_MNNPackedMatMulRemain_BF16.S +// MNN +// +// Created by MNN on 2022/10/09. +// Copyright © 2018-2021 Alibaba Group Holding Limited +// + +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 +.macro SET_ZERO d0, d1, d2, d3 + movi \d0\().4s, #0 + movi \d1\().4s, #0 + movi \d2\().4s, #0 + movi \d3\().4s, #0 +.endm + +.macro Float32ToBf16 d0, d1, d2, d3 + shrn \d0\().4h, \d0\().4s, #16 + shrn \d1\().4h, \d1\().4s, #16 + shrn \d2\().4h, \d2\().4s, #16 + shrn \d3\().4h, \d3\().4s, #16 +.endm + +.macro FOURFMAX s, d0, d1, d2, d3 + fmax \d0\().4s, \d0\().4s, \s\().4s + fmax \d1\().4s, \d1\().4s, \s\().4s + fmax \d2\().4s, \d2\().4s, \s\().4s + fmax \d3\().4s, \d3\().4s, \s\().4s +.endm + +.macro FOURFMIN s, d0, d1, d2, d3 + fmin \d0\().4s, \d0\().4s, \s\().4s + fmin \d1\().4s, \d1\().4s, \s\().4s + fmin \d2\().4s, \d2\().4s, \s\().4s + fmin \d3\().4s, \d3\().4s, \s\().4s +.endm + +.macro SET_BIAS s, d0, d1, d2 + mov \d0\().16b, \s\().16b + mov \d1\().16b, \s\().16b + mov \d2\().16b, \s\().16b +.endm + +// 12 * 8 * 4 MatMul +asm_function ARMV86_MNNPackedMatMulRemain_BF16 +//void ARMV86_MNNPackedMatMulRemain_BF16(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias); +//Auto x0: C, x1:A, x2:B, x3:eSize, x4:parameter, x5:postParameters, x6:bias +sub sp, sp, #64 +str x19, [sp, #0] +str x20, [sp, #8] +str x21, [sp, #16] +str x22, [sp, #24] +ldr x11, [x4, #0] // aStride +ldr x9, [x4, #8] // l +ldr x10, [x4, #16] // h +lsl x11, x11, #2 // aStride * 4 +mov x22, #64 // B_stride = LP * HP = 4 * 8 * sizeof(int16_t) + +ldr x7, [x4, #24] // cStride +ldr x19, [x4, #40] // bExtraStride + +add x10, x10, #3 +lsr x10, x10, #2 +add x9, x9, #3 +lsr x9, x9, #2 + +cbz x5, Start +ld1 {v5.4s}, [x5] +dup v9.4s, v5.s[2] // Min Value +dup v10.4s, v5.s[3] // Max Value + +Start: + +E8: +cmp x3, #8 +blt E4 + +LoopE8: // e, TILE_BLOCK size is 8 + mov x20, x6 // bias + mov x8, x10 // updiv(h, 4) + mov x21, x0 // dest, C + mov x13, x2 // weight, B + + LH8: + cmp x8, #2 // h/4 > 2 + blt LH4 + sub x14, x7, #64 // cStride - 64 + LoopH8x8: + mov x15, x1 // src, A + mov x12, x9 // l + cbz x5, NoBiasLH8 + ld1 {v0.4h, v1.4h}, [x20], #16 // 8 * sizeof(int16_t) + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + mov v2.16b, v0.16b + mov v3.16b, v1.16b + uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 + uzp1 v24.2d, v1.2d, v3.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v25.2d, v1.2d, v3.2d // bias_2, bias_3, bias_2, bias_3 + SET_BIAS v16, v18, v20, v22 + SET_BIAS v17, v19, v21, v23 + SET_BIAS v24, v26, v28, v30 + SET_BIAS v25, v27, v29, v31 + b LoopL + NoBiasLH8: + SET_ZERO v16, v17, v18, v19 + SET_ZERO v20, v21, v22, v23 + SET_ZERO v24, v25, v26, v27 + SET_ZERO v28, v29, v30, v31 + LoopL: + // A [8, 4, bf16] : rn = 4 : v4 - v7 + // B [8, 4, bf16] : rn = 4 : v0 - v3 + // C [8, 8, fp32] : rn = 16 : v16 - v31 + ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x15], x11 // A: 8 * 4 * sizeof(int16_t) + ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x13], x22 // B: 8 * 4 * sizeof(int16_t) + .inst 0x6e40ec90 // bfmmla v16.4s, v4.8h, v0.8h + .inst 0x6e41ec91 // bfmmla v17.4s, v4.8h, v1.8h + .inst 0x6e40ecb2 // bfmmla v18.4s, v5.8h, v0.8h + .inst 0x6e41ecb3 // bfmmla v19.4s, v5.8h, v1.8h + .inst 0x6e40ecd4 // bfmmla v20.4s, v6.8h, v0.8h + .inst 0x6e41ecd5 // bfmmla v21.4s, v6.8h, v1.8h + .inst 0x6e40ecf6 // bfmmla v22.4s, v7.8h, v0.8h + .inst 0x6e41ecf7 // bfmmla v23.4s, v7.8h, v1.8h + .inst 0x6e42ec98 // bfmmla v24.4s, v4.8h, v2.8h + .inst 0x6e43ec99 // bfmmla v25.4s, v4.8h, v3.8h + .inst 0x6e42ecba // bfmmla v26.4s, v5.8h, v2.8h + .inst 0x6e43ecbb // bfmmla v27.4s, v5.8h, v3.8h + .inst 0x6e42ecdc // bfmmla v28.4s, v6.8h, v2.8h + .inst 0x6e43ecdd // bfmmla v29.4s, v6.8h, v3.8h + .inst 0x6e42ecfe // bfmmla v30.4s, v7.8h, v2.8h + .inst 0x6e43ecff // bfmmla v31.4s, v7.8h, v3.8h + subs x12, x12, #1 + bgt LoopL + LoopLEnd: + uzp1 v15.2d, v16.2d, v17.2d + uzp2 v16.2d, v16.2d, v17.2d + uzp1 v17.2d, v18.2d, v19.2d + uzp2 v18.2d, v18.2d, v19.2d + uzp1 v19.2d, v20.2d, v21.2d + uzp2 v20.2d, v20.2d, v21.2d + uzp1 v21.2d, v22.2d, v23.2d + uzp2 v22.2d, v22.2d, v23.2d + uzp1 v23.2d, v24.2d, v25.2d + uzp2 v24.2d, v24.2d, v25.2d + uzp1 v25.2d, v26.2d, v27.2d + uzp2 v26.2d, v26.2d, v27.2d + uzp1 v27.2d, v28.2d, v29.2d + uzp2 v28.2d, v28.2d, v29.2d + uzp1 v29.2d, v30.2d, v31.2d + uzp2 v30.2d, v30.2d, v31.2d + cbz x5, StoreLH8 + PostTreatLH8: + FOURFMAX v9, v15, v16, v17, v18 + FOURFMAX v9, v19, v20, v21, v22 + FOURFMAX v9, v23, v24, v25, v26 + FOURFMAX v9, v27, v28, v29, v30 + FOURFMIN v10, v15, v16, v17, v18 + FOURFMIN v10, v19, v20, v21, v22 + FOURFMIN v10, v23, v24, v25, v26 + FOURFMIN v10, v27, v28, v29, v30 + StoreLH8: + Float32ToBf16 v15, v16, v17, v18 + Float32ToBf16 v19, v20, v21, v22 + Float32ToBf16 v23, v24, v25, v26 + Float32ToBf16 v27, v28, v29, v30 + st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v19.4h, v20.4h, v21.4h, v22.4h}, [x0], #32 // 16 * sizeof(int16_t) + add x0, x0, x14 + st1 {v23.4h, v24.4h, v25.4h, v26.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v27.4h, v28.4h, v29.4h, v30.4h}, [x0], #32 // 16 * sizeof(int16_t) + add x0, x0, x14 + add x13, x13, x19 // weight stride + sub x8, x8, #2 + cmp x8, #2 + bge LoopH8x8 + LH4: + cbz x8, E8End + LoopHRemain: + mov x15, x1 + mov x12, x9 + cbz x5, NoBiasHRemain + ld1 {v0.4h}, [x20] + shll v0.4s, v0.4h, #16 + mov v2.16b, v0.16b + uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 + SET_BIAS v16, v18, v20, v22 + SET_BIAS v17, v19, v21, v23 + b LoopLR + NoBiasHRemain: + SET_ZERO v16, v17, v18, v19 + SET_ZERO v20, v21, v22, v23 + LoopLR: + // A [8, 4, bf16] : rn = 4 : v4 - v7 + // B [4, 4, bf16] : rn = 2 : v0 - v1 + // C [8, 4, fp32] : rn = 8 : v16 - v23 + ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x15], x11 // A: 8 * 4 * sizeof(int16_t) + ld1 {v0.8h, v1.8h}, [x13], x22 // B: 4 * 4 * sizeof(int16_t) + .inst 0x6e40ec90 // bfmmla v16.4s, v4.8h, v0.8h + .inst 0x6e41ec91 // bfmmla v17.4s, v4.8h, v1.8h + .inst 0x6e40ecb2 // bfmmla v18.4s, v5.8h, v0.8h + .inst 0x6e41ecb3 // bfmmla v19.4s, v5.8h, v1.8h + .inst 0x6e40ecd4 // bfmmla v20.4s, v6.8h, v0.8h + .inst 0x6e41ecd5 // bfmmla v21.4s, v6.8h, v1.8h + .inst 0x6e40ecf6 // bfmmla v22.4s, v7.8h, v0.8h + .inst 0x6e41ecf7 // bfmmla v23.4s, v7.8h, v1.8h + subs x12, x12, #1 + bne LoopLR + LoopLREnd: + uzp1 v15.2d, v16.2d, v17.2d + uzp2 v16.2d, v16.2d, v17.2d + uzp1 v17.2d, v18.2d, v19.2d + uzp2 v18.2d, v18.2d, v19.2d + uzp1 v19.2d, v20.2d, v21.2d + uzp2 v20.2d, v20.2d, v21.2d + uzp1 v21.2d, v22.2d, v23.2d + uzp2 v22.2d, v22.2d, v23.2d + cbz x5, StoreLH8x4 + PostTreatLH8x4: + FOURFMAX v9, v15, v16, v17, v18 + FOURFMAX v9, v19, v20, v21, v22 + FOURFMIN v10, v15, v16, v17, v18 + FOURFMIN v10, v19, v20, v21, v22 + StoreLH8x4: + Float32ToBf16 v15, v16, v17, v18 + Float32ToBf16 v19, v20, v21, v22 + st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v19.4h, v20.4h, v21.4h, v22.4h}, [x0], #32 // 16 * sizeof(int16_t) + E8End: + sub x3, x3, #8 + cmp x3, #8 + add x0, x21, #64 // move dest address of 8 * 4 * sizeof(int16_t) + add x1, x1, #64 // move A matrix address of 8 * 4 * sizeof(int16_t) + bge LoopE8 + +E4: +cmp x3, #4 +mov x20, x6 +blt E2 + +mov x8, x10 +mov x21, x0 +mov x13, x2 + +cmp x8, #2 +blt E4LH4 +E4LH8: + E4LoopH8: + mov x15, x1 + mov x12, x9 + cbz x5, NoBiasE4 + ld1 {v0.4h, v1.4h}, [x20], #16 // 8 * sizeof(int16_t) + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + mov v2.16b, v0.16b + mov v3.16b, v1.16b + uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 + uzp1 v20.2d, v1.2d, v3.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v21.2d, v1.2d, v3.2d // bias_2, bias_3, bias_2, bias_3 + mov v18.16b, v16.16b + mov v19.16b, v17.16b + mov v22.16b, v20.16b + mov v23.16b, v21.16b + b E4LoopL + NoBiasE4: + SET_ZERO v16, v17, v18, v19 + SET_ZERO v20, v21, v22, v23 + E4LoopL: + // A [4, 4, bf16] : rn = 4 : v4 - v5 + // B [8, 4, bf16] : rn = 4 : v0 - v3 + // C [4, 8, fp32] : rn = 8 : v16 - v23 + ld1 {v4.8h, v5.8h}, [x15], x11 // A: 4 * 4 * sizeof(int16_t) + ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x13], x22 // B: 8 * 4 * sizeof(int16_t) + .inst 0x6e40ec90 // bfmmla v16.4s, v4.8h, v0.8h + .inst 0x6e41ec91 // bfmmla v17.4s, v4.8h, v1.8h + .inst 0x6e40ecb2 // bfmmla v18.4s, v5.8h, v0.8h + .inst 0x6e41ecb3 // bfmmla v19.4s, v5.8h, v1.8h + .inst 0x6e42ec94 // bfmmla v20.4s, v4.8h, v2.8h + .inst 0x6e43ec95 // bfmmla v21.4s, v4.8h, v3.8h + .inst 0x6e42ecb6 // bfmmla v22.4s, v5.8h, v2.8h + .inst 0x6e43ecb7 // bfmmla v23.4s, v5.8h, v3.8h + subs x12, x12, #1 + bgt E4LoopL + E4LoopLEnd: + uzp1 v15.2d, v16.2d, v17.2d + uzp2 v16.2d, v16.2d, v17.2d + uzp1 v17.2d, v18.2d, v19.2d + uzp2 v18.2d, v18.2d, v19.2d + uzp1 v19.2d, v20.2d, v21.2d + uzp2 v20.2d, v20.2d, v21.2d + uzp1 v21.2d, v22.2d, v23.2d + uzp2 v22.2d, v22.2d, v23.2d + cbz x5, StoreLH4x8 + PostTreatLH4x8: + FOURFMAX v9, v15, v16, v17, v18 + FOURFMAX v9, v19, v20, v21, v22 + FOURFMIN v10, v15, v16, v17, v18 + FOURFMIN v10, v19, v20, v21, v22 + StoreLH4x8: + Float32ToBf16 v15, v16, v17, v18 + Float32ToBf16 v19, v20, v21, v22 + st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0], x7 // 16 * sizeof(int16_t) + st1 {v19.4h, v20.4h, v21.4h, v22.4h}, [x0], x7 // 16 * sizeof(int16_t) + add x13, x13, x19 // weight stride + sub x8, x8, #2 + cmp x8, #2 + bge E4LoopH8 + E4LH4: + cbz x8, E4End + mov x15, x1 + mov x12, x9 + cbz x5, NoBiasE4R + ld1 {v0.4h}, [x20] + shll v0.4s, v0.4h, #16 + mov v2.16b, v0.16b + uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 + mov v18.16b, v16.16b + mov v19.16b, v17.16b + b E4LoopLR + NoBiasE4R: + SET_ZERO v16, v17, v18, v19 + E4LoopLR: + // A [4, 4, bf16] : rn = 4 : v4 - v5 + // B [4, 4, bf16] : rn = 4 : v0 - v1 + // C [4, 4, fp32] : rn = 4 : v16 - v19 + ld1 {v4.8h, v5.8h}, [x15], x11 // A: 4 * 4 * sizeof(int16_t) + ld1 {v0.8h, v1.8h}, [x13], x22 // B: 4 * 4 * sizeof(int16_t) + .inst 0x6e40ec90 // bfmmla v16.4s, v4.8h, v0.8h + .inst 0x6e41ec91 // bfmmla v17.4s, v4.8h, v1.8h + .inst 0x6e40ecb2 // bfmmla v18.4s, v5.8h, v0.8h + .inst 0x6e41ecb3 // bfmmla v19.4s, v5.8h, v1.8h + subs x12, x12, #1 + bgt E4LoopLR + E4LoopLREnd: + uzp1 v15.2d, v16.2d, v17.2d + uzp2 v16.2d, v16.2d, v17.2d + uzp1 v17.2d, v18.2d, v19.2d + uzp2 v18.2d, v18.2d, v19.2d + cbz x5, StoreLH4x4 + PostTreatLH4x4: + FOURFMAX v9, v15, v16, v17, v18 + FOURFMIN v10, v19, v20, v21, v22 + StoreLH4x4: + Float32ToBf16 v15, v16, v17, v18 + st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0] // 16 * sizeof(int16_t) + E4End: + sub x3, x3, #4 + add x0, x21, #32 // move dest address of 4 * 4 * sizeof(int16_t) + add x1, x1, #32 // move dest address of 4 * 4 * sizeof(int16_t) + +E2: +cmp x3, #2 +mov x20, x6 +blt E1 + +mov x8, x10 +mov x21, x0 +mov x13, x2 + +cmp x8, #2 +blt E2LH4 +E2LH8: + E2LoopH8: + mov x15, x1 + mov x12, x9 + cbz x5, NoBiasE2 + ld1 {v0.4h, v1.4h}, [x20], #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + mov v2.16b, v0.16b + mov v3.16b, v1.16b + uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 + uzp1 v18.2d, v1.2d, v3.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v19.2d, v1.2d, v3.2d // bias_2, bias_3, bias_2, bias_3 + b E2LoopL + NoBiasE2: + SET_ZERO v16, v17, v18, v19 + E2LoopL: + // A [2, 4, bf16] : rn = 1 : v4 + // B [8, 4, bf16] : rn = 2 : v0 - v3 + // C [2, 8, fp32] : rn = 4 : v16 - v19 + ld1 {v4.8h}, [x15], x11 // A: 2 * 4 * sizeof(int16_t) + ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x13], x22 // B: 8 * 4 * sizeof(int16_t) + .inst 0x6e40ec90 // bfmmla v16.4s, v4.8h, v0.8h + .inst 0x6e41ec91 // bfmmla v17.4s, v4.8h, v1.8h + .inst 0x6e42ec92 // bfmmla v18.4s, v4.8h, v2.8h + .inst 0x6e43ec93 // bfmmla v19.4s, v4.8h, v3.8h + subs x12, x12, #1 + bgt E2LoopL + E2LoopLEnd: + uzp1 v15.2d, v16.2d, v17.2d + uzp2 v16.2d, v16.2d, v17.2d + uzp1 v17.2d, v18.2d, v19.2d + uzp2 v18.2d, v18.2d, v19.2d + cbz x5, StoreLH2x8 + PostTreatLH2x8: + FOURFMAX v9, v15, v16, v17, v18 + FOURFMIN v10, v15, v16, v17, v18 + StoreLH2x8: + Float32ToBf16 v15, v16, v17, v18 + st1 {v15.4h, v16.4h}, [x0], x7 // 8 * sizeof(int16_t) + st1 {v17.4h, v18.4h}, [x0], x7 // 8 * sizeof(int16_t) + add x13, x13, x19 // weight stride + sub x8, x8, #2 + cmp x8, #2 + bge E2LoopH8 + E2LH4: + cbz x8, E2End + mov x15, x1 + mov x12, x9 + cbz x5, NoBiasE2R + ld1 {v0.4h}, [x20] + shll v0.4s, v0.4h, #16 + mov v2.16b, v0.16b + uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 + b E2LoopLR + NoBiasE2R: + movi v16.4s, #0 + movi v17.4s, #0 + E2LoopLR: + // A [2, 4, bf16] : rn = 1 : v4 + // B [4, 4, bf16] : rn = 2 : v0 - v1 + // C [2, 4, fp32] : rn = 2 : v16 - v17 + ld1 {v4.8h}, [x15], x11 // A: 2 * 4 * sizeof(int16_t) + ld1 {v0.8h, v1.8h}, [x13], x22 // B: 4 * 4 * sizeof(int16_t) + .inst 0x6e40ec90 // bfmmla v16.4s, v4.8h, v0.8h + .inst 0x6e41ec91 // bfmmla v17.4s, v4.8h, v1.8h + subs x12, x12, #1 + bgt E2LoopLR + E2LoopLREnd: + uzp1 v15.2d, v16.2d, v17.2d + uzp2 v16.2d, v16.2d, v17.2d + cbz x5, StoreLH2x4 + PostTreatLH2x4: + fmax v15.4s, v15.4s, v9.4s + fmax v16.4s, v16.4s, v9.4s + fmin v15.4s, v15.4s, v10.4s + fmin v16.4s, v16.4s, v10.4s + StoreLH2x4: + shrn v15.4h, v15.4s, #16 + shrn v16.4h, v16.4s, #16 + st1 {v15.4h, v16.4h}, [x0] // 8 * sizeof(int16_t) + E2End: + sub x3, x3, #2 + add x0, x21, #16 // move dest address of 2 * 4 * sizeof(int16_t) + add x1, x1, #16 // move dest address of 2 * 4 * sizeof(int16_t) + +E1: +cmp x3, #0 +beq End + +LoopE1: + mov x20, x6 + mov x8, x10 + mov x21, x0 + mov x13, x2 + + cmp x8, #2 + blt E1LH4 + + E1LH8: + E1LoopH8: + mov x15, x1 + mov x12, x9 + cbz x5, NoBiasE1 + ld1 {v0.4h, v1.4h}, [x20], #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + mov v2.16b, v0.16b + mov v3.16b, v1.16b + uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 + uzp1 v18.2d, v1.2d, v3.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v19.2d, v1.2d, v3.2d // bias_2, bias_3, bias_2, bias_3 + b E1LoopL + NoBiasE1: + SET_ZERO v16, v17, v18, v19 + E1LoopL: + // A [1, 4, bf16] : rn = 1 : v4 + // B [8, 4, bf16] : rn = 4 : v0 - v3 + // C [1, 8, fp32] : rn = 4 : v16 - v19 + ld1 {v4.4h}, [x15], x11 // A: 1 * 4 * sizeof(int16_t) + ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x13], x22 // B: 8 * 4 * sizeof(int16_t) + .inst 0x6e40ec90 // bfmmla v16.4s, v4.8h, v0.8h + .inst 0x6e41ec91 // bfmmla v17.4s, v4.8h, v1.8h + .inst 0x6e42ec92 // bfmmla v18.4s, v4.8h, v2.8h + .inst 0x6e43ec93 // bfmmla v19.4s, v4.8h, v3.8h + subs x12, x12, #1 + bgt E1LoopL + E1LoopLEnd: + // v16-v19: [r0, r1, 0, 0] + uzp1 v15.2d, v16.2d, v17.2d + uzp1 v16.2d, v18.2d, v19.2d + cbz x5, StoreLH1x8 + PostTreatLH1x8: + fmax v15.4s, v15.4s, v9.4s + fmax v16.4s, v16.4s, v9.4s + fmin v15.4s, v15.4s, v10.4s + fmin v16.4s, v16.4s, v10.4s + StoreLH1x8: + shrn v15.4h, v15.4s, #16 + shrn v16.4h, v16.4s, #16 + st1 {v15.4h}, [x0], x7 + st1 {v16.4h}, [x0], x7 + add x13, x13, x19 + sub x8, x8, #2 + cmp x8, #2 + bge E1LoopH8 + + E1LH4: + cbz x8, E1End + mov x15, x1 + mov x12, x9 + cbz x5, NoBiasE1R + ld1 {v0.4h}, [x20] + shll v0.4s, v0.4h, #16 + mov v2.16b, v0.16b + uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 + b E1LoopLR + NoBiasE1R: + movi v16.4s, #0 + movi v17.4s, #0 + E1LoopLR: + // A [1, 4, bf16] : rn = 1 : v4 + // B [4, 4, bf16] : rn = 2 : v0 - v1 + // C [1, 8, fp32] : rn = 4 : v16 - v17 + ld1 {v4.4h}, [x15], x11 // A: 1 * 4 * sizeof(int16_t) + ld1 {v0.8h, v1.8h}, [x13], x22 // B: 4 * 4 * sizeof(int16_t) + .inst 0x6e40ec90 // bfmmla v16.4s, v4.8h, v0.8h + .inst 0x6e41ec91 // bfmmla v17.4s, v4.8h, v1.8h + subs x12, x12, #1 + bgt E1LoopLR + E1LoopLREnd: + uzp1 v15.2d, v16.2d, v17.2d + cbz x5, StoreLH1x4 + PostTreatLH1x4: + fmax v15.4s, v15.4s, v9.4s + fmin v15.4s, v15.4s, v10.4s + StoreLH1x4: + shrn v15.4h, v15.4s, #16 + st1 {v15.4h}, [x0] + E1End: + subs x3, x3, #1 + add x0, x21, #8 + add x1, x1, #8 + bne LoopE1 +End: +ldr x19, [sp, #0] +ldr x20, [sp, #8] +ldr x21, [sp, #16] +ldr x22, [sp, #24] +add sp, sp, #64 + +ret +#endif diff --git a/backupcode/cpubackend/arm/arm64/bf16/ARMV86_MNNPackedMatMul_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/ARMV86_MNNPackedMatMul_BF16.S new file mode 100644 index 000000000..7d3282969 --- /dev/null +++ b/backupcode/cpubackend/arm/arm64/bf16/ARMV86_MNNPackedMatMul_BF16.S @@ -0,0 +1,286 @@ +// +// ARMV86_MNNPackedMatMul_BF16.S +// MNN +// +// Created by MNN on 2022/10/09. +// Copyright © 2018-2021 Alibaba Group Holding Limited +// +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro SET_ZERO d0, d1, d2, d3 + movi \d0\().4s, #0 + movi \d1\().4s, #0 + movi \d2\().4s, #0 + movi \d3\().4s, #0 +.endm + +.macro Float32ToBf16 d0, d1, d2, d3 + shrn \d0\().4h, \d0\().4s, #16 + shrn \d1\().4h, \d1\().4s, #16 + shrn \d2\().4h, \d2\().4s, #16 + shrn \d3\().4h, \d3\().4s, #16 +.endm + +.macro FOURFMAX s, d0, d1, d2, d3 + fmax \d0\().4s, \d0\().4s, \s\().4s + fmax \d1\().4s, \d1\().4s, \s\().4s + fmax \d2\().4s, \d2\().4s, \s\().4s + fmax \d3\().4s, \d3\().4s, \s\().4s +.endm + +.macro FOURFMIN s, d0, d1, d2, d3 + fmin \d0\().4s, \d0\().4s, \s\().4s + fmin \d1\().4s, \d1\().4s, \s\().4s + fmin \d2\().4s, \d2\().4s, \s\().4s + fmin \d3\().4s, \d3\().4s, \s\().4s +.endm + +.macro SET_BIAS s, d0, d1, d2, d3 + mov \d0\().16b, \s\().16b + mov \d1\().16b, \s\().16b + mov \d2\().16b, \s\().16b + mov \d3\().16b, \s\().16b +.endm + +// 12 * 8 * 4 MatMul +asm_function ARMV86_MNNPackedMatMul_BF16 +//void ARMV86_MNNPackedMatMul_BF16(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias); +// x0: C, x1:A, x2:B, x3:parameter, x4: postParameters, x5:bias +stp d14, d15, [sp, #-80]! +stp d12, d13, [sp, #16] +stp d10, d11, [sp, #32] +stp d8, d9, [sp, #48] +stp x19, x21, [sp, #64] + +//ldr x8, [x3, #0] // deprecated +ldr x9, [x3, #8] // l +ldr x10, [x3, #16] // h +mov x11, #64 // B_stride = LP * HP = 4 * 8 * sizeof(int16_t) + +ldr x13, [x3, #24] // cStride +ldr x7, [x3, #40] // bExtraStride + +add x10, x10, #3 +lsr x10, x10, #2 +add x9, x9, #3 +lsr x9, x9, #2 + +cbz x4, Start +ld1 {v5.4s}, [x4] +mov w19, v5.s[2] // min value +mov w20, v5.s[3] // max value + +Start: + cmp x10, #2 + blt LH4 +LH8: + sub x14, x13, #96 // cStride - 96 +LoopH: + mov x15, x1 + mov x12, x9 + cbz x5, NoBiasH8 + ld1 {v0.4h, v1.4h}, [x5], #16 // 8 * sizeof(int16_t) + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + mov v2.16b, v0.16b + mov v3.16b, v1.16b + uzp1 v18.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v19.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 + uzp1 v30.2d, v1.2d, v3.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v31.2d, v1.2d, v3.2d // bias_2, bias_3, bias_2, bias_3 + SET_BIAS v18, v8, v10, v12, v14 + mov v16.16b, v18.16b + SET_BIAS v19, v9, v11, v13, v15 + mov v17.16b, v19.16b + SET_BIAS v30, v20, v22, v24, v26 + mov v28.16b, v30.16b + SET_BIAS v31, v21, v23, v25, v27 + mov v29.16b, v31.16b + b LoopL + NoBiasH8: + SET_ZERO v8, v9, v10, v11 + SET_ZERO v12, v13, v14, v15 + SET_ZERO v16, v17, v18, v19 + SET_ZERO v20, v21, v22, v23 + SET_ZERO v24, v25, v26, v27 + SET_ZERO v28, v29, v30, v31 + LoopL: + // A [12, 4, bf16] : rn = 6 : v2 - v7 + // B [ 8, 4, bf16] : rn = 2 : v0 - v1 + // C [12, 8, fp32] : rn = 24 : v8 - v31 + ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x15], #64 // A: 8 * 4 * sizeof(int16_t) + ld1 {v6.8h, v7.8h}, [x15], #32 // A: 4 * 4 * sizeof(int16_t) + ld1 {v0.8h, v1.8h}, [x2], #32 // B: 4 * 4 * sizeof(int16_t) + .inst 0x6e40ec48 // bfmmla v8.4s, v2.8h, v0.8h + .inst 0x6e41ec49 // bfmmla v9.4s, v2.8h, v1.8h + .inst 0x6e40ec6a // bfmmla v10.4s, v3.8h, v0.8h + .inst 0x6e41ec6b // bfmmla v11.4s, v3.8h, v1.8h + .inst 0x6e40ec8c // bfmmla v12.4s, v4.8h, v0.8h + .inst 0x6e41ec8d // bfmmla v13.4s, v4.8h, v1.8h + .inst 0x6e40ecae // bfmmla v14.4s, v5.8h, v0.8h + .inst 0x6e41ecaf // bfmmla v15.4s, v5.8h, v1.8h + .inst 0x6e40ecd0 // bfmmla v16.4s, v6.8h, v0.8h + .inst 0x6e41ecd1 // bfmmla v17.4s, v6.8h, v1.8h + .inst 0x6e40ecf2 // bfmmla v18.4s, v7.8h, v0.8h + .inst 0x6e41ecf3 // bfmmla v19.4s, v7.8h, v1.8h + ld1 {v0.8h, v1.8h}, [x2], #32 // B: 4 * 4 * sizeof(int16_t) + .inst 0x6e40ec54 // bfmmla v20.4s, v2.8h, v0.8h + .inst 0x6e41ec55 // bfmmla v21.4s, v2.8h, v1.8h + .inst 0x6e40ec76 // bfmmla v22.4s, v3.8h, v0.8h + .inst 0x6e41ec77 // bfmmla v23.4s, v3.8h, v1.8h + .inst 0x6e40ec98 // bfmmla v24.4s, v4.8h, v0.8h + .inst 0x6e41ec99 // bfmmla v25.4s, v4.8h, v1.8h + .inst 0x6e40ecba // bfmmla v26.4s, v5.8h, v0.8h + .inst 0x6e41ecbb // bfmmla v27.4s, v5.8h, v1.8h + .inst 0x6e40ecdc // bfmmla v28.4s, v6.8h, v0.8h + .inst 0x6e41ecdd // bfmmla v29.4s, v6.8h, v1.8h + .inst 0x6e40ecfe // bfmmla v30.4s, v7.8h, v0.8h + .inst 0x6e41ecff // bfmmla v31.4s, v7.8h, v1.8h + subs x12, x12, #1 + bgt LoopL + LoopLEnd: + uzp1 v7.2d, v8.2d, v9.2d + uzp2 v8.2d, v8.2d, v9.2d + uzp1 v9.2d, v10.2d, v11.2d + uzp2 v10.2d, v10.2d, v11.2d + uzp1 v11.2d, v12.2d, v13.2d + uzp2 v12.2d, v12.2d, v13.2d + uzp1 v13.2d, v14.2d, v15.2d + uzp2 v14.2d, v14.2d, v15.2d + uzp1 v15.2d, v16.2d, v17.2d + uzp2 v16.2d, v16.2d, v17.2d + uzp1 v17.2d, v18.2d, v19.2d + uzp2 v18.2d, v18.2d, v19.2d + uzp1 v19.2d, v20.2d, v21.2d + uzp2 v20.2d, v20.2d, v21.2d + uzp1 v21.2d, v22.2d, v23.2d + uzp2 v22.2d, v22.2d, v23.2d + uzp1 v23.2d, v24.2d, v25.2d + uzp2 v24.2d, v24.2d, v25.2d + uzp1 v25.2d, v26.2d, v27.2d + uzp2 v26.2d, v26.2d, v27.2d + uzp1 v27.2d, v28.2d, v29.2d + uzp2 v28.2d, v28.2d, v29.2d + uzp1 v29.2d, v30.2d, v31.2d + uzp2 v30.2d, v30.2d, v31.2d + cbz x4, StoreLH8 + PostTreatLH8: + dup v5.4s, w19 + dup v6.4s, w20 + FOURFMAX v5, v7, v8, v9, v10 + FOURFMAX v5, v11, v12, v13, v14 + FOURFMAX v5, v15, v16, v17, v18 + FOURFMAX v5, v19, v20, v21, v22 + FOURFMAX v5, v23, v24, v25, v26 + FOURFMAX v5, v27, v28, v29, v30 + FOURFMIN v6, v7, v8, v9, v10 + FOURFMIN v6, v11, v12, v13, v14 + FOURFMIN v6, v15, v16, v17, v18 + FOURFMIN v6, v19, v20, v21, v22 + FOURFMIN v6, v23, v24, v25, v26 + FOURFMIN v6, v27, v28, v29, v30 + StoreLH8: + Float32ToBf16 v7, v8, v9, v10 + Float32ToBf16 v11, v12, v13, v14 + Float32ToBf16 v15, v16, v17, v18 + Float32ToBf16 v19, v20, v21, v22 + Float32ToBf16 v23, v24, v25, v26 + Float32ToBf16 v27, v28, v29, v30 + st1 {v7.4h, v8.4h, v9.4h, v10.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v11.4h, v12.4h, v13.4h, v14.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0], #32 // 16 * sizeof(int16_t) + add x0, x0, x14 + st1 {v19.4h, v20.4h, v21.4h, v22.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v23.4h, v24.4h, v25.4h, v26.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v27.4h, v28.4h, v29.4h, v30.4h}, [x0], #32 // 16 * sizeof(int16_t) + add x0, x0, x14 + add x2, x2, x7 // weight stride + sub x10, x10, #2 + cmp x10, #2 + bge LoopH +LH4: +cbz x10, End +LoopHR: + mov x15, x1 + mov x12, x9 + cbz x5, NoBiasH4 + ld1 {v0.4h}, [x5], #8 // 8 * sizeof(int16_t) + shll v0.4s, v0.4h, #16 + mov v2.16b, v0.16b + uzp1 v18.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 + uzp2 v19.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 + SET_BIAS v18, v8, v10, v12, v14 + mov v16.16b, v18.16b + SET_BIAS v19, v9, v11, v13, v15 + mov v17.16b, v19.16b + b LoopLR + NoBiasH4: + SET_ZERO v8, v9, v10, v11 + SET_ZERO v12, v13, v14, v15 + SET_ZERO v16, v17, v18, v19 + LoopLR: + // A [12, 4, bf16] : rn = 6 : v2 - v7 + // B [ 4, 4, bf16] : rn = 2 : v0 - v1 + // C [12, 4, fp32] : rn = 12 : v8 - v19 + ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x15], #64 // A: 8 * 4 * sizeof(int16_t) + ld1 {v6.8h, v7.8h}, [x15], #32 // A: 4 * 4 * sizeof(int16_t) + ld1 {v0.8h, v1.8h}, [x2], x11 // B: 4 * 4 * sizeof(int16_t) + .inst 0x6e40ec48 // bfmmla v8.4s, v2.8h, v0.8h + .inst 0x6e41ec49 // bfmmla v9.4s, v2.8h, v1.8h + .inst 0x6e40ec6a // bfmmla v10.4s, v3.8h, v0.8h + .inst 0x6e41ec6b // bfmmla v11.4s, v3.8h, v1.8h + .inst 0x6e40ec8c // bfmmla v12.4s, v4.8h, v0.8h + .inst 0x6e41ec8d // bfmmla v13.4s, v4.8h, v1.8h + .inst 0x6e40ecae // bfmmla v14.4s, v5.8h, v0.8h + .inst 0x6e41ecaf // bfmmla v15.4s, v5.8h, v1.8h + .inst 0x6e40ecd0 // bfmmla v16.4s, v6.8h, v0.8h + .inst 0x6e41ecd1 // bfmmla v17.4s, v6.8h, v1.8h + .inst 0x6e40ecf2 // bfmmla v18.4s, v7.8h, v0.8h + .inst 0x6e41ecf3 // bfmmla v19.4s, v7.8h, v1.8h + subs x12, x12, #1 + bgt LoopLR + LoopLREnd: + add x2, x2, x7 // weight stride + uzp1 v7.2d, v8.2d, v9.2d + uzp2 v8.2d, v8.2d, v9.2d + uzp1 v9.2d, v10.2d, v11.2d + uzp2 v10.2d, v10.2d, v11.2d + uzp1 v11.2d, v12.2d, v13.2d + uzp2 v12.2d, v12.2d, v13.2d + uzp1 v13.2d, v14.2d, v15.2d + uzp2 v14.2d, v14.2d, v15.2d + uzp1 v15.2d, v16.2d, v17.2d + uzp2 v16.2d, v16.2d, v17.2d + uzp1 v17.2d, v18.2d, v19.2d + uzp2 v18.2d, v18.2d, v19.2d + cbz x4, StoreLH4 + PostTreatLH4: + dup v5.4s, w19 + dup v6.4s, w20 + FOURFMAX v5, v7, v8, v9, v10 + FOURFMAX v5, v11, v12, v13, v14 + FOURFMAX v5, v15, v16, v17, v18 + FOURFMIN v6, v7, v8, v9, v10 + FOURFMIN v6, v11, v12, v13, v14 + FOURFMIN v6, v15, v16, v17, v18 + StoreLH4: + Float32ToBf16 v7, v8, v9, v10 + Float32ToBf16 v11, v12, v13, v14 + Float32ToBf16 v15, v16, v17, v18 + st1 {v7.4h, v8.4h, v9.4h, v10.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v11.4h, v12.4h, v13.4h, v14.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0], #32 // 16 * sizeof(int16_t) +End: +ldp x19, x21, [sp, #64] +ldp d8, d9, [sp, #48] +ldp d10, d11, [sp, #32] +ldp d12, d13, [sp, #16] +ldp d14, d15, [sp], #80 +ret + +#endif diff --git a/source/backend/cpu/arm/arm64/bf16/MNNAxByClampBroadcastC4_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNAxByClampBroadcastC4_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm64/bf16/MNNAxByClampBroadcastC4_BF16.S rename to backupcode/cpubackend/arm/arm64/bf16/MNNAxByClampBroadcastC4_BF16.S diff --git a/source/backend/cpu/arm/arm64/bf16/MNNConvRunForLineDepthwise_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNConvRunForLineDepthwise_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm64/bf16/MNNConvRunForLineDepthwise_BF16.S rename to backupcode/cpubackend/arm/arm64/bf16/MNNConvRunForLineDepthwise_BF16.S diff --git a/source/backend/cpu/arm/arm64/bf16/MNNConvRunForUnitDepthWise_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNConvRunForUnitDepthWise_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm64/bf16/MNNConvRunForUnitDepthWise_BF16.S rename to backupcode/cpubackend/arm/arm64/bf16/MNNConvRunForUnitDepthWise_BF16.S diff --git a/source/backend/cpu/arm/arm64/bf16/MNNGelu_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNGelu_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm64/bf16/MNNGelu_BF16.S rename to backupcode/cpubackend/arm/arm64/bf16/MNNGelu_BF16.S diff --git a/backupcode/cpubackend/arm/arm64/bf16/MNNPackC4ForMatMul_A_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNPackC4ForMatMul_A_BF16.S new file mode 100644 index 000000000..faa7d31a1 --- /dev/null +++ b/backupcode/cpubackend/arm/arm64/bf16/MNNPackC4ForMatMul_A_BF16.S @@ -0,0 +1,260 @@ + +// +// NEON_MNNPackC4ForMatMul_A_BF16.S +// MNN +// +// Created by MNN on 2021/02/26. +// Copyright © 2018-2021 Alibaba Group Holding Limited +// +#ifdef __aarch64__ +#include "MNNAsmGlobal.h" + +.macro transpose_4x4 x0, x1, x2, x3, x5, x6 // transpose 4x4 of sizeof(int16_t), only low half simd vector is valid. + trn1 \x5\().4h, \x0\().4h, \x1\().4h + trn2 \x1\().4h, \x0\().4h, \x1\().4h + trn1 \x6\().4h, \x2\().4h, \x3\().4h + trn2 \x3\().4h, \x2\().4h, \x3\().4h + trn1 \x0\().2s, \x5\().2s, \x6\().2s + trn2 \x2\().2s, \x5\().2s, \x6\().2s + trn1 \x6\().2s, \x1\().2s, \x3\().2s + trn2 \x3\().2s, \x1\().2s, \x3\().2s + mov \x1\().8b, \x6\().8b +.endm + +.text +.align 5 +asm_function NEON_MNNPackC4ForMatMul_A_BF16 +// treate float pointer as int16_t* +//void NEON_MNNPackC4ForMatMul_A_BF16(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el) +//Auto: x0: dest, x1:sourceGroup, x2: info, x3:el +ldr w10, [x2, #0] // number +mov x4, #0 +mov x11, #0 +mov x6, #0 +ldr w4, [x2, #4] // eReal +ldr w11, [x2, #8] // eDest +ldr w6, [x2, #12] // xOffset +// xOffset -> xOffset * 4 * sizeof(int16_t) +// eReal -> eReal * 4 * sizeof(int16_t) +// eDest -> eDest * sizeof(int16_t) +mov x12, #2 // sizeof(int16_t). kept as a const +mov x9, #8 +mul x4, x9, x4 +mul x11, x12, x11 +mul x6, x9, x6 + +LoopNumber: +mov x2, #0 +mov x5, #0 +mov x8, #0 +mov x7, #0 +ldr w5, [x3, #4] // l +ldr w8, [x3, #8] // eOffset +ldr w7, [x3, #12] // lOffset + +mov x13, x0 +mov x14, x1 +ldr x1, [x1, #0] + +// Compute dest ptr: x0 = x0 + eOffset * sizeof(int16_t) + lOffset * eDest * sizeof(int16_t) +mul x7, x11, x7 +mul x8, x12, x8 +add x0, x0, x7 +add x0, x0, x8 + +ldr w2, [x3, #0] // e + +Body: +cmp w2, #12 // original eDest +bne Right + cmp w5, #4 + blt LoopEL3 + LoopL4: + mov x2, x1 +.macro MAIN_TRANSPOSE + ld1 {v0.4h}, [x1], x6 // load size: 4 * sizeof(int16_t), jump one stride line as x6 + ld1 {v3.4h}, [x1], x6 + ld1 {v6.4h}, [x1], x6 + ld1 {v17.4h}, [x1], x6 + ld1 {v1.4h}, [x1], x6 + ld1 {v4.4h}, [x1], x6 + ld1 {v7.4h}, [x1], x6 + ld1 {v18.4h}, [x1], x6 + ld1 {v2.4h}, [x1], x6 + ld1 {v5.4h}, [x1], x6 + ld1 {v16.4h}, [x1], x6 + ld1 {v19.4h}, [x1], x6 + + transpose_4x4 v0, v3, v6, v17, v23, v24 + transpose_4x4 v1, v4, v7, v18, v25, v26 + transpose_4x4 v2, v5, v16, v19, v27, v28 +.endm + MAIN_TRANSPOSE + + stp d0, d1, [x0] // store size: 2 * 4 * sizeof(int16_t) + stp d2, d3, [x0, #(16 * 1)] + stp d4, d5, [x0, #(16 * 2)] + stp d6, d7, [x0, #(16 * 3)] + stp d16, d17, [x0, #(16 * 4)] + stp d18, d19, [x0, #(16 * 5)] + add x0, x0, #(16 * 6) + + // st1 {v0.4h}, [x0], #8 // store size: 4 * sizeof(int16_t) + // st1 {v1.4h}, [x0], #8 + // st1 {v2.4h}, [x0], #8 + // st1 {v3.4h}, [x0], #8 + // st1 {v4.4h}, [x0], #8 + // st1 {v5.4h}, [x0], #8 + // st1 {v6.4h}, [x0], #8 + // st1 {v7.4h}, [x0], #8 + // st1 {v16.4h}, [x0], #8 + // st1 {v17.4h}, [x0], #8 + // st1 {v18.4h}, [x0], #8 + // st1 {v19.4h}, [x0], #8 + + // st1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32 + // st1 {v4.4h, v5.4h, v6.4h, v7.4h}, [x0], #32 + // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0], #32 + + add x1, x2, x4 + sub x5, x5, #4 + cmp w5, #4 + bge LoopL4 + + LoopEL3: + cmp w5, #3 + blt LoopEL2 + MAIN_TRANSPOSE + + stp d0, d1, [x0] // store size: 2 * 4 * sizeof(int16_t) + stp d2, d3, [x0, #(16 * 1)] + stp d4, d5, [x0, #(16 * 2)] + stp d6, d7, [x0, #(16 * 3)] + str d16, [x0, #(16 * 4)] + add x0, x0, #(16 * 4 + 8) + + // st1 {v0.4h}, [x0], #8 // store size: 4 * sizeof(int16_t) + // st1 {v1.4h}, [x0], #8 + // st1 {v2.4h}, [x0], #8 + // st1 {v3.4h}, [x0], #8 + // st1 {v4.4h}, [x0], #8 + // st1 {v5.4h}, [x0], #8 + // st1 {v6.4h}, [x0], #8 + // st1 {v7.4h}, [x0], #8 + // st1 {v16.4h}, [x0], #8 + + // st1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32 + // st1 {v4.4h, v5.4h, v6.4h, v7.4h}, [x0], #32 + // st1 {v16.4h}, [x0], #8 + + b LoopEEnd + + LoopEL2: + cmp w5, #2 + blt LoopEL1 + MAIN_TRANSPOSE + stp d0, d1, [x0] // store size: 2 * 4 * sizeof(int16_t) + stp d2, d3, [x0, #(16 * 1)] + stp d4, d5, [x0, #(16 * 2)] + add x0, x0, #(16 * 3) + + // st1 {v0.4h}, [x0], #8 // store size: 4 * sizeof(int16_t) + // st1 {v1.4h}, [x0], #8 + // st1 {v2.4h}, [x0], #8 + // st1 {v3.4h}, [x0], #8 + // st1 {v4.4h}, [x0], #8 + // st1 {v5.4h}, [x0], #8 + + // st1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32 + // st1 {v4.4h, v5.4h}, [x0], #16 + + b LoopEEnd + + LoopEL1: + cmp w5, #1 + blt LoopEEnd + MAIN_TRANSPOSE + stp d0, d1, [x0] + str d2, [x0, #16] + add x0, x0, #(16 + 8) + + // st1 {v0.4h}, [x0], #8 // store size: 4 * sizeof(int16_t) + // st1 {v1.4h}, [x0], #8 + // st1 {v2.4h}, [x0], #8 + + // st1 {v0.4h, v1.4h, v2.4h}, [x0], #24 + + LoopEEnd: + +b End + + +Right: + +LoopE1: + mov w9, w5 + mov x7, x1 + mov x8, x0 + cmp w5, #4 + blt LoopE1L3 + LoopE1L4: + ld1 {v0.4h}, [x1], x4 + st1 {v0.h}[0], [x0], x11 + st1 {v0.h}[1], [x0], x11 + st1 {v0.h}[2], [x0], x11 + st1 {v0.h}[3], [x0], x11 + sub w5, w5, #4 + cmp w5, #4 + bge LoopE1L4 + + LoopE1L3: + cmp w5, #3 + blt LoopE1L2 + ld1 {v0.4h}, [x1], x4 + st1 {v0.h}[0], [x0], x11 + st1 {v0.h}[1], [x0], x11 + st1 {v0.h}[2], [x0], x11 + + sub w5, w5, #3 + + LoopE1L2: + cmp w5, #2 + blt LoopE1L1 + ld1 {v0.4h}, [x1], x4 + st1 {v0.h}[0], [x0], x11 + st1 {v0.h}[1], [x0], x11 + sub w5, w5, #2 + + LoopE1L1: + cmp w5, #1 + blt LoopE1End + ld1 {v0.h}[0], [x1], x4 + st1 {v0.h}[0], [x0], x11 + + LoopE1End: + + subs w2, w2, #1 + add x0, x8, x12 // !!!! caution : sizeof(int16_t) + add x1, x7, x6 + mov w5, w9 + bne LoopE1 + +End: + +mov x0, x13 +mov x1, x14 +subs w10, w10, #1 + +// x3 is (const int32_t* el), this array size of 4. as a result for next struct element, +// address added by 4 * sizeof(int32_t) +add x3, x3, #16 + +// x1 is (const int16_t** sourceGroup), even though data content is int16_t, +// the element in sourceGroup in 'int16_t*', as a result for next struct element, +// value added by sizeof(void*) +add x1, x1, #8 +bne LoopNumber + +ret + +#endif diff --git a/source/backend/cpu/arm/arm64/bf16/MNNPackC4_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNPackC4_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm64/bf16/MNNPackC4_BF16.S rename to backupcode/cpubackend/arm/arm64/bf16/MNNPackC4_BF16.S diff --git a/backupcode/cpubackend/arm/arm64/bf16/MNNPackC8_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNPackC8_BF16.S new file mode 100644 index 000000000..87503e839 --- /dev/null +++ b/backupcode/cpubackend/arm/arm64/bf16/MNNPackC8_BF16.S @@ -0,0 +1,126 @@ +// +// MNNPackC8_BF16.S +// MNN +// +// Created by MNN on 2021/02/20. +// Copyright © 2018-2021 Alibaba Group Holding Limited. +// +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + + +.text +.align 5 +asm_function MNNPackC8_BF16 +// treate float pointer as int16_t* +//void MNNPackC8_BF16(float* dest, const float* source, size_t l, size_t h); +// h, l -> hC8, l, 8 +// Auto: x0:dest, x1:source, x2: l, x3: h +// x4: lC8, x5:hC8, x6: sourceStride, x7: destStride + +lsr x4, x2, #3 +lsr x5, x3, #3 +mov x12, #2 // sizeof(int16_t) +mov x13, #16 // 8 * sizeof(int16_t) +mul x6, x12, x2 +mul x7, x13, x2 +mov x12, #16 // 8 * sizeof(int16_t) +mul x15, x12, x2 + +.macro transpose_4x4 x0, x1, x2, x3, x5, x6 + trn1 \x5\().4s, \x0\().4s, \x1\().4s + trn2 \x1\().4s, \x0\().4s, \x1\().4s + trn1 \x6\().4s, \x2\().4s, \x3\().4s + trn2 \x3\().4s, \x2\().4s, \x3\().4s + trn1 \x0\().2d, \x5\().2d, \x6\().2d + trn2 \x2\().2d, \x5\().2d, \x6\().2d + trn1 \x6\().2d, \x1\().2d, \x3\().2d + trn2 \x3\().2d, \x1\().2d, \x3\().2d + mov \x1\().16b, \x6\().16b +.endm + +LoopH: +mov x8, x0 +mov x9, x1 +mov x12, x4 + +LoopL: +mov x10, x9 +ld1 {v16.4h, v17.4h}, [x9], x6 +ld1 {v18.4h, v19.4h}, [x9], x6 +ld1 {v20.4h, v21.4h}, [x9], x6 +ld1 {v22.4h, v23.4h}, [x9], x6 + +ld1 {v24.4h, v25.4h}, [x9], x6 +ld1 {v26.4h, v27.4h}, [x9], x6 +ld1 {v28.4h, v29.4h}, [x9], x6 +ld1 {v30.4h, v31.4h}, [x9], x6 + +shll v16.4s, v16.4h, #16 +shll v17.4s, v17.4h, #16 +shll v18.4s, v18.4h, #16 +shll v19.4s, v19.4h, #16 +shll v20.4s, v20.4h, #16 +shll v21.4s, v21.4h, #16 +shll v22.4s, v22.4h, #16 +shll v23.4s, v23.4h, #16 +shll v24.4s, v24.4h, #16 +shll v25.4s, v25.4h, #16 +shll v26.4s, v26.4h, #16 +shll v27.4s, v27.4h, #16 +shll v28.4s, v28.4h, #16 +shll v29.4s, v29.4h, #16 +shll v30.4s, v30.4h, #16 +shll v31.4s, v31.4h, #16 + + +transpose_4x4 v16, v18, v20, v22, v0, v1 +transpose_4x4 v17, v19, v21, v23, v2, v3 +transpose_4x4 v24, v26, v28, v30, v4, v5 +transpose_4x4 v25, v27, v29, v31, v6, v7 + + +shrn v16.4h, v16.4s, #16 +shrn v17.4h, v17.4s, #16 +shrn v18.4h, v18.4s, #16 +shrn v19.4h, v19.4s, #16 +shrn v20.4h, v20.4s, #16 +shrn v21.4h, v21.4s, #16 +shrn v22.4h, v22.4s, #16 +shrn v23.4h, v23.4s, #16 +shrn v24.4h, v24.4s, #16 +shrn v25.4h, v25.4s, #16 +shrn v26.4h, v26.4s, #16 +shrn v27.4h, v27.4s, #16 +shrn v28.4h, v28.4s, #16 +shrn v29.4h, v29.4s, #16 +shrn v30.4h, v30.4s, #16 +shrn v31.4h, v31.4s, #16 + + +stp d16, d24, [x8], #16 +stp d18, d26, [x8], #16 +stp d20, d28, [x8], #16 +stp d22, d30, [x8], #16 + +stp d17, d25, [x8], #16 +stp d19, d27, [x8], #16 +stp d21, d29, [x8], #16 +stp d23, d31, [x8], #16 + +add x9, x10, #16 // 8 * sizeof(int16_t) + +subs x12, x12, #1 +bne LoopL + + +subs x5, x5, #1 +add x0, x0, x7 +add x1, x1, x15 +bne LoopH + + +ret + +#endif diff --git a/backupcode/cpubackend/arm/arm64/bf16/MNNPackedMatMulRemain_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNPackedMatMulRemain_BF16.S new file mode 100644 index 000000000..a65140adc --- /dev/null +++ b/backupcode/cpubackend/arm/arm64/bf16/MNNPackedMatMulRemain_BF16.S @@ -0,0 +1,672 @@ +// +// MNNPackedMatMulRemain_BF16.S +// MNN +// +// Created by MNN on 2021/02/21. +// Copyright © 2018-2021 Alibaba Group Holding Limited +// + +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 +// 12 * 8 MatMul +asm_function NEON_MNNPackedMatMulRemain_BF16 +//void NEON_MNNPackedMatMulRemain_BF16(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias); +//Auto x0: C, x1:A, x2:B, x3:eSize, x4:parameter, x5:postParameters, x6:bias +sub sp, sp, #32 +str x19, [sp, #0] +str x20, [sp, #8] +str x21, [sp, #16] +ldr x11, [x4, #0] // aStride +ldr x9, [x4, #8] // l +ldr x10, [x4, #16] // h + +ldr x7, [x4, #24] // cStride +ldr x19, [x4, #40] // bExtraStride + +add x10, x10, #3 +lsr x10, x10, #2 + +cbz x5, Start +ld1 {v5.4s}, [x5] +dup v6.4s, v5.s[2] // Min Value +dup v7.4s, v5.s[3] // Max Value + +Start: + +E8: +cmp x3, #8 +blt E4 + +LoopE8: // e, TILE_BLOCK size is 8 + mov x20, x6 // bias + mov x8, x10 // updiv(h, 4) + mov x21, x0 // dest, C + mov x13, x2 // weight, B + + LH8: + cmp x8, #2 // h/4 > 2 + blt LH4 + // sub x14, x7, #32 // in "StoreLH8", total 2 lines stride is x14, first line is 4 * 4 * size_t(int16_t) = 32byte + LoopH8x8: + mov x15, x1 // src, A + subs x12, x9, #1 // l + ld1 {v3.4h, v4.4h}, [x13], #16 // 2 * 4 * sizeof(int16_t) + ld1 {v0.4h, v1.4h}, [x15], x11 + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + + fmul v16.4s, v3.4s, v0.s[0] + fmul v17.4s, v3.4s, v0.s[1] + fmul v18.4s, v3.4s, v0.s[2] + fmul v19.4s, v3.4s, v0.s[3] + + fmul v20.4s, v4.4s, v0.s[0] + fmul v21.4s, v4.4s, v0.s[1] + fmul v22.4s, v4.4s, v0.s[2] + fmul v23.4s, v4.4s, v0.s[3] + + fmul v24.4s, v3.4s, v1.s[0] + fmul v25.4s, v3.4s, v1.s[1] + fmul v26.4s, v3.4s, v1.s[2] + fmul v27.4s, v3.4s, v1.s[3] + + fmul v28.4s, v4.4s, v1.s[0] + fmul v29.4s, v4.4s, v1.s[1] + fmul v30.4s, v4.4s, v1.s[2] + fmul v31.4s, v4.4s, v1.s[3] + beq LoopLEnd + + LoopL: + ld1 {v3.4h, v4.4h}, [x13], #16 // 2 * 4 * sizeof(int16_t) + ld1 {v0.4h, v1.4h}, [x15], x11 + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + + fmla v16.4s, v3.4s, v0.s[0] + fmla v17.4s, v3.4s, v0.s[1] + fmla v18.4s, v3.4s, v0.s[2] + fmla v19.4s, v3.4s, v0.s[3] + + fmla v20.4s, v4.4s, v0.s[0] + fmla v21.4s, v4.4s, v0.s[1] + fmla v22.4s, v4.4s, v0.s[2] + fmla v23.4s, v4.4s, v0.s[3] + + fmla v24.4s, v3.4s, v1.s[0] + fmla v25.4s, v3.4s, v1.s[1] + fmla v26.4s, v3.4s, v1.s[2] + fmla v27.4s, v3.4s, v1.s[3] + + fmla v28.4s, v4.4s, v1.s[0] + fmla v29.4s, v4.4s, v1.s[1] + fmla v30.4s, v4.4s, v1.s[2] + fmla v31.4s, v4.4s, v1.s[3] + + subs x12, x12, #1 + bne LoopL + + LoopLEnd: + + add x13, x13, x19 + sub x8, x8, #2 + cmp x8, #2 + + cbz x5, StoreLH8 + AddBiasLH8: + ld1 {v0.4h, v1.4h}, [x20], #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + + fmla v16.4s, v0.4s, v5.s[1] + fmla v17.4s, v0.4s, v5.s[1] + fmla v18.4s, v0.4s, v5.s[1] + fmla v19.4s, v0.4s, v5.s[1] + + fmla v20.4s, v1.4s, v5.s[1] + fmla v21.4s, v1.4s, v5.s[1] + fmla v22.4s, v1.4s, v5.s[1] + fmla v23.4s, v1.4s, v5.s[1] + + fmla v24.4s, v0.4s, v5.s[1] + fmla v25.4s, v0.4s, v5.s[1] + fmla v26.4s, v0.4s, v5.s[1] + fmla v27.4s, v0.4s, v5.s[1] + + fmla v28.4s, v1.4s, v5.s[1] + fmla v29.4s, v1.4s, v5.s[1] + fmla v30.4s, v1.4s, v5.s[1] + fmla v31.4s, v1.4s, v5.s[1] + + PostTreatLH8: + fmax v16.4s, v16.4s, v6.4s + fmax v17.4s, v17.4s, v6.4s + fmax v18.4s, v18.4s, v6.4s + fmax v19.4s, v19.4s, v6.4s + fmax v20.4s, v20.4s, v6.4s + fmax v21.4s, v21.4s, v6.4s + fmax v22.4s, v22.4s, v6.4s + fmax v23.4s, v23.4s, v6.4s + fmax v24.4s, v24.4s, v6.4s + fmax v25.4s, v25.4s, v6.4s + fmax v26.4s, v26.4s, v6.4s + fmax v27.4s, v27.4s, v6.4s + fmax v28.4s, v28.4s, v6.4s + fmax v29.4s, v29.4s, v6.4s + fmax v30.4s, v30.4s, v6.4s + fmax v31.4s, v31.4s, v6.4s + + fmin v16.4s, v16.4s, v7.4s + fmin v17.4s, v17.4s, v7.4s + fmin v18.4s, v18.4s, v7.4s + fmin v19.4s, v19.4s, v7.4s + fmin v20.4s, v20.4s, v7.4s + fmin v21.4s, v21.4s, v7.4s + fmin v22.4s, v22.4s, v7.4s + fmin v23.4s, v23.4s, v7.4s + fmin v24.4s, v24.4s, v7.4s + fmin v25.4s, v25.4s, v7.4s + fmin v26.4s, v26.4s, v7.4s + fmin v27.4s, v27.4s, v7.4s + fmin v28.4s, v28.4s, v7.4s + fmin v29.4s, v29.4s, v7.4s + fmin v30.4s, v30.4s, v7.4s + fmin v31.4s, v31.4s, v7.4s + + StoreLH8: + shrn v16.4h, v16.4s, #16 + shrn v17.4h, v17.4s, #16 + shrn v18.4h, v18.4s, #16 + shrn v19.4h, v19.4s, #16 + shrn v20.4h, v20.4s, #16 + shrn v21.4h, v21.4s, #16 + shrn v22.4h, v22.4s, #16 + shrn v23.4h, v23.4s, #16 + shrn v24.4h, v24.4s, #16 + shrn v25.4h, v25.4s, #16 + shrn v26.4h, v26.4s, #16 + shrn v27.4h, v27.4s, #16 + shrn v28.4h, v28.4s, #16 + shrn v29.4h, v29.4s, #16 + shrn v30.4h, v30.4s, #16 + shrn v31.4h, v31.4s, #16 + + stp d16, d17, [x0] + stp d18, d19, [x0, #(16 * 1)] + stp d24, d25, [x0, #(16 * 2)] + stp d26, d27, [x0, #(16 * 3)] + add x0, x0, x7 // stp donot support post-index offset in register + + stp d20, d21, [x0] + stp d22, d23, [x0, #(16 * 1)] + stp d28, d29, [x0, #(16 * 2)] + stp d30, d31, [x0, #(16 * 3)] + add x0, x0, x7 // stp donot support post-index offset in register + + // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0], #32 // 4 * 4 * sizeof(int16_t) + // st1 {v24.4h, v25.4h, v26.4h, v27.4h}, [x0], x14 + // st1 {v20.4h, v21.4h, v22.4h, v23.4h}, [x0], #32 + // st1 {v28.4h, v29.4h, v30.4h, v31.4h}, [x0], x14 + + bge LoopH8x8 + + LH4: + cbz x8, E8End + LoopHRemain: + mov x15, x1 + subs x12, x9, #1 + ld1 {v3.4h}, [x13] + ld1 {v0.4h}, [x15], #8 + shll v3.4s, v3.4h, #16 + shll v0.4s, v0.4h, #16 + + fmul v16.4s, v3.4s, v0.s[0] + fmul v17.4s, v3.4s, v0.s[1] + add x13, x13, #16 // weight + ld1 {v1.4h}, [x15] + shll v1.4s, v1.4h, #16 + + fmul v18.4s, v3.4s, v0.s[2] + sub x15, x15, #8 + fmul v19.4s, v3.4s, v0.s[3] + add x15, x15, x11 + fmul v20.4s, v3.4s, v1.s[0] + fmul v21.4s, v3.4s, v1.s[1] + fmul v22.4s, v3.4s, v1.s[2] + fmul v23.4s, v3.4s, v1.s[3] + beq LoopLREnd + + LoopLR: + ld1 {v3.4h}, [x13] + ld1 {v0.4h}, [x15], #8 + shll v3.4s, v3.4h, #16 + shll v0.4s, v0.4h, #16 + + fmla v16.4s, v3.4s, v0.s[0] + fmla v17.4s, v3.4s, v0.s[1] + add x13, x13, #16 // weight + ld1 {v1.4h}, [x15] + shll v1.4s, v1.4h, #16 + + fmla v18.4s, v3.4s, v0.s[2] + sub x15, x15, #8 + fmla v19.4s, v3.4s, v0.s[3] + add x15, x15, x11 + + fmla v20.4s, v3.4s, v1.s[0] + fmla v21.4s, v3.4s, v1.s[1] + fmla v22.4s, v3.4s, v1.s[2] + fmla v23.4s, v3.4s, v1.s[3] + + subs x12, x12, #1 + bne LoopLR + LoopLREnd: + + cbz x5, StoreLH8x4 + AddBiasLH8x4: + ld1 {v0.4h}, [x20] + shll v0.4s, v0.4h, #16 + + fmla v16.4s, v0.4s, v5.s[1] + fmla v17.4s, v0.4s, v5.s[1] + fmla v18.4s, v0.4s, v5.s[1] + fmla v19.4s, v0.4s, v5.s[1] + + fmla v20.4s, v0.4s, v5.s[1] + fmla v21.4s, v0.4s, v5.s[1] + fmla v22.4s, v0.4s, v5.s[1] + fmla v23.4s, v0.4s, v5.s[1] + + PostTreatLH8x4: + fmax v16.4s, v16.4s, v6.4s + fmax v17.4s, v17.4s, v6.4s + fmax v18.4s, v18.4s, v6.4s + fmax v19.4s, v19.4s, v6.4s + fmax v20.4s, v20.4s, v6.4s + fmax v21.4s, v21.4s, v6.4s + fmax v22.4s, v22.4s, v6.4s + fmax v23.4s, v23.4s, v6.4s + + fmin v16.4s, v16.4s, v7.4s + fmin v17.4s, v17.4s, v7.4s + fmin v18.4s, v18.4s, v7.4s + fmin v19.4s, v19.4s, v7.4s + fmin v20.4s, v20.4s, v7.4s + fmin v21.4s, v21.4s, v7.4s + fmin v22.4s, v22.4s, v7.4s + fmin v23.4s, v23.4s, v7.4s + + StoreLH8x4: + shrn v16.4h, v16.4s, #16 + shrn v17.4h, v17.4s, #16 + shrn v18.4h, v18.4s, #16 + shrn v19.4h, v19.4s, #16 + shrn v20.4h, v20.4s, #16 + shrn v21.4h, v21.4s, #16 + shrn v22.4h, v22.4s, #16 + shrn v23.4h, v23.4s, #16 + + stp d16, d17, [x0] + stp d18, d19, [x0, #(16 * 1)] + stp d20, d21, [x0, #(16 * 2)] + stp d22, d23, [x0, #(16 * 3)] + add x0, x0, #(16 * 4) + + // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0], #32 + // st1 {v20.4h, v21.4h, v22.4h, v23.4h}, [x0], #32 + + E8End: + + sub x3, x3, #8 + cmp x3, #8 + add x0, x21, #64 // move dest address of 8 * 4 * sizeof(int16_t) + add x1, x1, #16 // move A matrix address of 8 * sizeof(int16_t) + bge LoopE8 + +E4: +cmp x3, #4 +mov x20, x6 +blt E1 + mov x8, x10 + mov x21, x0 + mov x13, x2 + + cmp x8, #2 + blt E4LH4 + + E4LH8: + E4LoopH8: + mov x15, x1 + subs x12, x9, #1 + ld1 {v3.4h, v4.4h}, [x13], #16 + ld1 {v0.4h}, [x15], x11 + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + + fmul v16.4s, v3.4s, v0.s[0] + fmul v17.4s, v3.4s, v0.s[1] + fmul v18.4s, v3.4s, v0.s[2] + fmul v19.4s, v3.4s, v0.s[3] + + fmul v20.4s, v4.4s, v0.s[0] + fmul v21.4s, v4.4s, v0.s[1] + fmul v22.4s, v4.4s, v0.s[2] + fmul v23.4s, v4.4s, v0.s[3] + + beq E4LoopLEnd + + subs x12, x12, #1 + ld1 {v3.4h, v4.4h}, [x13], #16 + ld1 {v0.4h}, [x15], x11 + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + + fmla v16.4s, v3.4s, v0.s[0] + fmla v17.4s, v3.4s, v0.s[1] + + beq E4LoopLComputeEnd + + E4LoopL: + fmla v18.4s, v3.4s, v0.s[2] + fmla v19.4s, v3.4s, v0.s[3] + + fmla v20.4s, v4.4s, v0.s[0] + fmla v21.4s, v4.4s, v0.s[1] + fmla v22.4s, v4.4s, v0.s[2] + fmla v23.4s, v4.4s, v0.s[3] + + ld1 {v3.4h, v4.4h}, [x13], #16 + ld1 {v0.4h}, [x15], x11 + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + + fmla v16.4s, v3.4s, v0.s[0] + fmla v17.4s, v3.4s, v0.s[1] + + subs x12, x12, #1 + bne E4LoopL + E4LoopLComputeEnd: + fmla v18.4s, v3.4s, v0.s[2] + fmla v19.4s, v3.4s, v0.s[3] + + fmla v20.4s, v4.4s, v0.s[0] + fmla v21.4s, v4.4s, v0.s[1] + fmla v22.4s, v4.4s, v0.s[2] + fmla v23.4s, v4.4s, v0.s[3] + + E4LoopLEnd: + add x13, x13, x19 + sub x8, x8, #2 + cmp x8, #2 + + cbz x5, StoreLH4x8 + + AddBiasLH4x8: + ld1 {v0.4h, v1.4h}, [x20], #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + + fmla v16.4s, v0.4s, v5.s[1] + fmla v17.4s, v0.4s, v5.s[1] + fmla v18.4s, v0.4s, v5.s[1] + fmla v19.4s, v0.4s, v5.s[1] + + fmla v20.4s, v1.4s, v5.s[1] + fmla v21.4s, v1.4s, v5.s[1] + fmla v22.4s, v1.4s, v5.s[1] + fmla v23.4s, v1.4s, v5.s[1] + + PostTreatLH4x8: + fmax v16.4s, v16.4s, v6.4s + fmax v17.4s, v17.4s, v6.4s + fmax v18.4s, v18.4s, v6.4s + fmax v19.4s, v19.4s, v6.4s + fmax v20.4s, v20.4s, v6.4s + fmax v21.4s, v21.4s, v6.4s + fmax v22.4s, v22.4s, v6.4s + fmax v23.4s, v23.4s, v6.4s + + fmin v16.4s, v16.4s, v7.4s + fmin v17.4s, v17.4s, v7.4s + fmin v18.4s, v18.4s, v7.4s + fmin v19.4s, v19.4s, v7.4s + fmin v20.4s, v20.4s, v7.4s + fmin v21.4s, v21.4s, v7.4s + fmin v22.4s, v22.4s, v7.4s + fmin v23.4s, v23.4s, v7.4s + + StoreLH4x8: + shrn v16.4h, v16.4s, #16 + shrn v17.4h, v17.4s, #16 + shrn v18.4h, v18.4s, #16 + shrn v19.4h, v19.4s, #16 + shrn v20.4h, v20.4s, #16 + shrn v21.4h, v21.4s, #16 + shrn v22.4h, v22.4s, #16 + shrn v23.4h, v23.4s, #16 + + + stp d16, d17, [x0] + stp d18, d19, [x0, #16] + add x0, x0, x7 + stp d20, d21, [x0] + stp d22, d23, [x0, #16] + add x0, x0, x7 + + // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0], x7 + // st1 {v20.4h, v21.4h, v22.4h, v23.4h}, [x0], x7 + + bge E4LoopH8 + + E4LH4: + cbz x8, E4End + mov x15, x1 + subs x12, x9, #1 + ld1 {v3.4h}, [x13] + ld1 {v0.4h}, [x15], x11 + shll v3.4s, v3.4h, #16 + shll v0.4s, v0.4h, #16 + + fmul v16.4s, v3.4s, v0.s[0] + fmul v17.4s, v3.4s, v0.s[1] + fmul v18.4s, v3.4s, v0.s[2] + fmul v19.4s, v3.4s, v0.s[3] + add x13, x13, #16 // weight + + beq E4LoopLREnd + + E4LoopLR: + ld1 {v3.4h}, [x13] + ld1 {v0.4h}, [x15], x11 + shll v3.4s, v3.4h, #16 + shll v0.4s, v0.4h, #16 + + fmla v16.4s, v3.4s, v0.s[0] + fmla v17.4s, v3.4s, v0.s[1] + fmla v18.4s, v3.4s, v0.s[2] + fmla v19.4s, v3.4s, v0.s[3] + add x13, x13, #16 // weight + + subs x12, x12, #1 + bne E4LoopLR + E4LoopLREnd: + + cbz x5, StoreLH4x4 + AddBiasLH4x4: + ld1 {v0.4h}, [x20] + shll v0.4s, v0.4h, #16 + + fmla v16.4s, v0.4s, v5.s[1] + fmla v17.4s, v0.4s, v5.s[1] + fmla v18.4s, v0.4s, v5.s[1] + fmla v19.4s, v0.4s, v5.s[1] + + + PostTreatLH4x4: + fmax v16.4s, v16.4s, v6.4s + fmax v17.4s, v17.4s, v6.4s + fmax v18.4s, v18.4s, v6.4s + fmax v19.4s, v19.4s, v6.4s + + fmin v16.4s, v16.4s, v7.4s + fmin v17.4s, v17.4s, v7.4s + fmin v18.4s, v18.4s, v7.4s + fmin v19.4s, v19.4s, v7.4s + + StoreLH4x4: + + shrn v16.4h, v16.4s, #16 + shrn v17.4h, v17.4s, #16 + shrn v18.4h, v18.4s, #16 + shrn v19.4h, v19.4s, #16 + + stp d16, d17, [x0] + stp d18, d19, [x0, #16] + + // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0] + + E4End: + + sub x3, x3, #4 + add x0, x21, #32 // move dest address of 4 * 4 * sizeof(int16_t) + add x1, x1, #8 // move dest address of 4 * sizeof(int16_t) + +E1: +cmp x3, #0 +beq End + +LoopE1: + mov x20, x6 + mov x8, x10 + mov x21, x0 + mov x13, x2 + + cmp x8, #2 + blt E1LH4 + + E1LH8: + E1LoopH8: + mov x15, x1 + subs x12, x9, #1 + ld1 {v3.4h, v4.4h}, [x13], #16 // + ld1 {v0.h}[0], [x15], x11 + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + + fmul v16.4s, v3.4s, v0.s[0] + fmul v20.4s, v4.4s, v0.s[0] + + beq E1LoopLEnd + + E1LoopL: + ld1 {v3.4h, v4.4h}, [x13], #16 // + ld1 {v0.h}[0], [x15], x11 + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + + fmla v16.4s, v3.4s, v0.s[0] + fmla v20.4s, v4.4s, v0.s[0] + + subs x12, x12, #1 + bne E1LoopL + + E1LoopLEnd: + + add x13, x13, x19 + sub x8, x8, #2 + cmp x8, #2 + + cbz x5, StoreLH1x8 + AddBiasLH1x8: + ld1 {v0.4h, v1.4h}, [x20], #16 + shll v1.4s, v1.4h, #16 + shll v0.4s, v0.4h, #16 + + fmla v16.4s, v0.4s, v5.s[1] + fmla v20.4s, v1.4s, v5.s[1] + + PostTreatLH1x8: + fmax v16.4s, v16.4s, v6.4s + fmax v20.4s, v20.4s, v6.4s + fmin v16.4s, v16.4s, v7.4s + fmin v20.4s, v20.4s, v7.4s + + StoreLH1x8: + shrn v16.4h, v16.4s, #16 + shrn v20.4h, v20.4s, #16 + st1 {v16.4h}, [x0], x7 + st1 {v20.4h}, [x0], x7 + + bge E1LoopH8 + + E1LH4: + cbz x8, E1End + mov x15, x1 + subs x12, x9, #1 + ld1 {v3.4h}, [x13] + ld1 {v0.h}[0], [x15], x11 + shll v3.4s, v3.4h, #16 + shll v0.4s, v0.4h, #16 + + fmul v16.4s, v3.4s, v0.s[0] + add x13, x13, #16 // weight + + beq E1LoopLREnd + + E1LoopLR: + ld1 {v3.4h}, [x13] + ld1 {v0.h}[0], [x15], x11 + shll v3.4s, v3.4h, #16 + shll v0.4s, v0.4h, #16 + + fmla v16.4s, v3.4s, v0.s[0] + add x13, x13, #16 // weight + + subs x12, x12, #1 + bne E1LoopLR + E1LoopLREnd: + + cbz x5, StoreLH1x4 + AddBiasLH1x4: + ld1 {v0.4h}, [x20] + shll v0.4s, v0.4h, #16 + + fmla v16.4s, v0.4s, v5.s[1] + + PostTreatLH1x4: + fmax v16.4s, v16.4s, v6.4s + fmin v16.4s, v16.4s, v7.4s + + StoreLH1x4: + shrn v16.4h, v16.4s, #16 + st1 {v16.4h}, [x0] + + E1End: + + subs x3, x3, #1 + add x0, x21, #8 + add x1, x1, #2 + bne LoopE1 + + +End: +ldr x19, [sp, #0] +ldr x20, [sp, #8] +ldr x21, [sp, #16] +add sp, sp, #32 + +ret + +#endif diff --git a/backupcode/cpubackend/arm/arm64/bf16/MNNPackedMatMul_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNPackedMatMul_BF16.S new file mode 100644 index 000000000..22c2c24ca --- /dev/null +++ b/backupcode/cpubackend/arm/arm64/bf16/MNNPackedMatMul_BF16.S @@ -0,0 +1,501 @@ +// +// MNNPackedMatMul_BF16.S +// MNN +// +// Created by MNN on 2021/02/21. +// Copyright © 2018-2021 Alibaba Group Holding Limited +// +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + + +.text +.align 5 +// 12 * 8 MatMul +asm_function NEON_MNNPackedMatMul_BF16 +//void NEON_MNNPackedMatMul_BF16(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias); +// x0: C, x1:A, x2:B, x3:parameter, x4: postParameters, x5:bias +stp d14, d15, [sp, #-64]! +stp d12, d13, [sp, #16] +stp d10, d11, [sp, #32] +stp d8, d9, [sp, #48] + +//ldr x8, [x3, #0] // deprecated +ldr x9, [x3, #8] // l +ldr x10, [x3, #16] // h + +ldr x13, [x3, #24] // cStride +ldr x7, [x3, #40] // bExtraStride + +// v0, v1, v2: A +// v3, v4: B +// v8 - v31: C +add x10, x10, #3 +lsr x10, x10, #2 + +cbz x4, Start +ld1 {v5.4s}, [x4] +dup v6.4s, v5.s[2] // Min Value +dup v7.4s, v5.s[3] // Max Value + +Start: + +cmp x10, #2 +blt LH4 + +LH8: +// sub x14, x13, #80 // in "StoreLH8", total 3 lines Cstride is x13, first 5 line stp is 5 * 8 * sizeof(int16_t) = 64byte + // stp should add at last +LoopH: + mov x15, x1 + subs x12, x9, #1 + ld1 {v3.4h, v4.4h}, [x2], #16 // 8 * sizeof(int16_t) + ld1 {v0.4h, v1.4h, v2.4h}, [x15], #24 // 12 * sizeof(int16_t) + + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + shll v2.4s, v2.4h, #16 + + fmul v8.4s, v3.4s, v0.s[0] + fmul v9.4s, v3.4s, v0.s[1] + fmul v10.4s, v3.4s, v0.s[2] + fmul v11.4s, v3.4s, v0.s[3] + fmul v12.4s, v3.4s, v1.s[0] + fmul v13.4s, v3.4s, v1.s[1] + fmul v14.4s, v3.4s, v1.s[2] + fmul v15.4s, v3.4s, v1.s[3] + fmul v16.4s, v3.4s, v2.s[0] + fmul v17.4s, v3.4s, v2.s[1] + fmul v18.4s, v3.4s, v2.s[2] + fmul v19.4s, v3.4s, v2.s[3] + + fmul v20.4s, v4.4s, v0.s[0] + fmul v21.4s, v4.4s, v0.s[1] + fmul v22.4s, v4.4s, v0.s[2] + fmul v23.4s, v4.4s, v0.s[3] + + fmul v24.4s, v4.4s, v1.s[0] + fmul v25.4s, v4.4s, v1.s[1] + fmul v26.4s, v4.4s, v1.s[2] + fmul v27.4s, v4.4s, v1.s[3] + + fmul v28.4s, v4.4s, v2.s[0] + fmul v29.4s, v4.4s, v2.s[1] + fmul v30.4s, v4.4s, v2.s[2] + fmul v31.4s, v4.4s, v2.s[3] + + beq LoopLEnd + + cmp x12, #2 + blt L1 + LoopL2: + ld1 {v3.4h, v4.4h}, [x2], #16 // 8 * sizeof(int16_t) + ld1 {v0.4h, v1.4h, v2.4h}, [x15], #24 // 12 * sizeof(int16_t) // * sizeof(int16_t) + + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + shll v2.4s, v2.4h, #16 + + fmla v8.4s, v3.4s, v0.s[0] + fmla v9.4s, v3.4s, v0.s[1] + fmla v10.4s, v3.4s, v0.s[2] + fmla v11.4s, v3.4s, v0.s[3] + fmla v12.4s, v3.4s, v1.s[0] + fmla v13.4s, v3.4s, v1.s[1] + fmla v14.4s, v3.4s, v1.s[2] + fmla v15.4s, v3.4s, v1.s[3] + fmla v16.4s, v3.4s, v2.s[0] + fmla v17.4s, v3.4s, v2.s[1] + fmla v18.4s, v3.4s, v2.s[2] + fmla v19.4s, v3.4s, v2.s[3] + + fmla v20.4s, v4.4s, v0.s[0] + fmla v21.4s, v4.4s, v0.s[1] + fmla v22.4s, v4.4s, v0.s[2] + fmla v23.4s, v4.4s, v0.s[3] + + fmla v24.4s, v4.4s, v1.s[0] + fmla v25.4s, v4.4s, v1.s[1] + fmla v26.4s, v4.4s, v1.s[2] + fmla v27.4s, v4.4s, v1.s[3] + + fmla v28.4s, v4.4s, v2.s[0] + fmla v29.4s, v4.4s, v2.s[1] + fmla v30.4s, v4.4s, v2.s[2] + fmla v31.4s, v4.4s, v2.s[3] + + ld1 {v3.4h, v4.4h}, [x2], #16 // 8 * sizeof(int16_t) + ld1 {v0.4h, v1.4h, v2.4h}, [x15], #24 // 12 * sizeof(int16_t) // * sizeof(int16_t) + + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + shll v2.4s, v2.4h, #16 + + fmla v8.4s, v3.4s, v0.s[0] + fmla v9.4s, v3.4s, v0.s[1] + fmla v10.4s, v3.4s, v0.s[2] + fmla v11.4s, v3.4s, v0.s[3] + fmla v12.4s, v3.4s, v1.s[0] + fmla v13.4s, v3.4s, v1.s[1] + fmla v14.4s, v3.4s, v1.s[2] + fmla v15.4s, v3.4s, v1.s[3] + fmla v16.4s, v3.4s, v2.s[0] + fmla v17.4s, v3.4s, v2.s[1] + fmla v18.4s, v3.4s, v2.s[2] + fmla v19.4s, v3.4s, v2.s[3] + + fmla v20.4s, v4.4s, v0.s[0] + fmla v21.4s, v4.4s, v0.s[1] + fmla v22.4s, v4.4s, v0.s[2] + fmla v23.4s, v4.4s, v0.s[3] + + fmla v24.4s, v4.4s, v1.s[0] + fmla v25.4s, v4.4s, v1.s[1] + fmla v26.4s, v4.4s, v1.s[2] + fmla v27.4s, v4.4s, v1.s[3] + + fmla v28.4s, v4.4s, v2.s[0] + fmla v29.4s, v4.4s, v2.s[1] + fmla v30.4s, v4.4s, v2.s[2] + fmla v31.4s, v4.4s, v2.s[3] + sub x12, x12, #2 + cmp x12, #2 + bge LoopL2 + + cbz x12, LoopLEnd + + L1: + ld1 {v3.4h, v4.4h}, [x2], #16 // 8 * sizeof(int16_t) + ld1 {v0.4h, v1.4h, v2.4h}, [x15], #24 // 12 * sizeof(int16_t) // * sizeof(int16_t) + + shll v3.4s, v3.4h, #16 + shll v4.4s, v4.4h, #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + shll v2.4s, v2.4h, #16 + + fmla v8.4s, v3.4s, v0.s[0] + fmla v9.4s, v3.4s, v0.s[1] + fmla v10.4s, v3.4s, v0.s[2] + fmla v11.4s, v3.4s, v0.s[3] + fmla v12.4s, v3.4s, v1.s[0] + fmla v13.4s, v3.4s, v1.s[1] + fmla v14.4s, v3.4s, v1.s[2] + fmla v15.4s, v3.4s, v1.s[3] + fmla v16.4s, v3.4s, v2.s[0] + fmla v17.4s, v3.4s, v2.s[1] + fmla v18.4s, v3.4s, v2.s[2] + fmla v19.4s, v3.4s, v2.s[3] + + fmla v20.4s, v4.4s, v0.s[0] + fmla v21.4s, v4.4s, v0.s[1] + fmla v22.4s, v4.4s, v0.s[2] + fmla v23.4s, v4.4s, v0.s[3] + + fmla v24.4s, v4.4s, v1.s[0] + fmla v25.4s, v4.4s, v1.s[1] + fmla v26.4s, v4.4s, v1.s[2] + fmla v27.4s, v4.4s, v1.s[3] + + fmla v28.4s, v4.4s, v2.s[0] + fmla v29.4s, v4.4s, v2.s[1] + fmla v30.4s, v4.4s, v2.s[2] + fmla v31.4s, v4.4s, v2.s[3] + + LoopLEnd: + + add x2, x2, x7 // weight stride + sub x10, x10, #2 + cmp x10, #2 + + cbz x4, StoreLH8 + + AddBiasLH8: + ld1 {v0.4h, v1.4h}, [x5], #16 // 8 * sizeof(int16_t) + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + + fmla v8.4s, v0.4s, v5.s[1] + fmla v9.4s, v0.4s, v5.s[1] + fmla v10.4s, v0.4s, v5.s[1] + fmla v11.4s, v0.4s, v5.s[1] + + fmla v12.4s, v0.4s, v5.s[1] + fmla v13.4s, v0.4s, v5.s[1] + fmla v14.4s, v0.4s, v5.s[1] + fmla v15.4s, v0.4s, v5.s[1] + + fmla v16.4s, v0.4s, v5.s[1] + fmla v17.4s, v0.4s, v5.s[1] + fmla v18.4s, v0.4s, v5.s[1] + fmla v19.4s, v0.4s, v5.s[1] + + fmla v20.4s, v1.4s, v5.s[1] + fmla v21.4s, v1.4s, v5.s[1] + fmla v22.4s, v1.4s, v5.s[1] + fmla v23.4s, v1.4s, v5.s[1] + + fmla v24.4s, v1.4s, v5.s[1] + fmla v25.4s, v1.4s, v5.s[1] + fmla v26.4s, v1.4s, v5.s[1] + fmla v27.4s, v1.4s, v5.s[1] + + fmla v28.4s, v1.4s, v5.s[1] + fmla v29.4s, v1.4s, v5.s[1] + fmla v30.4s, v1.4s, v5.s[1] + fmla v31.4s, v1.4s, v5.s[1] + + PostTreatLH8: + fmax v8.4s, v8.4s, v6.4s + fmax v9.4s, v9.4s, v6.4s + fmax v10.4s, v10.4s, v6.4s + fmax v11.4s, v11.4s, v6.4s + fmax v12.4s, v12.4s, v6.4s + fmax v13.4s, v13.4s, v6.4s + fmax v14.4s, v14.4s, v6.4s + fmax v15.4s, v15.4s, v6.4s + fmax v16.4s, v16.4s, v6.4s + fmax v17.4s, v17.4s, v6.4s + fmax v18.4s, v18.4s, v6.4s + fmax v19.4s, v19.4s, v6.4s + fmax v20.4s, v20.4s, v6.4s + fmax v21.4s, v21.4s, v6.4s + fmax v22.4s, v22.4s, v6.4s + fmax v23.4s, v23.4s, v6.4s + fmax v24.4s, v24.4s, v6.4s + fmax v25.4s, v25.4s, v6.4s + fmax v26.4s, v26.4s, v6.4s + fmax v27.4s, v27.4s, v6.4s + fmax v28.4s, v28.4s, v6.4s + fmax v29.4s, v29.4s, v6.4s + fmax v30.4s, v30.4s, v6.4s + fmax v31.4s, v31.4s, v6.4s + + fmin v8.4s, v8.4s, v7.4s + fmin v9.4s, v9.4s, v7.4s + fmin v10.4s, v10.4s, v7.4s + fmin v11.4s, v11.4s, v7.4s + fmin v12.4s, v12.4s, v7.4s + fmin v13.4s, v13.4s, v7.4s + fmin v14.4s, v14.4s, v7.4s + fmin v15.4s, v15.4s, v7.4s + fmin v16.4s, v16.4s, v7.4s + fmin v17.4s, v17.4s, v7.4s + fmin v18.4s, v18.4s, v7.4s + fmin v19.4s, v19.4s, v7.4s + fmin v20.4s, v20.4s, v7.4s + fmin v21.4s, v21.4s, v7.4s + fmin v22.4s, v22.4s, v7.4s + fmin v23.4s, v23.4s, v7.4s + fmin v24.4s, v24.4s, v7.4s + fmin v25.4s, v25.4s, v7.4s + fmin v26.4s, v26.4s, v7.4s + fmin v27.4s, v27.4s, v7.4s + fmin v28.4s, v28.4s, v7.4s + fmin v29.4s, v29.4s, v7.4s + fmin v30.4s, v30.4s, v7.4s + fmin v31.4s, v31.4s, v7.4s + + StoreLH8: + + shrn v8.4h, v8.4s, #16 + shrn v9.4h, v9.4s, #16 + shrn v10.4h, v10.4s, #16 + shrn v11.4h, v11.4s, #16 + shrn v12.4h, v12.4s, #16 + shrn v13.4h, v13.4s, #16 + shrn v14.4h, v14.4s, #16 + shrn v15.4h, v15.4s, #16 + shrn v16.4h, v16.4s, #16 + shrn v17.4h, v17.4s, #16 + shrn v18.4h, v18.4s, #16 + shrn v19.4h, v19.4s, #16 + shrn v20.4h, v20.4s, #16 + shrn v21.4h, v21.4s, #16 + shrn v22.4h, v22.4s, #16 + shrn v23.4h, v23.4s, #16 + shrn v24.4h, v24.4s, #16 + shrn v25.4h, v25.4s, #16 + shrn v26.4h, v26.4s, #16 + shrn v27.4h, v27.4s, #16 + shrn v28.4h, v28.4s, #16 + shrn v29.4h, v29.4s, #16 + shrn v30.4h, v30.4s, #16 + shrn v31.4h, v31.4s, #16 + + stp d8, d9, [x0] + stp d10, d11, [x0, #(16 * 1)] // 2 * 4 * sizeof(int16_t) + stp d12, d13, [x0, #(16 * 2)] + stp d14, d15, [x0, #(16 * 3)] + stp d16, d17, [x0, #(16 * 4)] + stp d18, d19, [x0, #(16 * 5)] + add x0, x0, x13 // stp donot support post-index offset in register + stp d20, d21, [x0] + stp d22, d23, [x0, #(16 * 1)] + stp d24, d25, [x0, #(16 * 2)] + stp d26, d27, [x0, #(16 * 3)] + stp d28, d29, [x0, #(16 * 4)] + stp d30, d31, [x0, #(16 * 5)] + add x0, x0, x13 + + // st1 {v8.4h, v9.4h, v10.4h, v11.4h}, [x0], #32 // 16 * sizeof(int16_t) + // st1 {v12.4h, v13.4h, v14.4h, v15.4h}, [x0], #32 // 16 * sizeof(int16_t) + // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0], x14 + // st1 {v20.4h, v21.4h, v22.4h, v23.4h}, [x0], #32 // 16 * sizeof(int16_t) + // st1 {v24.4h, v25.4h, v26.4h, v27.4h}, [x0], #32 // 16 * sizeof(int16_t) + // st1 {v28.4h, v29.4h, v30.4h, v31.4h}, [x0], x14 + + bge LoopH + +LH4: +cbz x10, End +LoopHRemain: + mov x15, x1 + subs x12, x9, #1 + ld1 {v3.4h}, [x2] + ld1 {v0.4h}, [x15], #8 + shll v3.4s, v3.4h, #16 + shll v0.4s, v0.4h, #16 + + fmul v8.4s, v3.4s, v0.s[0] + fmul v9.4s, v3.4s, v0.s[1] + add x2, x2, #16 // + ld1 {v1.4h}, [x15], #8 + shll v1.4s, v1.4h, #16 + + fmul v10.4s, v3.4s, v0.s[2] + fmul v11.4s, v3.4s, v0.s[3] + fmul v12.4s, v3.4s, v1.s[0] + + ld1 {v2.4h}, [x15], #8 + shll v2.4s, v2.4h, #16 + + fmul v13.4s, v3.4s, v1.s[1] + fmul v14.4s, v3.4s, v1.s[2] + fmul v15.4s, v3.4s, v1.s[3] + fmul v16.4s, v3.4s, v2.s[0] + fmul v17.4s, v3.4s, v2.s[1] + fmul v18.4s, v3.4s, v2.s[2] + fmul v19.4s, v3.4s, v2.s[3] + + beq LoopLREnd + + LoopLR: + ld1 {v3.4h}, [x2] + ld1 {v0.4h, v1.4h, v2.4h}, [x15], #24 // 12 * sizeof(int16_t) + shll v3.4s, v3.4h, #16 + shll v0.4s, v0.4h, #16 + shll v1.4s, v1.4h, #16 + shll v2.4s, v2.4h, #16 + + fmla v8.4s, v3.4s, v0.s[0] + fmla v9.4s, v3.4s, v0.s[1] + fmla v10.4s, v3.4s, v0.s[2] + fmla v11.4s, v3.4s, v0.s[3] + add x2, x2, #16 // + fmla v12.4s, v3.4s, v1.s[0] + fmla v13.4s, v3.4s, v1.s[1] + fmla v14.4s, v3.4s, v1.s[2] + fmla v15.4s, v3.4s, v1.s[3] + fmla v16.4s, v3.4s, v2.s[0] + fmla v17.4s, v3.4s, v2.s[1] + fmla v18.4s, v3.4s, v2.s[2] + fmla v19.4s, v3.4s, v2.s[3] + + subs x12, x12, #1 + bne LoopLR + LoopLREnd: + + cbz x4, StoreLH4 + AddBiasLH4: + ld1 {v0.4h}, [x5], #8 + shll v0.4s, v0.4h, #16 + + fmla v8.4s, v0.4s, v5.s[1] + fmla v9.4s, v0.4s, v5.s[1] + fmla v10.4s, v0.4s, v5.s[1] + fmla v11.4s, v0.4s, v5.s[1] + + fmla v12.4s, v0.4s, v5.s[1] + fmla v13.4s, v0.4s, v5.s[1] + fmla v14.4s, v0.4s, v5.s[1] + fmla v15.4s, v0.4s, v5.s[1] + + fmla v16.4s, v0.4s, v5.s[1] + fmla v17.4s, v0.4s, v5.s[1] + fmla v18.4s, v0.4s, v5.s[1] + fmla v19.4s, v0.4s, v5.s[1] + + PostTreatLH4: + fmax v8.4s, v8.4s, v6.4s + fmax v9.4s, v9.4s, v6.4s + fmax v10.4s, v10.4s, v6.4s + fmax v11.4s, v11.4s, v6.4s + fmax v12.4s, v12.4s, v6.4s + fmax v13.4s, v13.4s, v6.4s + fmax v14.4s, v14.4s, v6.4s + fmax v15.4s, v15.4s, v6.4s + fmax v16.4s, v16.4s, v6.4s + fmax v17.4s, v17.4s, v6.4s + fmax v18.4s, v18.4s, v6.4s + fmax v19.4s, v19.4s, v6.4s + + fmin v8.4s, v8.4s, v7.4s + fmin v9.4s, v9.4s, v7.4s + fmin v10.4s, v10.4s, v7.4s + fmin v11.4s, v11.4s, v7.4s + fmin v12.4s, v12.4s, v7.4s + fmin v13.4s, v13.4s, v7.4s + fmin v14.4s, v14.4s, v7.4s + fmin v15.4s, v15.4s, v7.4s + fmin v16.4s, v16.4s, v7.4s + fmin v17.4s, v17.4s, v7.4s + fmin v18.4s, v18.4s, v7.4s + fmin v19.4s, v19.4s, v7.4s + + StoreLH4: + + shrn v8.4h, v8.4s, #16 + shrn v9.4h, v9.4s, #16 + shrn v10.4h, v10.4s, #16 + shrn v11.4h, v11.4s, #16 + shrn v12.4h, v12.4s, #16 + shrn v13.4h, v13.4s, #16 + shrn v14.4h, v14.4s, #16 + shrn v15.4h, v15.4s, #16 + shrn v16.4h, v16.4s, #16 + shrn v17.4h, v17.4s, #16 + shrn v18.4h, v18.4s, #16 + shrn v19.4h, v19.4s, #16 + + stp d8, d9, [x0] + stp d10, d11, [x0, #(16 * 1)] + stp d12, d13, [x0, #(16 * 2)] + stp d14, d15, [x0, #(16 * 3)] + stp d16, d17, [x0, #(16 * 4)] + stp d18, d19, [x0, #(16 * 5)] + + // st1 {v8.4h, v9.4h, v10.4h, v11.4h}, [x0], #32 + // st1 {v12.4h, v13.4h, v14.4h, v15.4h}, [x0], #32 + // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0] + + sub x10, x10, #1 + + +End: +ldp d8, d9, [sp, #48] +ldp d10, d11, [sp, #32] +ldp d12, d13, [sp, #16] +ldp d14, d15, [sp], #64 + + +ret + +#endif diff --git a/source/backend/cpu/arm/arm64/bf16/MNNReluWithSlopeChannelBF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNReluWithSlopeChannelBF16.S similarity index 100% rename from source/backend/cpu/arm/arm64/bf16/MNNReluWithSlopeChannelBF16.S rename to backupcode/cpubackend/arm/arm64/bf16/MNNReluWithSlopeChannelBF16.S diff --git a/source/backend/cpu/arm/arm64/bf16/MNNUnPackC4_BF16.S b/backupcode/cpubackend/arm/arm64/bf16/MNNUnPackC4_BF16.S similarity index 100% rename from source/backend/cpu/arm/arm64/bf16/MNNUnPackC4_BF16.S rename to backupcode/cpubackend/arm/arm64/bf16/MNNUnPackC4_BF16.S diff --git a/source/backend/cpu/bf16/BF16Backend.cpp b/backupcode/cpubackend/bf16/BF16Backend.cpp similarity index 100% rename from source/backend/cpu/bf16/BF16Backend.cpp rename to backupcode/cpubackend/bf16/BF16Backend.cpp diff --git a/source/backend/cpu/bf16/BF16Backend.hpp b/backupcode/cpubackend/bf16/BF16Backend.hpp similarity index 100% rename from source/backend/cpu/bf16/BF16Backend.hpp rename to backupcode/cpubackend/bf16/BF16Backend.hpp diff --git a/source/backend/cpu/bf16/BF16Binary.cpp b/backupcode/cpubackend/bf16/BF16Binary.cpp similarity index 100% rename from source/backend/cpu/bf16/BF16Binary.cpp rename to backupcode/cpubackend/bf16/BF16Binary.cpp diff --git a/source/backend/cpu/bf16/BF16Binary.hpp b/backupcode/cpubackend/bf16/BF16Binary.hpp similarity index 100% rename from source/backend/cpu/bf16/BF16Binary.hpp rename to backupcode/cpubackend/bf16/BF16Binary.hpp diff --git a/backupcode/cpubackend/bf16/BF16Functions.cpp b/backupcode/cpubackend/bf16/BF16Functions.cpp new file mode 100644 index 000000000..3f792a3ce --- /dev/null +++ b/backupcode/cpubackend/bf16/BF16Functions.cpp @@ -0,0 +1,918 @@ +#ifdef MNN_USE_SSE +#include "../x86_x64/sse/FunctionSummary.hpp" +#include "../x86_x64/avx/FunctionSummary.hpp" +#include "../x86_x64/avxfma/FunctionSummary.hpp" +#include "../x86_x64/avx512/FunctionSummary.hpp" +#include "../x86_x64/cpu_id.h" +#endif +#include "core/Macro.h" +#if defined(MNN_USE_NEON) +#include "../arm/FunctionSummary.hpp" +#endif + +#include "BF16Functions.hpp" +#include "WinogradOptFunctionHalf.hpp" +#include "../compute/CommonOptFunction.h" +#include "../CPUPool.hpp" +#include "../CPURuntime.hpp" +#include "VecHalf.hpp" +#include "math/Vec.hpp" +#include "BF16Binary.hpp" +#include "BF16Unary.hpp" +using BFVec4 = MNN::Math::VecHalf<4>; +using Vec4 = MNN::Math::Vec; +extern "C" { +void MNNReluWithSlopeChannelBF16(float* dstO, const float* srcO, const float* slopeO, size_t sizeQuad, size_t depthQuad); +} +namespace MNN { +// just for reference BF16 converting of c++ code, not for arm or sse. +inline int16_t MNNFP32ToBF16(float fp32Value) { + int32_t* s32Value = (int32_t*)(&fp32Value); + return (int16_t)((*s32Value) >> 16); +} +inline float MNNLowpToFp32(int16_t s16Value) { + int32_t s32Value = ((int32_t)s16Value) << 16; + float* fp32Value = (float*)(&s32Value); + return *fp32Value; +} + +static void _MNNFp32ToLowp(const float* src, int16_t* dst, size_t size) { + int sizeC4 = size / 4; + for (int i = 0; i < sizeC4; ++i) { + auto srcV = Vec4::load(src); + auto dstV = BFVec4(std::move(srcV.value)); + BFVec4::save(dst, dstV); + src+=4; + dst+=4; + } + int sizeRemain = size % 4; + if (sizeRemain > 0) { + float srcTemp[4]; + int64_t dstTemp[1]; + ::memcpy(srcTemp, src, sizeRemain * sizeof(float)); + auto srcV = Vec4::load(srcTemp); + auto dstV = BFVec4(std::move(srcV.value)); + BFVec4::save((int16_t*)dstTemp, dstV); + ::memcpy(dst, dstTemp, sizeRemain * sizeof(int16_t)); + } +} +static void _MNNLowpToFp32(const int16_t* src, float* dst, size_t size) { + int sizeC4 = size / 4; + for (int i = 0; i < sizeC4; ++i) { + auto srcV = BFVec4::load(src); + auto dstV = Vec4(std::move(srcV.value)); + Vec4::save(dst, dstV); + src+=4; + dst+=4; + } + int sizeRemain = size % 4; + if (sizeRemain > 0) { + int64_t srcTemp[2]; + float dstTemp[4]; + ::memcpy(srcTemp, src, sizeRemain * sizeof(int16_t)); + auto srcV = BFVec4::load((int16_t*)srcTemp); + auto dstV = Vec4(std::move(srcV.value)); + Vec4::save(dstTemp, dstV); + ::memcpy(dst, dstTemp, sizeRemain * sizeof(float)); + } +} +static void MNNConvRunForUnitDepthWiseBF16(float* dst, const float* src, const float* weight, size_t fw, size_t fh, + size_t weight_y_step, size_t dilateX_step, size_t dilateY_step) { + int fx, fy; + BFVec4 dstValue(0.0f); + const int16_t* src_z = (const int16_t*)src; + const int16_t* weight_z = (const int16_t*)weight; + for (fy = 0; fy < fh; ++fy) { + const auto src_y = src_z + fy * dilateY_step; + const auto weight_y = weight_z + fy * weight_y_step; + for (fx = 0; fx < fw; ++fx) { + const auto weight_x = weight_y + 4 * fx; + const auto src_x = src_y + fx * dilateX_step; + dstValue = dstValue + BFVec4::load(src_x) * BFVec4::load(weight_x); + } + } + BFVec4::save((int16_t*)dst, dstValue); +} + +static void MNNConvRunForLineDepthwiseBF16(float* dstO, const float* srcO, const float* weightO, size_t width, size_t src_w_setup, + size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, size_t height, + size_t srcHStep, size_t dstHStep) { + int dx, fx, fy; + auto dst = (int16_t*)dstO; + auto src = (const int16_t*)srcO; + auto weight = (const int16_t*)weightO; + for (int y = 0; y < height; ++y) { + auto srcY = src + y * srcHStep; + auto dstY = dst + y * dstHStep; + for (dx = 0; dx < width; ++dx) { + auto dst_x = dstY + dx * 4; + BFVec4 dstValue(0.0f); + const auto src_z = srcY + src_w_setup * dx; + const auto weight_z = weight; + for (fy = 0; fy < fh; ++fy) { + const auto src_y = src_z + fy * dilateY_step; + const auto weight_y = weight_z + fy * fw * 4; + for (fx = 0; fx < fw; ++fx) { + const auto weight_x = weight_y + 4 * fx; + const auto src_x = src_y + fx * dilateX_step; + dstValue = dstValue + BFVec4::load(src_x) * BFVec4::load(weight_x); + } + } + BFVec4::save(dst_x, dstValue); + } + } +} +void MNNAxByClampBroadcastUnitBF16(float* CF, const float* AF, const float* BF, size_t width, size_t cStride, size_t aStride, size_t height, const float* parameters) { + auto C = (int16_t*)CF; + auto A = (const int16_t*)AF; + auto B = (const int16_t*)BF; + auto minF = BFVec4(parameters[2]); + auto maxF = BFVec4(parameters[3]); + auto beta = BFVec4(parameters[1]); + for (int y = 0; y < height; ++y) { + auto a = A + aStride * y; + auto b = B + 4 * y; + auto bv = BFVec4::load(b); + auto c = C + cStride * y; + for (int x = 0; x < width; ++x) { + auto av = BFVec4::load(a + 4 * x); + auto cv = av + bv * beta; + cv = BFVec4::min(cv, maxF); + cv = BFVec4::max(cv, minF); + BFVec4::save(c + 4 * x, cv); + } + } +} +#ifndef MNN_USE_NEON +void MNNReluWithSlopeChannelBF16(float* dstO, const float* srcO, const float* slopeO, size_t sizeQuad, size_t depthQuad) { + auto slope = (const int16_t*)slopeO; + auto dst = (int16_t*)dstO; + auto src = (const int16_t*)srcO; + auto zero = BFVec4(0.0f); + for (int j = 0; j < depthQuad; j++) { + auto slopeZ = BFVec4::load(slope + 4 * j); + auto srcZ = src + 4 * j * sizeQuad; + auto dstZ = dst + 4 * j * sizeQuad; + for (int i = 0; i < sizeQuad; i++) { + auto srcValue = BFVec4::load(srcZ + 4 * i); + std::array dstV; + for (int c = 0; c < 4; c++) { + if (srcValue[c] < 0) { + dstV[c] = srcValue[c] * slopeZ[c]; + } else { + dstV[c] = srcValue[c]; + } + } + auto dstValue = BFVec4(std::move(Vec4::load(dstV.data()).value)); + BFVec4::save(dstZ + 4 * i, dstValue); + } + } +} +#endif + +#if !defined(MNN_USE_SSE) && !defined(MNN_USE_NEON) +void MNNPackC4ForMatMul_A_BF16(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el) { + MNNPackC4ForMatMul_A(destOrigin, sourceGroup, info, el); + return; +} + +void MNNPackForMatMul_B_BF16(float* dest, const float* source, size_t h, size_t l, bool transpose) { + auto hP = h / 4; + auto hR = hP * 4; + if (hR != h) { + ::memset(dest, 0, UP_DIV(h, 4)*4*l*sizeof(int16_t)); + } + if (!transpose) { + for (int y=0; y 0) { + auto destY = dest + hP * 4 * l; + auto sourceY = source + hP * 4; + for (int x=0; x().max(); + float maxValue = std::numeric_limits().max(); + if (nullptr != postParameters) { + minValue = postParameters[2]; + maxValue = postParameters[3]; + alpha = postParameters[0]; + beta = postParameters[1]; + } + + for (int x = 0; x < eSize; ++x) { + auto dst = C + 4 * x; + auto src = + A + x; // input data is packed as tileCount x l x 16, is only one tiled block here, indexed as A[z * 16 + x] + for (int ry = 0; ry < h; ++ry) { + auto y = ry / 4; + auto yRemain = ry % 4; + auto bY = B + y * bStride; + auto dstY = dst + y * cStride; // convert NCHW to NC4HW4 ie 1·(y/4)·X·4 + int wdy = ry / 6; + int wdyRemain = ry % 6; + auto weight = + B + wdy * bStride + + wdyRemain; // weight is packed as (h/6) x l x 6, indexed as B[(ry / 6) * Bstride +z*6 + (ry % 6)] + float summer = 0.0f; + for (int z = 0; z < l; ++z) { + auto aZ = src + z * 16; + auto wZ = weight + z * 6; + summer += MNNLowpToFp32(wZ[0]) * MNNLowpToFp32(aZ[0]); + } + float originValue = MNNLowpToFp32(dstY[yRemain]); + if (nullptr != bias) { + originValue = MNNLowpToFp32(bias[ry]); + } + auto dstValue = originValue * beta + alpha * summer; + dstValue = std::min(dstValue, maxValue); + dstValue = std::max(dstValue, minValue); + dstY[yRemain] = MNNFP32ToBF16(dstValue); + } + } +} + +void MNNPackedMatMul_BF16(float* C, const float* A, const float* B, const size_t* parameter, float* cache, + const float* postParameters, const float* bias, const float* k, const float* b) { + return MNNPackedMatMulRemain_BF16(C, A, B, 16, parameter, cache, postParameters, bias, nullptr, nullptr); + // return _AVX_MNNPackedMatMulFMA(C, A, B, parameter, cache); +} + + +static void _MNNConvDwF23MulTransUnit(float **cacheLine, const float *weigth, float *dest, size_t ow); + +static void _MNNMultiAndDestTransformCommon23(float **cacheLine, const float *weigthF, float *destF, int cacheLineSize, int ow, const float* bias, const float* parameters) { + auto weigth = (const int16_t*)weigthF; + auto dest = (int16_t*)destF; + int unit = ow / 2; + auto biasF = BFVec4::load((const int16_t*)bias); + auto minV = BFVec4(parameters[2]); + auto maxV = BFVec4(parameters[3]); + MNN_ASSERT(cacheLineSize >= 1); + for (int x = 0; x < unit; ++x) { + auto offset = 4 * 4 * x; + int i = 0; + BFVec4 m0 = BFVec4::load(weigth + i * 16 + 4 * 0) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 0); + BFVec4 m1 = BFVec4::load(weigth + i * 16 + 4 * 1) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 1); + BFVec4 m2 = BFVec4::load(weigth + i * 16 + 4 * 2) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 2); + BFVec4 m3 = BFVec4::load(weigth + i * 16 + 4 * 3) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 3); + + for (i = 1; i < cacheLineSize; ++i) { + m0 = m0 + BFVec4::load(weigth + i * 16 + 4 * 0) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 0); + m1 = m1 + BFVec4::load(weigth + i * 16 + 4 * 1) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 1); + m2 = m2 + BFVec4::load(weigth + i * 16 + 4 * 2) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 2); + m3 = m3 + BFVec4::load(weigth + i * 16 + 4 * 3) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 3); + } + + auto o0 = m0 + m1 + m2 + biasF; + auto o1 = m1 - m2 + m3 + biasF; + o0 = BFVec4::min(o0, maxV); + o1 = BFVec4::min(o1, maxV); + o0 = BFVec4::max(o0, minV); + o1 = BFVec4::max(o1, minV); + BFVec4::save(dest + 8 * x + 0 * 4, o0); + BFVec4::save(dest + 8 * x + 1 * 4, o1); + } + if (unit * 2 < ow) { + auto offset = 4 * 4 * unit; + int i = 0; + BFVec4 m0 = BFVec4::load(weigth + i * 16 + 4 * 0) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 0); + BFVec4 m1 = BFVec4::load(weigth + i * 16 + 4 * 1) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 1); + BFVec4 m2 = BFVec4::load(weigth + i * 16 + 4 * 2) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 2); + + for (i = 1; i < cacheLineSize; ++i) { + m0 = m0 + BFVec4::load(weigth + i * 16 + 4 * 0) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 0); + m1 = m1 + BFVec4::load(weigth + i * 16 + 4 * 1) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 1); + m2 = m2 + BFVec4::load(weigth + i * 16 + 4 * 2) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 2); + } + + auto o0 = m0 + m1 + m2 + biasF; + o0 = BFVec4::min(o0, maxV); + o0 = BFVec4::max(o0, minV); + BFVec4::save(dest + 8 * unit + 0 * 4, o0); + } +} +static void _MNNConvDwF23SourceTransUnit(const int16_t *source, int16_t *dest, size_t unit); +static void _MNNSourceTransformCommonF23(const float *sourceF, float *destF, int unit, int iw, int pad, int su, int eu) { + auto source = (const int16_t*)sourceF; + auto dest = (int16_t*)destF; + for (int x = 0; x < su; ++x) { + auto dstX = dest + 4 * 4 * x; + auto sx = x * 2 - (int)pad; + auto ex = sx + 4; + + auto clampSx = std::max(sx, 0); + auto clampEx = std::min(ex, (int)iw); + + BFVec4 v[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + for (int i = clampSx; i < clampEx; ++i) { + v[i - sx] = BFVec4::load(source + 4 * i); + } + auto m0 = v[0] - v[2]; + auto m1 = v[1] + v[2]; + auto m2 = v[2] - v[1]; + auto m3 = v[3] - v[1]; + + BFVec4::save(dstX + 4 * 0, m0); + BFVec4::save(dstX + 4 * 1, m1); + BFVec4::save(dstX + 4 * 2, m2); + BFVec4::save(dstX + 4 * 3, m3); + } + _MNNConvDwF23SourceTransUnit(source + 4 * (su * 2 - pad), dest + 4 * 4 * su, eu - su); + + for (int x = eu; x < unit; ++x) { + auto dstX = dest + 4 * 4 * x; + auto sx = x * 2 - (int)pad; + auto ex = sx + 4; + + auto clampSx = std::max(sx, 0); + auto clampEx = std::min(ex, (int)iw); + + BFVec4 v[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + for (int i = clampSx; i < clampEx; ++i) { + v[i - sx] = BFVec4::load(source + 4 * i); + } + auto m0 = v[0] - v[2]; + auto m1 = v[1] + v[2]; + auto m2 = v[2] - v[1]; + auto m3 = v[3] - v[1]; + + BFVec4::save(dstX + 4 * 0, m0); + BFVec4::save(dstX + 4 * 1, m1); + BFVec4::save(dstX + 4 * 2, m2); + BFVec4::save(dstX + 4 * 3, m3); + } +} + +static void _MNNConvDwF23MulTransUnit(float **cacheLine, const float *weigthF, float *destF, size_t ow, const float* bias, const float* parameters) { + int unit = ow / 2; + auto weigth = (const int16_t*)weigthF; + auto dest = (int16_t*)destF; + + auto w00 = BFVec4::load(weigth + 0 * 16 + 4 * 0); + auto w01 = BFVec4::load(weigth + 0 * 16 + 4 * 1); + auto w02 = BFVec4::load(weigth + 0 * 16 + 4 * 2); + auto w03 = BFVec4::load(weigth + 0 * 16 + 4 * 3); + auto w10 = BFVec4::load(weigth + 1 * 16 + 4 * 0); + auto w11 = BFVec4::load(weigth + 1 * 16 + 4 * 1); + auto w12 = BFVec4::load(weigth + 1 * 16 + 4 * 2); + auto w13 = BFVec4::load(weigth + 1 * 16 + 4 * 3); + auto w20 = BFVec4::load(weigth + 2 * 16 + 4 * 0); + auto w21 = BFVec4::load(weigth + 2 * 16 + 4 * 1); + auto w22 = BFVec4::load(weigth + 2 * 16 + 4 * 2); + auto w23 = BFVec4::load(weigth + 2 * 16 + 4 * 3); + + auto biasF = BFVec4::load((const int16_t*)bias); + auto minV = BFVec4(parameters[2]); + auto maxV = BFVec4(parameters[3]); + for (int x = 0; x < unit; ++x) { + auto offset = 4 * 4 * x; + int i = 0; + BFVec4 m0 = w00 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 0); + BFVec4 m1 = w01 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 1); + BFVec4 m2 = w02 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 2); + BFVec4 m3 = w03 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 3); + + m0 = m0 + w10 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 0); + m1 = m1 + w11 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 1); + m2 = m2 + w12 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 2); + m3 = m3 + w13 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 3); + + m0 = m0 + w20 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 0); + m1 = m1 + w21 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 1); + m2 = m2 + w22 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 2); + m3 = m3 + w23 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 3); + + auto o0 = m0 + m1 + m2 + biasF; + auto o1 = m1 - m2 + m3 + biasF; + o0 = BFVec4::min(o0, maxV); + o1 = BFVec4::min(o1, maxV); + o0 = BFVec4::max(o0, minV); + o1 = BFVec4::max(o1, minV); + BFVec4::save(dest + 8 * x + 0 * 4, o0); + BFVec4::save(dest + 8 * x + 1 * 4, o1); + } + if (unit * 2 < ow) { + auto offset = 4 * 4 * unit; + BFVec4 m0 = w00 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 0); + BFVec4 m1 = w01 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 1); + BFVec4 m2 = w02 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 2); + + m0 = m0 + w10 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 0); + m1 = m1 + w11 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 1); + m2 = m2 + w12 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 2); + + m0 = m0 + w20 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 0); + m1 = m1 + w21 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 1); + m2 = m2 + w22 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 2); + auto o0 = m0 + m1 + m2 + biasF; + o0 = BFVec4::min(o0, maxV); + o0 = BFVec4::max(o0, minV); + BFVec4::save(dest + 8 * unit + 0 * 4, o0); + } +} +static void _MNNConvDwF23SourceTransUnit(const int16_t *source, int16_t *dest, size_t unit) { + if (unit <= 0) { + return; + } + BFVec4 v0 = BFVec4::load(source + 4 * 0); + BFVec4 v1 = BFVec4::load(source + 4 * 1); + BFVec4 v2; + BFVec4 v3; + source += 8; + + for (int x = 0; x < unit; ++x) { + v2 = BFVec4::load(source + 0 * 4); + v3 = BFVec4::load(source + 1 * 4); + auto m0 = v0 - v2; + auto m1 = v1 + v2; + auto m2 = v2 - v1; + auto m3 = v3 - v1; + + BFVec4::save(dest + 4 * 0, m0); + BFVec4::save(dest + 4 * 1, m1); + BFVec4::save(dest + 4 * 2, m2); + BFVec4::save(dest + 4 * 3, m3); + + source += 8; + dest += 16; + + v0 = v2; + v1 = v3; + } +} + +static void _MNNMatrixSub(float* CF, const float* AF, const float* BF, size_t widthC4, size_t cStride, size_t aStride, + size_t bStride, size_t height) { + auto A = (int16_t*)AF; + auto B = (int16_t*)BF; + auto C = (int16_t*)CF; + for (int y = 0; y < height; ++y) { + auto a = A + aStride * y; + auto b = B + bStride * y; + auto c = C + cStride * y; + for (int x = 0; x < widthC4; ++x) { + BFVec4::save(c + 4 * x, BFVec4::load(a + 4 * x) - BFVec4::load(b + 4 * x)); + } + } +} +static void _MNNMatrixAdd(float* CF, const float* AF, const float* BF, size_t widthC4, size_t cStride, size_t aStride, + size_t bStride, size_t height) { + auto A = (int16_t*)AF; + auto B = (int16_t*)BF; + auto C = (int16_t*)CF; + for (int y = 0; y < height; ++y) { + auto a = A + aStride * y; + auto b = B + bStride * y; + auto c = C + cStride * y; + for (int x = 0; x < widthC4; ++x) { + BFVec4::save(c + 4 * x, BFVec4::load(a + 4 * x) + BFVec4::load(b + 4 * x)); + } + } +} + +static void _MNNStrassenMergeCFunction(float* c11F, float* c12F, float* c21F, float* c22F, float* xAddrF, size_t cStride, + size_t eSub, size_t hSub) { + auto c11 = (int16_t*)c11F; + auto c12 = (int16_t*)c12F; + auto c21 = (int16_t*)c21F; + auto c22 = (int16_t*)c22F; + auto xAddr = (int16_t*)xAddrF; + for (int y=0; y= height || w < 0 || w >= width) { + return -1; + } + } else { + // Clearly, CLAMP is the right way to go for GridSamplePaddingMode_BORDER + // For GridSamplePaddingMode_REFLECTION, since we have reflected the values into (-1, 1), + // the leftover reflections degrade to GridSamplePaddingMode_BORDER + h = h < 0 ? 0 : ( h > (height - 1) ? (height - 1) : h); + w = w < 0 ? 0 : ( w > (width - 1) ? (width - 1) : w); + } + return h * width * 4 + w * 4; +} + +void _MNNGridSampleInterp(float* output, const float* input, const float* cord, size_t inH, size_t inW, size_t outW, size_t channelCUnit, size_t inOffset, size_t outOffset, bool sampleMode, bool padMode) { + int16_t* outputPtr = (int16_t*)output; + const int16_t* inputPtr = (const int16_t*)input; + const int16_t* cordPtr = (const int16_t*)cord; + + for (auto ow = 0; ow < outW; ++ow) { + auto w = MNNLowpToFp32(cordPtr[2 * ow + 0]); + auto h = MNNLowpToFp32(cordPtr[2 * ow + 1]); + BFVec4 interp; + + if (sampleMode == true) { //sampleMode == SampleMode_NEAREST + int nh = ::floor(h + 0.5f); + int nw = ::floor(w + 0.5f); + size_t ns = _MNNGridSampleComputeOffset(nh, nw, inH, inW, padMode); + for (int k = 0; k < channelCUnit; ++k) { + interp = ns == -1 ? BFVec4(0.f) : BFVec4::load(inputPtr + k * inOffset + ns); + BFVec4::save(outputPtr + k * outOffset + 4 * ow, interp); + } + } else { //sampleMode == GridSampleMode_BILINEAR + int w0_h = ::floor(h); + int w0_w = ::floor(w); + int w1_h = ::ceil(h); + int w1_w = ::ceil(w); + auto oneV = BFVec4(1.0f); + + auto f0 = BFVec4((float)w1_w - w); + auto f1 = oneV - f0; + auto h0 = BFVec4((float)w1_h - h); + auto h1 = oneV - h0; + + size_t s00 = _MNNGridSampleComputeOffset(w0_h, w0_w, inH, inW, padMode); + size_t s01 = _MNNGridSampleComputeOffset(w0_h, w1_w, inH, inW, padMode); + size_t s10 = _MNNGridSampleComputeOffset(w1_h, w0_w, inH, inW, padMode); + size_t s11 = _MNNGridSampleComputeOffset(w1_h, w1_w, inH, inW, padMode); + + for (int k = 0; k < channelCUnit; ++k) { + BFVec4 i00 = s00 == -1 ? BFVec4(0.f) : BFVec4::load(inputPtr + k * inOffset + s00); + BFVec4 i01 = s01 == -1 ? BFVec4(0.f) : BFVec4::load(inputPtr + k * inOffset + s01); + BFVec4 i10 = s10 == -1 ? BFVec4(0.f) : BFVec4::load(inputPtr + k * inOffset + s10); + BFVec4 i11 = s11 == -1 ? BFVec4(0.f) : BFVec4::load(inputPtr + k * inOffset + s11); + + BFVec4 i0 = i00 * f0 + i01 * f1; + BFVec4 i1 = i10 * f0 + i11 * f1; + + interp = i0 * h0 + i1 * h1; + BFVec4::save(outputPtr + k * outOffset + 4 * ow, interp); + } + } + } +} + + +static void _MNNAddC4WithStride(const float* sourceF, float* destF, size_t srcStride, size_t dstStride, size_t count) { + auto source = (const int16_t*)sourceF; + auto dest = (int16_t*)destF; + for (int i = 0; i < count; ++i) { + auto s = source + i * srcStride; + auto d = dest + i * dstStride; + BFVec4::save(d, BFVec4::load(d) + BFVec4::load(s)); + } +} +static void _MNNDeconvRunForUnitDepthWise(const int16_t* dst, int16_t* src, const int16_t* weight, size_t fw, size_t fh, + size_t weight_y_step, size_t dilateX_step, size_t dilateY_step) { + int fx, fy; + auto src_z = src; + auto weight_z = weight; + BFVec4 dstV = BFVec4::load(dst); + for (fy = 0; fy < fh; ++fy) { + auto src_y = src_z + fy * dilateY_step; + auto weight_y = weight_z + fy * weight_y_step; + for (fx = 0; fx < fw; ++fx) { + BFVec4 weight_x = BFVec4::load(weight_y + 4 * fx); + BFVec4 src_x = BFVec4::load(src_y + fx * dilateX_step); + BFVec4::save(src_y + fx * dilateX_step, src_x + weight_x * dstV); + } + } +} +static void _MNNDeconvRunForLineDepthwise(const int16_t* dst, int16_t* src, const int16_t* weight, size_t width, size_t src_w_setup, + size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step) { + int dx; + for (dx = 0; dx < width; ++dx) { + auto dst_x = dst + dx * 4; + auto src_dx = src + src_w_setup * dx; + _MNNDeconvRunForUnitDepthWise(dst_x, src_dx, weight, fw, fh, fw * 4, dilateX_step, dilateY_step); + } +} + +static void _MNNComputeMatMulForH_1_BF16(const float* AF, const float* BF, float* CF, const float* biasPtrF, const MatMulParam* param, size_t tId) { + auto A = (const int16_t*)AF; + auto B = (const int16_t*)BF; + auto C = (int16_t*)CF; + auto biasPtr = (const int16_t*)biasPtrF; + int e = param->e; + int l = param->l; + int numberThread = param->numberThread; + float biasValue = 0.0f; + auto bf = BF16Functions::get(); + if (nullptr != biasPtr) { + bf->MNNLowpToFp32(biasPtr, &biasValue, 1); + } + if (param->ATranspose) { + auto eC4 = e / 4; + auto eR = e % 4; + for (int y=tId; y 0) { + BFVec4 sumValue = BFVec4(biasValue); + auto srcY = A + eC4 * 4; + int16_t AR[4]; + for (int x=0; x 0) { + int16_t AR[4] = {0, 0, 0, 0}; + int16_t BR[4] = {0, 0, 0, 0}; + ::memcpy(AR, srcY + lC4 * 4, lR * sizeof(int16_t)); + ::memcpy(BR, B + 4 * lC4, lR * sizeof(int16_t)); + sumValue = sumValue + BFVec4::load(AR) * BFVec4::load(BR); + } + float sumSingle = sumValue[0] + sumValue[1] + sumValue[2] + sumValue[3]; + bf->MNNFp32ToLowp(&sumSingle, C + y, 1); + } +} + +static void _MNNComputeMatMulForE_1_BF16(const float* AF, const float* BF, float* CF, const float* biasPtrF, const MatMulParam* param, size_t tId) { + auto l = param->l; + auto h = param->h; + auto numberThread = param->numberThread; + auto lC4 = l / 4; + auto lR = l % 4; + auto A = (const int16_t*)AF; + auto B = (const int16_t*)BF; + auto C = (int16_t*)CF; + auto biasPtr = (const int16_t*)biasPtrF; + auto bf16 = BF16Functions::get(); + if (param->BTranspose) { + for (int y=tId; y 0) { + int16_t AR[4] = {0, 0, 0, 0}; + int16_t BR[4] = {0, 0, 0, 0}; + ::memcpy(AR, A + lC4 * 4, lR * sizeof(int16_t)); + ::memcpy(BR, by + 4 * lC4, lR * sizeof(int16_t)); + sumValue = sumValue + BFVec4::load(AR) * BFVec4::load(BR); + } + float sumRemain = sumValue[0] + sumValue[1] + sumValue[2] + sumValue[3]; + if (nullptr != biasPtr) { + sumRemain += BFVec4::broadcast(biasPtr[y])[0]; + } + bf16->MNNFp32ToLowp(&sumRemain, C + y, 1); + } + } else { + auto hC4 = h / 4; + auto hR = h % 4; + for (int y=tId; y 0) { + auto bs = B + 4 * hC4; + BFVec4 sumValue = BFVec4(0.0f); + if (biasPtr != nullptr) { + int16_t biasTemp[4]; + ::memcpy(biasTemp, biasPtr + 4 * hC4, hR * sizeof(int16_t)); + sumValue = BFVec4::load(biasTemp); + } + auto srcY = A + 4 * hC4 * l; + int16_t bTemp[4]; + for (int x=0; xMNNConvRunForLineDepthwise = MNNConvRunForLineDepthwiseBF16; + gInstance->MNNConvRunForUnitDepthWise = MNNConvRunForUnitDepthWiseBF16; + gInstance->MNNAxByClampBroadcastUnit = MNNAxByClampBroadcastUnitBF16; + gInstance->MNNFp32ToLowp = _MNNFp32ToLowp; + gInstance->MNNLowpToFp32 = _MNNLowpToFp32; + gInstance->bytes = 2; + gInstance->pack = 4; + gInstance->MNNPackCUnit = (decltype(gInstance->MNNPackCUnit))MNNPackC4Int16; + gInstance->MNNUnpackCUnit = (decltype(gInstance->MNNUnpackCUnit))MNNUnpackC4Int16; + gInstance->MNNUnpackCUnitTranspose = (decltype(gInstance->MNNUnpackCUnitTranspose))MNNPackTransposeInt16; + gInstance->MNNPackCUnitTranspose = (decltype(gInstance->MNNPackCUnitTranspose))MNNUnpackTransposeInt16; + gInstance->MNNConvDwF23MulTransUnit = _MNNConvDwF23MulTransUnit; + gInstance->MNNSourceTransformCommonF23 = _MNNSourceTransformCommonF23; + gInstance->MNNMultiAndDestTransformCommon23 = _MNNMultiAndDestTransformCommon23; + gInstance->MNNMatrixAdd = _MNNMatrixAdd; + gInstance->MNNMatrixSub = _MNNMatrixSub; + gInstance->MNNStrassenMergeCFunction = _MNNStrassenMergeCFunction; + gInstance->penalty = 10.0f; + gInstance->MNNScaleAndAddBias = _MNNScaleAndAddBias; + gInstance->MNNGridSampleComputeCord = _MNNGridSampleComputeCord; + gInstance->MNNGridSampleInterp = _MNNGridSampleInterp; + gInstance->MNNCopyC4WithStride = MNNCopyC4Int16WithStride; + gInstance->MNNAddC4WithStride = _MNNAddC4WithStride; + gInstance->chooseWinoSourceTransformPack = (decltype(gInstance->chooseWinoSourceTransformPack))(WinogradFunctionHalf::chooseWinoSourceTransformPack); + gInstance->chooseWinoSourceUnrollTransform = (decltype(gInstance->chooseWinoSourceUnrollTransform))(WinogradFunctionHalf::chooseSourceUnrollTransform); + gInstance->chooseWinoDestUnrollTransform = (decltype(gInstance->chooseWinoDestUnrollTransform))(WinogradFunctionHalf::chooseWinoDestUnrollTransform); + gInstance->MNNDeconvRunForLineDepthwise = (decltype(gInstance->MNNDeconvRunForLineDepthwise))_MNNDeconvRunForLineDepthwise; + gInstance->MNNDeconvRunForUnitDepthWise = (decltype(gInstance->MNNDeconvRunForUnitDepthWise))_MNNDeconvRunForUnitDepthWise; + gInstance->MNNSelectBinaryFunctionForFloat = BF16BinaryFloatSelect; + gInstance->MNNSelectUnaryFunctionForFloat = BF16UnaryFloatSelect; + gInstance->MNNReluWithSlopeChannel = MNNReluWithSlopeChannelBF16;// TODO: Optimize it + +#if !defined(MNN_USE_SSE) && !defined(MNN_USE_NEON) + gInstance->penalty = 1.5f; + gInstance->MNNPackForMatMul_B = MNNPackForMatMul_B_BF16; // common function MNNPackForMatMul_B_BF16 is needed even with out sse or arm neon. + gInstance->MNNPackC4ForMatMul_A = MNNPackC4ForMatMul_A_BF16;// + gInstance->MNNPackedMatMul = (decltype(gInstance->MNNPackedMatMul))MNNPackedMatMul_BF16; + gInstance->MNNPackedMatMulRemain = (decltype(gInstance->MNNPackedMatMulRemain))MNNPackedMatMulRemain_BF16; +#endif + gInstance->MNNComputeMatMulForH_1 = _MNNComputeMatMulForH_1_BF16; + gInstance->MNNComputeMatMulForE_1 = _MNNComputeMatMulForE_1_BF16; + gInstance->MNNPoolingAvg = (decltype(gInstance->MNNPoolingAvg))(poolingAvg); + gInstance->MNNPoolingMax = (decltype(gInstance->MNNPoolingMax))(poolingMax); + gInstance->MNNPoolingMaxWithRedice = (decltype(gInstance->MNNPoolingMaxWithRedice))(poolingMaxWithRedice); + +#if defined(MNN_USE_SSE) + gInstance->MNNPackForMatMul_B = _SSE_MNNPackForMatMul_B_BF16; + auto cpuFlags = libyuv::InitCpuFlags(); + if (!(cpuFlags & libyuv::kCpuHasF16C)) { + delete gInstance; + gInstance = nullptr; + return false; + } + if (cpuFlags & libyuv::kCpuHasAVX2) { + gInstance->MNNPackForMatMul_B = _AVX_MNNPackForMatMul_B_BF16; + gInstance->MNNGetMatMulPackMode = _AVX_MNNGetMatMulPackMode_BF16; + gInstance->MNNPackC4ForMatMul_A = _AVX_MNNPackC4ForMatMul_A_BF16; + gInstance->MNNPackedMatMul = _AVX_MNNPackedMatMulFMA_BF16; + gInstance->MNNPackedMatMulRemain = _AVX_MNNPackedMatMulRemainFMA_BF16; + return true; + } +#elif defined(MNN_USE_NEON) + gInstance->MNNPackForMatMul_B = NEON_MNNPackForMatMul_B_BF16; + gInstance->MNNGetMatMulPackMode = NEON_MNNGetMatMulPackMode_BF16; + gInstance->MNNPackC4ForMatMul_A = NEON_MNNPackC4ForMatMul_A_BF16; + gInstance->MNNPackedMatMul = NEON_MNNPackedMatMul_BF16; + gInstance->MNNPackedMatMulRemain = NEON_MNNPackedMatMulRemain_BF16; + gInstance->MNNConvRunForLineDepthwise = NEON_MNNConvRunForLineDepthwise_BF16; + gInstance->MNNConvRunForUnitDepthWise = NEON_MNNConvRunForUnitDepthWise_BF16; + gInstance->MNNAxByClampBroadcastUnit = NEON_MNNAxByClampBroadcastC4_BF16; +#ifdef __aarch64__ + cpuinfo_arm_isa gCPUInfo; + cpuinfo_arm_init(&gCPUInfo); + gInstance->supportFp16arith = gCPUInfo.fp16arith; + gInstance->supportSDot = gCPUInfo.dot; + gInstance->supportI8mm = gCPUInfo.i8mm; + if (gInstance->supportI8mm) { + gInstance->MNNPackForMatMul_B = ARMV86_MNNPackForMatMul_B_BF16; + gInstance->MNNPackC4ForMatMul_A = ARMV86_MNNPackC4ForMatMul_A_BF16; + gInstance->MNNGetMatMulPackMode = ARMV86_MNNGetMatMulPackMode_BF16; + gInstance->MNNPackedMatMul = ARMV86_MNNPackedMatMul_BF16; + gInstance->MNNPackedMatMulRemain = ARMV86_MNNPackedMatMulRemain_BF16; + } +#endif + return true; +#endif + // TODO: raw cpu version of bf16 + return true; +} + +CoreFunctions* BF16Functions::get() { + return gInstance; +} +}; diff --git a/backupcode/cpubackend/bf16/BF16Functions.hpp b/backupcode/cpubackend/bf16/BF16Functions.hpp new file mode 100644 index 000000000..e6b29a0f6 --- /dev/null +++ b/backupcode/cpubackend/bf16/BF16Functions.hpp @@ -0,0 +1,16 @@ +#ifndef BF16Functions_hpp +#define BF16Functions_hpp +#include +#include +#include +#include "core/Macro.h" +#include "../compute/CommonOptFunction.h" +namespace MNN { +class BF16Functions { +public: + static bool init(); + static CoreFunctions* get(); +}; +}; + +#endif diff --git a/source/backend/cpu/bf16/BF16Unary.cpp b/backupcode/cpubackend/bf16/BF16Unary.cpp similarity index 100% rename from source/backend/cpu/bf16/BF16Unary.cpp rename to backupcode/cpubackend/bf16/BF16Unary.cpp diff --git a/source/backend/cpu/bf16/BF16Unary.hpp b/backupcode/cpubackend/bf16/BF16Unary.hpp similarity index 100% rename from source/backend/cpu/bf16/BF16Unary.hpp rename to backupcode/cpubackend/bf16/BF16Unary.hpp diff --git a/backupcode/cpubackend/bf16/CMakeLists.txt b/backupcode/cpubackend/bf16/CMakeLists.txt new file mode 100644 index 000000000..b533bec6f --- /dev/null +++ b/backupcode/cpubackend/bf16/CMakeLists.txt @@ -0,0 +1,19 @@ + +file(GLOB MNN_BF16_SRCS "${CMAKE_CURRENT_LIST_DIR}/*") + +file(GLOB MNN_BF16_SRCS_ASM "${CMAKE_CURRENT_LIST_DIR}/asm/*") + +add_library( + MNN_BF16 + OBJECT + ${MNN_BF16_SRCS} + ) +target_compile_options(MNN_BF16 PRIVATE -DMNN_SUPPORT_BF16) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)") + if (MNN_USE_SSE) + target_compile_options(MNN_BF16 PRIVATE -DMNN_USE_SSE) + if (MNN_SSE_USE_FP16_INSTEAD) + target_compile_options(MNN_BF16 PRIVATE -DMNN_SSE_USE_FP16_INSTEAD -mf16c) + endif() + endif() +endif() diff --git a/backupcode/cpubackend/bf16/VecHalf.hpp b/backupcode/cpubackend/bf16/VecHalf.hpp new file mode 100644 index 000000000..d5fe3a69f --- /dev/null +++ b/backupcode/cpubackend/bf16/VecHalf.hpp @@ -0,0 +1,517 @@ +// +// VecHalf.hpp +// MNN +// +// Created by MNN on 2021/01/26. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef VecHalf_hpp +#define VecHalf_hpp +#include "core/Macro.h" +#include +#include +#include // supply std::max and std::min + +#ifdef MNN_USE_NEON +#include +#endif +#ifdef MNN_USE_SSE +#if defined(_MSC_VER) +#include +#else +#include +#endif +#endif + +namespace MNN { +namespace Math { + +template +struct VecHalf { + using VecType = VecHalf; + std::array value; + VecType operator+(const VecType& lr) const { + VecType dst; + for (int i = 0; i < N; ++i) { + dst.value[i] = value[i] + lr.value[i]; + } + return dst; + } + VecType operator-(const VecType& lr) const { + VecType dst; + for (int i = 0; i < N; ++i) { + dst.value[i] = value[i] - lr.value[i]; + } + return dst; + } + VecType operator*(const VecType& lr) const { + VecType dst; + for (int i = 0; i < N; ++i) { + dst.value[i] = value[i] * lr.value[i]; + } + return dst; + } + VecType operator+=(const VecType& lr) { + for (int i = 0; i < N; ++i) { + value[i] = value[i] + lr.value[i]; + } + return *this; + } + VecType operator-=(const VecType& lr) { + for (int i = 0; i < N; ++i) { + value[i] = value[i] - lr.value[i]; + } + return *this; + } + VecType operator*(float lr) const { + VecType dst; + for (int i = 0; i < N; ++i) { + dst.value[i] = value[i] * lr; + } + return dst; + } + + VecType& operator=(const VecType& lr) { + for (int i = 0; i < N; ++i) { + value[i] = lr.value[i]; + } + return *this; + } + VecType operator-() { + VecType dst; + for (int i = 0; i < N; ++i) { + dst.value[i] = -value[i]; + } + return dst; + } + VecHalf() { + } + VecHalf(const float v) { + for (int i = 0; i < N; ++i) { + value[i] = v; + } + } + + VecHalf(float v0, float v1, float v2, float v3) { + value[0] = v0; + value[1] = v1; + value[2] = v2; + value[3] = v3; + } + VecHalf(std::array&& v) { + value = std::move(v); + } + VecHalf(const VecType& lr) { + for (int i = 0; i < N; ++i) { + value[i] = lr.value[i]; + } + } + float operator[](size_t i) { + return value[i]; + } + static VecType broadcast(int16_t val) { + VecType v; + auto tempV = (int32_t*)v.value.data(); + for (int i = 0; i < N; ++i) { + tempV[i] = val << 16; + } + return v; + } + static VecType broadcast(int16_t* val) { + VecType v; + auto tempV = (int32_t*)v.value.data(); + tempV[0] = (*val) << 16; + for (int i = 1; i < N; ++i) { + tempV[i] = tempV[0]; + } + return v; + } + static VecType load(const int16_t* addr) { + VecType v; + auto tempV = (int32_t*)v.value.data(); + for (int i = 0; i < N; ++i) { + tempV[i] = addr[i] << 16; + } + return v; + } + static void save(int16_t* addr, const VecType& v) { + auto tempV = (int32_t*)v.value.data(); + for (int i = 0; i < N; ++i) { + addr[i] = tempV[i] >> 16; + } + } + static VecType max(const VecType& v1, const VecType& v2) { + VecType dst; + for (int i = 0; i < N; ++i) { + dst.value[i] = std::max(v1.value[i], v2.value[i]); + } + return dst; + } + static VecType min(const VecType& v1, const VecType& v2) { + VecType dst; + for (int i = 0; i < N; ++i) { + dst.value[i] = std::min(v1.value[i], v2.value[i]); + } + return dst; + } + static VecType fma(const VecType& v1, const VecType& v2, const VecType& v3) { + return v1 + v2 * v3; + } + static VecType fms(const VecType& v1, const VecType& v2, const VecType& v3) { + return v1 - v2 * v3; + } + static inline void transpose4(VecType& vec0, VecType& vec1, VecType& vec2, VecType& vec3) { + VecType source[4] = {vec0, vec1, vec2, vec3}; + for (int i = 0; i < N; ++i) { + vec0.value[i] = source[i % 4].value[i >> 2]; + vec1.value[i] = source[i % 4].value[(i + N)>> 2]; + vec2.value[i] = source[i % 4].value[(i + 2 * N)>> 2]; + vec3.value[i] = source[i % 4].value[(i + 3 * N)>> 2]; + } + } + + static inline void transpose12(int16_t* srcPtr, const size_t packCUnit) { + + MNN_ASSERT(false); + } +}; + +#if defined(MNN_USE_SSE) + +template<> +struct VecHalf<4> { + using VecType = VecHalf<4>; + __m128 value; + VecType operator+(const VecType& lr) const { + VecType dst = { _mm_add_ps(value, lr.value) }; + return dst; + } + VecType operator-(const VecType& lr) const { + VecType dst = { _mm_sub_ps(value, lr.value) }; + return dst; + } + VecType operator*(const VecType& lr) const { + VecType dst = { _mm_mul_ps(value, lr.value) }; + return dst; + } + VecType operator+=(const VecType& lr) { + value = _mm_add_ps(value, lr.value); + return *this; + } + VecType operator-=(const VecType& lr) { + value = _mm_sub_ps(value, lr.value); + return *this; + } + VecType operator*(float lr) const { + VecType dst = { _mm_mul_ps(value, _mm_set1_ps(lr)) }; + return dst; + } + + VecType& operator=(const VecType& lr) { + value = lr.value; + return *this; + } + VecType operator-() { + VecType dst; +#if defined(_MSC_VER) + dst.value = _mm_xor_ps(value, _mm_set1_ps(-0.f)); // Using unary operation to SSE vec is GCC extension. We can not do this directly in MSVC. +#else + dst.value = -value; +#endif + return dst; + } + VecHalf() { + } + VecHalf(const float v) { + value = _mm_set1_ps(v); + } + VecHalf(const float f0, const float f1, const float f2, const float f3) { + value = _mm_set_ps(f0, f1, f2, f3); + } + VecHalf(__m128& v) { + value = v; + } + VecHalf(__m128&& v) { + value = std::move(v); + } + VecHalf(const VecType& lr) { + value = lr.value; + } + VecHalf(VecType&& lr) { + value = std::move(lr.value); + } + float operator[](size_t i) { +#if defined(_MSC_VER) // X64 native only mandatory support SSE and SSE2 extension, and we can not find intrinsic function to extract element directly by index in SSE and SSE2 extension. + float temp[4]; + _mm_storeu_ps(temp, value); + return temp[i]; +#else + return value[i]; +#endif + } + static VecType broadcast(int16_t val) { + auto temp = _mm_set1_epi16(val); +#ifndef MNN_SSE_USE_FP16_INSTEAD + auto zero = _mm_xor_si128(temp, temp); + auto res = _mm_castsi128_ps(_mm_unpacklo_epi16(zero, temp)); +#else + auto res = _mm_cvtph_ps(temp); +#endif + VecType v = { std::move(res) }; + return v; + } + static VecType broadcast(int16_t* val) { + return broadcast(*val); + } + static VecType load(const int16_t* addr) { + auto temp = _mm_loadl_epi64((__m128i*)addr); +#ifndef MNN_SSE_USE_FP16_INSTEAD + auto zero = _mm_xor_si128(temp, temp); + auto res = _mm_castsi128_ps(_mm_unpacklo_epi16(zero, temp)); +#else + auto res = _mm_cvtph_ps(temp); +#endif + VecType v = { std::move(res) }; + return v; + } + static void save(int16_t* addr, const VecType& v) { +#ifndef MNN_SSE_USE_FP16_INSTEAD + auto temp = _mm_castps_si128(v.value); + temp = _mm_srai_epi32(temp, 16); + temp = _mm_packs_epi32(temp, temp); +#else + static __m128 gMinValue = _mm_set1_ps(-32768); + static __m128 gMaxValue = _mm_set1_ps(32767); + auto t = _mm_max_ps(v.value, gMinValue); + t = _mm_min_ps(t, gMaxValue); + auto temp = _mm_cvtps_ph(t, 0x8); +#endif + _mm_storel_epi64((__m128i*)addr, temp); + } + static VecType max(const VecType& v1, const VecType& v2) { + VecType dst = { _mm_max_ps(v1.value, v2.value) }; + return dst; + } + static VecType min(const VecType& v1, const VecType& v2) { + VecType dst = { _mm_min_ps(v1.value, v2.value) }; + return dst; + } + static VecType fma(const VecType& v1, const VecType& v2, const VecType& v3) { + return v1 + v2 * v3; + } + static VecType fms(const VecType& v1, const VecType& v2, const VecType& v3) { + return v1 - v2 * v3; + } + static inline void transpose4(VecType& vec0, VecType& vec1, VecType& vec2, VecType& vec3) { + __m128 tmp3, tmp2, tmp1, tmp0; + tmp0 = _mm_unpacklo_ps((vec0.value), (vec1.value)); + tmp2 = _mm_unpacklo_ps((vec2.value), (vec3.value)); + tmp1 = _mm_unpackhi_ps((vec0.value), (vec1.value)); + tmp3 = _mm_unpackhi_ps((vec2.value), (vec3.value)); + vec0.value = _mm_movelh_ps(tmp0, tmp2); + vec1.value = _mm_movehl_ps(tmp2, tmp0); + vec2.value = _mm_movelh_ps(tmp1, tmp3); + vec3.value = _mm_movehl_ps(tmp3, tmp1); + } + + // x86 VecHalf transpose12 unused in any case + static inline void transpose12(int16_t* srcPtr, const size_t packCUnit) { + MNN_ASSERT(false); + } +}; +#endif + +#if defined(MNN_USE_NEON) + +template<> +struct VecHalf<4> { + using VecType = VecHalf<4>; + float32x4_t value; + VecType operator+(const VecType& lr) const { + VecType dst = { vaddq_f32(value, lr.value) }; + return dst; + } + VecType operator-(const VecType& lr) const { + VecType dst = { vsubq_f32(value, lr.value) }; + return dst; + } + VecType operator*(const VecType& lr) const { + VecType dst = { vmulq_f32(value, lr.value) }; + return dst; + } + VecType operator*(const float lr) const { + VecType dst = { vmulq_f32(value, vdupq_n_f32(lr)) }; + return dst; + } + VecType operator+=(const VecType& lr) { + value = vaddq_f32(value, lr.value); + return *this; + } + VecType operator-=(const VecType& lr) { + value = vsubq_f32(value, lr.value); + return *this; + } + + VecType& operator=(const VecType& lr) { + value = lr.value; + return *this; + } + VecType operator-() { + VecType dst = { vnegq_f32(value) }; + return dst; + } + VecHalf() { + } + VecHalf(const float v) { + value = vdupq_n_f32(v); + } + VecHalf(const float f0, const float f1, const float f2, const float f3) { + vsetq_lane_f32(f0, value, 0); + vsetq_lane_f32(f1, value, 1); + vsetq_lane_f32(f2, value, 2); + vsetq_lane_f32(f3, value, 3); + } + VecHalf(float32x4_t& v) { + value = v; + } + VecHalf(float32x4_t&& v) { + value = std::move(v); + } + VecHalf(const VecType& lr) { + value = lr.value; + } + VecHalf(VecType&& lr) { + value = std::move(lr.value); + } + float operator[](const int i) { + // vgetq_lane_f32(value, i) does NOT work, i must be const number such as 0, 2, + return value[i]; + } + static VecType broadcast(int16_t* valPtr) { + VecType dst = { vreinterpretq_f32_s32(vshll_n_s16(vld1_dup_s16(valPtr), 16)) }; + return dst; + } + static VecType broadcast(int16_t val) { + VecType dst = { vreinterpretq_f32_s32(vshll_n_s16(vdup_n_s16(val), 16)) }; + return dst; + } + static VecType load(const int16_t* addr) { + + // equivalent to this: + // int16x4_t vec4s16 = vld1_s16(addr); // load bf16 data as fixed point data of 16-bit. + // int32x4_t vec4s32 =vshll_n_s16(vec4s16, 16); // shift left 16bit as 32-bit data. + // float32x4_t vec4f32 = vreinterpretq_f32_s32(vec4s32);// treat 32-bit fix point result as float32 data + // VecType dest = { vec4f32 }; // construct a struct of VecType + + VecType dst = { vreinterpretq_f32_s32(vshll_n_s16(vld1_s16(addr), 16)) }; + return dst; + } + static void save(int16_t* addr, const VecType& v) { + vst1_s16(addr, vshrn_n_s32(vreinterpretq_s32_f32(v.value), 16)); + return; + } + static VecType max(const VecType& v1, const VecType& v2) { + VecType dst = { vmaxq_f32(v1.value, v2.value) }; + return dst; + } + static VecType min(const VecType& v1, const VecType& v2) { + VecType dst = { vminq_f32(v1.value, v2.value) }; + return dst; + } + static VecType fma(const VecType& v1, const VecType& v2, const VecType& v3) { + VecType dst = {vmlaq_f32(v1.value, v2.value, v3.value)}; + return dst; + } + static VecType fms(const VecType& v1, const VecType& v2, const VecType& v3) { + VecType dst = {vmlsq_f32(v1.value, v2.value, v3.value)}; + return dst; + } + static inline void transpose4(VecType& vec0, VecType& vec1, VecType& vec2, VecType& vec3) { +#ifdef __aarch64__ + auto m0 = vtrn1q_s32(reinterpret_cast(vec0.value), reinterpret_cast(vec1.value)); + auto m1 = vtrn2q_s32(reinterpret_cast(vec0.value), reinterpret_cast(vec1.value)); + auto m2 = vtrn1q_s32(reinterpret_cast(vec2.value), reinterpret_cast(vec3.value)); + auto m3 = vtrn2q_s32(reinterpret_cast(vec2.value), reinterpret_cast(vec3.value)); + vec0.value = reinterpret_cast(vtrn1q_s64(reinterpret_cast(m0), reinterpret_cast(m2))); + vec1.value = reinterpret_cast(vtrn1q_s64(reinterpret_cast(m1), reinterpret_cast(m3))); + vec2.value = reinterpret_cast(vtrn2q_s64(reinterpret_cast(m0), reinterpret_cast(m2))); + vec3.value = reinterpret_cast(vtrn2q_s64(reinterpret_cast(m1), reinterpret_cast(m3))); +#else + + auto m0m1 = vtrnq_s32(reinterpret_cast(vec0.value), reinterpret_cast(vec1.value)); + auto m2m3 = vtrnq_s32(reinterpret_cast(vec2.value), reinterpret_cast(vec3.value)); + vec0.value = reinterpret_cast(m0m1.val[0]); + vec1.value = reinterpret_cast(m0m1.val[1]); + vec2.value = reinterpret_cast(m2m3.val[0]); + vec3.value = reinterpret_cast(m2m3.val[1]); + vec0.value = reinterpret_cast(vsetq_lane_s64(vgetq_lane_s64(reinterpret_cast(m2m3.val[0]), 0), reinterpret_cast(vec0.value), 1)); + vec1.value = reinterpret_cast(vsetq_lane_s64(vgetq_lane_s64(reinterpret_cast(m2m3.val[1]), 0), reinterpret_cast(vec1.value), 1)); + vec2.value = reinterpret_cast(vsetq_lane_s64(vgetq_lane_s64(reinterpret_cast(m0m1.val[0]), 1), reinterpret_cast(vec2.value), 0)); + vec3.value = reinterpret_cast(vsetq_lane_s64(vgetq_lane_s64(reinterpret_cast(m0m1.val[1]), 1), reinterpret_cast(vec3.value), 0)); + /* + generated arm32 assembly code is almost the same as: + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vtrn.32 d4, d6 + vtrn.32 d5, d7 + vswp d1, d4 + vswp d3, d6 + */ + +#endif + } + static inline void transpose4(int16x4_t& vec0, int16x4_t& vec1, int16x4_t& vec2, int16x4_t& vec3) { + auto trans0 = vtrn_s16(vec0, vec1); + auto m0 = trans0.val[0]; + auto m1 = trans0.val[1]; + auto trans1 = vtrn_s16(vec2, vec3); + auto m2 = trans1.val[0]; + auto m3 = trans1.val[1]; + auto trans2 = vtrn_s32(reinterpret_cast(m0), reinterpret_cast(m2)); + vec0 = reinterpret_cast(trans2.val[0]); + vec2 = reinterpret_cast(trans2.val[1]); + auto trans3 = vtrn_s32(reinterpret_cast(m1), reinterpret_cast(m3)); + vec1 = reinterpret_cast(trans3.val[0]); + vec3 = reinterpret_cast(trans3.val[1]); + + } + static inline void transpose12(int16_t* srcPtr, const size_t packCUnit) { + auto s0 = vld1_s16(srcPtr + 0 * packCUnit); + auto s3 = vld1_s16(srcPtr + 1 * packCUnit); + auto s6 = vld1_s16(srcPtr + 2 * packCUnit); + auto s9 = vld1_s16(srcPtr + 3 * packCUnit); + auto s1 = vld1_s16(srcPtr + 4 * packCUnit); + auto s4 = vld1_s16(srcPtr + 5 * packCUnit); + auto s7 = vld1_s16(srcPtr + 6 * packCUnit); + auto s10 = vld1_s16(srcPtr + 7 * packCUnit); + auto s2 = vld1_s16(srcPtr + 8 * packCUnit); + auto s5 = vld1_s16(srcPtr + 9 * packCUnit); + auto s8 = vld1_s16(srcPtr + 10 * packCUnit); + auto s11 = vld1_s16(srcPtr + 11 * packCUnit); + + transpose4(s0, s3, s6, s9); + transpose4(s1, s4, s7, s10); + transpose4(s2, s5, s8, s11); + + vst1_s16(srcPtr + 0 * packCUnit, s0); + vst1_s16(srcPtr + 1 * packCUnit, s1); + vst1_s16(srcPtr + 2 * packCUnit, s2); + vst1_s16(srcPtr + 3 * packCUnit, s3); + vst1_s16(srcPtr + 4 * packCUnit, s4); + vst1_s16(srcPtr + 5 * packCUnit, s5); + vst1_s16(srcPtr + 6 * packCUnit, s6); + vst1_s16(srcPtr + 7 * packCUnit, s7); + vst1_s16(srcPtr + 8 * packCUnit, s8); + vst1_s16(srcPtr + 9 * packCUnit, s9); + vst1_s16(srcPtr + 10 * packCUnit, s10); + vst1_s16(srcPtr + 11 * packCUnit, s11); + + } +}; +#endif + +} + +} +#endif diff --git a/source/backend/cpu/bf16/WinogradOptFunctionHalf.cpp b/backupcode/cpubackend/bf16/WinogradOptFunctionHalf.cpp similarity index 100% rename from source/backend/cpu/bf16/WinogradOptFunctionHalf.cpp rename to backupcode/cpubackend/bf16/WinogradOptFunctionHalf.cpp diff --git a/source/backend/cpu/bf16/WinogradOptFunctionHalf.hpp b/backupcode/cpubackend/bf16/WinogradOptFunctionHalf.hpp similarity index 100% rename from source/backend/cpu/bf16/WinogradOptFunctionHalf.hpp rename to backupcode/cpubackend/bf16/WinogradOptFunctionHalf.hpp diff --git a/source/backend/cpu/bf16/register.py b/backupcode/cpubackend/bf16/register.py similarity index 100% rename from source/backend/cpu/bf16/register.py rename to backupcode/cpubackend/bf16/register.py diff --git a/docs/contribute/op.md b/docs/contribute/op.md index 8591c6338..059a84d25 100644 --- a/docs/contribute/op.md +++ b/docs/contribute/op.md @@ -12,9 +12,10 @@ MNN 的算子转换与实现如下图, 3. 添加几何计算实现(可选,如果实现几何计算,无须后续在各后端添加算子实现) 4. 添加各后端算子实现(可选,选择需要部分进行实现) -![image.png](https://cdn.nlark.com/yuque/0/2021/png/405896/1618994794052-575a79b9-d291-4d1b-a630-79dd705bc977.png#clientId=u1c902b2d-d8e6-4&from=paste&height=701&id=ue223d8c2&margin=%5Bobject%20Object%5D&name=image.png&originHeight=1402&originWidth=3394&originalType=binary&ratio=1&size=256977&status=done&style=none&taskId=u4663d0eb-adcf-435b-b540-f61d2617cd4&width=1697) +![image.png](pic1.png) ### 添加算子的流程 -![image.png](https://cdn.nlark.com/yuque/0/2021/png/405896/1618995111237-321c5ca8-ed99-4cfc-9d91-04deaa2e29eb.png#clientId=u1c902b2d-d8e6-4&from=paste&height=597&id=u518a1fda&margin=%5Bobject%20Object%5D&name=image.png&originHeight=1194&originWidth=2714&originalType=binary&ratio=1&size=222438&status=done&style=none&taskId=u9c8f2ef4-7bf3-4b18-9560-794c3344f01&width=1357) + +![image.png](pic2.png) 简单来说,优先转换,然后组合,然后几何计算,最后各后端实现。 ## 添加Schema描述 @@ -254,25 +255,31 @@ REGISTER_CPU_OP_CREATOR(CPUMyCustomOpCreator, OpType_MyCustomOp); ``` ### 添加Metal实现 -1. 添加Shader -在`source/backend/Metal`目录下添加`MetalMyCustomOp.metal`,并添加进Xcode工程。metal可以参考目录下已有实现。 -2. 实现类声明 -在`source/backend/Metal`目录下添加`MetalMyCustomOp.hpp`和`MetalMyCustomOp.cpp`,并添加进Xcode工程: +- 实现类声明 + +在`source/backend/metal`目录下添加`MetalMyCustomOp.hpp`和`MetalMyCustomOp.cpp` ```cpp class MetalMyCustomOp : public Execution { public: virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; - virtual ErrorCode onExecute(const std::vector &inputs, - const std::vector &outputs) override; + virtual void onEncode(const std::vector &inputs, const std::vector &outputs, id encoder) override; }; ``` -3. 实现`onResize`和`onExecute` -不同于CPU Tensor将数据存储在host指针中,Metal数据指针存放在`deviceId`中,deviceId上存储的是`id`: +- 实现`onResize`和`onEncode` + +尽量将申请内存和计算group size 的操作放在 onResize 函数中。 + +onEncode 时,使用传入的 encoder 编排计算任务,不要自行创建 command buffer 或 encoder + +- 内存使用 + +不同于CPU Tensor将数据存储在host指针中,Metal数据指针存放在`deviceId`中,deviceId上存储的是`id`, ,由于内存复用机制,各Tensor有可能共用同一块内存,以offset进行偏移: ```objectivec auto buffer = (__bridge id)(void *)tensor->deviceId(); +auto offset = TensorUtils::getDescribe(tensor)->extra.offset; ``` Metal Op的特定参数等可以通过`id`存储。buffer数据类型可以与tensor不同,buffer甚至可以混合多种数据类型,只需保证创建时指定了正确的长度即可。例如: @@ -297,20 +304,11 @@ auto buffer = [context newDeviceBuffer:2 * sizeof(int) + 2 * sizeof(__fp16) acce 一般而言,heap只会与**CPUTransparent**一起使用。_heap实际只在iOS 10+上有效,iOS 9-上会回退到device上。_ -使用Metal时,**如非特殊情况,禁止自行创建device和library**。加载library、编译function都是耗时行为,**MNNMetalContext**上做了必要的缓存优化。通过context执行Metal的示例如下: -```cpp -auto context = (__bridge MNNMetalContext *)backend->context(); -auto kernel = /* metal kernel name NSString */; -auto encoder = [context encoder]; -auto bandwidth = [context load:kernel encoder:encoder]; -/* encoder set buffer(s)/sampler(s) */ -[context dispatchEncoder:encoder - threads:{x, y, z} - maxThreadsPerGroup:maxThreadsPerThreadgroup]; // recommended way to dispatch -[encoder endEncoding]; -``` +Metal 内存布局与CPU-FP32-Neon一致,在 Tensor 的 dimentionFormat 为 NC4HW4 时,使用 C4NHW4的排布。否则按默认线性布局。 + + +- 注册实现类 -4. 注册实现类 ```cpp class MetalMyCustomOpCreator : public MetalBackend::Creator { public: @@ -322,7 +320,11 @@ public: REGISTER_METAL_OP_CREATOR(MetalMyCustomOpCreator, OpType_MyCustomOp); ``` -添加注册代码后,重新运行一下 CMake ,自动变更注册文件 +- 工程更新 + +进入 source/backend/metal 目录,执行 [ python3 MetalCodeGen.py . ] ,更新自注册文件 + +重新运行一下 CMake ,或者手动在Xcode工程中新加文件 ### 添加Vulkan实现 1. 添加Shader diff --git a/docs/contribute/pic1.png b/docs/contribute/pic1.png new file mode 100644 index 0000000000000000000000000000000000000000..5cfd6fbc6282cfe9b5f4381816aa0bca8f7f386c GIT binary patch literal 714878 zcmY&;Wl)?=(=~zM5FCQLTW|;o8v?=IB|va@*9`>M;10oMaSagMg1gINf#58GMRwVJ zxxcFC$M<8Zx@NALYr3bW=bWC7(NL4e$DzVOK|#S+RFKs|LBVK3K|vkDLVfO;=n#uR zLBW%@myyv>l#yZ3@Nl)Ucd|x7QHV*`$28C$BQG#c`TpS}wu#bTRl;QA4@y|PUvHRJ zu@t{*Sm3=RlWeNoSsz9f_0pAXM6-}48XhQKg;_{T8&cr<{87dE+gTsv+T(fwzbh0% zN_bvOcKf1e0LIATJT>r9JU%_7tbRusSv?@dsG=tPO84W1<<9hLrTl7>V-r3x;vOxT*&W%1AOhV5mEpGHoJ-bg+~A0E{4eu#rrn=0o=gM5OYzf+db58OtfLh9GA z8i|Jbi}QYCu@+|V;T?X!)5m(Nnl%#>WkA0d5%%kcc-fTA*nvSAe+!?qv$RZFin#up_0<-5g&LHma^!ivQQhG~Lm-S}b7y*#_J_bz`FriStM8)_%kb}YYjfcVMvi35O-{C73tn*!dD(U-8xcA z!Uw!W??U6R=(jr>uD|?}Xc&J)z6bmcu{HP}qaipiorHHQt!zOk@cp$C9^3kE0z#lO2u4`YY9!JrhIiK9&m2wnQ}e)VNrSpF9Rag>vO0XLjHbc%j!H`>hzPzCNUl;9oO`*;Q{>>?(wAryg6 zHH4T$=;lm43WUU=Lrh}|Tq&W^gq(sT*-2@P+~041lb2GhCDBN$F&-tqIuN}kJeD?P zF4Vx&m1)(qT%@8671SiBN*pO6nZ@(MF^VQHQJ!tJBkQEDl~FIEm^-y&e?&bWMJbOx zwy?=!9vNCSAk7FXvm|yB)r(EH)HUE;#wz@$TSIa>EVU<_mnh^W|1(Sm@-i<q~ z_dBL4Y&Jx;gcKxVgm=X1#FEiTF;6VSV_y$GTr+-3{#hENg>6r3FIE-f6|wl~@kepB zMfF}WaZO4wlNP5Ir~X=*$Hzah<3i~(R`^Y1YjHH64pS>qGgI1=smC&Zn;guLs>b3E zM-i)!6;GDblr3saYg}kUTi!JKT!khS8g8F~&W* zan&y$V!v-nVnPB;OGv|w!foZv&#^@tIUx$68Gqt;Xzz~v=>Ea~{<%oM^zJ+5ca^_%x-{9|Dsh&> zm2;(jS{>btI&B`VSjgPXJ?UC&UznLpI@SMk_ZvJ=c-gy!q?|}hA6}mdpxmKhz!s;u zDmWi57?z@pMvr`%fzgI(kIwqCmLP!8i7t=mGsz~!9=Ra%ffWa%@H;R6zp9A>QKMCd zHxd5fQ#g*;`nXBBX7mV-0jZC~bK)4ZB%(HdvI3L!l8b*oau5#37*Toy*4YA-ZsuNF zsCcP-np{{-ZWOXK-v8^Ky5}?89Y+vtMg7nRPL&8sXBmuh?q3D){DA6q)z$UHJl z6)3Rw#xQI1`$r{3m9>I-0H59FiMz@E)<|Z2uRy=eKnwN}JC)jGwqH=nBg;kUx2o{P z(nXUhv!9xqTrNN=_FBQ(q|w9L-R>ROKYd8xOQ|rjFxz9PWA6Dbp&&s$n;WY~>kL~c z&f383fWBdoVQ$m5t-ST+Z@S-#<7>H~l-!&^*;3YV@>Uhm?03!{QLECQnJ)77M67Sq zq!fQz1=6L6G1fDhQ6ql0D>|3;D$tYPlk1QMW#x$9?g&rW7u!28oLBJYAZp(HylSVs z$J>rS%(f9anhFw+6LFkyvhrC!JCq>zj6L$2!q4vVq_Z}K{sPD4#p#iD=NAh1Hm?5k zbAjxeeGIAMEJ{~Ua^br&ZLnwT(fHXAJXiVYlsuUGPo^n{Cm%OItSPJgexRNiXaYO! zOSxe^SKi8W`7HOD&B({H1ALv&Ev!#k=l9EFa=cSFUbhUiU02ZZ7}VkTP%e@tGB1+v z$h<6C)BL&JX)E(K`?OyFrOsKcaP?=MhY&R2V0r?A+EI+sm22b1zS-?xEia?-Np%fq zyq&n6+$p$|*WLKkbrM|NPVK_Kf$)y9lRuhX&F5XBTUzRh_u;LTb}SWn-!*#K^NZb0 zU8dIZJfpiaZ~__{JRdxBS8>tn&H(@LPx`QbT}Cg8i(r8ubD7b5;q>uD=v0zAh5Bt` zIIX?Qjmx{B#pAz;mNuhYiKjGX0bw4+_d_4F&9#w=kg;uT$_hF$b%&n7fU{#qUIE^7 z+ku%pN$Mb|7dNQ&IB%xI@~C^7zN7D#25|1l!X(#tV;8&?0@%FRHuW9b*BZ2*nMj~5 zkyQH(>GC*n*jZ{`5ntgmP5QO%&b&KPo4fDov%LpC>B;wmE#-$v7G6$w%m365FxwA# zKr6x21+G3dwl8%5yC3X}a!%T#+X|ixgzqVCr}CQ&t5>N{6lP0-E_{xqmPs}}A0B=l zneY$sOWkF_s~&9sC~ss&A+C|2T308t749)m=t>;;^{+L^@~yKc&Qr?Q23$*3S?1C>Y#11+V%}X;Cy%U;G&S zM`@k62WRJx-kao--vO8pQ)-%SzNm~qUJ7(yT^_Qbz}ouvDHiqV2|fsuef3lSnn)$0 zgE`}H?O9esxGV9+^Jb-GQ~1g?c5m`QW)slE7^e?FB@f>V@EvOhTZz>uXq9tlyNDK5?drH;+yh6H{r!`lh#RPvBj_HcByD*iX2W;!toVf2qjpK&w#65UgRixmJc zd9_`2AD2oAvgHXa93Ufu*5U6n8f*GOpE>S~0;{DSMda6-9kp5j$F<`qopWJYd}6i!O*7W!xf@nmlyI7`sQw(sTgalvVUAWiq$ zF9c2=p(lVfxIb42bI*X(E`c@#)G$C^K(Pqb?ECWogtps+&@E5gR6R7u>&}3gj5hYm z{fcoNH>TPOhY;hAhf&7?J`QmFJ4*mruQr5wny|6ltM``mK@q3NplmnXt~m7{z}H72 zgv_HGLal{x-xIq8}h2Z!xCO9 z{-ZQIRMQ|Y*XA9wE>7(%XVqs~|M@_pQ3aahgLCX?p__?6U46Rv=ISs&(V4P0IE9y8 zB-nFHs&~iMLD?&*eiQO2@BNu8{7+c$8G-&9TWTt$!*;3eGB_mp!K-GYy{U@DqHtjb4?li>{UxkgcyZmG|kK60@ew0V#Lr8gEW2IN|hZ!Pq2!$ly+KU-(aN|vTGtRDk^s* zf0gNo|Nhu@dS|CN4Ne|Xt+(SxpRgWh*6uoi(7b)g0&8&ponxhx=g9B$^yzc!ZF?~( zdn(=ry9{Nt$$alfRL+5QPy=p{hbK%e77&A=T%l5o{PT5d0m;DUX#T{yWbA{l<`EPl z)RS>LVzt+H;(mNQKz~#eh_cRg9qkfiSmI!|(F>jF zQ=Qto#{P8iDhv50-zyn=-)tGA* z*Mhre-9xbFy;*UwX;Ho|!IKP~)s6zilOs&FXwm{!tOY;y8p%u|z<=fLeqAAnXjv=I zohs;FcWwtuIUe$i1lkkWwqdto7YWL(vxt%W6S((gOQ_MMf2!5pkisX;MnSd@^W%Rf zyLh|&=xg0!+E%GDTu$4%TbXu3aZass`3Y|cgjmSaWEL#>McCOcI#m`M+<7|#SxO)} zQjEPJ!NZYH={~r-w>55hBiQ3nqX^^3urWr5sSMZWFrCq3;Dh7>NskB{M}c8hg4r%w zPW2gz#!Ud*V3@7@8gPJGt4MqiQMe%?WI%b|dBy#te^Qv9p^0~;qJ00*GOLdcI5jve zw13FDibD{SLba7T1mouIL1)@9;IvOn(EO1BWAet%Lh@sBn2OGvomCmJ?M&m#bFLxR zKGtZmM|EcYeaqxLmIY|lW7IO7SLeK+peswo9oBKzyHjT$hB&Q%!&CDtlc6{l( zu4<812+Bux4sN4USJkjjODflbJ&ySxi{*7>YOqf7vL4KK`uA=0zrN8|<{6Wvmbs17 zhGkx)mvLZUI2R&27mKqM(S7n8)t@<`QWjeh`wfv!c3uUW1*HJlB7Eh|I$0N{>priI zi_@Y$(B5&;t@|K4?WJ5_XyI>~z0ZMdXUOs>WEv`Hr|<`LkQC!ps&^&4E~B{a(a~V@tR| z<6}oRYjTM;dC6aKhNgs+mELrd;y6^JXV5MOvs=MK-5VJEa3jkXuPf+AZi=|D+m;pcrz!j`9g)F=uQ`Cv(?c!Z-Bf3SYu^jq+GTXwwI;cumac+Ee#`0ot@ zVeMx_2~Q+;-oM3>&llr;NLbSn;BSnzC>L*HQ%AcxrMqgF=FXR;-?_}$f>LBc*c&Ab zEhJX$SIRCXbrih~oSM6m_C=|?hbbfNIlAr*n@E*v;FM!`*RAUYJMO7OWH6VYcx!gL z_AayNO;{dmr+ORor7+y=l%Mjw@f=uUOe%F(eZFT55xNs_GuJy(-=YAm=cp~cH6sd{ zLo&9xIv1KEk4t(XsV?ckf}i~SLGh-no-Qzh8!>-YM0PBW@Ug2>eK2v!g99?VC3q0P zZV$$o@;I*$nL1WHTc{7_E)idIMACWTMyfB>#i|dxnjx#;&DKc8_4XCu8+TtMNy5AA zCzx=SqBjg9%xIn@#crgy_sMo?LqGN>*64pf2ijzirw4nDlOyPl*ZV(?$zJ+$JW6`9 zKI-2q0<{Cq!3tdW%Ya?|M|IHVb>2I_BJAqT0&z1XR3 zt_^V=%gmuSkLvVKj&L2dC_SXcoc8=EJ^1y{g?s#@vkm5#|_w^Ca!m+%4)57^C5ZIKA${bB&iFt zC*pg7ms>eX><7o}x@!lTs+GS6tGZ3pV88h{vt&1Qpug4hI4n=vx~h0^oYi*&8SkrM zbjfv`^V^a88eZfn=~%ejwT`VCeHOn!Ua2A5O@}WS&g7zoh|XPWe^eG{kPWB4BI~hY z|GS74nhO17Ps#pTx1todSUb@Ec8$$ltDjYl3nzSv_?#_(LJCz-yVesuQhLxPhW~0_ zu`3u&T0}R_*sEGe*`-#7NXNlBHa+&;gtFqP`Z_sr%c*{vh6%Bnj_z>Qt;l2C_qeEZ z;*wA%69@BYozTUe;f}AQT;nnMz%&uLrgmXa+mNG@S&&D{A(=XA<(2jnx)fRO>Id>_ z=Ug6)DLL)BU^YJo+!bg1+|^I9HWO2ycjaM5pB=(^CPr~OS#;O4i10;v($#pkK|n3p z=|2n>N8x{GPY^F=C)55ecP!UJf#bLXP))8HmZuwjLY;-@;51Wvdb+F+(RkcJn9~JL2 z*8XbjD!1+axm6lA%rN-s(M=pRRB9L9$%4n3#``^(+0t6h{dbSkoXh*S+_qdH)(q6u zZR+_xA=mWn`8Bb-O3bs5S5b|F+a^=Feyp7BRDV3z4`eS@(R8o=;BuAa3w!DPCZ5U_ zPHuWzwW;(uJu!lo?FsjZq$O_oJ{?gcs-gCq7&W7&um0;4vy*_~u@2YL#~SIe*G#1O zo^4oSlE8SS7Q zPw$_RnX;Q!jT(N#zC7RKK@z9p!q{en?2Fm6Xw;5YU_2p|IzD@Q4nF|AaJ4xvjVl{; z#Y+yi=1sdHhsJo>twyNp&2OrBzjdIvuR266j5TY|+2D<*JG_;WMDK7}SOwWb(2MSkp5sCawH+e8d+ zLpZ~O*k&^liJ#*cD-{7J?)5er70fJ&(`ZAOCSl9 zY)*=nDy>#w7EVfXPT*dyk*76lMYJ;Hev)Kos+f>UKhO~ztA=Qg72Kc4(N7xg_qndH zVV_9#~6W+RHr%F)oyk{Fr?|Xwz#|ZpzhW`h5fYmTdbf5=Q7|5 z{0+|1{nnejyOrK&HGR5;EUI1pr>1pF)xERIADddIg@ZJ@Ua@vT$6*e_ zz@4LI@-MhDYiJ|^M&A)>)^hn&j99N~RJ6>o9xIk){m?eiC;8Pp?b!=_=IE66bPdgM zX4cCrjmqjMKqaynS>vY+wHBbYI9bM#YKOgRMzGTNvw+iY^?}L#84ysljk56TY(9zXGWU$F+DQHg;wAb_^(_!&IH+_6y{jn=J)u<@x9P?u) zFcP8oM=9No{aC|jvEwjvepyrU)NrNP8aY6omxh)yV2%`LYfQh<;Zm;oP^#}F5;!OQ zTN~nt77^@}P<3DXt}-7FCft_dYfZr2Q)~AKc->nC+;8mL#3d?#csezOwM8!<0GgJb z1J(tjKF*3JS6KGsFHOXA<2Bg)c;^?JGC-$NS3!JGPFd7o_679+4DmH9uZW7NYC) zXAFTu5v2InIi7EIzNbmO{pnk~KJo_zTnX6#9LJQ@;@(z|1o(3v=d=|A*>px(-tQj8 zTcm_MVruU!92)Ia!+R&BPI$hY56pPz1vu?1+lYmH@Y>CU)}pJN6!Jcu5-Z&$)LRq| zm}E=Zy$>H+FMBpCISEwtCMj{gXBYq@pJtmutc?QK5rwe!`5yGbVrL|_6YMC~s^VAG zeLCagYOiylS2X`xJd=P|c(|PWQ>Huw@yA8l>1L z*xX@M)=8taTv7*0PPcM9DMGzjy^1Bavm*0{Qsk>2#qM2}rjEhF~X<22!{APwiugP?MOC>ouXzRdu zCsSVN3m)gZ8JkbAHIMU)!8t1Ou-^fTgP;3J0wwASUo`R!yfIk`3>%7-c8`2(Z63{f zL-0ML0cF+{3mw1=3TGI@ntsXn5JCDlftbE!bnh~)Qc19{jYU{s!DOwL^PG{PjSLfj zf7yguoD`KrY5E?c*zj6%!QNmcok4H>* zV!L@)xgRZM?*LCvC?7F1mfmlIGs|TN_lRTOY2Fzn$hIO&y#{8BJ}I(SHNbV;G%Q~W z1%hfg$oj{~i;oE2=>8k5g0zboUcy<<^w(GZVYbVhHMrjp2ndbwii-gGrn&g>NE`to zz=BwC)u|a9MSy7{7;QJ}VNhon^ESPHS)B{{0%mUn1dss6l1b5Dl&wN*T}6AMzR*>z zt%;V5MAh2>M7K_oZ6J$I097_Q1({IZ>M^`wDY91d1rTu1{^+>hnT&su7sl?e4~R&R zrH3u95eU#ewr?CTe!OYA({UGshn#QVvMT{#S93Sk#f%`DS9Z^5kHI>cA#g*tnj>VSel@$0~kXVoxubZyo- zk>kQg^;)8U*M+3LT>0|HR9jay>kof-yW#q8vl#_$;_bZhws8Sl-4z2s-lSnqAg zqj$^J)ewU-I7p+6X4T8<;T@N)*nXjAq-T1n0E#r-sVvJXz!H%njMRLHKRYc@3P^J{ zJNRMjvD!wuWMovUf~?GE$!XPDZ1S+i(S@c-XY!d)`D&CW#K)`i&{NU85=ltCsdyu1 zEH^qLUydIY{PT9;Ta<3ZfCXV{#1umU>zj5XEP6Akp3fQ!doA0bg7l|2>G0&_Vg9;G z7n8STV~efY!}Xn&-`7_tjCheT78g`ah>`XisYlxo0o#L%W$cYe9JTh_`j%JqZ61n8 zrcc~Zy(T%AW7j%9TB{dGvRWgyslK$ZemlYU6M?L z>MD2y1Pekf1rb$nTQ4ulcMCMfe`gX5`-yPvb}YQ-A;n37Wy;%paF@VjD*BQ zJc6WC^QM?xM!-iG+iI%RiWwC8*VNQ9FZ*1ui?Dvl2HRS!&CeY#EdBc-*RLK#M$@tU zp)CKx68MKx|K1xWM?3JPUudmIj(mC?-rOD>m~6uDn66t_cC+t#rhP;;x;(L1Y43{c zuz+*$3TvfD2ss=PWW4Gi3a)@R8N~)vF!Cxs5-_lj6Tu!cFYRe#SVI zkGkXHu}=m^C=lg3x`E#a`p3JGdMj+@dw6KQR(|-`U=*`9C*ij*^$lsimN{aG3%~p6 zblh~jP{B`aBI?YdO8vw4I0-wIjS{e_nvr@7I+7jAymDTl>D`RHSV1bR)bv&-pYPMn z-!ke)k0GinI8Fe%8x7S>Qx+nF2rWFL2(t~~(ru=$kklR_IL9iUqmFAI=w@f{3Z7Q0XW;|Dj(2X z&LD|Uf0q>uSv8yO zKvoSK%vjNz$JB;`Ao(u1pszL22V~*@-x=vshd7FiIWGawa_PE5uiXpm7J4R0ah_tp zC_7$jf9Hut&~a87QXFA731)uZ|MH~~KpaC@^XOn8{m=^p?_*Tw(_YNhd{1<%NS3Oi zzXb^xbpEYP1qm>+yv!wi{KOyV;h>H9?g?r$=8Qt&}q!jIX5)PHlabS#2c;z#91qvRt|OVgYb+mRbzI14{(Db8uhWn?BY)$oL9cMrL!d+Kttus%O< z=p~q~>*u|PkipAz8hiczEzg6`9W9T}PFCaSp`7Qqu7CBITW-X9%GhCxk5L@vhX~%a z#xPuXnR4dr5iwF>QFLae{Hhf<;;`6~U#Ma4a(}R=oY3^M$cNf0O zPU8@k$7T*UN=L6y!ir*3kB;I?o|TAz!O5Vep=5n{mAg-a(^Bk)+nQD6XDo;<7_2JPEp1y|BtWnc3sn<(BkuhO0Q0 zey?U=J{G)S>>AR^7`3~sMCabNnbhjeM5xNv;`TIA)g z(`jRu4(o6D%eFWJ{)h9La)p0CyY*+T7+9)`zQs2h684tf4L?yNH5j|)Aij4nI^)vB zq5D26KT4*5L9z2V%pc-@l<&LkdH4RbuAQ$!yno($0&ALse^+nVPfanLNL!65<6Gye zHlI2bruS5@k1*Tf^s=zQN?kg-HPQC$>uma_{m+$ICOY2OntH^Nw{yKFEQK)wE0yzr zrm>oqaSS`0Xl{x`Vt?@Oc9c|@CzoS{^PEEj0@~i9C_F~>r~0L<3~$xvZVi(IJ{ag} z<2GW}n*N&=EUtTvYg(4H=Zz;m8u%r<^4bNIm(sg90{j+x7`8fky(V&>eCAxGlKkW? zF}3Mf{I3rUH{_<|blJ%9{CGj`!^*Ih?bkjXm%2Do#dGdr_0-_MFBU6E8U#ae(;j8r zZz=PWckQdp=>Jak+9QBZKqQ#A7eS@Az7MJ+);CfRvwM8*hMJ?J{s@-tOp zVqMoo=knWgw_`>V{i3TOGX2kxT=1s2^JB}BZGBx<%4LNSx>!X?iRINRf(MJS2@Y$F zJi_OPchX)|mJhFi{Yj_qV~)E7+7=ia^gqgrW*me(otn+$ie3KV2WH3IW*9q}8%x%A zFO<<6b7fvfo1L&Y^hp4y$mXZSz(L3`*ve;1Rn%tg5YDK>Np%iWb>%lY)jtWEO3CIa zW3pN&;^8q1`X&9S6`U3xLEgj#5nR6&IO^GZ{6N=iZ!QsIcG!WfKEa1P;`RHEQw%0D}#`Y;}6F; zVv`D=!CnihfL_EA@P6>pwd)C*AhwpJOf^YXu0b15ph!kzIfgd&Ez`v3xOK%;x!YjK zihHTZds1{H>#^?}F5l0cpyikPxvf-UjVN9)97Bf&k0HD@+~_ht?qrqo$Ts>hm9+&0 z^{K+t4y#J1tg*+L1Mj(Nu1s973U4~&#@>sdS+E008C`#SVu?A^COuWfO|yJySC&1g z&GR4U%XGrha{KFpn0ny(-c&I{PmW=~&%QbaAN|pKlWqo%AUxGS(`JTu5Aj2biy+E*v60%I z7GtX*iyV^KtpI~$)s6?-V)^XQP;^{!w;8{4@8zPnk=PoRorDjidTu0pmuz_PiB-xx zdQ9e0v>7~xALWB3W%^8fog4Zw8g^}VeJxV8&LU@R+&<__M|a#^1gcB?0he9P&Q4Ma zwa`U{>4}%W;TTl9tEvc5D>)e&%5kjYt#nBB*%rq8(SCug2t4}`hD)C$OYSV{sF{AL zrX-;=+s!l;ao`;0?I*dL9siMEbGTTjoU>dU9KMsb?z2@HiW$-OufU{VEH@TsYWO!Q zINok4;nGm|1bzH5DNhtfhoKb_O3?Jv>OFCW9nR7b&y7CZQn@wTj<5_wcEguW&?#tA z*G)nxl;b@tx#&hVoNF!7fph4HZ`;w?%pQ;e9D&YSeOwrz($PFN4xR@sW>$)>Y= z_2cOhLtGg25j6uMNrEXukNV4C}cLb|8r25mEC{gl? zB@Pai+vibJ;g(yFg|xWt#4+s(I^Gi;SybkT8XDJh@^L~5f*@4I{2^9KdLzKf$Gbgl z8*7$DMIVivE@AkX6~}y&x>mhBve-Kl5z0@wHj_U6xB?xM8|PhWo3XkVbagS?So9vd zr+TGh%l&8eq!q|PgE+=$tmZ4PrUu+(uKa;65nB1bVd+qe?w*5&c7TG>-ooYWD1m}V zMNQYI7=Dc5{-RyaX(`ywH-e8RFEDA`4wB_v8u}ySXI1zvf9+34JAPdXFJcI)Q4-&N z!C-s4WVoW7s*l!wL*U4e`N4+a<=-xFw--40&hVF?faSX+go*!>Zj%3`<(o`CcoU)( z`CG59jfDV@&!j1f&)RDJ_sy};0N3mIjevuQNXg4z6D{`4?VBF|T!Dd}Cz%7KJ^)rs zwhUhCz{iEc!~TK@&0Xfuyjj3HI_&3m0y6wbJK+G!<-MT|SI82nH!@YFgae=5+xS7d zucnzSTBNZd}tba^kz~Pod$0+cZ>lyi>g^AGCy^e^PI`e5Mkne!CH{ z^Y%h=skdnCL61W+uO28iRUPR+Z{Rge*184mgkrXB?j;lO{G=BRdd;30YV~UbKnXOY z{X&Iff@{?Y*nBuv53vwpPvycHND}C-t=Meo@TJVkdLM{yk++m8|89@yw64oE6B)JH z0KA6wB{Sul!LRk7uRPplt-6J_EuW1`Akyf{kXxtb&wcvzTX#ugY$Ov^3 zVrPyb`1YgC_T<)zvFm%Tl-rpT&x4q(m_IH2&9+NA#9G7oV=>j|EomI~DHgg}wp+6E z~+E6)d8gIY%c{kcI7R{`F%_B7cM!gbg1`&N5^nEFwvmI2UwzL>_0hW{ z|NVFMV!;QPU}XQRq_t2&*fcBnp)XlpKz?C9mQF(pEuxwM2*EY;8zhpSlyIKrz6)eE zA&g_L4!t%mP+TT*eYdx$tv<|Kkrcn_eYsV$&R+jvhAYJTY2ceB&m$hb)P;Leq3?de z=cSs)=bhhlm&Y0g9LwFN;fRf~y}k4sm6UoIQ%CK;o+YU3_JN(#%Pd}VutM!s93(5n zgXVfl4@SqMOUvWi-CqHSwZ9m2Oyjs?#P9YV&On-eqEgowC!4j+}`7lpUEukNk zNB*q_Tp<_A99aY0iJSMuU0|Wzahqb{dd{W1s}<1tKKJ$%Zu=Lwlzy?)e}$6Ss*Llq zt(v9Yf!DOj4^-Q}9n?IP74^Xo0;hBa-1ifN+%waUNEN~tv7Zf#v}80+Vs0cXaR{(| z3Ot$#DH-q%Q;*kC#ylthA|9gX5s@T-r{nIsHDg@nno?UNfeyv&%VSm7kOj_)yaA1g z9D=0u$()Xy@TM0>553fhrnwbfw>8+7nEuXsyhgnIX@ zlD=8+?X=`2)uYTT%~Eb@gQC4CL(^tui$Hp+*uOte5rGq9n93E0_K0?w404aGdIbma zoE(4D=qF;o)w=*>P~Vf!&o6UnkI`U%BBoqJmU%*Ub^5$d8r2gBn3hNSSbmD$7BCwliVQ4;ZU3gA5WWW;pwY|J6^D0(?K z8|)hiB9&)^aSapwaHcSc_H~7UjkM{u4%>V!X{nu_3kJU-banK-p6H}=^2DPZEF78E z#!&K{HNf9mr=-7Cf?<@y%L|b*y4dzPt=?S~T7sj@ zSZUV>W(s)mZE2wP>Vx_!I8#s!M@^?Ijd9nYyZ-ClcOl2d;79U9`N_s@2(+CZ`nV#L z%P;-$e%pN}|JkVR91{+2J?|)+Z8FG0Uvh|j65Xw*l^G7c+mb&p3t1zp_Ztpj+R%-L z+FrD6q?9RZ9{RR#cFv7Z7m2lFVG(pD88+LPuYtiK8yUv$1ut;j$5?{=b6&LtG=nzU zC#10O#;u^q{T|9Mdv0e`9T`)1L4uFCOW`TxpSWqINkW8`%YY^^Wczm~{IR*Lco2#` zr0*%e=PHY!a?(?Gyv78=0Rul>+yy=S`iEfCV9O z)}sisj;m*TE%CH1^h*#(-GAx7VoIkYP&tPon8CekHn|UDRpEd(ueCb$wIwBtz{Nx_ z3y#MMC;A3<#3(|Dy}<8xQGJvRzZJiueQFii_Ss@j;~HiA7Id!btZCwzVJea$57+@6 z|E49E&46gjoL&!HF3ufa$;4Rq*(3Rag#8zCx4p(#Gd1#jDlRBKUy%V`?F{()4iU*; ziSL5Uo=!}Bw{CX{0<-jVZZp#hyP-t-W>1co zC<3SS_Zi8?Wk+_Q0U2S#d-^@sWIS$HK3JbG^p~Q3{uD55{rw~)b+uUW{?^e9Mx~#W zzx#@t>dFlHukiBmpkj7uCR_53vg^F6k;U>5YQaIYb|d5tw}%!a5~ z=RWQUz@q5z43*7<70n|N)rDU7rz89yAtX$Dto;7i>M~uAP32T|@D6h(xdEm6fF+O` zZ1cz>q-hV53C))Dt+SCvLvsy;ZS(|fchoR@jytw>-xJ5ww7C1ms@Yq#F4@#e1wxN) z^IomZWBRFFiqt;={caphVL63QaNB1z2a{)$vmn>c$6+2NApYj&8J|xYIk!f=cN)AxX3-zokz}25%lzZtq%Icqr^Os~bsO_NAROw^ zVsgk+=OaVZ!+#47C_!|`aqMglz7yXIAY&eKg~y;V59xl@dkH3aME-ONDc^@^KIPH= zOXYa|sBS~QB);+fS7Ni$?VA|v5sIWk{iY?WN9qsxjCtn3w^np_mie#1KcurI+&;~C zD%_)hJjAAReebq>pAI9zn?5jSmDDiyG*l}$jTNd%KIo?94Dx;&_`_A0qVBf^lPhbX z-+W9Q1#dYifmxv8cY;zpKEuxVE~&uoFY2AXbuNf|7@(%Ltzdsp(Pa09jTvdjMJN6H z+bVTSjUULPYH8Y^wThku1O)R7A<*Uu1_fft{C~YcA(gi3-_Z-MUeYB73{03o#^=hY zT(P2_@xV1E0N$^#N$x>=&#~aptS^bU-}m3k9exZ4Q+~;Y6`DQHsO0U$jxl0;%t^~x zk~rRC4qG*$seE^!1T5mIW!3zEkf1>`t^V{ab|Q)&4~ENUSTzcZ)B=d99S+9n zt``Tg|4Oa!QSz`Tr=e4|4iIp1uCU|hs##a?gjo_Bgl7!~4P-)eF5~N)HQtoQY2slw z{8e|EwTk;yrBH1WQB~ZQHE#ER08l}%zDqm?FeQ~r5-a=A_3(&-3#qzHFc^q%*;7b3y%;g5Xe$oIWZ&#_Yj2h7e-dJ-W`S)L^S#y7sP;f5QkxL+({R6u#X z4dG{iC;%X zK%|5iY>ChlD*P@Zr-)P0lq4f^gB~v9jLQBpxI7~YxKy(0ls7?crb;CR zmEw|SbZnFh`)j{N{OP z#ls83!~RZ(ga}_tX=)*vQGL$f(0^X{pU-{XbLl{s+tC}4vD&QF7cN}5(@r~HbM-Zk zeB>jctG*QS(}&4EQKRf=u8FFFZZR*x3AtE`k(Ej@HhaiNSzd*nwv@TZkY2ITB^+u? z>mfQbCY{VsRWDx(s_p33VtfgSxJmZ*+WxAXsweH(l)x0SkPRN$E8K^y#GmVxFpI<< zgeV1BVk|3)CQZWOQfph2N`Jz-Z~RWGbEQ=!rEMQCz9GHn)1$HoVz9Qg{`Qboe8DLP zW3Mh2nR>Oh*q;V2;FbRuT9XKUYJ6IV?4koAoSP|XK8mMH|Jq{UL{zK1 z9^G?gD}3S$ufw;0{F5J3FX2xtKl-SnRXcx75D#R0<6)?0K6AHYk2{u_VYCB{IrQ0Q zpN)I74iQWCga%prAF%&6TW`&>sZ1m|jHaiG5QO7cH&e;uR8@&hnquC_BTW(9o~U6& zcJHC6f)ynJ;S(ZUrx8Rv?xh7C);f5HQW)}1K-OPxU40kWYnrUoJo8!4B)W;~VjPgE zIBPVcOP6lD-FEOmakEOFP{W>X^FkTuTyXvck9q84Zoc`q(7*ldZy$HuaqoWjy9jYG z={>@FU;gr!7c97+hXS7U%x8V`Ti-+<%;KhUuOJ0o>)6pamVrbnFTvGTbdWpA-J)n^ zvur?<1jLpO#z~?acfl)6#^2mYCVM5wFuqR8pZ(p|$gKswbAzSPUmdANE#Zd_p}%B_ zH=zc$&Q#r$kvaw!XxYbls`}z@?33yx6y=CPN>%zx4|z*)iZO2pNm30facufkab4=L z;WHSCd7lm)+)$AcuigF7;PT6hTSZp0LVJX&e$A(d=Yy?{7A21SpVbO!$Gzy3>gnCN`?!^dp1 z&DJczFcA&?Mej_Uar)_QPw{;4Lm%W>=FjlBBDcSIW(p2t!Us5uFDz9AINUVJK#VEm z3c_YoeLKT^up5qviFdv0oiPqlwQIkBEmyE%A7@JXHP?Lq^2;x~@BRf6fq}-y&pr2S zWYKSE@Rb7kvegrJ+R5rAk0 z0Mj&_$t5$7S~vTrOTnjFCqYt1XKYk~V#{OQg>ylr7XzIyeDh`%DIy&je6fMx&PQO* zg;MJu_#!D=|3VT6;Y+Cc1`86tPki%9_EPsKn^{{Gwta#kBGUP~owLPPG%<32{c#(8 z=6HhHjvbZHim8XB^(9XDa%p;aCMUEny(N3pgBX36b#9Hnix(}KK68fjE#mXYO3hKp4L|%URPr{P4qLdWNc8Ss?wr?|a|I8*lQ%AMnD2uHZ1{#TQ>h+Oa%} z{=DTYK?4IvAN@gASHfuF8$^~aK->gnhQk@rq5i;hzQW0Phtbqv>||_R%aEWfM3|Km zdYeVf<2FKVHLL7|a+R+sLpUm>Jniy9Re)gm!9%`UhwcKV$W8c`LbfuDPi{g>P63Ci zlGF*DTnbE8O0MYY7@wr2lOd7L*BI3D&%!~x1ba}VN8Xa)eA;!CqRoaz2nbKE=@o}` zw28tnoP@BFAXgPlHPYJ z8#dc)Q)X$D!tzK~=jK4-5YelFGMp8+CgRsA!n z!L*EPt$6a*M#k)W2 zne;!YnjBm`12?piV(6uNfoJLHC>_Y9mtLxtr5|$p1{?H^9^^&fY1>HmUzOH+BIqY5!Akwvr*HMbe4LD|Sugd;Dsb4TTeeaY(oH z|J#n=tbgm+(!Z|n-%0%9X8d9g_puK;_#mP{HBgmEaM1b)dxlyrfe43x{nvlxx=WKU zU76{+NDB>ohKxZ}Ee;q?JE&g+^UI7}7b!yRE<7|rqF5;Tr2Jdc?m}m^vBT9jF~geKI`n=pS?R*&Y&H8?8lz>yyxwv3Jp;MLZj#_hfZFGqCpBYw&6f8<-7$8XhA-&x6V52;NBZ~ z-lBaYy>#iQM|Q3L%$_~#t6%*JU#;N*LwF87^iVu%t4D5C>wneNSHIy6Z{Sjs)raXL z(?0+C&+}l*m(M>Rw9?4Me?-)@f#}d{n@PU2Q`*phf6Ot*{OVV~qT&&QJUkW8N-Hq< zAt09c2L^bb<|7~Z2r;N=OL*$}@p%YL&Y=-Cl#SXcE+t-`tRBR}Lrk<@002M$NklLq?53R4@hC>u$0t3@h3GhZ$rta;zk->lkUPj2J~Od~17qPo;meR3s| zcrNUc91z@&Mh7T#mCALdaIsHFrU37Cd7Rk$ zrF*M0?6871`sxE;eeB1MB}9l6J$pf^bkXi&wlGWNtU$%0B;f#!)rT3=SGN$0oakJ7 z*=6s2?|c9K$pv zI_eD!g;=s7ld);(65cbNj{h^P@EjR$(6bf;2MpAXg&4<+9u}b-X}%xIAVS$Le4R@f zZ5H7MigWgEi6~O*53icG-X^hsji1=vzbRzQTp;Wtbo-}$xx0Nc{;Jw1tK6l(Pdi?N8yIBvj^okLfV0+sFL26&oz@@5 z#%9jsLS*{v*{gGf!U*!Tr#^|xyv%|H#aVP zVBxfNrtwH1>v7Z`D1w5C3gWneq2^qGQ1vBC7t@7$l|#3JVgpLSNVlwSmyi7NSHC*? zsH3P051;ciAs&chVx(}_SRZ6u+iI(=C8$tf+nA%v36R+tUk$A5xBxeB$|sW??`P$r2l7@~M2R6rQ1TtEM8#$P^jIoPtn?0xK+cb1-@_DP&kgwsvTtBB8W3cY^^`@M#sVNkt=xGYe80rIq2LsScXE zgXxw_QVTu>&|RHrg&%sVZ!h{gHx`4F{#M#J!bhxdR2@k*yH3hxE!}DRX&UI}N%o1F z;z>EGRQOL9`(zGMAcKXyhglH{iL9yM_o4k+Gf=Mi7RiHjC~%Fyo`)0-1}P<%s@ z>Qj2+YZrfwhiQ4K@WBTcYSCeA9G%|smbX0YVGrY>-<@{c@iC8mY{92EkwY9tEo7@S zjDbX*dA3o#MyX>?$o^C>PB|odCXa^DwhvgQc_gy^a>-)N)`VxvT$-Kr2;tYBPrns?0sTR zcqD?IY)&HkCq%YJLqi9?tC;fzV2^WNul}_qrMU~UOZLg{&jBUU2MFk6Ur=NL-?h`Cu{_H*XAS%_O8odS|T)0pQ zOz54a{rBI0zx`enPeU*i;TeqI`|Ptf7b(o!z}Hd*aJBu3D1K!{&wqRcMGiJq@ci!9Tc7&mCv$&}@f}%PZn@U0-Rx?*$V-4x0 z*9dsrFP^ltof%wEYe{yzX_cJMqL5fudu1_q*SXtpEXm zcdozt-S1Km7@k1n#UIi!Pne3||K0etyA}V@@S`r;A9i4J$oHXemFs@=!`+|zoKZgm z!b00#d+oJo(c%+N{PdCUKN5fD`Hq`znoqdk1AJwSMLJ$3rg3%6FL$2cG4^kK{p;Lp zmS20Ow`lPqh>S0|$AJgF`ks64CdBwA2552;bsccF#>a&V9|Q`QW?sr;9FKnVR-gRT zrw)4E>mX9(kb@7o5Q^;q>-_rHzv7k{7rl&4427?JwGk#JxCmm+>uqm+ zt6l?422{Q2?Ya189fXyI82*yBGE36E*hc}P9{|zPFj0p8Q+$(6H^Du4Eti5A1Ro|* zThhgKmC7|BPlfYiK7=P%ZJTetIqy#dYs4=Fk3RZn-uK#PpMA&pbnNtL-}?5qSC_Ml9dmMGt2Xtd#U|{`q*JVbx#u{so=7%Ff9)cuGRwB;yujnJ= zMQ-Ik$S?70JzM@6ibtsuU*IaP+x{PALx)sd4%t3JkZ_qK^ocPmkYiut*YvQSDZr;H zR1A>*p5j;5lU;kPsvLV%NIQ5 zK)=eYg=IUUgXTFCs&;(B6Zk0wo-se?oO2XlgM%lZd@`{N8}-0ZqdGLC?a<+w27do4S{6+C z2{y{Lj`^D$XMoQlExzWO@4fJaFA$p_;C;ahUV#2+%?MljPdJM3vDM%C{J9;!s*yW* zW__|}xPv}UNh`VmHHx!Ex_6K5*n-7;lX41!2`S9_T?_LX&dH#svYbE+S%z=pljTfw3EcQYf8TsvRe@oOIddQ(fKE&8VkEe=gP7rt&_ZQb+j|haa=beAP zFU}LT@QX4y#B6DU^*5liV0i+$5bHWd+BI#FE4#VYRlUU*rm6O=!f|5z)O4}awB7}N zJGb(mDkFqcgq&Oj{8Tm$_Rmz zGxSRAUxB)8R;Z#!vcj?0jNikkX5r6vSV{cW`uD+KmG+GaTZz!$a{JVx#!os|_^Iv{ zT3xS{Yf{38R5+AQXDxsVzer3BZTpy*cEl+cJj_rP`-6il5%N_&9@60pgABBcqr@iB z3d2!H9mUEW-7i}dh7&prQIM`VddR4I*|T!NyDiUr<}+cV6M&x*tO+vr1&XIa;N)vN zp#Y&y?m-6~L~h+)?HVn=^>??5e_)V#Ay-!1ZCkW>@ol%=7RtnJ31Inj6cmKM_O-7? zTl8nU>#jSq8v2C?b^Mwcxc@IkPR`y~y~&~S+5SP4HK0`X5Br4vkd@W2)@>Jl+s>`< zjguuCzk(?^{rx`|^HG?oN;N(3wa+z*Rx@2SQqCk>;+Z73!ZA$7?`4++U%Jo=B!2h1 zTWGR%)?P;~lKeL7V3C2)2j7(ldMzdJg|@Qsy)ISbCwhgS>R#znmrc1Qf%C3+zKa?W zDU7c>?6AXq_uk8m=->S2H_T8O5Z7FDExx^ra&za*VJ^$yh;y&(*5ct|>iqa8 zJb_S&_yS+m38SfA%X!r$+sLi#@jZB?HCwj-#TQ?U+=zFy;`@O2-FLs>dhB!Vx#uzp zOT#tR+H%V+x7%*J$DsWV{76Im?XdlJ+cSfQ4uMP1M?d<}d}4=bJ&)z`d>4DT3m_CjAqH4yv`a zy!Il@UTJLQbv%ekvOW+dmaH zCDM_p%F=;UB(+-BzhakLWO8cQT&TbQC(O(w85xenINE*YjG0G&@Pk6FB%%*4$Bk0; z`o0j`fByrb>05wpFNHzZaX5-ErugjzaOi16V2xjc;ML5sUjO>T@G%^3lT9|!IHLB@ z=CS~fGr!^#|3@G2|M{Q)8Iz$T8@cwoae8bD@ELlRG+4wS`ZRy@CcJk4hSNU$;SUE& zs~w#Vza{}AKF578%mRIjEjGt{nSbuM(@uQu5cQt-yyu}U_UCgqbQBnnOT`Pn`c)4l z6UwT#&^!p%&n7Mms$pHnoMEzS)btpJ>efH-Ybi2ThH+YUV~gV2XNf^Cyd@##++!3X z3Dev9SJ{DI5&Dc@b*^-4Z>`)ofgx*K2%_}b$JwfRY2OMicRKC=RGiJ zl9P=j-S{G`or-2<6Gh1Kwxmho4!Uf~Bm@`2XV|=kp|uMK0Y4c$bIzPO{0f5Hdz$F$ z@xyBCt+(cyYmknOKA(_~3&X?k^M+?YWYSr?UR!hA|EC#KeMrq0g8*7bPeKd~O zW3~9`Er6hPZLk^VvF&}My=aHj4tic-qx&V!JGEIy&~ z=O>MXANi)bD3>wjYIY`T<+34F_=y;l5DuvyezY(C(Cwdgtw1*q#ig_;`7fk6SIq@k zY3U#3oL{9IGnidy_D>@OSbOgR3BSq#G8rhxSFipRdn?(}%{jSRq0awI{AS~2KS8Fe z_4gLPValc*lWBFQD}n=pDn|RdYEj>)ETsr-221ki&!2zFXHOv>Ipq>->#eupL56>Q z_j@QvByw+xB8<@F>Oy=P&-U{MvHEk$`g&w3U+N$*&}dW|BElW2tjl_b3gxiURSy7vdjMIQUAm<%+#sG zFN-5gPBm66U(S>DJW?@z=5(H?Kq2U$v2g=}87nMwP@GcPFMR$BU;5IQVhpDLVW7cG zbUj>pvWCbjEH0OKHj}iq#-awr5oC?~^bg@i+5Rf2LfQU-WS)$c@QVz$mkLO%rTins zZ`1$SAbLDa?j+-vEnLf;-``98qWdu)KIZ!Ce}3uZm(!*jZM5;*-}z47jbUcPW!N2e z-tmIHUhuP@{p^T096>bgf4~8A=FH_z7@yVq*0;We4>N$WHuu=aKK4=DKKkE|KmMKX zd?#LoWnvcVVA0Pt5}FE0nadXAsT6s2+GMba&VmOP@Pi4TJ8cdh&tVt^ZKTe93-Ro{BbHozI#+ZO0vVVk}@4 z#j{O)l`rO~3_{$lnmczcuMbPYz`!f_+wZGiy%0dX=jDxO!{d1hNsyokCY92G}FnbCkY35c3M!#cr7sq_XJXap$?MD@R)^6zha;~Ve4|32;toO;@+yq-o8u30g` zZo55$cj~!oPEhZ%%g%gMp2taeQSQ*g4nq?#xwIyfUi9J@pLhQGJMX&7>Z`5JqB+B} zj0k$Jsz*cV10m?PKq5E_AeTmugz78)yt%MW+)6?#_)6sIUjlV0Blj5}B}~upweW>u zpntIV4gc|#kHSq&7c)==-k`ii{kbzR4;(U5dn-0))qv$Zo-~(Fb~>z=J>@A+!=0G2 zk%LF6T!qhCgdKtxA^9FA-6yT6r($sr=6LXG+BB{o>QOmdnxSN3;^|L&+Sk7Rb?#ts z!&ncbyOb~A^cp+wa-k%Pov5GYSAH8h{2$>c0~lj~csm^)2FJr6c;J4fYUh82Uvc;{ z#aL~jF8rp@Hd}2&pN|C||G398Sny#DmRDYX_~9rsYo?wugLCh__rCC=i=MZ~^U+P6 zl{b61Y-O8@RIDTqwyRa7l#EgVfc;lx{%q#K$q4%@s;%V z!4eWd6?BDys56J?-+27Gnp9d(+p(a7tH_Vo{G+BE&9I!I!1a^g(^+HAWNWve`u`|7 zpzn(Cc&YgLZ3F+{0S4mJwLFE$6iczBpDWQ=hU4A8U;$FqLY4|;$Z`cP z`u4ZKgXuc* zRf5O#z*kuZw#%0SIbkg_ekL|Y#Ye&p6`U3J>D52`OH*FqsAtY=I3=%gJpOTyr!0+h?z!h|x9xU}E`;vv*{f}^;luS( z6KjGK=X8vL$$ZYun4Q$x6QVKvl`!K#sJMWDSl!q&gCMF|LC-C-+uZVJA5}a zdDDW826d{zmwm$j>-=`Q!gof?K9lGl>a=ktI}^TT*a%(rUkQGrzZgrsOCrlm_~z)R zf98v0+di2l{IGv-`=;{$!|~hJGWAL(+H|GXjsD6E{~-~h5#i5g)Wg<1qJPRMpCt_W zn301*A$iO(AGz?ti{!qn5)KWsMnK>L;+F=o0Heqd7kipG!sHCVc^rt>qp;7iWn(XT z(F-rX;&L*X03ePb^VnlQ!3$ziP=C=6=`+J;o^b}BTl(~;KfPqJF6!>R=N^7gjCBw` zZ8I^ZCPhzvCmjWl8~U?i5fxPzmV1{HR zyEoA$Z(mn6Y}W1{5>yhGKy}v02>aLg?f%EuwhPcki28MF*qk@`jsG{f-Kh%SS^fOK zTmMj}8>#=f@GT=%`k~+_|0%}eT$R^fYfJdf=!f58w)F#2__5XY|0~2V6UvLf_O&an zxPp~F+*^L=mmKk7#=G*mWk{Sulv!DuKVH6 zJMZ$AH@_u*<~vs<*(V!KwyEt?Y{6H|czOB0`|d-^*mAxqIP~_nz5U1!97)^}lrdc_ z_L1TFr@b~72f^oFAn)*kpG8V$*gtF-vYb}Y*jNpO4{y-(|N4~%AqPGS89P03=gTj< zj0oi-;cFLP%zL0D1UfkQg1z=a;j6B?W}S7`;;Pa8$;8|_b2NY8C(1el&+%h}Nlg17 z&8FfvY?7O+ZK27|D*urYSE+x?ZX@qq@%%ab$$pMU$S+d-TFE>So8S$?_19aE_lWtK z>2uFJ?#*G1*QJo%)P*7OJ9nb5xMW&83YHkW+ok~h8iO`Nho8EkeWsf=7+ z+G67}kNK^}Ejf7_zqq{Q;*^yXsN6Z5Il}Lmdxl+|jGg?x27k2U2FXpHC)Nkl_W)(qSm^+t9U~rth+H9}j zboXyOyRT6)o7L4(`}W4KvsSw6##h@3jK9`q8U6bI7T1bSg!q3ve&zpOQ&U^2QKDsJ zl`x%pP+J!Jc3Zf%9gpeZtGsp2+$`u!q9}@A>Ipu6he}K24L(7^sx%YXV%3^ z4(4SO+k4bwD-7$@(avtr`P_$^4EL|D5Sb8|pn zS^PHk3H<}tPYb#mDsloyPNPjmLKJ7wtpr(VS1NX)Z#YL$Ndm2;RC69)1!5pa+9Jqw zl|q0xEH>lUr6SWEn~UaM!j!gbQX%Vaf_F6~VY(7Ee%6!Xsr4uYQ9Ox|kzG%hO~9nn z(zw=}2cQ4k=U;Qp)hg#t6FI2p<#`InCHZmTk!e?5bIoHP^H{ol&sX@GX53lAdB^9? zn-{nX7CgYD2PMG#*vF1N_SoZCG~^RP>#x5)KsV02>9o^M+yLNQDcP;R#eyni8x8v?VVqPzw%1?Su<*Mn zn)c23l^CZG@R>M(qkdc!Em^W=gxjk7A@L5&DM|rB|STU}XI{tI#Bliezo2flmv*3Mc%m zzp4$X=r0zXK~q+)LP*IPIwxvsQlo=PX6T3=FoM&tEhh_E&JAgqrQ2u)ff7j*3#qjV zQ>!X}R{UBQ*EeY!Mqw|JvO}&#;*H5qF_LR?VK8hDOmJaFAEGDq{LBUDgc*jQXIQd$$qAo6 z;R8p0fS?U=F2%2l{s&J$5XhdbT0<{|5Y+_z2R?8VpQ3-yd)})hNgfm*9pgJ4e1nN! zMCJDi^ei(^`MF18bmX~x#_8PJs-i$FjEDH?Tc{U{5pZf6!KRyd)ZI7XZ6jU~KwE}h zUR8{C6pCu+*6eQ5$dnDqx0RvT4i>jHe`s z5uyexK~3oAdWaEtSkWM?c#V}sZ@iZmE%^o?p~Rb&;DaukBA-;t&1RdMd$Y5gIg!DOOd!@|CafER``O zivJp+LkDCUQ ztW)uu4MJo7cgC;H9{w2~m|{&`Oy20g{lrG?qH@LDh|_1x;96+?_1D{W+wHWz;K`w8 ztK-WLKIGtUe*2p@{Q3sJw3QaA&pGRyCqL!Mnlc%|F`3*I}xf2IWFxKj}tF>=}R`>Y;%}dNoT1ASlpWsVXQ%6-a^}Y>#c6P?Ka&! zi_Kr5((fdU40Bm1b9Oq57cV;GkbfcETz%P?g%1?^mxB)8ZU-u*i&zGx&z*WI{;%Ox zKB!GVo7+UTa6ZgRs|2Rm-GC`I52|7Rp3?pTk;Gx0_q)8o?+x88c|hmZNNO9%ks zqJ@j_|I4qqLT<>1Uq?sZ^yWAE33T!`TjV8S`hDJf+4li)-bSipuy{Os=B02Kk{X9OlZ)ujFb12cZ<`2#g!w2_k` z1o>+F+;*X5_)mw?hu>hl`IejM3_M7UZ@TH0*IaWAFVPFZO<&qy_K_1I!~b;ZGy8U( z4ka2L!m`F=*WwhIluo+lL#jn?5VQ2>i4qCZX0P$3snuS^_;uU4dhPhaOH_5~sXxb( zBPuB@nk_4ekS2 z=ikg7P9E{&VfB6X-g`*DkE0Ke(Pli$OG4i1#wR59eEuGnUvU|sG=Kh08*RK1?^1pI zj9IRU`&(Co8v{KkKTzfyIQ9i^&cFAAhqZFF;0eA?iPmi^QD!PG8P;Ws=vCDyk8 z$i&hlJ9RI9E}M>mFicDTDzoAj-wN=_w!PfUf5rH%GzF|1$?#S7&x_yWaBkJmpildjdNex9=8zeP`F7aB2OXqGFkFjcKJ>wle)OXZa(D{s zeX~}Zb?&+6McnbC2P5%~^XAQ;uWyD*XM1m&#&OSJ8n6C^Ft7%=WA>t#y!gQf9)Lj? zRXozKUl!B%fcM;U4~_jS`rDh_$EyeojJrx%RYq9`MQTL6^R*7X$j}Q zB?EKOqJI9|`s6fB5!G;f8Z@u-sCa5(OmL_k#?RG!8(%}^cvS17Sr{Y%u z+ibMk|6=?m2kMG1bfQhsk+9Z$jBGg)spK`f7)lpo!>pluN~vsCpqZMoit^?+zwt|F zeTnNX9wTM;i2p>TAOD2MyC9F05An$=CbX=(>4jzbegDQ3uM10xq(u?ihY2rVlfWH) z#(?NSYK#Sr$lhtE9l2hm1bh4w*X7&TuX^Qv+@m5Kc+FYrO82C{9|6|#eOy4EdFGdR zYCcxxV1$p^@pGR2Y_z2y{tStgKRt>&p@-$)^5!=k^}!EHI?I#<+K|4(3ny|}BInsI zUS6~P_=#|RcplTR*3LAZTVGZpw6-Yk6sOguZ2cu|Q~y~p^h-AJg$UPHT*4K2_*hl$ zA>pIQKWzM}EwqIL*S57aW#DSVL9)S3t?j>f*%E%6m0;m!2``@n+_Mop+Gbzt3Ox7~K*gBWr`QRol&7233_!q5I3`)|i-^?YEE z+q&znzaA4m!iVv3baY(L5>k=nJSRAK{q@)HzWeS(8FN=cOku4GA}ZBWLRZm0J@mrP z)}B8!c8J`(7d_)gsbsy(z_EM$jjT3)(E7($1=S=~J^kf2Y)5y)*Gl!r&-(Xh{*dr( zb0EV0D^*8Ay^)|Z$5HW2jEU#C?R2kHki}Vbk&<2pN+s=W2*N}7&*~h9CCirb0xvFs zJMo<&?mlw$@gd%U8u$>Oec&NeE)&!uLe{?wuOq8=BJ=&BF}*&;+>Gl5yl&%-Hsaor z{S!Ib7$#7d@0hWe{%ev{EBrMmaS7wbchUp@SR3#hPB)sd-* zqe@Dr5ELIL%5MH#N-d%O*E#I$7V49ot2`3kLhV4L6f6O$%*3ymbZT3Q1(S2NGhf1w zQ)9c2C4#lP&^$r|xbfBV z3doq?(6d7iP8>Swo=ngg;1&AA4nJHM5<>%g)Oii!VA>1;5nTG|ZmExGu5bANr#t=> zn*RRw-#_Bv59h@|=0kMoT$bE4ZyvvS3O)fz)KO19w#*GBy;H2IAmf%6D~0TupgeC9 zo%XrY_I=sDQaGroro~FC8XK8_So4|@nz!0MiKK&!AmtmhNC!(4R$f9@HIWfKAyJjj zTa9CyIEz+Ej8u(6sbKQ7u$m=RI3OSqkYLWotWQX;_>(GD%B>R3BXZHRB;4ZvKYQl^ z=vPtY|My;cp@*P!LN5ZMATBkbi?}GoRRO_O6h&MGD`lmuAc(L^5nKeM*+7aQML+?S zCV~Q?CP*)#B!u+j{XgIDnVCDk-}}7~bp7wL=Dy_q?wvXFJ!j6GGwsfu+k)@pf98bnR1wIulA>!Ydh~#Q(?qH+yJEsxyKju;o|I`3w1Tew7d9L}bc)gma72 zZMOMltUe$u!w=jL7v$3gLC*XITURjkrbl62dOAlEUSjGR5MwuB#8Y#K#$?t54us&g zJYraC)KagqYM8I%`2`jlFuH>b{&9SDFCq5Be>=@n_iN$QPoV?^qN500>Ws`T&J|G(`oYiY?#dIXJKVy$#T zySp~nV0{ErXOt~{{b3D!kgg{Nsn*cm%2Wjz#63NE!E_-@B-G&~Yo5r{1x+Sr(PM20kX--e@yE#>*^H9H4fNzcL1aQTikjK1bLpVLTtA{-<9UR6GyH9#{b@-`7yDXXcFQ z>=&si28!Mvj%3yh>!I{ny8eeh`~fr^XyU|)H{Em-Z3>XHX3Q8oWa#Rvug=mZ$avPJ znRh!0HVpx>jLiZc*I{UfY!KiS9Pv~P@4yY6(1t3};jgL&f$=^C^mfmg#l$$~ACsH1 zOyuFj#2kUF+nPCN=4@}=20l%XrA@jDRj^e(jg($$wqeuVCI5iaGJfmClfHTU31Vrb z(RoIN34tFDl*5ctO_&MH z%()L?6BFB03tAvYbR?F_vT#E9NAh3I_IgKVrsS|yvTqYZAJ$4JRO-0zS$~N#;#sg)f zWU}|GU;Qcr7NTHn5?d;iCME=NED}bjhXJ-9*>fDB-q3D{ZDI*vQk_JZ?LTfKQmQi? zXB(z9siekNbil|X3Y^eHU(&YVD~%M`NEq@*s>qa+cJyCk2EIjNS} z-%3#5kr~0pQJ%HuWXNI&3MvO>TK`5U=g!g4wBVPtMW{~rstXfAb5)YIT?Bp@5~iTp zW)Af0-&W!T+KP`)KpKpiG@1XHF{}Rh&-e1RdipLjUC@nOWF)1l0#BCU3SR44(_0F# zP9_3un2N=q<$3E{mt}oBrp!{WXsU-*Ce6`5BLM_L1&OSE>+stjY-Ebq&PPOK`){FXwVzH1ee`4d>_piDoNmC6KWqMVQuWb)D#cd$ z{}21`-=rUFTq~Hmd)U`}!womM=bn2!(ohwRPr0e>T6>+f4?p4vJTd%Uqx7yWMj(6> zfaM7kR=5_+geSlJ3T`e`#Nwaqr)qBgl<*l$_qbNNkmO-ncN|l!G z8-H?>BDYXSVc>wvFTa9b=H{Di#u{NNko`XPv0?sB>or$j!-E0VSw!!!G)^Q$T3YwO z2jtZ$Q&?|L+UVEPt7VL-9zs_a>mt8#?6JI|##oS4d=6oL^{29D3)MN9TU>DNe7OEm zc2ibMp~=t_MCX*&VyCF!Re;(_BI3>NzcvuE#=^Mv4EnyPyn(_?+LWtFFpCp!y6FUwjr>bPGjOPMHhl8Dc9(d?MRL}KltF6BI7MsUpQ{!Lr z(LX@Sv?a;TCy5l;&#};d`%z3&UyDHY?@}#lP1aQGht}cnBMv|Fj5C=EVHpJ#JGR5- zG<)p1CojNK=q%8{9w=pIH<%H|DR`K~XRmmK!}n^KmHW$I{=%oOdDEOJLZ&uYKYr$! zXR;Cm_9VpQc%H?Q@Xa>c{Dc!uV7{1-m`|Unmxys38XZ3TO@}iYERtZ!cm45?^ekx( z_;Bh;-#!V68G13A!WwB))S&pl5in-#Jf;p23?ZX634ulB_A9!nj74owa8`C?VA72& zDF8u1zP>^r$K%OL0({dt(UK72O8cm0O8@j1Rw_W_`e~N7omym(kxagzMnuyK zH06M+5>+9ROfV(1TsV=$Y&mTIRakXd(RB6b z{Wrw+Y@HU3s-z@vr5|o{RuVyV%cgfa9=OtD7A`W`8)Zhu29eMw6QLRo#9UxRCO{hHdi;hxN=#9>?9J&W2 zVPNvgD=&lMeKrR@@x&7{%&sn8Ey5;YDp-3LsBkk}*Hk`%NKwPo{Vtd#AL!+EqM<}l z`BT%1WppiU=gdQEdc8Dz5)MqUGpyJsByKTzNR!ZzGzX8wpcg)ev#4}VmF8!1D&)s& zlv8ujIdRfe5rn+cR}@(MDY>L-J0g^G=V+({zb#nEbx8X-`zR8^uC}S2O z1(#T*^`Q_0S$fYp``(d~5II0${U5yzSclaz0tf^Ql!}}D&12JY8HOC3dn$U)mOt?a zpFgJgR$h4}m_qsNGtWT7&VyXD@MrPG7lWy7!nxL3q&YlH#{N}l@B~WTCAAYgbR`Mb zz_8?pkBgH~Q7*^OzqvXS`J{a2Y=<7g_e1{xN)uHwWgU?@B0LGzO5^CzpDsm9`T8pB zzh$m6G!m2*F9IMB?pa0=A(bE_U#bO^zTgv^Q`x0tKT4HzCxDbAcxCxTVdTsx{{}Nl z<+3U{wG25`q!pSdGrnWvWEfrAoiey+^Iz6-=-+n%r9@6?A#zfMNMJL}xyyl;&HAB~ zLQv~R%z}@xOkVq!aFYK9l!be9`ftiWJe5L&jpCXjm4a+EGGiEHh*kXV} zM@67csW|c`%Rt_dTxOZK@ZKMLR3ppci!br`06f8jG z4KjxgnXVc}-RYvAB&4j^fQ3QWBJqK-irJ(>- z%Na`N|1bvU1yzr7PDr&^MbYGT!dqvdNQm9RqCpS?XkFtZ!$jd_~V4QXQ9+I zxQy$dnEKjlJYb_yv!2nJ{2I?x`zH4dW$Oa(oZZ#Uie1eFN}p2c@S(%tg#9x=&h!dR z?G{^X!50sROm+`1uUKSz_?>s&8Bz7lqgScmzbwLCX7nW&b{`^x}&z*>T4mkc>d=%_h*j{`wm-y+{LrebRl!G8t|R)caO3^pg3JgnT{! z{PUO!51|2MUtOk~iKC%GAxPn7EoC}{fdz~<+2mdK-uow9CZyeFn{63}Nk7?7$Tyy{ z|Ni^)$$QkxoC`~914z92(=-N6PwvlKQ3%Cja+)EsywgrQe*EK~FrwsSqzt}WI)23l zV#4Rlp7r}%Z{?{L!c3hqm0XY)m95J+@N55tfaDP_pnOQ8JbsBfDaYuyMo77+@TGHk zuk+w!!jgbaNw_a{SJOCJX>vMcDy~Vd45a>v2z>w(@ih)HkVi;H5;anZTrkeB&>MVW zbE^N*{xecZbV^TSoDn02TkPo0=G+jS6=!{l2e?LL0A6oORfwPpb0YUQ+YJ4%E)Q{5&V~mlqVm`aBnvo zLF~BGPSRLDLgkJB5}p*@*pEw4#e@DSF30Q1kgw-bV=uehvOEV;2Wjv?mXL!%EPR=i zmSYGraJ^=5UKcx>1eW4bsj=Ki-4gsODo?^?8Y=2}?2B^FIp+);HhivDg@>>zU&$0^ zpl+IKHEwr#?=D)o=dNs&Z^+LzTd5s$%zt3~Z~-T50sJr+67>w!*QKC`fR^#dBfrj} zSg`?;fAUSI)d`O{?6AW^A-cDJ@f(0)pXRPeKiH}Coy#<4Lsh=$Cp^=+Y(2RQ zt?1Tp?$jb!Wt?2gprKL)_oa5>Dw)XS$t317&X!2lq{FD`LgMwoS7X(Fgz*qs;fu_q zVa4;s54@1S;0N<|R^`4gez3@Dc>0;AH+siAXfqg}(+6U8fP~BSATYwW{zJ?pgF+;T z^eg?KF71%QtN3A;6CZmFBjy{6R(mgF{?%4njg2AjuE7KK4kKxBk`-25aSBh52Xz0_ zW}97gDq47-UE&V&-y9(>+d28EyY^?R{P22TlCWrT3KvZx`>kE zivBABq`B%gUnWr+oE!nRwUuK!;#1-=DIF-8AH{>;A(4pt=L$l0mmvwmv)Hu)>^e&i4Q1@NzsKk(;UzDYl- zA0)fwUjY0NwGF@0f0dQKmj%DpldyCj`ss+TK$U^o753$&PeyO@*T25;?6c3N1;XRr zv-8gDtg|*whQnfx*a<@;&NNw!)#=JnaQrb9!Al?zFi>}=Gq?#G1~i<{J@-5_jNn5^ z5d#+2+1OrS|xaxk5b!b#1!qr9d=x>n1VSH zxQjwG7CH}*T;HGr?TxJ`an4;(R%aD!b(=54eh{MXk?k{LTtw~lgLH5Gs~8&m%D+0{ zCpr2r-y%!s-?s%{7yrxsH)YyNp||4OqTyEggS6Pcv#R3H7hxV)tO?uoUldj8KjF)J zrD!XA0q_Ir4RtE`kN^Nc07*naRQj(a?e5zDlb_@TD1_89d5zw*n;wHP`hwWAO&Y)B zGhFjto662LbP$^k4}th1@?*WoKrHPenI*g_ecUeRM_o_KUyC(*w9$tSX=~by>74Oj z`pvA@An?g2pJYrJEA>Q0kgDnKDN`mNcIaU{?6?D)It?8%gr!^bwufqRXF3El+|#G) zl}QdsE_Es)8wff~6@cr3Zs?gqL@ucK&mh4wJ6)Gvdg=P>uaBIBTzv7xyY0R^pC55r z70G`4?f3G_uf*;m@7nZTJMOe2QRJPKBQk6k*L>^eK+0#EMQ9)g<==(=izXI8KQiK0 z|H>Mye`N>Q3B`&f)7N9-fX6dD*O@)-HCwpGY<_i@zM`gbZ09l+<}Xr*(`-g zUzAlLx8HvIm@#9xrS#dwkMR`~B3L3u%Cjhmq~L^9ryxsh{hKSst$ytV$hgVjQcw}MMZ*6 zF&$BoQAC{EubjUKQu~5I00LJ?DWg)hX5Qpa1;lcfRwTY7P<|tYoHf;ALFMwg>n{MfK~{YJ^D!UrI9y z6nu7;pqaqGG>HR8&3UoG#XuGS(m!Ofp0upzRzfER8UKoPe+qM_4#(*o#A2xVm#_Q_ z$HC*Tx%z6JT9KRsKn^a{HS>&)EXxvN{~V+;p-z4Elf)_%9WRRgNB)B*+b6t{stF_i zS(ys7{hLdKLfOClx8f^XBsadZ=zC>3xOF_&5SgIJp9ALq+k$WTbN(V-|MJ(kSwAU_ zihus)+mb64ocA3f-CN%B7Clhn$MX!u?2a?6Z z7TaOSs0X+7bodG|4&}F|60D+apK2O>6n4V#P0_zd**|*r96DW9-qF9T!Y2$}4j#%T zg9hoLSL}96NHJY*;B>n0zI)vi4nZS%RFxqmg333MhA6G}17<`^hF1HO7;W~SfZFj> z{g(3~z``?M*7yO_h9ArmhrT~bZ-I*ZH2vtGc0YwHLRF-)Oi;N%f+CGn5U~`2LRHH& zhcePoD_n+}7&{`X*l{W#$pn(?N>S^s@XNzaQ77P392yzxXz-n?C2h_~fvZHd!Eul6 z8Yf}#6WnY3%t`u@tog7#gv5cP{zNhui%Dj1-Fxo7TXJ^kC1GNPFBo6M{J0Do1!y?U zNo#=RLSDdbjR+PF0|vb7UGIXBF4u$!6S6(PzY zC(<~bs<$Y51kL#TW(6lb*O+D%OCHff=vnw7LW%_;!NSfo6@E|#W=_UkKt)zEzB{AP zOJWyfLzhK#aI(+93@zz8Qc4BN5dno*cgXCh8sGA8O2P!76G9!y9hH?9niYGgDpD10 z#coX%yj%3Io1`cU8|l-L>nk@8`?r z1R?+*G@dnQHXp_FvPYH#@_cXZT5GMzE>y_RBquctZ;*ZXBOkuv^2^~zADR}Op&(x< zi&3SrfibHqE2*5XWV{kEl(x`+n|4R`lj;s@&bmQUy7^oDH=8c_3^If&?enjty(8>R z?7xvAsfo2tGiS{8PVHJD=n%XQXQY|43euR3WxU+tJ)9xa_`jD)Q`N9BwSm)dZCiC4 zryaFQCGq>G#+i9nt#y`ff zmtTJQ*s)u~ciJ?~D(cX?oI??wl-N1;5T|V22=)utootC&hvgCEnfj!V8n=jngNmhtD1n z2*7p_mtJ}q;|RtGY%`1fgdqj@zzqHP$&IGMSzhqH?|l#Zga-N1&$w~h@FIgTFXT0in5 z2Z9kk#Zw=8gI<&9t{Pwg9;K}gVx$GXtD9L-kCl37%$!9p&VotC(5G)z;x14r1BrB# z?5BvI>@Db19v$f>CsJ@<>=TKaw7H;5P!*Nyx2y+oF!z+H8b6YSeM;7Rg#5Nhwe<<1 zw;NN?wa6fry~LeG`Yg z-3P*yp$)!7YZecG3kM_g`RAX5`nl(xqkOQWrIs2+Ieqh+-{hI_H1AD|AXsoX25%W3 zcKG36|N4kZMdO=+aRr)z z+wckQd{{t|75((V5BU|`ijeiw$Y1M68Tz~XZ>Rd9)^@AFZFep5x5eq!@|W;ZzLkEs zEJy$PNmMSTLe51$`bqM`4+7bb6SkyI#;)>6;SfcY`~LUu%F{EtRs8+p$}3rz!){+! zU3De*6DCX`0n*S6^8lHCCp(lCeA-uh893Dof{lvN_&J}GUYNwVi;k%%q0$~A;;Tki z`4_R9My@3~|J0EBpcBBxQY;YWBAO70y~Ij2(rZ(v@oFKn2}ng_L%CAtY_sh)S6_V% zcWiV+|A6;2VS4e!Kl{=bzj*RV|H*T8zId>z4+fdbExW9CZPSxy%N*k5>yRO`#dRsn z6y7;V%3A2Z#k5ipM?r_RHR^L@zAg5nfT&kZ{ zcb6^AC|Ni&+S{Y0Y(b0l9Cz$-e9(sR&r(Z{`uU}ovQi0Oph{nUx#hHZ zJljwX9eVxs*T4JS@8(+87E^Mz2l|JwB`^$Z@++^5S#^w7lgr-Rnpi4Ej9~H{=)0$! z0y*Z#44@Zl^qg|aDah8VZ?{?*StF4*?qpRdifO@UnB<1G=*VWK3R)41JMB@Dz?eEo z)Ic+;ac8lwh4Z!i^2@KX$||JeDL9FhHK?3SvWX+yCFf%kAA9JbhvL;ygTf6uVP^ce zQ)D@s@~wloR+^AUBuTPDlP@Ge8-5B>F#i_-KV!B18n{*dN{SR)vREa~xvqi)<65#E zK`@=MQNjWw?XzE8s&i&f~Ukuw1%9_a$*Y29#UU`+3&9oig$qWe_ zdQIY`Ar+Gqb^+}bOBkU{pvQPk;i5uP6i~9IAnJDR>7&svVz9<7P}Ju^M1}%QeK^R4 zBAmcCjY4G5YOHjb98UU&%*v9G<}V{v2|#WPhlXUPjeFty%C)_2R#<+y)z?@PHkJld znlt~oeY8-*;~~LQPd>%d1^kz1LqgF3cY$R2jb1`c;gdymHe1R!qzMy-sa1^pff=c- z+;;p1xaG>(Pa-%%f}1sWeetvAh|KcWoD=i{_?Jj(wkf55TIDx>3KCoXF93dxS6z3I zzmg)w3V+fIFHo^l57X80Hs!N^^{eZyyDpYU_p<%N9PM->aiU6$b8wRJQ{Fv2qehKl zS}@26ft*ifo2K{izdTNlV)A6|R7S{R>{Qm>#cs27^#zJ^0Y1Bi#0b{VrHjIG7D3@HkD_SyZT`NHNqs5L_cUH^034YQi)& z%14whOlhaK+ipAPamI**9!W@PD3)c=?V5e#jW+;`tMdF{)m2vurw9V*r5~R`SnqA? zhPpHo;^1VVd=nfv+XxBwZzdB)p_u|cC(GD^lJOH&AITc| zQ!+vH>NNr7L4T2Jt5L)PQrV^&%Q?3p0Bvs*fiCqI!@LN>O*mR@@FH;?@$3kP`Cs)t2gU3>4nHv(|zOTjv{Hcv389yRPEBOQ8qDxLo3VyAh2u=7YG0Bp1x=@f$_9;j~=`dQ# zVxt<76!j_*)QPKN65o_8Aqg$?qbnmgfOHw(ypzm*@g-G4ZV}69iZOviCwEy(Nq(e~ z7#7p?E%G}gRFc3VS!B}@5l%+IR}K@D0CRRz((+@5I>kY+lty}wJ@)wB?|w%Jtu9~W z*le@SrZU;bt6sC`F0sVod|eU8#N%k9j`@#cPCEG{YeQVPV48r0xFj^fbbcj?{z57# zhD8w3sD~+WHjUa~CqjIxjwG10AE|j0dFW6kAUH@4J1BTA2j%rQTz}Zt4t?;UhhClf zD#SF^)7zl}(9UEa--@zmxAgrsp z^wOVe-JIvvLz<*Qm`FC&4`arx^6azE;z#SQyRN#((RbAgHS_4{g6ts{?mviq2I;kv zNKno`wFKTx=bc$S)@i--9q$-4NFN~T#Umw;hN-++bHfcc-hA^dOfPz6hf~Wmv_5yA zef9xAns|sorF8YGvOXTB?}=-?>b3lrTypUyn{GOB!USp?0#Omuf1dQ`VZcCyKjDOL z1p=Chxl}1XvaKtZl$0E0O+*S&Pzof>1u5y$A(=9vqAj^D2-clW908{*AA;2397)ef zY007-C%eg(V-#~Gj6{O$%?;7aPu`rW@ogN#YQzEQD%H7kuqldsM|>Z!DACAQlNZIjxi4(qhmX1OaSY` z#>gn0J&hRW&7nD=(|GhT$9(#LPxCFB&mQ#ISRV=xx{D{DbnVUmA(c*HICHTw4hD6?xTtI-j=3kZ4W*`JdnSvx_f5&<7rP;Ep@)*kX$< z5KpSok>q!dbDo5wOemVL9Nee<6;DU{0a&Pkdjl)ndKMFF45ra|;hU_2*>>A)#3eA2 zjzw7thim-nfww&H;DZm~s(6iJafVXhCQtfzb_Az#vVA7*ifG_P5K`-`jJoPTcKzie z=o6*O*OAIwgP(39qfRTh7W^a#VFh0piA~~S$3PfDlbj(|qH2{t&GBu;;;AL2gctok-NH=9yMy^6<4Ni?y3mN{bW{Ggfv@u}Yhkj2&%GZiycJ(Vh-fHyX)ie`#9Gf{?8DUjs`-`Z7# zh-m>hVRT|In`5Hfq!36YAj85!D=wxAW1>X($_z)wcY>G}p%PeX>Cr3}7La%;En0{K z4iG-KWNYGt$L_uN-VzryE%**<@)pTLX^4@mLqvW2SIm{kEUS^yNnuq)$kRgVAV`o+ zeDo`&ibh4MOXsR3QQWl1ANqHGg(6kRFmf^j5fMj0PV7u5{$`elQkJp|E(_;Jk{0}Q z6B#u=U=awI@sl7me#Dqj%8OO4%tqe%5f1BQAEuyr#fQ0t5i2Me=hY01T2&3;mg`w%TIRMHk}@DrL_{ zv|ml$rFtn<(}LClD&~MbXz0J~z1LnJ{?Lc%=diL*q%KX?S-~piXLvX?Y0?Y4MS+C0 zhD^eI`O9DSb6EtkOG!vvFgvxln z3HePZno2)bgUdH1%h=L^#4z`4rHK%!l+I$Bz6IYQR*AulBC(vA(2q$Z2>3CvDgrUo zkt&P0kpyw2vxTx$$mKgn$_El*xe&QX#8?0H7sq_?6Gi>w*7=R$=j1B7d>y}){5hiF zOT1j~q{v$SS`8^$_@Vle&6HJ$E7l{14gbhTK1>u1i5BD{4q!q> z`i(<_7~Kd+_>miC5(f`r0m6$fzQi|(p=Qk>B}eqAQQBxsbo!C~unI|*&7EVe{2bNJYvdn6)nfbVzG)wIJtWKOuJdAR$Aao7y1IubxXE>K96x;K9bRrlt`l$T zs|4r@GdIC1KN2!ehdU~X8JVu<*=L`=_s{prOS`(a++qv%A><{p0D;QF>pib|Gv$(W zpWPl3jE}2R?uQ4ACiN5$XC;qQUww@UO==suu%Q7e31!*bJLMLmrxpO*) z2`NX~E~S3`&w+8Hf*?fb{P@sohH5?_?(Tl>`RC&bd&3l2sf`WrS{4(;m_1fMYnErF zwNR*MkKOm!{R1BuK70ghupGX6KWN}!DsY}oo&29CkH3HX5=$`iK z4j#;a4W?ogqG-lkpPV}F)YEueG-$ve^75%q9q`y=f4TF{J20>lPy9A}>^d_YczCsPY3-6XpkeDKhOGGA5BcYBDn`bNvEC=R}wO%W|=Mp zt!KQ#)JOlC$Vu#7D;Tr?_6szTr_6wsvzuj!-&DS2k0ZfT;7;6@qD?=Y@k6yWk4V;n zwIK3`KrTs(zJU5S=g#scp>p{mze?BTU&=Srpo&mQGCPNt_N$Tq`AL(AqW{CgNKJxh z#N%naP&b5Gsg@r!IMRJ$YH!{xx7^Is_Wt|tU;K*#!*{4uop}jh+v6vZP(bILbM7yG zaV4fgZA>*Dm9XC?2AVgtVnJf$%$ZBJ#&wZDgcE+wW|Vv~cjocLIfm)V01(oeK%;zJ z|5$TEujI#&`M6N;?YG@_$L+UUX-*zjV%K8-vV8Bhf#+rt$!V=gI{&jWs%Qni5&INi z+o4S5@@=A8$ede|g&59bgrx+<{`MZZ&Z~`>P8S<19e(%`HEp2aY8l@tHGe`A3VmDu ztpz_w`#%@^k+>E)E|kLZJ15eNo`6;=d_D5S!UVX^PN`_MUKcAuQRb=hUviH5C1$KQWHUB&6s zrcIbQag8O^QoLkyFgS1>gi@-;H8FN6_0}M zxbseG1Gu~OD^ST?G(em^XV}moH{Ws#+xiBwxVqzu3}l>F%+5ONtZld3Ru3w8z0r@* zk^kA}p51obIJLBT8p$(^UY-`hombDSiF87+;7V3dwe+DcFAv*%^UZ=H=_jaQ>~bV3 z^e+=$=)b4~B^LYF4Miahg?%>V>L4OzZ>4^yY{O|nQ{Mzen$b z`5=T}lE9Ja4+d?zGB2XS!U<(g1c%kc+C1L>()QSD5cT9tRxel`P5Z@Q%ORZD{e$m@ z>5s$=M`gxVO88+c7>p-v_`4GL&qRz_yMK7@d9O`-4bd4%eqguVpl6PewqBH~1iYA6 zZ})4!M*B{`fOnRWjZ&m>r!M2%qy0tokp$`o_EUoci(kkWxu6q3Hc7@Jf)R_1{Qd8K zzxLW|zA|}=CM%ip_h(fu`Ps#MWKc@-7?vNl0fNAJOnFa|Taya6Qj#XqElLovvexLw z#c72r$`@7a!uEH;{8w6ymWc}#oDb_+2GG;Pju12lw0A47w2}ve`jCjMT6V8@8~Gr% zhD6$+h9wdzeN2)19Mi%+Z@KxV4}IuE>;l1;>sDW5b-t7e8LidE8*jps9xh?221+H& zf4y``o1(O~>T`d1F2r-Xc&ydK_ZfD5|NHN`=N{$|ufP6Rci;WT_1E73%hCjm66?=M z!jXGAOI!d2BaBjg-`LM0GV1!R*cA2??LP96%0{JKcW83BMJgU>Nk_cY#`|b|9ZR3o zi|1`2yZB~cC4nh2(a~m1>piEmu7r;+lSd}d@e%1W5F=SRD=UD>tR-IVzf~44R!&+@ ziFXT42qcn39vlQ)@T>Yqfl!7w=v*lOfNDQc_t2OfOjz|S1W`q{TE^A<))Qn4#u%m5p}PPo@Bqg#ZyHB1*SV`6Na z4_oQ`-KtkHT5u_Sp5`z)C<$_5X8T8okVMwQJ6n8mNBh9c!*MWO%#tjC>}G}dX0sRN zF?a`NHZq9SMdVhq9&8uD_rz6=!<<~`LS1FVQ1&?jgC-AwMS}qOVF1a4hKbHWJ^k3f zDU_>Z{bW&HrnxfBgYv41hi$c{03 z@QM3!Zi>W--faF$RjGBXEOVYS=kB}jaS6^{bB)!}D#pWsR=8esjWzDR=blm=S)bBE z1k(4Qo}}}`X5tZ&h}|t=ca*1|e3HE>0l|Le_Re9(0Q*->%dSTv?PmY+efIh2(xaB5 zd~vA9#_YHwu_Gywz{|P^%Dq2Bul7^J)0!>=3BfvO0*Cxq&lyJ^~ zMCMxO_*G2%A?=Df{3|{4BP_VEr?p^7F1pqOrXjA;iGvLR4$hRNU?M;`0A~#eHLxXL zIwZAlk-+#FJ>h4v_$~O(qVrPlE6Q5_z^@{8DRwESMSevoBE<`b_ys$X{1PgsuM#>~ z_k5{s`yIAteJ>KI4PX|-%fcbRv3czXOJ$caZn1vV)Kud{xKC3+xZqQyAmGOkCO~*;aW4Em(4Un*(CKHwXId%E1?I#6Fcm3LD zrm>|a))juGpMLEp@D0?Gqb!pY;6D{pKgay^^M>&&`7=9-P_dVs#lPmOA0=xS+xQtd zehYrB|71TEWvw6diPgjr`oU%`r7$!lD2W*js1+oDZ*kF)ioO)RE97V0J1IQOW)LoS zkf^y@QetpZU@F=cEmVSW*>CwK5iPqU)_-1}y70ma8Jf{yW$$V}0J{GtKC$x3E7Pjs zk&w)sp~*FRUHIc?4*1N!e(YoNQ+GWPiX90z+k7*1n{@MH@e4+`i2c3rb(gRIZQFInFk3`7eoxS2J{T%D@JRrxz^OFQ(u@g>D4KdY51*_ z?x8#|oH2u0%(*?hr^d^PYGN4@5=*m6*E5#=GXO0K;wGQbY1qG_b%fzMI06*Z2f2^`0uq)Gq~Tw>POup(@XMVq3cfr}ysG-4 zqO9ew{kM=*Qntu%K%J}u^XMa10cpEnf@|xsTRr;dqZld{dDvlx{p2S5z@&VJ_|cd}TDWlubxyY`xES6pEQ{7zn}$u}keH5jAENhj8}ELIlOo)|d~ z{irVSqppyKc&RZSo=QI*kI+v<(+mIZDc^CIvA&!ndeF2{BliXvuUWTg2 z9eXS@>;;hL_K3%%ApL8meAJQE;B$Q=hM zk39P5jW^k330}L_tE02M$xqA)=8_IXWpAPXR`zR4RZ+@NQ;9cMr5I}aAzH^@>Ayw) zfkJ!7P7s3Y?t1Xyhv+f#hS!>Ft(EZk@dE(9zYqlt9sIo%lqG)D;ri4Sa{uBNzxddG z`>_)r4#H=0KK_YM0L6OJwbx#I%H&rD4b~SJaf<7Ib^Z3+Z_mmRtOiP|eT)aY`^is# z$}%fvqM3mH!odd*A2$3m2ORLt&;X7AfeKm{Xk(2W( zV^$^03?t7t;|$)QH;&TMZoW2c+U~pUcHh1CGGP7ZU3dNVi6`pC>h7+kMvY?9og}nt z3=84xu0q4JrG7)S0xB6*9Y|*XHfb7p6~D0mT}EyJ zp!@E6rsH zd0pN6?z`_TH{C>*-nRbRe*B{!^PGU`#n-j}5RbDnW&3y5Ql?6DzOeR^lO$c_DE(SL zp^;SBIkpWShK+v}no_=jlf&op3!y5MINpT*O9!4zlLH>3+;q!L?|xS7(Fuh%$EHqQPu3s?}q>>eI~jo~=hj~}_t!G`rX z)vF5VeIXn5Cjt_RaZn+18~so4X&vaF4CwCp;upVozySxaYyo;c?*lm>TU=+|b$)cw zk6HGPsNfSw`5bl3QD>ihwx%ZM&KW;`{E%UcWwfU=+2vU&oj5XNQwi~#b7Ma_$4wmg z4s7ZN-ODV{&w|v?kY+^|)Y`C8p#5H)tmIbwl1{Oyav%&r8vD#NVwbMl^XiPGc;WOB zD`T|(iZJZI;vPgAUsOdJ3e%~au7MG;Vx)75 zT$#ga9FT}AW(s!!@e@Z>KTtvy_&Fwi14Gv;0D+szQZ;dsQzSLM3oOwqeM)H*-FH3` zrsaebndl5bDo>MTS>a}*KtIpHvT3yN6GkJr2N(2;q*V@SM64wnb+a^M`2$K`1+1Qo zvJcS;E3OFHA|pofte7!8J(wkzSn|g|`7v7s0Z2Q?NPuMmv{D8xkBnFwI9tu49vw^C z;1Dzx013>;hE_BZISqL%YD#Cf4}D5->p=25^I3ig4z~YnKN^v61gP?y>__SIVTh#9 zAdHy$nvg8Nb(GR(`CTqSALY`dm8@bGa!B>92ik`si3wE}OgLVCPg1eYI4Z3I*RWB( zh|t(gv}S2Wmd9f{MWp|ZX->k=aeglv6_d|x_dcJ@+;#zpE9OTJd?A#&l3P(4*0i`Q0B)5v&r~1{FkNBL zs9?ZhFt*cZ|aO zUsXm4Z^y~{mkOi@k=DmT|CL6hNc~_Gn3L-tSq33C3x!+%(EOeKH-yL~BqNRbv+z|R z%2*RgT*=B^!Z~66`>~IGj427d2BMmpmO^=Y^2sNsPn%9X^VBoX?6S-Ih>AJ~pVIy~ z)24}wzU?^szyl9dg5F++Oc*f!xb|A>TzKIHI0F7bjZFw^^BDw6O^p=N9_;WhRRL}! z@Pw*<4h^^1Ib1Bc@{QAhaM~%SJ@?%6upkO&KkvNr#*Q7!hs*Jrd9&D>Q0o@S8eL}u zfCd?G0nT*O$tcwf-Elu$RQ+##swlyCe(nDT#~#n;p0mOxJ8gnjS_4NGV&O6yc z=$v!T*?Q}(H6W~dal}j7DkPNe;HDg$GqJ70Wp;=xMd$E#9LY+m(5t0xAa%9Sf2F)+ zccEITi^I0WLoeP>XTq-M;fEi2_UUJBv~#xFYRjTjl4B=nXc6{7S{DgZ;To9r^svV~ zU#SKVd&5EY*?S*m^k4x$zL`VskyWX*8`oTY%|;t-G-AXEj7LU@x4CU$KdB>*I06HU zBnN!zQwYo(>#RLw2#1SZdDT@s9{cczKg^t9*swBC9Wasub#?J1j5m(|@P|9sT5D}* zx*_LEs-Tl&`IRp_g6xMW6;=xBknunN-1EEavI~7)R#&l_2Cu|-Sn2|g=xiT-^iiyB z0V?SE<;m7lPd$aA$l5i({N?)|ei#Rg_K~R{CM$2b<(82nM?z0F)yQW3i=~)`Xfoia zX%wzvll&68P$<0IPq-yAk++d$!sU)TZf6gA&j~VK zQ1Ylc6r|}RPr#UtHwq6H9j({m#+hLf9E^jp`o#EbfS;P zO!~u*Jj_fwQ!{t{;jV4B-B!&F*63lgFv+l_j@jDMjC z4@i_wEzQW*rO`}}hRL7ox2DX5t`eNqr?^#ej-U_ww_k*wT5xI^d`+S^z5j;BZ@>MH zX|L)7zWn(kAu=phf2#6l%d%N|JVRzf&!H=wEB)(xqF21VDHxlp#gx}+XAEYmbRU<0R77Y_y$IyIW zpGa6ADZd2eaAiyS3G>VQbdr64q^{PFtTYTQnNosKTG#J_up-Sd^?xitp(Lj67p3}# zWKaaY)0R{4*R9JF)tV|meGbwHaAfA|90t8;Qp%izNY>|^_{mbjnu)$HIT`gC9T(<>!sY@y_e2#mB4gE+mC)3GzZ3&A0EB#b(smkU_;C+t zfA((zpO6Q-4*Ji`I|)omA415slj%GC+69^N2#tZ zMkD()KxLmJP26;@H&~WOpo$3Is z7^mK)ZejdJ6HNWX>>}@GQA;rQ$DSHT9d#t5EZ8u_y6D0SY44eTOiT^SMB{f)`R;Q5 zt_U^Sx#yn4HuKD7&^6%ExqG)EfW^wPDX2cGqA0<&{5df4k@M3Rtu@5dQP>%v!>{$0 z!Ylk%%2K{>O8?FEvt0m(NsiFyr<4A(0&4x_u&iexESUf1lzo5u!FRd!TmKjan|%1; zhrNMD`jj5_iAW6Hv2ey_P~H87nI# zbNg?f>_kQhrs%N~=H2K_GEBn$H{gxke57MHg-nK_o{$iv8y<1`)OWw~-@c0H*pa*Ut)<7|LKfez2Os z_??a-e-&BFpM(3wPXw*_rjE>HfDXScdQ91F#UW9WUsuvkDsE9H{3o7x{NkToLUl@E z@zR4kB!MD)OVasQy64aWYJI9Son=^4|Nq7%q`Q$0LApdFWORHf>69rYh=hQG0vjFD zqS8zRq(Kx!5f~j3qf2Bkx;Jtgi~oKP{?E4S+Jke>bv|e3bHDHVesRVNsXmiRtSC=$ zl;>zZ->q~uA4;c6`V+9`*?!|m(1@jM<)(h;i`_OUc>kic3Fvx-@>)>vMthh4>`P3w(h;b(z5}PSufD|6UoBkG)ghVi}s2j-2>BDzaflP^1`oqV;KanzHyS5<)ny zGH+jhD70dmX0DV=LK1#Y7i#>qtx>=;yZDydLyT6$P33*cPG&v}_i@nh_XsY|qQ=M1 z!~b1S?{ewZH;rM_A6jOCdrm8i3a!Qz)XEMX$2Na& z@2J5|(Hhk1S#JMmot2uBrQhG((z^F>x0!@^c~mNqA$eKPftaZb zCeMs-CTs2x(UV@G!Z}uTy!qGExM45V#W&#DNWvI$B_hi04t_4xnRn{f-dN#*r!jpT z1hgs}RYva(6xC!;kcM>dx~45ucAr-{+@7P5WX@xSOM!iqzr;bwg5 zarwQnkLX&Zfo(;Dj%3*Ci-M-2)4AYV>O9#c4jAFlwap{ zsgBV_d8ev03Y1GFWWLJTei*P>{JP`P3}f_sMlx4>_EL;>#BY4#S780NaN{h@G$?}Q zvL;;IkXaU9@3ovMI~6F#K+a(?iJWO*xLs}RE674Ia7>|Pv$3q6eVQF>`Lx3OAkbhg z;ftDnx_VD_Q{1QG9NpA3EXi_^{_`xhvA$N({$JEO zdMElQS!X4oh3qqB>JztZ6%J|20c+Z?1~N%tabmxuHi|@SFZ9luagWU4ent|NigTgS z%DvReqCN3{W8)(%rqy2v%=LY|d=_RCJ&-Jx#I#Qm-^<3)o<3yGtp>~1`PD1Q;yO+7e;Lcyr!wa6^i+%? z%Lg8Mlc^O9?&rnoMOM;gpwI8``xu7)20Y(=cut`}HPYjw6hLZ9gwL4El^EEBjC|5} zW^;d6`;ADGMYA(tD*fNP)@#R4K8M}RP|2CH2dyU)k>hGOvk(%bU9q-dPe6o^^q}LMusTzAnOQTw~ zdx-|8fTDIBNs`L)@LXg*`5J1O_&Jbk`CAQ*KJ}o+*GS=+=fWIOdDMe@jfDYO4_qsr zSlq6&9cow&=C+&fbY)0-zQ&Tk;bzOy5xLH0xAUKTC>&^}z2`eZcgv|tFxO7_y)M)W36omC8u4WblpeR<3O4n=>{h3xIc zAT;BEs~pjjU`AH{tfjwJST@4)q?km9EK;NKhTJ+hNM%w;3f;ChsEvp|YTxhYG*rqO z%8!w??s7`8E2ru_z;H`E0u5|MWu}M6lty1V*VD*{J=%Ubv-7lDcL=N~@tnWL){_$4 zo}jWpuai%`DQmr`^zggz8IP{woVil=M-Vr8&CpgX182vluR=aj&x=-KnaM@1U){A0 zmK2HRIaaOgl}ul{^g^;wX|LU}Pw^~3RBUFH?=l(v)Ox1$O}xizL`6?;Ne^;V**jAi zr^KLMwf7z0|7g+ZKaXqL>*u{`d#btt{4PN`BllHb4@)T}H0tl)x!ij^#Nng6>1xv| zXbnrixs(+!F_=~hFnut|q-Fg3?7#fn`=XMK;^Vq!7z;GgVM=FF^xHDP)FJJ10nv6t zGL;RqGJ;zZk3wyLg&aPX&&tzQ;KJSmM0np05m2f8Q4B-3l%LZ`WX>1>pB<7iUjfiZ zHTF{InkGvD4KJDvA!c7(#r5Hf{V$q_mg%muK3P+Jdi|xB6x^r<`&3KNhrn1E}Se=$#+7L%Z$e#Q$tbKb(iE{;Acm=n4-fo?VA;WiVU@ zKK;!bT~3ksyJb$b!u4Ko_y0(sRWl4T?#e-E5@Bub!wqkS>W?Q=l!*5Gd)|4E+I#6_ z(s@<31x_NW5YJO<2s@}!??nYv)b^`bBK2e)gx$_b{4Dk% z$U=g)eq1Blf1}FW-a7gSfc&6)Tgkz}=!0b*eo1okKY3wGd4iGX-VhNe1ZbweHhoBy zk#Y-=^Mg2xHNC`8;oIi3>=CEqQP1$=IR1ekFt*0TedpWYi5a#HtuT*nVDB3EoxCVm z=k}U$pKyIloX+d&H=tl%kT3~;{1QKRQ$+Zh0u~qb+ z@3&MwEIn14Z2O&6-B?Kh@9Um`*dAUo>k5dSSj=E-;hibv42|#%{QHN0nLB)g!Gtr& z_mBVm!!{m!8)a#eI{=J%kieCzQnvk-0)TKsCcCaNGIfm$GJ zl2oftim6{~pCkKzDs*lwsXSec*l1da{QG8OFhg^{y8PbaC+9Z~r#*>d4(TtEeyej=1B-+bjos;AKVUR;QM)CZLjS<7@8@=?{9o_(B->qIUV_5l(az%y`_g}mF7B{a zOo8!ZMMLv`!0Y7&c@ogBNzk6i@|K7raY`wcneMGH{5Mfm0@An!KIXhkI;Sa+pr!CR z;f;DiqHTB%G6Fo<5Xe4eR}f|^)?+bcZ~9~Og$!_OZi5ub=6i~pYXS{i@slbIPL%Uo zE`-Z`I)i>+-1}G`r?WxMT^urF0gpaiW>2G*?o;L;-lV zFR*LiP`!s#W}2RN=%knu58mt(S_*>WpE(X>vE!}U>8wp)Q#@S3M+oEp-&Ylo zKsr@Lq69=rc@HYeh);1aCAbp~)IBm#%n!~d0R)shr*k+=^WK}GdmBGQIBr{qN|e># z$J)7A^SMf^CnPqq_)=W`3T^s*sWFx%-XWHXc`RNi+Kiiw;VN*vl)*2cKQAnYj+=xa zojADT1^o@|Ug9d4OTK~L2t}>wzA}xkaS`LfG4ZyO#*_boHyC>;rw0ii9)|P>fuH&y z6*>=Xd`ywOyvtNA*QCo0y|`*~X>sqmc|L*%EZxKFCL>Akz3Z6=S24e)lK2GiLU0~+ zo5-6fiAT9~qWrNjvH(^ACbI>$Ie91>?_HZ~-i7oZ@_gV*yq?Xq!Vf78ePWgv*M~RG)f?FXvQmF76)UOI zc(h0AnjANi;mKTpF;A<_y%M!6;bH*2(UKhRA?xWFS!V11>a zV-{UDI{DVuLv;R~u8Gc#0y0!i2PL((p%8NW1Cln!Jw)MO*<-`!8l#GJV^_%v>C#Vp z=GTls7oQtWXJ0J?wkDX%hV(s z3~3MMtvMk;n+sW#uaBS0NjS=I{Q3s^)dCpn<8^<7xy{c!FJ_#sL;M;1$MHk+XJUmC zuU@{sG<-PtZG$T7g1qi+-#?C2+2YnGv!^Zu&Xb!wIVUwMgeDm!Z>*sa}h6g(|meR`5bAhbcn9xVvB);wn#r!o>rL%&I})UO=r zIsP>kcB2W>3BpiI+Bthi9hOrwR+?+zBbkwos5j`kL|;2oJxwsgg|+5D8?swkfD` z9XM=aGRV^1umxLIQpfM+ojN_lwk?eIx~0QEc0-FbMBiep6;%FsN=Fv zO_QAvt_9OLkbTDBAW%wbjsX8WXhduKq{ISxlzo*tz`9aWe%U+cJCAJ70Ir?q1;{q=fK_r#H+%_~3!ubAj-i1nU+`q$rFw$;g zYHEu$TVO5ar~Iq%gY`CjihUaE@+i5(3sYjOm3XqzpgqTt0eNty#frb~N)b(Cdg?{8= z^ANU%lexCw)gIyK1e9jyz%2ex2*0acRr=-@C?hBYX1xr2bS89!rooHPvE`1SER4Rt zJ=*WB)s0+>MXjljYF4eZFxnWPGvc*M8owBr@D<5+IpJ}Jq!>6)MWU7Bq~ zswtIeLHjjT#bMK?A&HI0hTP7ZzpF~)qHWYXGD;mDF5He7>HXdMJME*ui?O0B()3a> zFXPGM&kCItWh_0U+$XoG*NtJR2WHBa>r3tSKAa}OyQI+lqmExMk6IVvU@f;~4!b+B zd5#pZ*Si!b?VmSwd%r!a`F)+l7bxr`A5(R~uo#;4Ai~YSm4Y|+&A7~a7C#ZuW-q$0 zVe-nsR%*{l<-xjor$VKX$Kh7$iGId2{<-6}UDuDa!BtU}6*WI27 z4~6cjtRDo#xYEV7qc~pO(O!$OYUsk=GJi-@(r1GRWV2SE4RP5_W*4w{2RIv3apF#p zm8=ll9$}dgc75KZUV(&?7Svh@$?=3dezChlqGUWGNtG-~>Wu3<8>DZxLvspywj*i3 zLd1@QqM?o>O!BAZ0cuT2x*~&_eEg>BZ)aX(i934IS&`UoEcJ`1`EqZ=;GJ05Fkh50-Na~BSgR!14<&#XzU|91>qvvnF5IiAJ;}|J^g9#N9_&T^wWy+DZ0%Z| z-t*WPy8c9htAE{*hQHPx^vA)c`U|4bq3xwUHw()=y?h()l+=`9<(Up{AfeKQhzsC< zq_#{)7r)zsa*pmVZ+E2--!3x$_N~sZoq3+|Oo^BA1T$^b51Jinm9&ZsXcu+K^wNCh z=(~BY`eeB)HfsH@t+Y{0?SnVTjJ)L360Fnt#+dhsK8fPKIbS}xb4kp2y-PU$QZPln z)l`-HFqG!ZXX42_>uXMOSITdGJ6O5IA1UsE+kDQ|E(#{Op-v-BQ9q_%EPPZSfPyew z#6ND&7D?yWfHtka1+`psKNP?2rQ=f}Mp% zH`pl)Wy>{FU`o%|h!EWe(1qbZf&=^^*yBTlgUO?=A zwbZ37Lp2s)15lllP_OfE)z;i zh~kK26=qLb6!D=aT(4efzY?t2G-eP2_1xrs{MkvunrxN?q?s` zzCq)RFbUyk-3c~xog&NMJn;hDwuhwk*A1RLCzf2yP{YVfcPDc22@VCXjN%T7;n$;Y zkVS!Am^u;yKo#Ke@b2@}uO!a0Vzh;EO0P{;^_w~ta*IL+!KletY60T-25VmCxS2xN z{YWPG>Jxq1FRV+5U;gD8tFbwc-;|Y3X2DGD%fQ|pFL7oAqkWu;Px{$HR_(lE?P3%~ zCKN~OI@37miJiX>PcblXY5PYtWmNxF*HtHaMp(|#k^XL5BvV$92)#8zwp%+Qbl5Lk!?f`O#2UD@wi>IaTv1u3==P%P5)@R zbdO^Wl5#Z0)^v*)bPhMob&oioGFD|&YYLwU4js^T$W-7GskEJBfapvpp*? zHsGMiibg2OC$Qylo`Ky?N#^~kdUtueBrqzOF-l0mpjEQSKH#{Qe^+9nU$_1-P-ZweRwm3h9j*(qtqd-!VT^GMW-i)KgF|q!@iaOH z$NcY36EEtLj^+hdXFGW0x%q-RMuNcM^vAl!84^!OTeWg%+`#wMB$n-sB_EU0L#B@c ze#eW=o@y(PqkrmjJ4N@WI!H3TPN>1^b1LB~t^bJD_fq|KC5PiA4EwX%cgm^@cR@YlTbAFC|x;x zwCP%0PgIwGGewXy0QP11>dg}KAvTs|bB-wR{@C+duGLbDt-EG#ww4SQdTz%tk!dNs z{;rX^Z~R=4j*qEZDuj{MOT28)n;poHU)<>G@!ao;KPPX3ywNRieYlyxpC@!5xlS$Q z^-C0u^AruaJS_935f}Y0+hv$?S`DxMNyCviuloJ< zjO-g}FV!|gnQSG4^Ecy%g>FWNdc;;x#ah@;9fBra?NH~|LfK~>Vvl(TA&@{%uad9< z{!W}G8*sZ?_HQC-ajAaV8m}X`U}IV8+W?-UT+P;quP;}#W0_9Bmi2voO$A_(@ZKyP z*3p|XmDzjPR2Wk);u@j={QdXoN#_!C5x13zuD^N2nC90>tAeo+(^2p0HaZ8tZ|J-| z!};BY@5dTZQ&~;|$IHE8$wzHf8y+=`l}qV{Pf9*qWa(6pgYlDP&#wt(cz)3A$vHvqkuoG< z+hJ_l_4`e;hR*{Yu5bV4gnLhDUl)@MNZ))@_Mei@3gNf3Ig$%LFHc)E>idltOTUy! zXY3MT`%e7&NuVNeyw>s?bt-yY%3UH!rS#>zvqA@wd`71N_-1eF`Ajnb8>CMw8+o2T znJO!NC+1zTFxNgp06DE)Ds8329OW>rSZg8sf!f#}yI$&KRu`sQ;Fas*CH~(w!!OXJ zR3g{4lJg{Gw1(y#`#tL>&5 zv|mN+&e(!RsMzgfDr_DAIUrFrKKHnezdEc&O74ZG<@7A|@Uc?QVU|vc zvPT{*wY$XTt5-_Dy-i;`K|ObAHXr$e=Q6?l_6FJ;pc-eRm5{*Km2X~hNw`b8#JQN^7=bwE4P@b z!!b@swwX?KjYZ3gfVxz3iy8`#q6RUY^FdL3)uxBE5f@i7(9==0$`FH^1%CTJ2a950 z+#SX>of+l?8EuUJls+#T<=TZIH(7)mTS%Xn#pF{a6NC0ex|x>3V<`U@2lJdCccoN# z5_R%zV{5 z`X?HKe8vo)%Qq;FM!U{O2|WNbsqY3t4F5WX9|m6lMa`O{B5IYaX_(kzy6p<;e@IY; zyJ){bhHArGMo%m+R^Vr=SNIJgUTDLS*_!nYpYB9x+%RuN-B<2! z#*v8M@QnIC1sYE}^gS5(NMGo4Ca8&Az6&SQh2SGJ1fey-S0}=cE8yJG>%T$URmAP^ z=mjY3FSR38dsI%}!>P$pmjBRxx=~ZYTM>xZ{wZbo&Ksn;94i+l75e3(4vMXd(C#WzY%Fx&Lt2ZA=C8tl#>T-bGIycj8;pGw2g=nyyZSqg$t13~6OIj9q znsjS8E1(`|J|-VWJ20wGWtWcLQRiDPi{^yBFp>JqYMC$1Kse?L+A3yYef_u;3(IYh zh0WKy=Tqh=R(eBJ@8Z`fSba3ttKJn$x<9gjY;!1=nWOWWDS%n7D1Vg6h!=(mA$ddH zlknmN;_JCOPMq$y{Nft$vfn7{>H-ppPbQ`bVM~eVNMu4hY=&?Eym|>pgmoS$d<6A) zYFR#*PAdkV*H}f9!}U^b^-+ggyTGra;mKL-(>DdTFeyZhF5W!mC*0JaXdXT?_q)0j z_89=H^b87S`v`yJ4tSkg`*JF4U6KO*$_6*?vBJ}Zf#bKK7i}O`1q<{yX~54>1}T@i zoMVq@r2rGX9GUWKf`Ok4e-k^1?;QJfM#4L^X94>Ir@l_S!FuNd#1{7`FIPzE@jDl| zb2(~;iV4-mIc*}l6bEOSeZD$nMKDy`W2{XqrGGhL1u+f{Nj-_t1i4(GZ@Z$c#$QDL zUO355bulKC0LlzNxKqK(^Kw!TWe4Y3ID(@s;0eILLn>}sxDQ3LCHb~lC!wpMzww7ye`3!VuDqZA0n5j9E4o?1T;@qE0j}~FotO~R}yXZWwB|cz!t=}g+S&? zXET7abw_QS5FrtY?1Y~^0G_D=&xMSQBUdz|X*b&nNiafpmVk?2Y zLTJJk^mKn5hz|u2LjeSK0KuQMIREz{7N{|}VO)5Oy?E8VTz% z-ZmiYFaFkuB(q95uxta%Cu z5s=BKBR@D14`-$g4$fQ0=cucr&OAwHg#L5@Art~K+Gc-KtT-SwhlvkBzj(kgoK0f* zzzceuhug6oR?adZL)4I%21Tbm=8XWEF*jbMV)rvcvU%6rP4tl>_30syP#)3 zZ}r2x1v2;>?VjMue$F~dL);~CU&>@zec5Y&t;rCIH!%l!VpP0A2mXY=2DXsgo&Kn7 z6~xY+rtm=s%h&9uU+U^}Xwktdqzf2CJOaKUQfnij1O?(>7!jv%U%hg&qa4!sL7rg) zjH@KBH{sF1>!;8}0OeKkBBuFjV*!ME=>y)cbN@jNXUq;tVTVc(S0s^prwaX%DZyoa z#cN^!))4>(K%jlgN+h0H;mNe)Q-C&uLT8xO6O21xbnA>6BXSrD!ym#3<9VxRY#>eG zNzzvkC%727`Zl!}(%swhZe248T@|ma|0YN|GO;$Amn*D|`!(HP&-vt;#1h~L6+fYx^ry@c6?yyntV1yIW}Ec&1=>fnFt`Hbmv8#hXiN(p>K zl5_L{xIT1%Z$btl&UW(2s-||;SouyBWv>tyogT$T#c7Px#@YJC5tMAmIWIj5#R_9UfqG*Q8L>+kHDBuo)M^88Y zDzu=?iKM?Ywk9zoNWFBLq+f-hGhTr@^1x=L0P=xZ=9mc6Zp`*ms&(=ovs2WR=Nto4 zi2FP)i!e7e*3{44kr-T#R5$8JC+Me3VH8cDkfM#AFYi`npVQD7*k6WYq)(HQF2V6< zNLBVIji-Q#!`q`6xa`hF0L3$jxGzsf2C&N^a}X3Kbl9V?2zl;TdX3#4?sLT4?Cu6H z`poB_vMVyYyQuuodryNW|H#Oy3m|h*x8yBxmk{Y=iOTx4x_^;oG7we~Fvc+pLBU|t zp=k*DeVlF)yQ9&IX{Xz`Tk>F!qzP}l=+LM5WIe5xDZ1KFX}!4Q*(@S}s6hWQh`NJ= z@6mi7YDVFJ8}PaN3@)jSF@3KN`W$nTqdTGVPot3gTW&ZR?)^181TC&N5LB}JA{LN< zLk|I%N&x71tMzz8WTyN?gmbmk>oqPxoK7aU3%XKKD`1V49i<5{)JdWk)rx~5s8s?m zo(+LN_D`_hqPK}X%mx+juj50Qb{4<~eL{qN<}PAo-s&|NeCYXS&y6D8FGrO97O@1= z=7RU|L5DGK$8Z?VccKw#DIXR7a)AGV)xpZFm{jMEI6!wTumStF2}jrAPxX?g@%rhU z!Y|tk%=39SVD=3bD~mV9$l<=HQQ`@N^R>UcU%clPzy&v|t+~-DT)?lWiz3KV4mw0H z7#++GZOS0iEpa4vh}=uyVj5zDdpO?&^nWI2*2+ir6hjJoMj2L}Ny^Ht;=)u#s#kbH z^*>!0@b^*(>V;!8Hgz|e^XsdK_P*Z?XzLa@c{krs#QHY2(mr}FT|4nDbL0(?1vL)? zoYFOkFr=fL(Ea|2 zrO5z|B|o83rd(K%1QBzg_#C*A9-6)>@4pxIu;jacu*%wQ^ffCI^1f><{u3HixQbV9<0Tty zCJrvYSKv$%yAxMK8xf+Eemo$)IfalunKYR3#!;%v)`s|8IAMV>l0~zJ`Gbj{8y=XB zgkf8y7e)GBK)W@Ch3TMyS#-q;|_IhLJ4 zMAoycK&Kyw2b>)Af^|qDGiUulVc2p>M6!A+*v4iE6)6|u9?a>-$3`bg90$*zbRS&7MaDnYixOsH zY*D9Jj2$#$b5PGSwZP2ed;v%#h1wInFFu1YdEnR0*)q!e_%`shm=j|?_kSCtZ3xhd z;Jcsa>v=S%kQ>=WT&~`5WjnYzq1y~$<^wr3KlZp?=75XjXimKO=e^o(ZM<$= zky3A3mQM0ui1jD8Gv)7ZgLUrb@lv1ZXZ~Z9uSuw65MZ7w@JlF5b$Ip)@9*-29zNDf z?xiodvz6y`Fq!kOtC!il%+NJ2b8^f5-t|q+YNw77#+w}!yVSxoc;~Q)bGg{F`7T=X zU9tF!YbJCLN0@!HhfxrR$G;JV&8>53HkocSNwTELAyALwp$lsZ>Q2%k5*3!c4U|;s z%29t=Z`i2tr05z6q6QeOf6>1oL?$+p0HOOC@+c&Fq2Tg9#hFJmJ)49;)rPC2Peb(7 z5sh=%)rH(xGG7Md82_Yi_HnUO;v47u5DV)1!E0kF?O%mNm=Ru7XtxQ)-qAWxiU^u& zzzu!kIjHe}9hfGTadO;E{U&)>U?zUp-D366iXVcf93)C?!z z*wA`YN|B8!U$+^^;DDUnQu*Tlybre_f>q*nhjqVwvG(QfCAEsa=ywpxYeY!%Oy(ZK z@wq6U3IKl~K3O9myE`Bs+x8gwj1kZ?5y|`|l_JPOL$E9B6P}J=&zKd}DukIL@OSZm z_$PJiMw{l<2Nd;fj~p^7?8v{vLVP?)tsW+hx*lq~g%YmYj03+^X;{YRRl z9zz*bL-}S;VNBLg$IcIC|NG$2bnjtf0hKJmC2hW+R;{7+X_`zyOiw0)v^N_HVCQe1 z_FvF56D+M5UKhNSfux)TxDVIw#Fi73!eSwfMMUaDV1sdZbL#<4nCi<8agktBG))J&f8D0oy=JI`!Su zOSw8lwmP6PPF9Ne0AbbF^fT#g_Xtb>ne%3vd|_e`sl?;r?=e;P{AX>`XRAX2{*wjS zoHiEvL`6Gy9(uFmUn+X4i-;tw6|{%cg2+6gvf35vx^{c}Lb%*g185N~A;T#r)o`Y` zd)p=;?UQ|OoF*9Cg-=F@_F*|zV-wcMUe{eKT!Qa*Og)jx_ijQ3Rm6pQrELv6fi-9$ z6PF6s>lduVPr;*BktnPU5bz^b9WvB1e50S8J4oi~fxoenFi!X*WOROh<;uz+DdO?l z$OOSoC+G**<>~_WJCFD$9&@S!<_r(fISSgWA>GCa_k$%p66#gE93H4oAOO~V^?%Pe zi*1aztTD{%_V`yi3ky%aRk954ixJ+z;fh@W@Q)XDkbX+Qr`gnT0BNfedQuz_{mVFn zxVAduYzR8N-FGlXHrnSPW9j-2Xd>&TXo|-BOd4Dw814>d7n@i^p=1Ngu%Qqd#)sKF z5Y#5YO0|3UpHY)Gp|HT5mlfiq{M8ql zz95T_(Gxa#i*Ho@<(Y{~rWIWw0>eD(qX=Uhf}wC<$F$ASVBD#EyDE|2K?bl%8E-N7 z!|*3nO$h8oBJA(KCG7Sya!pA%N#IHSVfR8jv$$W;0intM2FHFjN_jj28`zcC9qhKZ z4W7ZiqJepA!9%&3X0s0dvaWn7+Dy(b(L|Qxo`^d`qobRU$(;H;>XRq|;vCke*hTbZ z$UtY{=FWHna5}qZRR|1ugf!jIaKX7*^NWGLz5OcXC*5{;O;<^`+XK@3|#Bo|4yuRh&p>WDv_=q2*Nf z@)|TYwp_x@p-UJemoEM;w1qA>E`^+(U?1{%$PXRauho8`Q~KV;#?E8+%Nj_{&XZa1 zNn_3~BmfE6{?mZHZTTe^)WYDkBCwY25L6EmV0tA@xdx*(!ePsGBdmMdG%?#ZiB8#Y z(k^E`8o7bKKCGwOoOY}+Qr~SLvr1M7i?=7;8S2s-;a)20<8~E`-MED2j$Q|x9nI9w z+9pA0Kw7ZVt4%S)3bYsM0?$Pkc;C-N*%sp#t1xAPt;Q1P=pf@j2k$Y{dApRA_S;Zs zj?#^fu@r^iN=}7W7a`Br-n+MldT?*uN`7OO`9!PrT)v3SLQObp&bAl`juk^sMfyhJ zU9;@p^}qMlQ$|PU;RF<-WH_LH*tbFY9pRqE7VrncFMxw*LbH~=MZc@vjz^Y2ZlzsBSIsrCwdVU_UiW0JQuu)8&nO^lxhqer z;q;_io zCaYr_@UQg7+$0_a3!FSy|IuTHQz3*th~dOBRAj;tZRAwyUHsfQ{+C$?zt`Mo8&R+Y zwO|*V3SgWxFrscm*Px5=erFRG4_Z5Kn-lm5&N$i^gU~^bPpDjp(U*ge1NuKZI6wj% z0TGx$$Gql&bUo|K-}l!6Zo(cS-@+H+#Q?uxFwW0znl6)8TJ^mmA_uM5kbg&8i;a0qmiUTfT<2IPBeE=c~qzC@{jodr1 zsE5jtM+Sq&Yaog;OF8!$++zw$SWC?I~VP z{0redzt}hk^@_-&W~6?@7HrHaz5s*jjz_6o&-iSB1tAOJg1^g?Y177&z+7Na#%;OR z><4O(`mus&=AjJY>{aUkm?ys!Ms}vC9sR_Ebaqe_^|>K|A(0j8<&PVzl965!g05jl z)?GwFjT?@$XV{60EfdkjV9}q?BVDHYZpMFqB~_Z1ThIe;%%BfR@*(5My+1DSR!t8G$JY->b7T5GF*IWUL980DxY zPJ#QzAUe-f0GbzLwJTkf6OkC)4`ARwBR;{6thaPgw9kj9Y|qD|4w$1ud_FT-|B=(d zMxOm7QddUg*%4pAuSZoeJTX&UPta0gP1x%7VMhPdk&}D7q#ku4$|jfNJ6pjo+565d z|H3k$k6O`F5Ec4tOZ?w_-+6+93e1vyz-!BXCk5!VJrsnd0(2m`4|XGX5}%&+=_nHl z%RjKLr6ibe`X*Rv@_r`7<)&MhO#7-vWxfYND3Vf z>8)9<2ct{XDn|JFq3mK@f33Wa%hQ%J6=u1WCAP3+5qB!|rRFv2GgnT6LMkS9>@4o) znCg+>uTH+$>=YX8tM}8O40NKDx_R&iW0xMTDVdHk{a7k$`S(0f&$Gkw@jEV*g_Wq# zWx)*R2=})?VP@;vk7nx(`}UrJFL0q_K= z^sP|8rkaOpN^5^F@%zu=+>49#?ED;tXasB!wdYAxrzd6KX!se}ASHi<21h2(D%4r- z&*!xZ&bldnj$NZxhNXr4)P;sfG(2x|ABBk=$u)TvUGF&KvB|ypmAf{CUF%d_{O+KY zMBzeJ=zD+TfWpODe-nb7BXk~-!d&hITxXWto3F9h%~Vj?j@>j5*fVCj12U}bk5?w4 zvj1q8L=h`N2}wfSXY^$1FK|X>r$Z=XrtZSkUa`q^unjalBNuX-tGtZe4Iop|-u)h6 z))K*zCWg%tE3WqV^xoRWELqkoQ5{w8Qmt(>8XbTC{Td`}E43kp-j#t%TYm~XyL{n4 z5rnh~X?1VU-g7EZ1%H{N9%&I@n87yUBLB&S%hw#I528b&V~057Gb^-S%;xR6HIGp0 zGYmKwuy}vOB&^UXgsEo04$q3qE=8#c#oU(SrEcMvBmaMq9*YaqMffL<*f&C5SWn(^T zoQJg*Qa?-z-~?rrdJ*lVtA2Ml!9F5=(bf+8Mrix?YzzDf!kC+SaCGKQ< z>iEQ*DmK*gf_>TIsqsmb*!xqBmUgk^a_t^L^!cbM6Lc(x`{9Ss4D?oZ{7B4&vE5f; z8{|y%P36IgN2ufmhK3-)F~wePE-@)8(y|+qxhnwREPJa9p!w7X|Cr5jtB~G^_olF- zP_%>USuW4{P>``Rk|Lh9XMzL{{f^1n0dIXbl0W-?rtj2#9?m$GiISpen_>n{S|!ya zGr=_0^>Pcq6M4^YRh$)X%sCu~A7=loLH@lvW$b4|Pozk)=|D{HY$mYNR5YQD(;j#z z#~>t2UTj8$@q4I)p1lKqp48K&9AkyZ*KqhN6<4?pHhK2u6$A(a8h`_Fel!r_hnwIm zRIjBHE9ZSXXXaY<~Ssg|T^eIev`SKBC=v%06fu|de_4HjMM>>qqA-Metmp~xt ze;CBvRqZFqHmXi>p_7$Yuo#psq`8MSrsfPs!q6C~Y^Y*W3B>eCeUp6?#|3KD9ILC!X&bnJ-bng z$WoSwn2Ib>wvcVcnw=v1GKs7)6(vi?zBBeAG4^f9EQT4LnfcB8{l4#?zdxVP=l(p; zx#xN2+~=I@y3W!4Nti%4?4(8NY12j*TUcSM zM9P|lm0T&El*XCtk2T&1#EpbSql@28csSw@f&TWjKlt)?N93oKRO%87J)(`7tM$od zF;Gweek}_w^dCH0u!6Am?I@RA0Z@U|qvP(y91OgsWMjhssGpQNcOS_(iyMgc$ePwX zC*39E10Sj@aG}-97T|_y36sB_Phr!LLZbp%*xm zlur@(a{ZWdO@)4q9t!X(stjI$iL{20}xB)?aIU~X-eiE$9k1bd&r0FYkf zI%)v*H2kz6RtUiK9Lf{^;j$A6p}_tJ zJMk8#_*Tf)tAn%X;xO+QV54tIcJqU70lc!&2~B2ItJKAD;u+rT;ouJ<_&f(K1zGF8 z0aM<^YIA;6$72llx;Tl>v@@G}w0zW9m#_nkU9-$`0vz1>sn zlRA9h#yf|-sVBSQeU+hysczlgO9L=jO!VIKrx9T)g0Me45C{ENG3_ky+ZxGlmd|59 zl;-uV|jjJbU8LJ!CP+guS;#Igc3{v(8$}mI65FwJ8%%}T_(568wbm=A zZ}q1ABtKqh=q-XbL!(n3Cofy=h+0fG4aKa`d{m)Xdi?giVxoi9hK95jV^R|0!vTL~ z3t=D*Om0W?K%{^^BR!^mElXqbKP*ka(#uApaQ zkzjqbugWsfX7_=4)gMeqO%3_JXmhT1GnClsT@^%q_*MfZJJDuQmYnW0i!uEO_a5AS z2Org33`LaXr83hGGMtIjI8Okn*W^la~I1GnK8RK!IzY!xCd!k zJNMpR5KCJ*c@v)uu{LpDED6x6Ox+_F$FtDxbI>|5amzCHo47Qstqv;@uG7#aw zD@LQ(t2xX1Om-e!AVW=guYm;tJvV93<=3@Z~;93g(yxDVsr^U|MaB0v%8+6jvV-}13}6xF)bcKb=#mz zwkm2vwLrF2VS(FdJtkz$9!|I63OEBMM8I77g6MK0Z;>sdG7Q2$zij?VKCig8QHz-a zRIi|q*l3?u01WAzvNAohZER1+F#km}6hS$g_iE!mYfdFJtqCD6MkZhYz)!CR5rXs?v*R!Q49PLt;}BA&r0kkR5~{3>D<@6#7j`aovJUR z{$HSb(^;u7cLSl%eearFZ*OGV-PcZmtHk^!uQ+@uDs)^kGx#YsewE_;Nc^H7D*)Zs z5SNv)GY8xA+pQn6vG36aAFfVdmoc626$qZ2Z*n)O67!WlI6CrFz5Nzhx826Fq+qOr5ZUl*+wZqlZJ!!wZLQU0DM$!8=8PoulV@MLXbT zsPRxO2Gwx1mR;`i2oJsyuVGGrB<)dyF);tQ^{GRdy^4=%R|=PDKmI~=yoSHg_l~{w z;5oTZ(!CO&S+b=DPxKUPJ9Bv$gZGa2Y9SlU$|vdg#)gfwfjS;nG~&=-FZ(qSGSO`8h=ZJ()sbMEh;`>(fD#n!9I+g$FpHpgN>|Kj;B@NDQ2w5a_= zxExe<`q5CjpxgdPeLC!g86dn2k{*};-u7tB6(EwXg-RQv&N(dS!b~|ofhfshioeh+3D{%)`;ZYzBUW8=&?b)} zSi(wEYRep6ogsxTmJ^79G-p+rWLaYx`e@w#BbT*Q+n%Qi5lnz}TU7JGst|S#&6*F_ z+bmR_((HS`okfzj>}U0Q58VkX3$>V4c?z6y$2g$t#0BXL_YwFjwLeZ6PoP-l$yK{` zR3ZuzSr&U;FtOv??e!YL&7TB|aE7?O^Qp!}Hf~iK9~qy7-&K?tffZY(-QTXa>P>^I zsKlkHQ(dCHanBM@v86@HltjY#U=;(V8XZi3gtA_TDT4z&WSgLA@T2}T=bbz&ASE~> zTTCNR?DLsI$g?cVONv%}1fY?=dp8_&D>EYYQ5lT9Kfe zJI9F4t$WHBBZ465Z+^74(@kFA*Lx@LuE{0$suAC8eqvQIBdxaNTI+9hxZ)m&-Qyyr{aUK*dO;jh2V&?HdJ2ixh@wejGqx$GB0uirBvfxK?J zZ%T|Vf|t&yiZElAJw);|@y#QrO)aMGe@j!NC0|VV3WqyJN&WERK}-jsy*GMyi#*;( z?ecUN>&&B2H-}~z2>H*)ORWPbKQfkq{+jo{15oVM)=V9>7?-oJbBj7fQY3PZ+-fl7 z6h{T#tWJoCHZCUdsyr(Xd!>CyO4_?$)7Gyc2Ej>pSk=8&43c)&7Pa8I^n^g2dIkp}GTQPXn)^9Tz`sLQ0YLx!1 zK&W&+#rv(5e-|tkJSj-$0}GzMUq+52sX++_^PxCqMmn=^~x2ZTIqK6Q|kd5eHD?*&b#zpY&dS%dpfJg_ICWKdB{kvQ!!MjX89e{?}l zOEZ(1W&_MCw7*>t$aNUkzwc9zRj)7}?}y=3GJfKkY8B4clK044=>Vsc^ZYvS)Vo3u zPW^*ni77>i!kC1KDywI9b{`;JufjAlR%=O_{@tYe5NDTlxW+oY4<)EalzPWOw#Cjd zVk;4^viodp-w+N7HF+P?_N7+fi^mOQ(R!B4v9tIBU}dC{R*aQZh%XF zo-MN+N?d$Z#+B zpI*ij8F*Cwp;r#P0gXa`^F)m!mHT#v1{A@F| zhZCl{${S^b1m(M?=?6sf=H*?5>TGu4l9bPGjV0aqutG<@iq2IjIa1~d%B7LBd z33SG$F#xu;W)}HDgVwO74`>H78>*~qTk6Ury(PpP7-;BfxQ?#+2doD?KFwGxg440wMha)cEt!XY!Tf2giwmO*a@0)E` zOTEmt^8M*Hw*wpD@F1$zk6%eIje>j#910lD;CzwbR;)MezCj{-;^qF7u*V_MbG>E(|%;lKYEkT;#wnX#qR=zP8t8w=}{OEmb3Xzdt3+g!{u%WB40Z{vBqH)L< z#EtU2etSsQq3kq>u;^cJ0Yg^!J*-|YggW-G%>Tmn^m6LSGrnK}5z|W0$Q~*nN ztb_86Kq`xGCaLzUZ|5zY1(@TLW7?%hs>GG#KZlF}E@gl8&ZUXsRy#v1ZMr5aaL0S& zu=gi2vz!lxfn`jEnCBd*r$)6>ftEdFuJlO!8pQ{#Ai88NujjZ6E>nU)I4uu}DsK#U zYEYcfB?fW_Fylia5jd*;7tA_hdU2!|_)9u&tio*ILueBS+7dBs#X0&hPiuK^HrESc zDwhA>w?wr?p2(l1ia!2@FG5sicKg4NADdV^JH4?hsT5RKMK)Z1DmxY*_v{(-fw1#E zt?%v9)oM$sVYnuMVW1N)J##TtB59>BDc*HC=n#iu^}vaMg- zf)y@mq#8Mo{OI-mXT|h#xXxkCW%csp)Qwb|-WJsutGhD+8P}FTlr8-0u$)Ifu4p8{ zD<+av4NS(XYQ0{En--oIdy{H?La(w}#blHTAapvpgx*?#yDT z!Wro>RTY@Dxuq8P8+g=PgG1PP0`Wgv-EYqd>t`vsq1I%i$*w70d?ITs;#FrunlmuT z-)g#6bV8>-^;ERVZKtm{bB6~4(TGRq7TR8n-u>2SVXQbCY%!9;y}d2atLjOTT75SY zmks85;u7y)(mR1yv#-AUG02`O+xbBGuZTu?#`~5aXdpE&vkx9lx2aL`2&AT7yKVEK z;NuE4PP1+g*`ktEdDHflhKab3O=JBYvZhwH^0t*Wi8{NSact{lcQWdncjhr|PE+9Q zBkMSKG@=MoV9*R*p;&3#Og;_$`4<7h_@YqXz>BR1Bcaq|GNm_76x?B((VNZgdJ{)m zcK(a9`XtwP#bI(KyyD<*8SgXXraQqa$Kb%))MBl0oZR2Rbwdm65(^ z_>QPl)YBJajUG6GlBZ-gt|GR=8P*HyQh;7BP6Tkfo8!oZW-lVgpM~MnB_YP z+oi!Ne%uEgsBI!k>xU$e#fyN~8FYe_KrPFpMQT)yp^O>%s_sjPR)Z-+P*=@YqX;5^ z0CE{8bWbBcT^WqRoep1wapYq!r4z|+q&laiP@lrPxV8llh3XK8dV9Wa=l2m6~~vF6iV z|6uU9rz_ntFFRR*=v-%2ke1|k@}H7H-JCT+c&2v9rcGKntIZU<QG?CzzqXyA8w_8-#MVF zSJw{2)~`m6&mKbbAXltj&!SMdLhWY{?Du-SvyTx(-B)tHuq7(vSoN10ozEx_r-yS` zc!mVdFvP@sCg;iw^}su5Yck)|$n45ms9^#dvo5&VJ4|5C+C6q0U&Ha^_1%+@ePMh} zh5yM+FXf`Q7AsOT-Chp>)njqzZBGYExYNT_mu5d&M;?6OFp~&Ll1Hi{aVIp-flkhQ^Q*V?xm$CkbIzivFJtFIjqkb&lD z-IGhH9yMrta2 zOuu^R|8E?IVcgh<&32@)2#&!7Ojh0$YKOHKbwhyt1PO3Y5FkZV`%%&XTILmwUrzN!r;QaxWN1aYp{JzbroQ7(8D2FKq$tN%z?P zk!HK%B|hNkX0^k#r+8Xwg2e8Pix!x#3Y1tEyDY`~)@EzzF#M zaHWS)r}K_Z1>Y3LUjmP+(1c+nDNCf48W=sC*PB(A^}V|8k%0ra`-(`;8d>me>Ds%t z7u|S^HVr1WJ03xw8#Fu`LQ3ye9f(4?4UTW>StnztmP~HI6G5A`96K_%9(QDtdP0$& z8Ssv%J6_jGW;^|LQb`Y@y?L?(H{mXYF$;{W#{^pyKZ=9LKnmpr?GPwc#%F-Y@2zREI|-08Qz z@DjMG=YqE2c4 zufbvF&MtxmLEc00#fHze{zRLXY9rR&j0OF!^lp%|hw4>-!$EPf2Y$7@?+0{_P1u&& zZ+{{8Y`OJ%6v)xEM<&okdeDwEFBrKK5yqf-zS;Vm%@`$!38wY0$U%7@21zdnXlHjV zbncze*>8pK_h-9;rRv__Uo2-I1X{%F>iUl`-5KJe+QaJkNn9f z_Ap8HU6$az5Yew8;_8)|LB$?@vRRi}bkX~$cGUYA8J?b3(phj|fqk;iMSnNvj#fYi zenC)VvQ#IFk^-rLQU6};9V4)>=SvwWyS?^YSGKUYGy2%ymcPNerrE#?V8jI;O(nmv zL1+Ya6Yu6k9r8ph-PxKl^B*aD1JB@e*V;c&f>nv86EAsh5^ntl+`f&xvHdrxx={4f zHCWSt5m&WexzRGYZg9c8WjUWWc>Sy!U{b*8q_Dslz6lj(Mnw_s>^{<08^g9G?ricSQ7K98(H;C)@!O!1xO0v_~H(kc4TspFAm zr#G}%ekXzBG_vM8(o56N&jk?3TZUWjEtOv?==BChGX{nM&~e(Y!AwN(&XL)`UjH>* zcm_Rnj3DLcV%Bpd4-inhr71}cE z0KEFBJ5htcjU;;jX%$AEMRSW7;5JY<-by#8tts|${TRbi&pdop8t0rZTR z!UYB51*!~97)=s|HnsoHYt&`D#`%l8&cO1kN`xcQcuu#`hCyhfUrY9S|G*q6u9uQ|+q*%jxYBOiU3<*n`qhTmb1lX2_ZKj`e8U8GoO_aeH0#~U z#nWSH457SYlu4NTcfDZg`V6vK!Mh^$QBsNhX`%bC%iZ$si;3oi5qjm_5p*4K&ntFI z!TPPx z)5WA0_N6&@ddr7j=P31}0!1fe-A`C`J$W|Jm>FwPr18>5Zml?8Ki$h%yk6jVkb-6yso?QE=%Px!UOtJdWWkM2Fj zlw7-^vuyOu7gC~b@(cNyuQUOL-*m?gmu`tx=D#DvxA}5qGoOt0?By4-iAU&bx2kb} z@Yyo0TGHjx2CJ5Fr0(BBCC6~_BD~t9KaQtftn^S#eREL|c~$RH-3H8r*F5fBq)`S) zEACo^45)b3TA*>oEs3q5?^jRQRW;>2KCqMpD+53=z!}iGw%_d>D+u^jJr5Vd22#`6 zN0mjgc8<9VD!^13Zu`|AYHa;`3a- z8LXUPIBY2}*p^DzyT|qJRrqK;5+m9&COCr*PVY>GM02$0bSzKIn%#dD6VdL4H;`_Tqg+8*9!>4!!E#+vQ{6`*z1$w>2?@ld1x{tRbYjt zm;d^Ay!ZYc^@v|BHpGpZyb}B>mkY~3^VRvA4k;iSKXD?ffp@$M{2-E#T%T$rZ&5w@ z@A=StyG>2u7+8I=^;0}k!_yu;vFF`w8hnh|IyaKUaJ#V@esO6heavt7H!3E!&QJW9 z(zKGXEd1x=nlZ6<$LP%vjtx0yc}e?X0F$5(0n5!Kh-r(oVAjkO-#YOm^Q@fe)i#z7L!%3*1nn5mXr#?G*jlf$9VLR#*6U9yP4Y^rF_d4 z^9wwNoQR8dTJ=9y|NoX7zVJG{&9e&sJRS>nuO4lRN zG^7{3=V{TnWh;WOd~j9r+2i{oSJp17;nhq`l|EmLos9Vl6UbzGg5#`eIQq=QX+m1t z{TZT5%X^`JcMZQH6}RD#@HB)50DnB2JdDrQMWD+^%`UMPj}9dK+xrvwWrNHuSawxD z;__WF#>6?sM1xbVj%PHHS}_(k6umgYSzJ8=!qWceyt#Dz^dp<;t|M}p?{U0D?d3;& zbI_hAC?z*bV=4@oZTF=V0fQ|pj@YE#e=ZHgGi zZWvyrVRqwG$Y;=$XOCLx$?e3WRvEv5`mpJLZavTQ?S}L=LwO|Hq@qOb8Qe&X4_{%; z;yvnb<-`xl|9rkQw=95UfO==)rEQPh3bMo_H7=vAfQM4HE|SX)iX-P*^*4P+BeF^( zZ{2Hrsh{%6eBLZSARDXUrFoCD|aE`FOa<@tD(#fZ-VNpMH=B%;2&(lY6@t9r>l zjgN_)cn~gb+yGD=9!nN99>w?5#9nXhlZ#Ow-#m>lqFvp1tSj#imw1e~8j?#6Ckm6l z?vAB%JW*k3bUSekwj=Xa_3}dTfQCOWdKuY2*^&zHyatZF$zqzjW`GmUd(j_SAcYdv5j}gm@iCmMVSPjY5e_a!i~MxL*RLQEhW0Mm=(7M-{hZba z839S2;`gpmsQN9wolWTv<2gO#<9F+UwtcuS*L;#Q+S{_c{)K!nol!~upwnhiWrlV{ z+CVNSY;LwbF}FzcMX2Rza|E8k(U%IG<-pPTj$c_DuFc7Ty!n`R8 zfq%bljePp?$j>wt86i>1dT~|5atM1?k2kcwRzaLmBH+~dp&BD%7SN2fEsz-7VTY>5 zC|oR!3HADtQv3K3)LoILjF^sub!MmEJ@vRChkP$j|0}h=)sIWmx!}rHoVL?=PwLB4 zUex|42bP(`J9bGML;CjG-MGV8?p!1izWFU_PQh)p-xBnS;Kh8n6zjMA%h*UGtikdL zU?9o!U`R&eiPqb^x0gTVMC-gs2=RE_Z@Ch@j4`@crMuYJ+YMSu4kF6*&JOY>>&8skqsC3MQN*@4+*lY zLu1?;tm5V`WDHdqPOQZdnS1GhMaaufA0CX<3uV50Bx$DlCx_ruiwH8MR-I|Tvh{}A zCurL%XV?f{2K8cX;9u4^{=PRJywh_wZj=4 zu0O&`inp}yK;ea-f>+$M@dT6EcU6_eLZIIrwkuKa?-yPbJ@hc~YKL?@@_~@&)7g{=mwk@jw_3ng%F;MP+~!x2z3`BOpf$B0J)~rKSNX#$YiieKZ2_4+ z?1T3q!k*ats+OXPdm&L6g8${eRftsZG5*s5*|SCH!1o-I2zLz@;bs|;c$YPF4ki>7 z$7cJ+C}2|!$^zX|qk)_->m*gW8?bUDNef`O3?}&mA!fQf*x#HG`qe_%DKz{ukUUyV zKI<5`B&A>!R>c5hcX+`vCQy!;hcpJFDT+V$Qz$U(9Q`3J`Ri0iAvc1@{9jruO!3K~ z7TxxF%x?zEZk2#5@qugw17(S6@rd(*U7-??r>!}z}7pReX5@jwh0 zc=&{KLso6}d6FI29-KzSJ^w7w+3!#XzN{w(%= zyeU3pRVqvqYI$RwLn=IV%m1;i!U{skfwM@>*y~ZG7gU+!#q9LL_m`eUrxpD<#M=Gd zSE!I1u8La}&H<2adfVS<%iyE{iQ(7wDvo4t<0VBnrDa5QU5?{n7!mVm7s&5ibz4o_ zFpt_uPV`|tb?V{xm*N!9!8$?M7qr&P?#t5RhAOyFp!%f=Z@>zMPz0{*uikK99jTJ) z{n95+@!%P2BB(qyaqG6QcB+AEFi^!A1kp-M3NR%v!}Ego-&o$L6#@I$4^8{iPWZb7 z%oD{69FfuEtPZ*pP2ZNqU=NQJ6#KzOl(*k%36c|(ImDG z7s?*0D``wEw$k%JI4ul|_IuRm0x8sNx)$SzY+M-TuvU zm43(5imQj0Ne0Z0TrXL-QkR3Q1~sbP6ze7tU8kZmBgs+)naAbi8q*7S{vGXHjq z3U%706EoPG``kaJ00EtC&j$ieec8_Uo_^2^S>fzUfGuCBTKqDJ>#V}+Qv~>bh{n?y zL;lWzm-(SxX?K-jlE5O2>5X{dTW~I9k(F-tg9us{g`txJUR+V@54Ze-=2te1tE((2 zO0^!L&62Kw(PUJzj01&pSO%6C0KsH|#cAWe{r+$3S*#x{XA+FW+Rp9)JI}`iXGo0D){nlE zV7dDTrZ0RTvo6q3U4`&~HCG^*nj6M&QW@s}5^d@brN4<+AFVq+7pM6kck$1mTc3c) zNUhTL*4?!oxT84Ke*C)G^#7R|PXGPUYXCKhA2=V~+M0Dh+C4m6gypz;E+cEaQ;CT! zTaFF)KJ@$C557!WvF4b59YO#mwmmzL6^PUOVmCYC)(SW@um`Y4ksnboq&Z6N%)^aj zc$R8Sa1s13we)p9Y=hoSd&tdXKwnW!b%Ge=)yRMiW}?g7(R%>R!0+%aq8X}CkwkS{ zbNfTj_qDLGTRYQ*2j20d2CRL6{lp#y{|4`Z9?B)zb)I1XC9c`#Q9K<)8doP!6T+pK zrH8n$Y@zI1GG}v?uy5`;OvLacPFrO~PEm~i{(bqx$UrNA`C`2X`Hafft8cg8)csx( z=Qyo?>n4QWo-97P_C9h{hAw`TjL=uSWfNNxQO5b<%-Qm{oa}&tRG>uR^rQw~k7*@O z>#VG7yum#^m}fz|hWuSOsIYWbR66)lX9xa42}jRen@?gP%bZ%B-soQPsFF#NiX=0$ z&yL+Jhj7(I93}f3MJ$R`;iWC(X_v^&Cv}r+`J7$$*93BnzH4PxpS_UDKIDE>Yk+-B zy2x7IN(}6_bXfOQYH}k{VP6zk?%=*ZMo?Lf%fxF+nPXU`j0QC28pCtwswp$c{7!dw zFI7NJU;b#X&9aiI^@T}%4W_kvv8pI+-(Aw*JSQ%4hK=p=;@#D^t|MatPI$g}#kBfa#=x|3ADbAqWKPQm|LM+-fTB=S&oKSWb*bF!@&dNZ~XnFRYbl0k71 zbMN(mmX!RtAB9Dq9PQXUsl(CqjpY$U^$dwiDY3$=HpSMYy60zh^_E@iN;C8l_zrKS za=$ofPV)M}JtHsY4*;iDv^~OFkXE5hkfp;#XNgQdJg@I`~G#eWa`crZ@r8}(_Qb$ z{Z9#ypwr3gwUGJKL&-#V>Ej)66+cgo5e^b?>eC!HdxrP(4p7dzIOy1r((Sx%b&}PF zK)Jb8$`%($l?1yETIb!kcIA|C0!h}=ScFDAvYbzX&A@IY2nimILc1CPj(Rq>w8V!w==va4b#nO&lrkT+rRqqq=l;5TGInu&E%!c0=K1R9le3xoC&OIS zILtmu-fPpw?e6oy&lp;SUBU@-mtAHqJIhjsEgI1i8S3d%qA86#lmo)fkvBK!=W!E` z*R4dN*QEqM{ha^_O=-aohc0FsAbP#oiB@miN6A-UO+r5mVI5LT3-_{j;xUA{>GRJb z94*jV4LAozgf!jIs9$$nw7mC%vkPMrDWJ&ivp<>TG61?at&*ZZX879?B=dKKHm)XT zVJ4p}cFR@jPhoGPm2)|1YBywVVZ|#fq$QwW8RUFYA4T_v+C#@Zx=2rGrtM>jL=1B_ z9*&yZ&IW?q?(07Jk2PXH48eKpac2bS+(^6umPM&8ka``+FK|gZ70nnueyv}GGJBwY zt$vq-0(xRgcEWs>$2?uVSH!#=0m7H_)DGKZd^&5BJo`j+>$eqfVbl>>_`@(nHHexB z??cH!wcE;ZN|guL8XojYgj^0)DnW+}s#?6N(a5yN+*eoZ~|ZiRE)|h}xLrP(lbH872fqKJ@q|}n5-ti#zm|#HRrql4$XhC7j)~i$!_>;SP5U&vv0W5cZ!9(=CXJG zXozXBnMM#^4!yag%KAC=0!zhB$2g-0R^tREuNyjIDFo_x^rwxAT>tB5mm7mj%`%{o z5eMulA^VaIc%3UYRdFV`45-O<7gI_8Gf%Zy?mlD5Q=3c8da%knDC>I9PB>Z6*FAO9 ze$ZVslck&8%3IQZ{|fD6i&EA*hX33eHFAnIblSSJb2BcoyQP#r=d17-8=IHZ{Y}=E zZq1(2gzI5)FL=1_BlF+hRUEf1QZ4LKl~Q^FO@`~_jYM)nGQm+&9UefUn5)p(b-apK zNkL&}6&j%-V3-MZIp)gdI^CY*oEUh4vSr={_g$S@U%hm#%}#Hm!+PIK1XttA;$Dl3 z&(ZKYr;#FqtDwMnRak`DFZT*N^jZtNkBr%KuIa_`)}>Frzkez2Yczb{%1_K$Ig_^= zmKd=^EW6mol#($C11U{ zk_=}>tDj(E$j3;(*L(5cWH27Ncu1~$3H=_2tpVUf;`K0bb($1F1T=A&s3g~DELjCy zEol4FsbP`R8V!A1@fHa4Zs>e zQ)cx&!?%c%&~?uJ+7)&1Vb7ih2PK-Eg8B59bZ%U{@I|;b*uNPM!QM+Dl5}xkgZ|`9 zcO~z$cA^36NA|<+nSsR3JTUQBs0uXSN_Ow)bhqq9#aqx3wEE@%a5HEPjAqcsU4y;` z{>p(%%G2`Y^ZlWcbeKFTUyQ=Y6EQF&L*WB@INglV;przDeN4;uP;&I7V-7LxgR|5% z3E28ZXY;?bW7@qe^hQHF)%~|k5uD_4vzBB9{(R*Y^w$aS)%J8G?e9wO-Saw)eh570 z{erX9ADt8B7M1%sdy3;+Rt8J$c@T5=Q%`tuN?N{_NQ#sUU#}&5c1`wzEZD`mgzqa{ zzyG|NmudXs0grQ&snd>#2{WXYTUmtD)abL%6=?6d^U^$T>q=WkR*mqi>f=}ZmQ|s= zP5K(=o><>6nDLuA8=%<7n4<>t$>MgenwA(;Xwpua@rLz(K;O#{a29@Gk$ra7FX5Ep zvXM{fL`Ly5!o>Uei#}i75>j-<9hQ^nm-*#?;MA+0x+|0=2;oXoDO-=;_vz<#N4*9{ zP;J6GF{so%4Kn(B8gKyw8@xp6Jk`h7P?KwY`e_$_?mS8pVvqN^asOY@NMbB0;4OH@ z80s!AQIMigh`K`^cV3C}#ohnw+HL-~l;B(C?wJ}D7)cZM+d zu##pc1*r4D8Z;A-oQI2fE&NYyx9|v{ z0FRM`C4vcV>?lvY)v=~U)*^EP>kV!ubVDz4QSb65*$ogTy2YM@mXSfrZe3tpQ7qga zI9rWZS-s=HKkpR;eY-4-eMv(R+%D@*Y7zE43RmeOfJLiNY{Lp?^;y`hrddG-w30zS z_K`f4NIGP;3go5_paki|8h9TO2KB08%yO8~B=B6UzuAWeH_CuV$yRc;(exP%x#u!^ zqqs|RQFYqvqq`}uv%j=ek1dN_8F~dXW0@w#F-68^uxv>O!+svL!J{~m&!hf-% zS8XX1D-eq23et?m3tL4A6rscVN&?zBbqZHGy{^Rcp|%x%ah}nl34){P)#PZn3@wf< zh)=`S;tW9CHo>aCBdG6Fb!3SfuBG zIUk8e{7RMrP@cisbQED@bFB%y++fj*A*y)66F@VnS6=nP-oixo7&EpoM&`?Vow~my z87Wy`yekT&0%f85k8qUsp|lAb?6AehOL09Sg1H{uxp?aR2Ml!qH`6~o^aM(E($ zqwpBi6^dg!nJn-k3p_x~_nxpnrIwe;N{+sc+42WIylG?KuCE4aaF@(!tRXaxIOuxNAPgd01QQ zI<9l*RH-prw+q2JMo8bT1zi0%>x7?H7J>YQ=)j zd|_8l{NA*N%+p+L6{7euEL-K7J zX5EE604T*4#o}uwh;ivEVcmlF+HOcO8ZVzwO3{!~8H#p9E--R@gl_oJYX1csaX=0c zHZ`a~KerdYw->hWOj&wvo24NqYgXLU44vz7u5BCq=!r&9D$nQc=)6Wg*}BefF^_y2 z>4!&-jh|da7i*+SyRLy1-u%mPfns2fCZp+@AY$PohC}!?Sh`s-#E=9DJUJ_CY*%Ph zEA*1V*Xhss^C!EFGsgE`H*LGW+ zGptl2d}6yf?f0(x@80Oe$`H;3#GT~WLtY6dFHRGKb=vj?2Amf{x$40CuSc`&2lvxk>hNjxbm?GRS7=TSwWy`Y8Nz zZ%D#hjp76!-0pt3gRYDhTjD&w6qM~45eSm~kkm7(MpJ@aAF_O0ofp;OTHgj=W_0hCR`b5m+0ghy))IofrnhLxa#*B*L) zEJRm^8r}ve7CxR|+!zYCnG*O*nwvgMhp*makOsWRAD~SK+S9J86NrooTepq(-^KFk z=f}$6OZ4J=o;}EbZ}O@n;mfhZ>31nV&vQ5b99;UTIz`@DfHlgE6R&~=hx$+-h5&XM z(8)R1Ioy`Jsedcd(;W=n+t`qg&${pKwC=WUHSK{?oU&bHh)F+lX5GEFA?3dO9IeoJ zvm+k<@yi;jmAa^$n$;hS&f4w`C7DqLHHP;Q6@sX(_MNnW%lg zhLyllEjyoEm@bPplY0c<(tqZ+Ri@Pp*V)kp&^e8@8_1>DQvvCovGiqWhL_hk+Wr+v zz^Ap&!~2qAez5BcP)7lV~p9i<(3;S`WlB*73|f zVGhB~qd*huJu*;iy(Kf{689bHhssmQ3?4rsq1qHqlNlOMdPSbFk{bonVDJ-oJUsUK zH4RyMawqSRs-!c0Y+H}HDIV2}nWt;3hT!iE#^V*w-Yf&J6X^&mlD{zNRqIn+p18Zp z(GevLwX4S^0 zSVj2a&faT^vT60P(efgz4rzM)_M-eS&X_gcGn+k_EnL{HQIr-Tm4$FeTmM>Fka)hl#X~dKjMMx(l^#bra#en@UaPC^4x3VIEy#4ze^|=DZI;K>#~G ztuU9njesYsWX#@YM=!h_Br)~h_erH~VlfQhCHnA|{(%)ji(A6` z10-kEM0L@OLWSCXvFp28^Nl&Q-f2hP-_A~wPrY9*C%_VWA}pWeOCmUvyiH>4$R)^I zy?z4uV@uy`M>2pND$KeSk+ZqK%!+^C{kT4jkm%O5g3xH&@!YC zC_S$^oo}gD9czsZ{rFGzA!VZJo3-7 zsh@ZB#waXN^G3TMIVOj&!tmR9oqz1$b%fD75!>~UG2hc~$oz=$mM}?0`f)Y@!s&Xp)=;^G-Mu{V~Ktb;ZFxdE-^$bU8X~JcGDLk5kCnB8EX5K!DxXQ|G>3p zD1R4(aL{B8>eCkT{&Wy$dgTRn2sUuRqyuhp{HM~@*v^e%DyIwuq>xdigM*91poZw- z#*V+f+*kD5%{~*LT{4dMkFbF4%QA34vh=?dEglsCKo$o=T$oi4Y*;4n`_yj1w~FmB z7g!uA$cx_D?ln0LOJ1PMa+d$>!2_26Xm&R0H68i!&mBHx1m=VLw`)CR1z;|OK}TWK z44k~4KohueRunOT?4wb+)J~Zdt5=3DG19~M;p0&0 zuI?OD{NlyTYVcKg+jCaft#cd&dGFP3FUr(ZZtD^2HT16RG(v^8-|o(L+Rg6fB*aS<&t zbB&1Cj!_i-l)OXrUR+RXixQWsIFU(rJim#L{Pb`M#qUsRQ*g?3cQbD|{{y9xI-er= zwUjvNvgThFNkw_I;%{+1&xjuz0u$lS^j_=|x_s0Wx|r^*bRX3RuXdp3*;1Is z*@vmPa;Z!|aBr=9RWU&dTkw+-S<2*?UD|pF^Kh z6L%z7R1|7&*&-P(v9F)SPmcE<5pAm|!27vR1aDgn%Ib|>iRy+P%;po0*Q^drHgA6b zELX++KCZp*J$ohY)ZHZGCbMMAe!43nm5j~D^3N|+3T~!VHl;M_$J0N0C-{e+rp$NZ zPIE^-N&WiI-N9YEHG;BcmOOal}(_#4M1B&_i7L9bk+wRqJQf}SWt(OJ|4;qWX23t8Vn42iIB6L{XZ>$ zQy*Mo!1xBil+`1uW<*(G{uTS_OcSvsUNXuA@YB8&CrH8NnW4vqSiaWt4;X7zIILSI zmN{=C>v=MkWa+PC{i2i6Z3yT*q9Tg>_s#%3%a~{%R1hdimK0g@Gx9z^aZ2lef2zE}>-AuXYlgf*~F&cNAq~xrNM6ROVmrXrtIb@-b#@_UIT0DOyoQ zWBD?Y=_Nc2McG9UJ*O;8E^hx~wG9t3NIpV6$91Clb61>QE5M4n7e8)bxYlS23w{`^vXKUWA=p1n_%u;051ZUQaLz_$+xdwKA!199czH z-3Gop9$Gmn@lZLthWo+xYhJIX8&I=3t8o6BTgRex;slSpDKO^%%#9x_obL^SDnnbT z^x65K%r!jBO;^N0)AXyj0{Zt1BW2)ongp#S7lA@fv3ESmk5jUPAA)pSR>aEl#BZsr zO@1UBTmBBUFFkYR$z=6TIQkg*YpX)z8F&w7oKXOAsV8z*)s|+6RS1({=W+2}JR=7+!D7$zWA>T><~lySwtS(s>oW^qZH zg(4NQIb`73S*&l^}Ncri>-fW|K4ng)UEyGMjG{eMd88*E+mlDEHjo*HADH$;RPZC*& zqPFRURAjdR>dKeDUpEkPVPCZmI@&>)lY1uDw5(^+}F?V7$u;qBJ36smxG{(Ub@}{8=c`s zic6zs7ZRZjieQfmYh>n|B+&8qP=^kR`_(!7gH9lfgPKHrTaG%A>+Tebo%{fNAbs;nA5?(tCX4Tv6^Y|gFYPE3DporXWMSZ zr#rF&!$AhO0ljR-D<+ag+~}N6&-CrR-JmpJN=kg%N|Wjjw_q{aL1R93m@IlX;SxrT z>cJ}U`Z68j+-9JTGpuyoXcw0%JOnmEN>>0}I}$* zgM_}N^Xas0N~r`EM*c=5%oT)J8v3ujy7?O(1qM9slMFxJH%*G1IO|ckEZaJ7f&~T( z&fV>UmUP9D#|n=pEgWKMZLkTBu+Ep;!l>rn)BJFWusmBT1FX#=R=XO#Ha-R0dib+& z3R{vFD*n!kncjQ>6`Jn2e*k$Id9g39%!0tCEBdjHzu06&7!d2*r3}8<%>C&vP1AY? zk86Q4=V*3U#f9c~D|mDoMIqj#PnwiNv6u({y7 z7rGrOcIr9p4gaL85s=t1C?NTReJ3w?JTh(hF=x-eKX*TFK?fF2neC1nEiWHC3u+D| zR)4#8<}+~qB;>CRSItFqZeFwW|LC6;@dLmz7byRZ+0?J3X}3q{&t!e4JRNyLcyrmY z+H50uZNVuDUg$WObHAQt%@$xx-{DE@2`BG8%f}H0yClKzh}oBA zzj7Y)q3F`+jHY_{er2%QZ6HdFHa1XHwR>j|7Z%x3jauWB^04)f_wc^=4vdaBxEWMw|(H3{LhUg;9c+0UNT?vHNL=q^JW4giXBK6I>>wy3L(~ zYDoi~<^c@n+8FKnUka{A&9({anU`1ys!hhJDr zUKwH`ym$&#Q9`47)+#Pz%Dy8iq0&566%Ey&i-f^a<J;&X9c@kG5I@t*HJ8f^Ix(A0Ya#RM4H`V2!C2EtmNZWL`7o!SL7 z-Je~bVKDjd?Nh6b?R~B^bxhvpwd+q=CexQ=YY2mvqoW?EM+%lL5JjelqQ+*Zlk;C1 z+2?=+Bolwk(8=9QKIzjMiG_FFj;M90zx)y%Su|4q!3=(#pjm`_1Vxo4MRq(B7$=I6o zW8zdP6V>aonl=|@Z&072rwRO3@KkliSYpt)y07vDx;@C;-)YQoPcqroGg5*RJ&DL|fZ*j*`k>xy*D!-A6yM-UUm{812|u zgeDp7e`n-pF$9?>57ht2O@rdsFv|q(Ox)oT#Z5&$jx|?j(5ak|r_BBJl-bTt8ay?& zCvyQZ8~^+_B2TeuAG%S_c(e!19x0~inuNrmc$^N&f?~PtqgVK4SHDhqfpz1=cf?@Y zc^W;tPqW^ACg$xUXwsqrM7B4#wwo5?((u{G3Yi#xkr3v3-Sy&{PVF-@w1ye#K+3Z> zdJpI#cc@$@)ZDZfa z8C>irL0rVgEZpPwXgZv*NFGcV={Z{ZH+$_mBtC~KGtyTc(-Ar3G6QMc_->3`NY;ut z-sU?+)Xh7N0WJcJlTp6b_@Gd-fg_{wZO*(S?7C!C9iRd!Jl&_EPXe3jZ_`(hWn2OY z60lz-5e;bXVx`)DPSRl4XT&A|&<9KdoYccs;Tp*){~PH(^#rr%QwVYI`HU{y5Ck^B zPsunbGlB0n^7AyZhshbsLGdRzi!FJp+d7{_Rq6k5sB3-4Gw@Js7= z*5RZ|-*Q_mM{GELQT_2)q8oqJVoS?}NXE@eWWQeWFvcgY)954zW$I;{IM_9I6i;J!HFI!jus1_c#HGZei+l=5@t-N_DFql+5YLECO@b#--EWN$fjshc)N9z5!oNc$ZmLNf0 ziggqZn6d7l0j@XxC0#L*ZHPT1(Y#*UT=-@abkaC(&%&nDhlA(OoDPax(08$8@^Vs~ zkzGVQ_1hc$IndvKmmfLA^1ZRicT$~OPp6iHF;PaNH1UmtaBOUQhuk!cF*i5wO9PFBiq?fw@gI zQ72KQ>TjkVa~Fkfy9sG?S*!h9LwIw?dHw!k6r8>_QNpmg>D4_6ZJ3i;JKn8kf)kA6*$O$8o+3!+k!g@AIBn%p(Q;e;ZMCLVGOmj=r)osRvY;erb@L<_BxdNNKPq z!2LTukSmD|f?fW1@l#SG>>v?W49NuB;;UES>@<)lsgR58@b2s38;F^zsDS7SQZQs( zt&c(UgYoV(@9#c=OwL^b|MFPc@4v1ZErNE!aI<6S6u=F`E4a#@4k};cYs|mXgL`&| z>}63YoiHqqX2v%5b2+i;;a5hZE^Yukwf(?>N(5NAcmW?kFEGFUCi)USTLarz7A6MDRgc_Y`I36lD_I1);gA z8p$<-NUeKt>{A)WYcxH^I3HsMTPgBMapWl>UUo2_n-DX@-+(cseO;$=fie7g*W&U_ zP4B`Cu7fj=b?~(KD*-OHN&0F~)Wj8ylm*T%$gF&Uu_{+8jv2W{Pk*3FY?lH{<0`5Q zXWi=sr5Z>VL_Z1^mIDVVdX>&Wbugw{B7YeTJ&f1inG4d?S&t4xx}G=n#X^V&?FH!u z*~)v$q*aUdKJ}X?tjXWGuEXTE7$XC}R#p#n4{O*T3-&MCBe;{i9ap%B@>|c!&0}Ie zB0~HggRf)ud<#y{0B}-Fyo>IW@A%-ub4g}a@^9xRi|g8RF zZ?&|D5@p^y{mUY*r!lf@)WubO`De+czU~hHU5dm1EsGdJal#02Q|*E<|2J-w*~JBS zMA>$4LaW5b8tKFA(*W!B_a~f1SfEJAevM;a0B)0pIik&f$S5B_?x#;IoRo44I>yBO z?ceZ!yT^iaSLW*M@A$@ZbPA&H7br~oc{?8PH`d_X^tYX=5L;wQF9&k!f4|nw!OxDOmUH1^U$o)BJ=`P`maLI5MgljlczuagtCABLG6TOek~WUuEk&5BzNgP)B>NGVGnFoLBOBJCyZ!U+`}eONqZxxkodEMLSEj;WaXS^h4;%TmB#{YZ=JZA9}5S(ml1Lc3rZFDv=O3HY1*@^ z`ODTcVCQ4^SBK$2&mVbYo-Fj>2X;3$ZT4^7IKjO98V^(K0b?%VwbzcxHjG(BV6$v4poL2=_^hmS9CKAmoO zqa=0$*UmrQRvI!#5JFgU?F$dLZeDGoRI)3qHW<=D%yx02fg5;I7xF{ag*zi6mrEa+LrO5nYg z-tR!0p|sz~RXE%j6pbWzadhdh^ehGS_+-FaL4}*T&0ndaW;nPWat|1eE=V6S(MFU_ z-s3@z@+8z=MusVM8l$L%eIT_O5l%Y4V2!?Mpbb9()tE5W?g|ckBJ%VLoU@mYePa0>$l!mX60vFTt+xB7~M!tN~PvYqaD`CUThd84#X_U~5&vAMqM zjoHsYE7%5dA;mUzyWlPM3BZ?I0LWl235l2Iajhvzv2`xaeE#>6#Z|}4qH3#7P12&+ z-TM@^O6P?WwT~K~S;rB2S9Dq5fqh7t;e)@2aQ|pH6%*_KTD_rd**e|X)C_g0`#ZN# zU_9F+77w3sRvrB5IR)!YV;Vtbld4ok)$}wh>pdmcT7F0lm5U$!T_+UMFGs72FrJvJ zSTD&*JcPVCPs`sCpnGe)KvemQ=$9$DccoggnRp23OMRRnw8_}KhY!}>xG6F=Q{&vr zZ*Nmn<{BFpQfbiOS8HdLO(>{fgRQz(KuGz``y%h@j1Ehx^s+mLmu6 z_v}?xC=)e&O?Y41&hgck^l2~S_lBZ#x~xw>e^yy>@*RBw&L1<6>&_i&Nh{TA+lxyDy}k8CiSbyTBXySG%}P;>yw&2KB{KYzhbspekw zfsFFH0%A)5tyZjCxV?gXV*+~4!n(q?E93eNbJ5}OQE&5|IWbO^I7U);E7ZS=+`&^?D(UL*VkDsHP|*U((?LQHakAJsDfC zcHt)ay}O~*bg)!-VHzTq^bD(LyQcOok(e?ao~42J8v!vO4wyBV?={}y&yLb(?EsFH~YHHN@vLbzFIu_LG%iJf1i%!BEn@VFN`(t~WDbDk5GD5Xdc#+Q;0Gdml)d)WWpYtdamu@DlP-h{bn&_zLTajN zI!|k}zknVA0p(7Lb(ZkA3KK>tOR+Fl$Gh3sJ((YYs`^t%M{27u7+523Q6w0Livh`d%xjUIF_r!Ygu z8s`yZ-_AWzYD`~qf4=7Ne3xafWzRU&&P+md;`XJYIgj~%qGTEXEB^!p;>Xovhj5887Faeow^U|Zb)?v zL7bsfkVeFbn4*$!gUQZGxh#@=lM>$9!Er7wH^J>_n$8i%38`fEE}cZ2d;A;5*-!h3 zR@WJIHoNK%C`|}DrT&^o^w08bA`}~aWA&LRiy4X%@e94L-bWtbU1l$J^TB3+j_4Df znoqq0ps4sUX19-m?}ISyFPz9Q1uwil69xKe^4e{GT0^j1X3LxY_lx9|9YN68Ot!Bf zQvHsW2yNv^sr-o#%kew<7RdPUp5;zb{61toU2;*P?-_lU)G`uE*a)OW;5OTB|sl z#91MC4QUIFoOGTfmIY%rLdeWw`CO6&JRZuC|NempRXU?$zH!Hn)j{OTVT>*bV8~3*RJ%wq>gitXM~d4 zqK?(aNs*4|zO5h#Bu;!!UFym=D5kIoL+DpBJ8|}PpU_nuWd>%}iWYhw7Fm~rdghuE zIUO03q&5b**bWRCUqHZ|@f+JC6@h96dsf>QU1qJuFBw4{h*bwNDK2N{jNXfc$cNj3NX5ajH z`vCQ@+FpusdSM+@GlcNYY%?wtcsnXa;8O@!jD=c-oYehH=gpJetbd9Se5bN;86!33=o!jI=^y!O z62Mi?r4EnC9n49Jej}BZF|PfJ9KimnIydMqWAS<2Yf~c;T&gx5mNZwxQ2OK z$xFZAexFWVxji1g?V#RSrCuqo2 z?MK<{ROtgbC`2xxFzw4gE`z6tDIrEGe4#)OG#gOai1q#>9M%t0e;RFFnP#t7M)U|o_diUjivrzSAP`xi(@Kd~P`10?fW`?v9g(|LkGs?R`x zB5L^EFrG(LXr2mi?SK^dKcf>*s0Nc|SUIQGEQ1 z4(d>*9G1fz^Z2=^am%>an}-8K?4XywoPOo^gEcYjk6kNvBq&Y@Ev@P+BDd1PU8|wb zBQ)e47Z2+ma}%w~k-atjZy{Gz2wuL3oFYKG>nD!wyxvKmUoPeAQ|qFO+E2QEWWw^MZgjA?GChzBW5NzCUrY{idsfa6+7!5ha7rAX!m;^aI-hYayr3( z7pPc%2`RLRxx^L{O{e|h0YTIj1wS@!?>t#kv-cy>cRg<+%G|w}4w~;m*nwqtZTm(f zifC+6k(%}Ln+M7zde!l$hI;Mj(+AWPs)cbtX)bcEbSQG z>dpdsgpG<-4KEhVc%l`X<{&F*-ny-~-VsyPD;vP^ZNR!N zJ%v>I1~m;^;2w`L7%`PlmvYkf8%(*b@6D>dO;6c*gsxioT8+a-4axrKMPT{-EA?0^ ziDt7=Cm%vF#ff(9vE3NFv&pwL6OF zR}xffmlw@NZq>CElZo3UyD~`MZNZg~JYW*=`9qG?0s#IPtz0W7^AKNh#+sbd<;9b@}@8VWm8$Tj1HQCN3`vc1o< z$x_26SMsey<>u?<#$=9@@j7%e`gT^5wQ%}}7H@Qe&m$j-PNfokO%8>{y4X%Cf1 zY*f6OWa~qH_QIgME)Me*s;!D0h6&%V9XgT-sug1sX?{Dy`PGg#a7O|wIOIx84fam!E#cTE0tvS4{b z!c_ImDf96O z-I;%1Gsbn}i_Hm7*1(>sb5(G(KPA1c!Q@t>*WN@FLdie`=lpT80rc(mIdHCf6Bg{Fdx2GXdUH8#t=z3bxYwfSDF|hCf z)9QbjcV2ap0Xy>ne5l1H1f(kw@xlt>9<5rYUI01hsgo}th3Yb6=xbY5{89mT4RB$P zF!0}5qpOFBjs31?WZFlr`Vj+>ENG{2>Xx8yKIbc2JV4HIPh6t^?EqX4te|82U;FYw zVQi7DS%|?=-0ukTJaPkY;cL@^QD6*E+}dAgLlU)6uZ?OJMoA5oE^;b>^XGC20(Px* zU=Y;4Wv$-VcIN-uE(ry9!|{<^Kvf|k5iAM&4X@7eb}#IIPBS2qqTYr79{++o%Jr?oH*Q8%#9Yc{!0+tpfUu{ zM@dal{VsseJJ?q5o}fdfr?0|Gtx(v018Iv6H-s?4g%Nh9D%G3@7c~cwGXS13y6$~U zrw0iHZk8@*O{-K!FIH4Kki9`kx;}$1KUld$`)Q04X^Nu3m=Y#M>Vi`;{shQ9EvZ7- zhz}XT=dSN&@*LS0eOF7qeClXA+fpu*Ra$KAMV57i*w_yn{n^%}y;1unHTac@#5>%g zZ2GU*AwM{&fPJQ1q>6}bMaV6?w_|gM@;P4(0qH>E4mS7-_uM|$qoQMmZQ{ug2$9Z2b1>_g{#x?NpJb{^%I&8= zga>h=w}f0-VdIu~Ji0!#O@!e^t??ut%`=Cz1KCTTDsbpoza&}i5w@QnHz@LT7O_NT z0u=QorKj6+7 znym5`9{fOj&x_P4TID8dA-juVeTel)N(#mc(WxDfqqvMe%jfwq^eIi~BV?C}YZ5s< zft*5YnIJ*iW*=ZPWu92}SMuu|IF+rF98C!UX1In09)V?D6dn{j(~cmFCF@))*TEJ| zn4yL`P6V{e#QtU}+eMhCk;A?-3#?+Sl(LhEv0OXHp>4^3_>9m3z`1vu7Z;c6h$9E~ zHr}t8)m4J=#)RE`I0`Rmjn5Ue@!LCIL&EMBSETqI?XfAXp&FaH^oMjp6Q@=)=Ci3Y zEl}BAgeKy57;1%9AL}lxJSsB|S?-^8xp~s`-OZK4(D(e*_j15m1*JQi!lXUtOXU#y z%G*2yZKg&`9+e1bvCE1yZ?kV+9}E;BH)-0C$HGb+)MMY#Nj^fpDl|VsPSo#-{+gY& z=jVmU9&Idq0O~XGr_&HOk-CJ3Rl6{-FNXgB2v~7W(BI`<-EG|~_kmJDm0Dy3)cZE8 z$cO%|@&QV7s@d|(m;5h7#;%K3di|fpw;#-F&3>9SayXAPSw~(2pkcSozP_*{rh?8`p*9`@xDslU!0sT@9BH8~oU=sa6I-P#WN)dfY@pSo#Ji zTq0c@p1yJLl!fKZq{(H0AxoK79PZkKc;q&zJ^pr)0-6;XXb0!qKD-xuw>6+N`!e$A zoIZx7-9O*q(Oq#}x4^KbeZq6I4_& z(s#Xy0+f* zT0qzsqLfT!yzoI6Nqf)H&#h3rs5d{Eqla}meP|5bR158rpfJ~}bj1VLy>#t&&Esd! z_Fh{e&Mw5F;fgLa3lhFr1P}%=AM)7DT{Awx`T6Rb^$9EOv#Kk)iDm9vhV?PNlfa07$xUuZGy z$6=bgus|86)PGttJiv&0FP7_R)aw@$`Qa{H+BgkxMV)nGV5R#Os#V{RWjMU;2gbpK zd?xDjxtXH>eqjr0x);ADQ;_ukBH?j)CB8z;A*mvvI7^C;LMomWQK}ks}NTeCkRrsG%5K1zw;rkurTP zvJOFkv!5jbzr>)2080DX;Mnuu)to9uZX`CfiN!8KA4@hqn*-8uoHOVMz#TeceAZV- z^#fjP>_np^QtbRVX3w4XMP}MResTQ4SnFx?YD@dLG)T_E?uzYoGzoP;9Fo-Wj=DL; znV36bE^br+W|S<%Tie#**kj6)S=xuY_^K`4Hi%cjr8ZmKqti#UrI9aVIHBl{n%;T! zL!j5*%JN6Yie_UDbGyCocjJ`RDtNXDp~Eu|0?eFE=y{Qp25w2RdnyT1SEKnU(nHYS zEZZtBvD9frLd+)tUt1j~^ocH151!Qi;WaZhM@a-=mXnvtEbk5IM;IaXvv%mZVU#vx zI*q9FFM{ShuWiRyekO*>KM`eKlwPsEU|h%-hj&l(F0_5W8$-OUdYCW@1jH=WNpkQC z-en%EUUKC4xKr_-ktqAx^Uu?0a&S8?dqJ{yfZZeb9I5AngS=taqY)gX5K&T*f3>!Z zHB)~l#2(Cgbek3}zW?(2QK_;Vx(Y=)*O!w0NIlHr#U+30vrgN7X6T76ulfa> zuGVzDj?fng`!we}8|G5ws-8QWlK2FuKA73q7+~ua z%k-8Iu=jYtIpIJ*wDtQ#u9=Oy>q`$*^5Q(^ z?v%x7r?ajZ!zBIQq}GMm4~$21B{V!%6Zd)hbabjOVEHX7kmO+iHwpULc|{ z?UMHzPJfPQWKZLlYnT3M;wENLm$1R1MZx+@)ePsG=eOruSs+F;)d#kv6g-SIbo~$J z)_9b-CR)}H^Kq;dVKsHLjLo0(^BRinB@06{QW%ds5it8X;CN?pVfeR5Y`e#u()qeDQo~=P)c26KzBq?7bcQcRLY;cn>E8 zj#EC{3qdnk6;*F6L!+x+2w(N+P9gSO6CCs#ekvlqgMpLfp1KTQK|zW(ycZWLNK-ZCJJH)~PQ zr4+wb;zbhFp9;_SJF6>L-yia}s?Z=!wGg;We-I$np&1#KZ>wa9%E*^v#m>UmEBU*P zJ)+$8vKohhL49#|30aIQ8qaWQVGK)zP!Cd)3fEW-7rOwRyD#lASf_%b?ZJc2FC()t zA77#J`W?~vU{i6ksr&klER4Yb{WuyFti}3KWBCF*D0R$DryuYCBCg9*#fTly*uC$g zv$EVypVhnD>$_b z@3&9Bo}Q$zDWm*9+Y5l|#4(N*_RQ>_D?1-QF~`bWr^5B*WyTi9GFz8DUS5z5(2dBBYU=t*_<3n==^=+s60`Bkp9|2dv z?~Gdz20X8e#!eIDOHWb{2HeQLnYkT(T<41?d84@xnk6>*W5~#-XI>S5^EdJ9+w*si zD*%rw`CB9?Q9$GmXQZ=$~8YC7cl zJQmt|)TR@!Oj3zZhvCDN#Ww~wqdbH}*@UHPrMGN5LAwaGF z=_tBN@Kfv}aqxw3FONj3arvyIpXclqLC-kU1GlZ~2m04A>RVEwIn1})02t+VNDvoh ziLEr>aJUWNr-A}yFsm>(dta8j@5qXxd8ss3do=;g0!N!Ph2)${!Thw~H>?#s&>1w0GD#@`1u4V!mnE5oY^Y(Re2vuHDD#P*fmMM6}ej!=(O*?E3YdM4WPV z)VYb6OWkBs*Hfxg810s9u@g7l$H7;_BDJ1njpJgWLRn;$1&emW*SFQI(4B?v&ICff z(%)ljo&EO9F18QqjZRuF5%hygzk;`}L!`k0K6RHy3z73?B{FyWCPx5${gLmZHdxuf z)rkLL>Z+rf{KGxnDV?KHk?tHNNT+~wgQSQ^Z*-?f3kZlPjkE}(Lt0WoV01HbgR$NH z&bjxT`{&Ni-o5=i-%mZ?c<^p;l~QXm>g)SR2e3Dh4pR;$yHt^%X|JM6zr8f&9!i-Mzv3?^8T?wJ ztm{7LN|bC~U$O=c(URyuoyrDp^z?2O4#On=M&c6oFqjcEN+(WE9oyZge9(akQYPvz znfC zuEpija8MqMv%B_0)~3FnH>~b3jYYmBlqq*3&HE z0Nlf;{uMpaKnd&A^~lRe@TJM?ck!>dmMfwzd+T1sgnVa(Q?ibJPI6-W7Pa%5-R=m+ z(iELRi2MK)T#Z#FAI_$R7+4f-F$|VYdcTHguJNGBO>RpUk1g54O6v$E5`nNxb2_m63w&y*hZKsA+unlLwuT-wMxtXFbfv4pARUrr#@1Y<;&y z$An;~I!;W<0jHoVMK?5{GFK+088OhuQjEeJhz~YCK73_l&E!2`Aj$rEb(!gX6VkkV z+Vg$wDJ_iB?VDm8W^@BY}j^k=J96_xT~}4l zh8Q9Hx?gf{f-?rNSry;nmCQJ#3lMg>Z#*5Yxj2vk!PcTi`U8-UDOBg)a^h;>wzD%C z}6Ja_pqU&GE9tY3V4|Vm zrhY?XH~spziE#Lz4kt2x5 z`wAAEmlL?4>BC-8(~Ib1nL<~s1b+MNX);qosJ-SLGn5coVWC*611?e3EF#LKpC`bi zxE?*PkuN4b5Q!syP7N5~;Ja!(a>TL3IBs@`RN3dBhh^lz1SCLgr0X2+b0bx;hc5z(JdQgQxRurmTd`#={$#bGC|midXV(V z`xFvAm_%hSLKZrs>MuoXRV3M2NS?0o zLnvL{`A5BE{N0Ko`cM7<-Z_$55x=fNr6Y;x1|tlO{x1ulG#1}`6dSSl^;Vpi2>Xln z@jLFF1j66+JDW9~#i+-pEtgF>>!@g`|Bfs?clqCN#>aFrVC6i!|bgx~SYXWv(t& zCJvtKNytxe-IzHq8mccO!4;eS4kh-RyHyJ$nWbRP{-P8H;|Sm3)Hij(T=^VF-ltCs z9Rz+iek9uc%!g2`rjH(b(mTT*LFGpd(^Mcap3MFrh<4VKirH#iRUNKJB)(6Q2uxwo z<7!U)LX3Wui+pTVQ}j~zk4>7Li<&`<`gvmoe%QnWdxmOe?PuV}=Hz0R{4yLz;;SOy zbcB$IaUZug&$quTB43{HI>>^#Qew?`l!h6oKk95^OWm-Es^CjDym9~V7K6_or!38p zZ&(i`iRL6l?R!T3hEVy)F511p5|b0pM7vio9>kPlBxdZtdGJ$Y!Q<1I2R}YXn+EjJ zAq!dRwRa>uoyIB~g+ah?-{d$3V3~GemVO&ZS5c5ZYWrJnIK4)?8!O-j_Sz8HAXcFI z{NFx8fT`kRZsRQMZtRSH`;UgT43)6ml^;n9S!fr}&60cQOHb5`OdlCURX;|WXHw{e zFdp#IVv;VAc?+SZoQBe}CjO>RG-YCxj_rB7K~t1bmAe-;CFjdpdCS1Ha^v8a#c(n-(C}CBK5T>?> zqm;E8r^@lBK{2!>`|W7<5|`l7R=oRS%fVe#Qp21U3mD_%;A&(!t%KEvA}hdj!;Iiv z%BlgJG-?y#RLQ@P&LjY-100I=Qvj~OChbQ)^ASyt>-1L^g|iReyI9vAD7ctn-sR`Y z_sT-5oJ6FH7o0kV^-@`|t&4tX+twuc_S1ID{Ie5U3P@V+@(6t6%Y3yZ0&0BL+TER- z3}uikO+g~xBn~dP)4H)YO zrU0_@c~n_oI@y;^Ex}PG51kk@RkKg8uz)ohRxLyxGz>tL@6)W4P-UVWP|gzp07%at z0*kBk`{rp4FM+v-yqBZE%dE|!)2&?Uq=iAmL@F6d(J31tc&TaG4{up}2keEMA|O}_ z1Hjsoz2l68byXtz3%5G3+kdCEZ6CIBuuEl`gC0Y;{kQSIkc&^z9dVdjumBV5>w(lC z3X!cvl#d!+7P^vF1n6IA*ILb8w^8I^3~vYPJnkUOf+$>9fQ`R?-E49Ff~1P9Z~NI$ zuJA$-XVREx{m7pIiZ=GV-$=>nup}aJmPMyGOua|a-NvpZj0>QkGSV!{hie15<``E@ zBb+h5xrZD@V2{yHZ=c182;J*RCr&X`1&lRG(FxQ9y`K902t2q*+E@Y0V)A8z!1VIk z9lBLU*T+Zp#XwWOKlKjg)NM-@SROu4!WSV0bEYuYAkpGkDNj^!Y<025a7D%7&$#3=cAFXS&V z%2C>U;&XE(zo#D7RVp1kY#AT5^@>G9)%cvhcECJEk{uHwSk$z`K@kX-fy#0qkADD+ z-n}?|v4OuO!$XnQoT^;LNtRVYcbjS!Blm86xuB$5<*7(0oJGQ_p-&a7re}C}VRuN5 z9-n9*jxXruqYy6lbIF9EG|Qi2j-In>BAt+>|M|4~A(jfRcj>=n4QDC+La0G5-5OlP zKQwmzkVYUQmX<@0P&L;klj0;NbW!d@gE8_eg-{66%M4lRYWhV}#S9~}S0K>ebnD@d z^K%SXhmVW*zm38pc$bl4D)mTS6ls+iYALE|F)8v_X;6HFrv9%c`j3t(4lj(Gs)_|* zsaKDYeTJC#T-5@b2(i4^*ei3iVz{2=q}r{#t(RF6LB>nnE(N_q4nazXd>>KQdN`!i zPLF5*+W3}G^XSV0EEGCGe`qn$^3PkLqOGx!!C%gCXk$>lF&xK4p|W8FT;;f>zZQ@L z(~)b=%oGD*r&Z%Olwk=@WaZ_GfDQsX(Lvr*;GY3l6p^)_@a&Qx8e=`HZ9sBKtM;>Z zF5R>9)pLdi9WzP>@dO+HTGGDE^h-?75i%%(2CC=!G7F5T)97Ki6QT+JQjBrO3)Ce; z1dkDvY{=KMA|#p#GUd-DiljUpedXIE`oF|Nna+(0K~`uSL&wD)$eIR+f}~QjF(o~D z<%U`=T~dyNv6d0OS(_U^Djqw@u)i?3;VZ?RX%@Yrri8H&($LG;)VQS!cQ393C5NK_ zVq*_%g)XPAd)+Lh;+8?<8SEfsNxG-5$W{*it+o&mAV!NX`w}Yo=c&%jy)>rj%Pe@! z;*^HFkMY!X{z4I~V9dK|o5v$*Q0KKj8L{(ld}5vv?yKbOI}8!TitmRmq+!Af>F9On z^_H1MuJTMrbJ)VS6RPS<6qm97d>zFfn7@{0WUo&@WITqe=DEPecX)DSjV0qZyJLjs zo79#VL?JK}%PyS3rEi>ctbd*zS(GnMjh$C8J|yGkFYOc2D}wRV@}Zi8&dH0}<17av zRI+9|n5ilmI^x!puR)Q^^$-s^8`w2)C#ZLx&75V65E%Mh&1xOel+tc+_oTMBGofh! zHk*J96mH1}6>|Rb*2D)HgoH=`}d?L>3v z8}+e8ti7_)%x$bje_13u5M-LmV)R6+j7Y*M5}Z1Zv$|SDK5Zev4>R4Slm)C0=j2im zs@#<4y0pF%=81fCx4#ZjeUT%YLXfLJ;QM_|V91g1{g0K^-Q&)OGxUKcWOsdSlcFtG zKT0B6t2Edz?puqEl+n$1DEfQ{F!XyPH*7Rlw-~F>B*xq(fR5HT{K)E(lWk_nemcli z>n#bQC;ck#d-QMidMro$3$@(v{k}G9_$DP7&JB2YoP%}x=6(fyvIshn4xg2F@U<2A zVbx7rqSO#v4?XtTb`(Wu0NDWcyr_bbXqF;% zRD;o7rYp%FtkXNa1jz^)`)T`x9L9j~qPFM@*l|RegU+L20Mz*o`uqmmV|^7#8HS}3 z5ZK<&*1hZ5y?Yu6k-vfCVExkh@k>P!bpLit0436mlVAy_h|Kf%@%QXj!=je}J0@GHu+nYE)TRKblyt`xF)>?~dkp!@xcu*9 z`7BM)e7EAnfYE3|%J+ogylrnS8=ql(XFpta+>-h(gOPQA{VURi-7*87qQpQoS_ZP+ za!T6Ns>5eWdaIum!8l|=!_|aYtBCIw{$C|g-vAfj;*vDVLU!PY$S!&uHXoOzj9@0aZsLj)#a#y`nA9TAe?HiGsXhPY;8*A!brkjDsck`y`29PPh@ zsisa9V;Z2JVQm$jxfnzBJ$@Ie%!J`R%?Y-q6ZVS`HfcPnw`Zcuow^_iCyP+REo*90dDV9APawl zLf-G`caGLWki3q3UvELwWAel`u74*U-v;@4ZLgm}_EFv0KB=SP{+nbQ?|+j|oahO4B5kz1$lD&-yYyw8Sg zKVO6g33cN<@BshDx6C7nMBYmOLr=+ADo4ye62`Os9=1tp2me^9fFHPQI54X$f?OH~ zzF%qbCB3A?D5|DFlm50he{vn4Gpz5$hF%jY!c~MH`g55cwX#Zng$I z>n`z31~rx*F3mWB5zC9H3t`}O*IBr*J*Rs49Bccac(i9A#%wStfA;GPQG9mLO;oRO zF84JGY1~_6#8BsC>rk<{$RtR6y)IJI&nL0Cup(%#A(exQ{%p!H@!AO&ZC8Rq+v-@LZwEVWYQvAx}3cIXMEJNFMXcq>CJdL zLtp;_Xn^?N*!g}uzy4mYtGXCRLKi)*gpB%Xs_v>%_nh_HvHf|7k;aR6R2$#C;i}2E zPYV>zu@&RbkVZuBWmGiC6C$Nu<%fOmD!z!&49Q8beG)yA&g0jodqEq&BAEreY79ft z3;zv+ZQ%vh!r1r(=w$K}(Q}U5Iv4{TmqIQ2t8~{vV|wUn==vr5HseQ8RYdc~vK-$t zM#mEru|W`$64^ufHs4FH_q~aYZTY0ZT3m7P~cOKkn#j zNtJrf481SL*Cbl4)l*3B3Zz-D``Hq(Kh3Gglp5c=^&soY%ozo+l6Uo^CI7)NS`n<4 zVz-x`NgY8nvOflodi-v#@UM^tYkq4dth!yc#**dLqvlHphE1s?ph!7D8s0COrHN2u z$%}(Zqr;y2avghl;blcKDsz@;_%{3$9#2OE78k}dLxxqAdwynzaJu{+WqE_VsMYp; zdd?#zd#U-5%&zwQ7wdtM=)KBi-B3MO2@}~@QzKK50=jvBKZ~j19ewJXljpPq=={)z zdU$3*g`m*Sz)!$;?P0iVP1!S$Do;5fT4gExKCaS8Csz|K8EF=6p+M<=*wLJye*-0G zuA$HsSunKQ_8c0;lqU8v|H(1d&ZGM$zr_(-aEz}{tu(EE9$x#mg&E@7e+e0~AUfRN ziYV+vy)`^Ny%w(=!Eo6&+qvcp#!(YZ`H0~#I%6~VAN``jE%=~K+@2c`<$}O@6qDf2 zBf{dDEgjizCD?v{rP8#=uee$E?FMEV^SQ&!3;V<`*e^d30Rm;UYnpK=5|bI!>DKGf ze)uy1xD@R-IO0&yN-_jpG*1Hx zH+fX(zk9%Ew89H}hY<>9O6($h7V2a6*%bY0cJ&Z#l%Qpy(r$f|rmVl2I~sTaOUFoN9u|Bz#ea>2>H^w} zdfHH&RkbIzsfsM?sFQURCWw}qa%3Z)SsE*_DM{h;wvAxs%5Jq}o0efuB_*P9zoli$ zI{aDbgMz4@sgi%c~iG@fiiL)u862g{z(Io&D|$D7Wk#&f&!pfA&+fR^6lJ+D2|f+-kE zQ1wMuqr1xh{**9xU-PfK_Plk}Gt2al9*m%kdos^a(dSJ{?G}ihrNhD#OCe z26MBc-Kl_8PDZF%Q~MD1ViwX@uiMR#P|Yumg(Jd%gO=IW36QAL9K>rFs9#1_2x#s}BcB>yMu*cCyl>XYV zn!ph={zFHpvX$>HC_ME1<8MQ#OXi?GbG=sUwMD0xd?ohNV@D^n5_FRi4Ic*jT|NFf zt0)}QOWJ)`4Pnk=u|7*T>)DZUj~UA|8u^qz4)oGFGCpjfu`Q}wgICB($9Uu_&3Es) zQUA!(AThD=9hCs4s=g_|m9e>uWrmzjqF&ay*T2a*4neus(A7>-Y8PeKVQ)6*A)Y+2 zgfnkR+;PcQTuNc)PnUa|#*@M+9)}$aL^QOGc_>biu_XCT^CdQn7!gNaf1R+&g);35aa{%px*>pyW-t9NBcKqcS-+mg;wS-Agm9Xr#!( zqcd_P5Zj^9R^^X=j7^^=zD@7{e;n=vND%nJ%5k+aQCz~fJ=d=c?$5? zx?g&lZ8cewYWh8eF$#}ZTU*M6Tk98Zf$TTtsiwyt1Wp^=@E08vo~Wruf)>>CxRtiw zX<>ZKS1oVlc+=EGnJXI3~U z`GP#?VaKgr*4wWr&-5M=-W2!giHo)K3$7Tp17r4q6!S1YPjZQxgiANYMQPMQ3F_dJ z|77oF$oc0ASa_J3+Z40XRC0O;6QEB2(}p0z32<-mY`*CBW90^4j`ZQ(g-ZLpn5fzN z&99Z=N8|wwOOcdb76!Yy0^Uw~8FE3EE|j^x;pxA1mz9E1zMsGtq8N)ju$G8LCnJG= ztW(=)Gm(0OPWTxg_G2+-uN*J-;ahDVYrmcB^=q5;>+aj`TvDXipa%y#_zJq!8y(NF zC$>L>~GRKL3#W3;l3*KL!aCRO)tnd~9SO&r;pB`T4cT&)3%70e(DYJmxQy_sFbz zFXJ5e0+tz2u?##iI=fR_ow?=<~^o@G)88{O*y6s!#YRO=e}$HBZ5kaGymo>vN=xR zaDhb|LrE<)9CRcZwVWZ6Zx)4H8ARl&0Z-0rr zF1J3TC%^iMUH6UT-s;h(#jcxc{#Gf@b3zbPG?Z7#AS6QFS`4&~m<)WF0X$ICiZnU+ zD7@L=_?OyqPlFuY>bfsRq_}w%^-N)3vOH}yfC^9q3-%V82bJUa6RvOpYdLrAD!p*3 z%VnQ!O6cwT_+R4v+}Y>*cR6jo_?<#yn!7x7fr3?CMDskFKf}Q%N|r$r)|v`J;OQMR zkv_Z`$q^HJs)6plX{o=TcFEmzMt6soJRG}8dJ+;f*YHHAEbvQDz$JkfBk$c%4FgN< zM>UpCDSr#RBxK96nKCH!y^^JVMCIiemBLJ5&lSGYGp6E=i5IxLg?-F-Rp*+ip}yW6 zC0xeqKV?W!5tvn#iu%ZoZJ6NG^w=@lIITXTY4}7EKBI*n%{4Li2;)H-Vun!gmc2~K z;dn7b#qaea$S_TfoIfL9;kyxfImNDNQ*lNU=Md?{rjZBlHE4VS(;Vk?;C}2d0zVT? z;>H(8yteZ(p8%A%+}xz#dwked;Ox^y^JSVQy)_ibrS@*ubWSKJt;{!U9D@_0{`OSm zxs0}uCVp!2GSekCy557Y(Tu^ri&|yDJs!QGhn#x^9%L^)DPSo<1q9RrP|C z;X160e*zqzPzI_)feYFktIvwpXDjC9WIB>Sh zedEl_N-p8jhof}$iqmH6(km<+8Zz`Y#14#Lv~2xx!TsHGG$*M4i9gPX8NV_-HtZo~ z>QKyH^NZ3>{v%zz7O4{2Nkad$u_3mzhZzq5F+6@~%`qj(7f^ToZ@$43hTU~GxNXgyzl90J69VdX z+zU)~Zy-re{iZ^@(OqXNJ-z^6U&#KT@NUTJa&h$iEqJ|S4+RxgLL9HGzF)^y&}%Q5 z;AW#4|A5~7{>F_ZB_j(N;6w&cU`j>MPiZB+Qt>;W@A04q?sme?&ff8dF|V#aACuo$ zu$PpwY>okI5@x8~Z2KtJeli97jvO zabC4wBkkgVEW8o)pEWjOFUN<4C zN4s?Ef>oH6TxEGu>P5mK+{Xpl(jMZl2d^+!T$%os)~dFXa0JW!X=ZH$W}#Ete8&O_ zId@-yG$i>x3=PL;KEKPDmRpZ@lWx_a_@Q7n#D)D=tb%awdCT~k?FEhTZuNWYp_n+i zCabMDmu-GK_%P?APjdOu+V&j=af=Rt!S=UM*LjHKfn>Vr`qz7Ona`_(V=5WbaCMQ=?$D(dJ8uMb_j zHz~Q}dC^Pww*eZ*3mHZsr_{2+p*Np`7xFDW^!Lk8!L!{Ij$TdmP4K+4ziuB^WFLzS zVkY%~w-~N^L*?NP5yDu}p3nNAm7reP#q(yKy# z^62!lprw9xg3Y$@l{SZTa?Cg#;(`Z43VYqYYw56l>Gn-84lSdf`MB_j5_S!*^EArO zm$O@ULRiipbeC?{5k>1A`(?>gWI6aMeYfH3;NTtftu}x9?fQk_dQ`elH~NE4_s(fx zd;zc@{%KEv#g6NJb<oA0tpg+&`^wI}k9Yu23YR1O&w3fgJSl7AO%tan-ph@+75AU8%#R zFszTN+b^1s@LfM77i9=}DVM6?r#~Ycj~&$f4M{)KXr@CWpIouYv0{r5gzOFBKtP^s zvJpErY0eMr;v#KnzR(W-*IHSegxyXYX{}qsX_4OT{bG85=wM@Nj#yT~dK@uMX)%Jt zYJSKew=R@)tEczJCQeg9tXtopz*;RPR1`GQCP4w{vb zrt#VxiY4p$%Ht&9qy#xy@@unxJz~!Q7TRwi+<9qpn(`8OkpYMQnPNasCYM}o^M@a` zfvUlGxQTZwkcXA@kh$`1)Dfh6hq4p~fWzg;6z6IlcWhnO!WqvAAa;~A)bKzgp!-udzJ^Z3Rftts|{X-X)T>?pq;Sj*CB~SiE8!WrV%H2RN zsxIH$?ex{wcnT($P8bVmTBV!kRFo(gD{$PSjPZ84%ZN}LX_ z2wZ$~rs5!QT*@ryFr^OqD!t%ALvCDw_obZgh{GM0@p8j&$TeVa>cvN+&&AD+q%nZ1 z?`lkuOBsXZ>-7{x6lH6Zm0huVT1-i#H>}{asR80m%)P`|9#B|PHVde{7jb?+C4}Wsh z4~J8l;Ks;Av+YNMP=QC^N2!~i0*ze7opjO_YF$2kcrHi87qm>q=_Z(3c2dQg^mC-j z$RN#h>(CkNUB2jjEnM=^Fqugy@rx=GHkrf-RfhOw?jiGAO$iMjnT(_ych@W$!ptAk zf_U^qk3oj--_ak|T#Cc`EG;PzH1H(Ifr$=W@6S0~VL>A^B;ZTcu$G@|?e9o7!>wgc) zdGa}S#jTF`=kN@KC`$ie0^1CaKjKs)xZHeL*tK6N3NPAUb3b>zWqkjj8t_MlhfyF1 z@JMcs4j`pQJw+!H^}FMgr~J4xy? zl2&0qDpoYymH`hluxVt5yJxTJ@6BkFufP1j@k??0vXtl8&6K2SOMoQ^K-wSypc9j_ zK4FqzF@A^K1EyDEe5$^HyXkloxd48<@2w#HX-_uS?~yD|^RT-lJCrY){3M3WCmV8D z-F=vDd-K-Cpd7EqB4_y$vIa|o^I}WT>_o?(Pek+ohlX#KM^RA1iEM92!~X0g3Djd| z9sm#edOIB6z}* zBbcWc(4Y4Ld%V#$j+bQ4zlde;lqD*f&8Nzd&=DKfgz42QmQW{urowi`zikxQrx1t^ zTzBgsm88#Sgx-IEcph(S?c2Q_B~*Gd%1beD%t`XC)T%Kk{&Pd4!G5~sV|&wRE`=1t zOnTVWL+&pCa^p2WQ5P#2+b)aJ$2vtTWqF?s!1`qZ`MS_>gmBl@xn%qdq^{`io-XH> z@!768pBLNu9+|8*wf`p%XBh@b!AP z^cxMRO??Wm!q*KqLgut}H3YXZf@l3s@IoRZ4CDFbs^=#N7pnAqfX5D$PL3OwKMM=( zE?n$FW>$ogYJ9&7f06$OOB04qFa(KR8(-TY2kp+SC`2w`z5n!d**5Vmaa}oiM3fZ% z)6_eHu-Li}{nxY+(eVk?Ko3M3fwrfoTS<5;cPpv=CJR2(9kVUh1`F!NlOfjipp96( zMTSX(1iQ;o_LPD#p-wWO4+hW;XKrESQ&+3>h4b^d*U4(PDyTWg0cY_F;k`o596cm# zi^21u8FjE38#1A4?#BZ?OH?0jSf`n56YfTgD0o#ql+Z)^QOL}xYC3)xt?XAK-B;&# zf2)D_zPYv`h#df@c`F`H{XIXW-9txu__34wlghjP_;j}6_!=!nxSci4yP(-FcnL}6 zo^HNF^x1pcBU9Ix^*$kLkyyW23nVtdoWs7Y4q8ZB=|dfP{WjRqGy!TJk_=mZKUv5euqwjc2#RIHQueg*E@DQrQlq9vxt$BWr$Nt5moEP**FCI2bfNMu-=H6 zrgI?~Gqcb|5|q^(3D_|>Z;}A!UKpsM*4J7iN*-1&#fQuXt$G5H@_PpO3CNF+&&7cT zY+yD!Qdng!+m=BE!fCK%F`{`ZUVCV2_ILVkHFR9>l_53`B(1)2S)a)2JaBg%3nNal zg?Y;K4Wz-B>9;C>dmsrExvDbgUJ-+Gb}OIl!TB4Zz{WrYp$#|m4KfAx(t-Yb4N^G* z;Aoc0!7yFpQzpZ9!;7j&rrk7HUU6`O%fZO75i7wI;3A8#z3fp<)45JB0KGjt zW38mw*!2)i8Mc8*@)tnf?fGMb2Ir(-dQ>tj@ZlId{ep$TI>HT$noeqj8r`uTcq*)L zpoKp036y_-4DXF5#5#2m2$Nt0UL}6D$ELwX|1Ej=o9|Zi{B1xq1;{?FLZ?RC-iQpS zbiU0GL>yMv4_#r8qLC%w3vNmT^{Sv#HjVW7^njW)e6qI^3C%)ZE1tpjE}HXNVMUlO z8;H&uUcK*xu-qOxvLD0fnMrAS20lO+cIiKZFx)MF-|`{~BG$OsqDXR_Wq}dpJ6<)}$t6UlnVianiB_7>qy!Hk6d9xKUyl|BW$RZ? zFIxqmi}(7w!z0GwKX=dgl0;lYkM2NeyG>hw`)EHN-oBdW>b1xX*8rg+l%a zW9$g%UP=&E=s@~IvfWkLA_Nscc*6!CpD$4i+Q8xo8lo!4k_|>K?aJQ%WGIg7KM=4( zXzOaj<5Yf~#7Jw3G>aNxkqe{*P2tk;Tf)xJ{b2YY=B~tuODG+c?Sm)@s{Jg?7~Bfh zW%~V+2;xW6C!3w5F8mCd6)XljiGYYKU?P8PV_Le%X;^M2@~-|N?`UsMX#0d!xjdAExd)5EsQC*_WG+d;=$?4j#tkaZxkf8A+l>-X-m zT(>g4|IB|C3~=8FxIfnv8eX9c>=?5-_fEGB8|YN>18HFRssr- zfq8WS~U6-dG>Y**cRTn#sxs zy2AS#Keo=j2}VQ{8rnW_mtY`yRrMX`9k{A8vMf>*WYsGowa5pgpWGm)#f+~Mb0(};&Y4a&KlhfDVNe|}Uh z`!xIHi%f{2ZrsnGPwy7LACfGmAnG@NFhiu{kLn*eN`e9eJ8Uu9wI9E3kxm{Rd;8}# z-x{I4Z~IMI^L9y5A;a z$l28vt#j?n-pHp0sF{{}9hOF4*g-bQuY#l2ufQn$ zy{-2Q1%_%gDT!UzjnFtdQab8wY&2v`z8klAV`p4^s*j6-o}=A88`*qQAovM&=ntkN z4PKgF;#WM-+U@ngh|_-5-~X&vQ%EBTZN}=w0+eoS|HpCpOYE-m`Gy{?P*q7R0xmMp zm!iO1|6bZM23#Max2HS8p@+oj@qnSY84Oes2%~D(nc>4Famo6(Da+IHl- zA66!Keo&{gtr4$td`I?L#0oCZ!Y*C6$SpV!I9PIHl z1p`Jvo)fHLN|FYl3ibQVMu|Qw$q$1U#%!kUpb;55(+gbA*4@mX%N$-vtEfQI4BQcc zl&F=p)7BA#Hi~Y-tQX=+~%b0y}O>;s;1O0R(St0{m3@_dheWi zL#jK2k~I+imrIwHW!!Ch53vU{NPj2}n((yUis@ycWQ2^!@}%n-_E^y_c}6_yXW_|P z#)^-B!}chlYx{%F>6>`@9`@cXfW#(6Xfy@@Lk$~*L22^tYSKAoo-C+@Zr(Tn9`Y;( zghDG@%={$_Ysd{W|DAQ;MthS!5`b?2Hi&RTb%nn{muBO7y3zNL_1++4SqglUA&Y=$ zn4z-oe7+teDJlx~J`TT|Ek<2c0?8NhY)i?8&QD0rNhNFOpycclB)>Dt7cQ|#xJZ13 z*9gw&Io{)~Tr{=(Y@6hj3Uoi?P=2`!=EOZP9Ljy_OxHR6Y2#}btWmSa-9G7ZNyG;U zBUO^6HTcwi{DU%5_ZOHTWYg{!FF%F(Ovi0<1kX0drLtDF*TnYzkY0?!^FeY^P%0~o z#$;qbDAIB?G;yttN?@SsLsE^!$IicWV64YJyyiUqGIiIW1W0?0pE1pxr}+-GL8Ij} zf^LEBKZ8j$@TvyuBwXCS{CqTF`p|GUZ&DQljIjC&6X3h9mCOa2#Vf|_RA&l!w5f6& zd@=geXuZsP4cepDz1?6+NLLN>e7}s;(vKtGc4|uaiJ7M&q;6U9nkwPTZ>t~XL%Hfd zl59S2)<9kV!dKhork=9D)EmP%BS8cxy%NUgPnVZ{e6mY)=modnI%yu;IJ-!E@pO*k zG~alKmBE=tB~$0z)UyXFuu-j_@#PQ`1dE_PNHK0=ax+(WzB&6%++tgu&0OM5nd5dz z!ISSNmT|s_2!j|0E+P;94?WRBQWf3Q~7Q^ zSN{RqPM;MLS1^$E##n1G>&x@s$9>;5LD}`RqH$*Ar4#5X+;@|M6$D^1E;*bp#$AFXTSuLRX4}K^fs~lh6yc`niW`W06V{md$rXNpG}jVmC$00 z*arxMC}p0^citLCGe{6}Zn#pClI)d$d)+X9B>~dRFpP=$93kG#tOyhe+V5O&iw*cmM6D4#*#ssHGOpl3XWs4o8e1}(% z5Sz)ZnO^E9yL|IWy%j4zb)ozv2jO;8<-!6xDj^>I@9aT|V6<2e{cLoh*5{fxo)YYn zY$rK-LY*EYL-rqB5+!Pqlx@0HCU_ppPcJ-3*|$Ng@hsZMmm6S@Q=EyOq1^>0dF1Hi zqdy)xEsNs0_Y7TzQpl=XOr>^>1)`|q@IPwJ`nJRva=mileag30{nss0>60ReRsHR4 zf`^5b^x!+TVc`)?Wgj|2qc@&}fE1f_**B&RPF0;oL~5;JlaY;y*k&*!+U-nnL-9#f zGdk2Qjwz0eOY8-697n<r;m-rOWY6@aIl8ZilkD?Zy{k`Pz`X2~Y{22C!_&K$fo(10%=~Ht zrAD758~=S+e4|zSg=*vnxbKDIx4ECczTi0hX0<((&bn@x%i4cXyjh|FlfLA1_5Q@+ z3_f!A4!qwWMEI^{iG(piA8yueF}2h|!pY%B5Q8O=ZXCX?`EPpZ^79^CN*pir!;O=X zAYO_o^WvAR$=ifJ;1dU^?xd`Q&(N&+YXYT2Txih%he=BFD<7aM88=H)-A0qEVLe~I zJVWJkqxdOS=xN-M?{;khhWN#weKY#@G3S$lTIVpQ8HpVaZj3|irItQE&xk9+rCgnU zx%l0M7G~hjZlrsaVZ=JY>CYpdWqrmU`)8~EQ$~p6jxe>v5)wWG+f=iUK2$$!U&!?t zs1R)ja5irM2@CYip^#n9%MEMYVr_>>k0aZha4Xr>&W3kjU9GshjVEK8e!Igumfb zH|g4h(S?tjmek43{hlQ~0{tjs8~-I(r<`$66GvMmV&jSx(p&j9wMMF0PcYi;-i_Ck zuNFE7XtsaaAE^nH(4;wj6o>B2TVi6XJSCTOX)$CJ$&-cwoybCG`xHjY~!>pB4vYJF2sKGP0; zV^?m%?q@w`cg3c#9i{*k4g>pZM(N$vz&Q=7=hMww=FZ)a&-72m+brt7M;9MSOOAT| zR5Bea3wIn@H2>X_RX^Lu$R(kEjvw`XFC29p`&s6SU+>F{SVQ88pB*FG@@~+*_h`@F zoTTFhWp^QL#k!J~)?`}moZr7;glPiSiMCf6RUbp7pBx^Z#4;e0LS+}?Hmt1kAYOlS z=i&LrOOl|bb}5r{2SfJOorJja7i;LXU^jN_9gUCuH2Mmg83urhrL6R5tO-asW-4vi zp~)713VPH4)W>#jhkAQb8V{2CbDiY(tVxx^)O2<@}Ej^ytdXIcs6b2&%0**$e$)*1_oGING!yd%Nd`{0w9ljc3hs|4h zvil!5u6TE{QSQKWMQNB4BJy&BMGr>r)W6ccA_(~Xq|$K2qm%Nig@K%ZN|j?NpQHV* zsyaWhCsSlm`+oo~LD9aetTO2p%d11jjbrr?&sTC=&Wgy9*&}b>eA91!;~Q(Qv-ThV zc*W;G_xTGh_!Ybi5Jw+<)TWzmw(Yjt(0HO(D-!jMlvy+cbPPgwOrAWMrm<+~lb)#}mF-h_!_4to^E^jy^i3Fkt60{P%zO<4Gr-%yh}n9KF79i!C<)`q#fXWy)$7 zUwkpoC-M*?%40(oVDh~4nrp48w;5{1E3aG|!^8v9nMlP5^w0{geC0ArFHIM}as@q* zDA(42)$RYXwe785N;D9gK1W1^Ms$nvaU4|j*8J+?Dj7SRBZXY6x}wb4#(#22-uJ`L z`Ul;-@H3jMLf?mfQrNanLxp{$Qp~?&?|I?(*T2Hdrj=!~{liLysF7oPH;2Xk(my2} zZ`z+c?VqSZlwb%L9xBr821E0tL z_t|HkYp%YUs3-PMJmL6HeEbt{c*90#pLG^ zW3j~-yY$ja*I$2q4$+DgvU~0Ip_5KJiMJ?Dow_a4M;h72joJ7O8#Dh74TH&tKC~Ch zqxbyKp0ggB#ZPSf|H@HE@@@AqLu1x@)mjK7e%aC`^a68&Dj6DK2x4s@>nNv8napnn z#~t_08PjK?Ac0B4KeWf5JW9+v1Q?=EIPtp&9(Vu>z{d#o?_d8q4^tb&<7dEOKY5or zG6j=OVRMwAteOvfcbn0NdNt@rp-;ag-Le;oTsi8elkYA8uGF}}rdy-loL`nT|FhLV@G z%$*B$ob`|R%M_LVS*^sBl*|wv=Z#WaW%~I~X)v@||B5YTn{lCA{~!tcc6?Ph4!QpL z6=jvRJDM}wr;T6PCz-6%F6u|ySES`%C8A{KVV}R(^RInWCXi+3ZU01AtkOG^jBV^> z*uMU0`&ZqP{YUnWa4lAf_uC+vjp;;g@U6JZ-)2pM${zhMbmG8@}5(3Cg1LHx=V zjsG;3iCn+&q4v+>e4Wfaj)gZL*UyfDtXB5_9&o9szy38(3Y|K>a@ZuY;x6GY&0+B$ z$DpZ&)*hd;EZL^EPuAbONr^%HHpWf%>F7iqzEsY?tdEMh2=3GQKI*^!`@i3L=baHY z=s0Q83Onw&1J6#awU)2y^HsAN;Kzf0eaR(%x%OJk59*7i$^XM!xRoHY{PN2)7qr^s z)s|j*Y1Z_B%-sYPT<|mBa?ZKu-hbbHD8QZ3YOAfb_10T+FOZ$h`^t1b_`whOq66*) zFQ159fBg-BX%W}{3_;mn002M$Nkl?NSwzhQ2AmqVyhEe!dO!C?GKrFKPr1L4UgPO6xXE6$-H3VEqjqdH5k_NwB$$Z|IZ0cM`Q& zk}WH(=rUg57~p^~bD>)IDU&QPifgV`7h)SGO6W?EQ4CKIm)baY%PqI0zu=-A9oU|B z>S@&2>P%nGEInZ~wO*i3E{Q{BDSPzMNA-lKU+gFgmIr1tys4h+$qxHbAX-3aW zhonf-Fz5q0;k&3KZ{JCr)Ti2}Ac>`by5=C~gFbVKJ!r^afoT^_BR-i$k~t{J@se=7 z6K|-VdF$F%-gI?|SnZRgP-qz^@;+`h-Sm8*12^_CA{XLU1klWzP-~nbs3QXn0|Neu z7vTk*iqztOktycj9;|M|-*8mkdryI~oof4T(k81s}D)^u;d`{2KYfspib#6Vhp589R=* zC}gYWNJ~mcTIes)N^zJxmck~vwmCGtc_=d*%vCa45B!Rx$?9=wzeu+!>cFom0y*Kk zyd$5QlJ&`&u&+=ueiZR(C6kTZe*5io2@hZ6HeyHy5mgWbz4K9!vQggDhG{xP)NU zw3on)pUTT|CuNq|)<2gDi4>cne_K`%h$PB&Y>sU5QjjYuWjgU|=sI7sWS9>8MDE%a zYnT2i$o8>v={UGt63uB@b^h@?qyrf8YW$jJ^t{=%k^QS=J<}$NXV=u+O`*VT32{H+41l(lk2$O@Xs*Z6Kh5Bi#`;!UDzoK)1c zgC-lTX=TaP_zhZ1kuJpCF;NltHIW2%*s~nForSoO*vnR`4b`2KJECbK$!+FDw|`0)yS=Uk{mn0OZQRh8G|!4rfT$Mq@- z)eC;&>)M&FE){F9_6AK7R^eMhm-I~6fn3zd^%S&tJLntxCx+kgX@B_&KLU#B_Cu{L|H1Q zFdcWHxj@#fk=x|K6NGbj+UeaG>eN&7L&Ux>Ztxqi?OcT)6&9DZa#082A&RXX0$a|shAw1JU*&7N zW)T1ju|~rN?v798yj#Mx$-KKT+UA!#u~$0zE1p ze&mrGZ@dv}kfy9Q`JX@b&)6sX6m}kS=;sdQcK{G*&UH6j|K~qngItD(@v;dmU}|RU zs;f@^?eBg&anVIFBPC2BA`U%y(vyxzv)z7K8#WJBnQ&p%5-SR6<>bkeS)>`yq|*+Z z0ek-N%rlpH*|I1@zc9N>8?gr9o^+5Y$S{JOcG_t>@BDw!B<#;%MFgpZN@{BHe^}f_ z>apaXDvEBCp~KoPE1T~AEmerrN^?xSuoBdM|HGa2!6d-}@ zt>sq2vl0#s=|6gE;U`k--(u^)_o1Ns+J{*-_oIk((u$XgiZ5%V?$L1vG$mD%J563~ zGHcvcT6tyOg34e504#V9`6U)#e8rVkB${C1>ml}J1-n8|g5+dgZ8rzc$A36MLdg)M za3+q1^@VY?P#8MKQIB_VQplLWX_hw?F17Sh9xJr^m9)khtAA+EJsBTn%zS$6IJtMy z-%VkBBeDoSq_AaVP>L-lZ6C2gF`|3Igz;dG8=sbt^J$}3uls5kS#^tU>#e{3ZMWTq zjbhT1A0#GB)bj;2LD4~?wU*7oKxwE}TM1iKx|#~fqQRhiy%RhyX{ih*xj;hq+x}w2vm4;^+zm!+nJ(u{!K5{%u7*&5YPckjH5?&Ni zLtnJ`E&W^6QATQJ`?vIOQB?^y{_#0K zHgwXT&pB{qK69293dDLOGFmy3lI97Z=az)U1S|;R*5>{1-<2l_LqF*F>;bO{f`eNg zree4yiK@J9^yh=F6bg9G9NiA+l}dV>cBZV)1ugrSz-=m)Dm>NoZ64oL2VwF5V*gap z+y4VPyiwdD&i$)=|J1@yWG{65dRowPSzOrZKP|BK)`vik!)TBwBBdtER!MQnefuH% z1r478q02X27mHXE!81tI2IY>&BR+qDj;;!_DHlnD7FhJ54}%&=^TL6AD8^ zqL2l8-1+OKeQ4-i?|he*tKi|DJ#=m^Z~WSI*Y`6ih6HA*PCVg6xS07Sk1e?AU3>}6 zfs*tPU*xIWeV=tbhL4#|Z@>0pM$@ zl<*4#iJkgO3w4;_2XVhH>rjyRGDOJ2c7 z2`jdMXKkW-1_n>V<3BvhiT&dz3O;6P18W`uVnie8^{;}QkqKBNkSDya$laW!ZZBPbN;5T&~M|`=OE|6#DybsxA)oQkme9}gpm^=#P05^U` zUT6f%0rZi~5z-TRgO=kraIF!7j;8u2J8leGwaP(HI8 zqZ)tU4ulqg(K_%`{0bo3$6#p#KiwDVPcW| z;YS?7w|Z~6kMRSb<2Vj}_@C>VyL9t()Q!6+>0>fGb>CJ3LztDe%@&#%JaNMQsNl zGi}uZPs}G8_f!LML_tH-5$N{KT*4qeE1u zeH;QwV2ls$zHG73$@LUD$-vQGHP$We(0T#IZ)2YjYGXxLMFZ$C{a42&4a26jwxZ1T zZ}yH~@B|?Cl5{=M92+R=0yFSkl5`5p!tV^2avS5Tpt4WG4m9a4?P71%XtT0ud<*>K zG?jmbYBq`6ki0A9VYgg+Skff7HY}L75}y7#{4=#ryITLuC$4V&1I)T5m1}%8%Iyt_ zZg3;she`j$6-CuHsH+0?UeQ1B>)L+yNg8@r)A!DYj1K$Qs)1i|JwNdq-3GWEBsVWf zsr+*gzhR%+{)6}pYlLfupAX{Kqg9DbS%}zD@OGTZd>-+@?btI#8Y^N%bu<tDwcC;X5wz(}4|dIO zaCZ{sRH`W8Cz_4L_fG)T||5)ua;RFIN28Fdkj7iEonR`)D4?{YtgeE2-*V z)$<2Rsf|p;j;d5kn)m&0(282JwE;E!&IsiFUj`iZ!VqYww}fO=REvMyLaR!8nMzwH zm!^CwaJS;MZr9N8l+~wjpUQ0<$9LRu2eU%-APY%rCh`GGhFeAiaVI4U`t~w};xVjb zd3xrIyYIT^l1qQhbk8r(KmU$9@8sqfd}e`|kDI0YYP?Eql7>>jZQzhn6*Yo%040&2 zMr0eM(xJco^{=?FZaYUtzW$AG9QUnnkyn?BQ24rC7C&>|-29Jw;GqYpWC05;*f}jJjqI|^WG&Z7Q*m|M#oZ-GmT!kq)i<5rw&5E zJMkN)I;Z$bW~uQhvwiEbXkfq=d@@cMq+lx&q)3V1Tw@sHx7APeLY^yU=D3#5VaVzz z%&YWPtFx=55ETr&vYk5kET1e&>!5S zZ44qDXa7AeF95=DyCv7^Aar<`pHrqzoyr7XC@(7P>MIFmFcHL8mH9R3f(tMF z&83(8?Qhp{u|W4R%?g>u2#+KNH|GM{gj#Lzx(r3sEVq(Isy}LC`ZwBM7dpNYT}`dt zaF15R!5**$pNvyTuBf}?H{mBGar}z9$riOsrqqS3TFp|Ww!VPzYiX5zY}#V~N~@BU zeX^gBli!r_E!n@&lCk|eIUifucPy5VEY;}IAu{!Kyl-hqV z-8}5y9luGHUj9>QSvsX;N+l(vTFp|W;_x4$s+&_)i7#>iRk16Z3S0hJ*{7!Nu+O~x z2ZllX_Bg-fj*xxkk!pfxZ2tcj={0_~ z#~-+ISY;;fdyQ{}KoM0AJepS5Cy9NO_-$C7>!o;I_5#GO@8H(gTfgRVyUi^R7ff4h zu?4e!vGj*SZr2}w;t3YkTy)Vz+*Z&$ZY6f!d1pRIxaeYh>X4?`CeCr6@{V`B{j9Uj zj*EQN`3mg*2Oi*o%d^iqn>Vd7OTf&)@yDOQq8~Wb5q@+}HsSg?F5GlbQgwczRwgRL zfJE0ebH_^|!TNt>jMX)ME{OB5)w3k$bR}0(_Mewz{Ce^QyM^&=J5Q@{3z3?jro>my?atsk|aYo*@!wyU~ zv!n%2`hp&M6&9;tzWd!1U$)HBYpu2B?z`=N;)y5Ta@%c!VkU96)|xUq<_RdNAf3dP zq>xoE{*oJUOp^W-ORoIi+Gu`y`1U*R#94JOt^0PZcSRPT-Hdf<7)R7p=)qhi9&tzF zq*qKL4=(hiv-oI$zFZoDzx1i)I$=wG00AF*Q}tcMol8AZsmR#?*w z7ALz6>z$)aDTe<*sRc!I=Y;?010y8Qn0U_6DvS*A*`r%;yA4bLX%VG<>3}8L;4DGn z01Y{UMg-HRlSd%)9lTr-CJtY<)~i?-oh%~(3BR(H+DocSEV1bbA8MtRX>#{e4B<>@ z34T?qEXRTGF2DvT7!yS*pqF|m6cDaJn7TQaH2C1Uyx_-iNU?t;{8VE9&p_ID$FJ?5 z+Ngla{*#5P7tCo&w84rgnTPwINl!ME@ciY+s9IJTaw7|e=Hy? z$N=itn5nb=(k%>3G4aK3VkpdnrZkkAv|H0^;r_%gla`cnNEBFxMl*P|TGqc|uq+xx zSxUxIbBR219Mc!z!^Lp`$`*c`>bkF4m!;NxIRw!>;}0+2DGK`_2r|vnGam|dv0u2c z3ktpm0JRf*zVoxN-h`+)1u$C5AsH4UV_0S$cir`Ur=D`kb=O@_IbACIKFy0?-Slup zb+F-WW_Tl6kyg)Aj+i=GY`dtcwtch#zbaOi%Lo8R ztG6VJB*I*-!zfW{x&F!cAVj=tXTK0xByl#p0VMd=_}|uFj3&wBTY~aVDz*GG@S!hK zg4cTJ{6d0(*pKo`e@bgRRDn{(SB)R`54$9M;M9C2l{n>`>?a$Q!k{zhOF4dvnki%b z)eafosU9D)|FE|?8Gl5VtQ|h$-s!e~fIX-4uk9cGn^lv0gsjxRVgDqI9qmBFXiRC6 zt6@v!E&YSFYn%PEvX5jd)qW*W2G%digZRb%7^$3J>i7x^r@q5hQ2Kz(Abv}Hb=W6D z4MWNA%3xGGtIKipLHyQMQCMmep7-_hAbuOWX}o-n@oNMldYH_2>$UHCe6ZWf_Av;s zT|nnwuFv^a=K~Ku$i+PC1R1*zJM0U*jEjqZ>FPm<8+mS{DCA2ZXy?<$Fz+Hi_uO;z zWMqylPJYo2CL_RcE7OMfWy!+v-#PwU-})BQ4qV%_R)bshU-N7qryZ{Jxz>05=Idt* zu?CbkGF-y^PY@RoWqgQJTZ|GIt&9(;1XbIk!~!}0O08kdi7@|wqC{CFwdo~&{?(nB z?xWQi&W7HpkHc090Gg3%@~-2ULo6;JSzNm}z3I)oV~)Ppo$8!ffBy5enKkxd7Xurg zsUO~#kE1{j6f5bVqeBlk@WB1|-+%gac`CSUd4U&GX`5{F#?OBC&E< zfd}flrX(KR_1xmy-~M)n23iUM>w}h9VsRvqR}_yKg94N+&i|bFZGj0*=0_G=Y;p8l zYDukmRA4ALl`tk-g2fJ#{0?n6%snRETp@b&=jV$>7FiTzdLPlYKQfV%(~?U}Wb$U> zLuB=BV-Onw+ojv;Mg%^(5CYwFvSrUi?}mX;Z1) zWqNJ}$+|lz;~Sh(MdnJ%wi_ow?Gnf+Xz5?++nE)=Q^8M1rMSTl;6$Do z`@~mg{3bYdz3dNIKmYH8-{AH1pR8R)mb3lsQ|wqWGJGd~Rhc10)lYxXB>yR#WU_sz zjs7iI&UENs<9F!a%RU0_7(W~TY2!gY{HH^IktX|Ou5giZ z6q(|mwqlY8R-e@J|4#ghrw>2}K8MfJK5&M8+WHp_B&hg>OMBR-PeP_xQstm{rI;#g5A4_@eSfir>^4 zbny`IEYB}}^iOSGFy~+0=+c*rP%^r`l5WPNx`j{UmU_k=puNaVY;ElUB2{t^>FQRc zBg4$vEVkI9Jk&-NH2|y!V7nO-}{)u;wE|4%-KBAhA8e8xjDD~lFSSpQ_jP@){oU;dh$-J(nb!X z_URdV`>r9?5`(MxWT0GT>1CAXwO?b#{L82RPbbd8m?uzl5R^_ z5i8>cxhq)}yyqT!$S%XfJR=MflY>l9#x4Ihk2{WMn%xAL=f=87A{izB^FRM{_uW6( zVIQP1i_dyMY3Pi8e?Vb9$jPq&aM0#iA-6{N(%5dSF@hn5%c9 ziN`4$1XWVG5s+Z7u!=$BEfJwf3-Fx-0y5dp+h~#AJP275kavP}d{qVN8wrlRtkEM` z1^b9m4iatwB#KMLOe2Z~yv^~^p=(V;@0C3Pj z{Aap}!{CQ3@S}gTK33THwoU|Q)s14BsOUKg^Ih^XXaTUA&xG4aHUi2;A;R2|gE}AJ8%aru+ zjM!R@`HWw|0i?59ic_IU`_vX{!ynv6s}{{Rqj}wOlSdz9|AC_c$>ibj90^Jk2#$7` zwwN@xR;AL&wK0B1l^LTrVTbf6YH-BJAOGbWRkNb|jGF+U(qTTog#J>_6>g0+Fo*y{ zf6LOCBeF`Py4=!J$Ay1FC`30t@Nrxy5^r!yxrz=5@s|=+19HP#2PW)MgmVMBZy9!rQFKiuD*C*+c zLlkk1L4CniiFvdkC1Tm017O?@hE19oMw_6Y*Y$IvlV-XU2S2w^_(Io?!i*TOE|#8F zLzyo9T`Tyol%vq$C@C+Y;l#TQ>h2`%DoUkfGq3c+|iD)^o6{Fh7()yNQ^ z2Zh|oI+oG>&%hEV9V|C6)eM!S6x#8&&MB*}P7yuD^cmj{KIx>BsM6fo*s;u1vQ&xM z*|TOsu<2%-0uZYje|OnsVIKq|mz2gl%b2ZYhfH5GS*aJ7%Ajf)+~M(L>_18W+=@`p zox&WAEa)Yo?|j!gS+#^XYR>z`FR(L@S{o{vi+*8kCck?i<7wnFi#m%hBxrWhN%m3m zN$maS^1LZEr9fm|No0OJQkXjT7(R7 zq0_hqB_78Bu-K`szo0e5Wd&>hkuDOfKOro-*rJZdxeG5m0UcQJ$Bfv640rlqy5cbs zQlENiI$}|h;9wZZjS>z>s0Sse%p^JnsdpsiUC{F*4G6XVGoG6M+Sj~>L6yOixa0Rj zvnk90sn_Ye_!i@P8+`irS0GCaXp zd6V#s@GS8KQKYxzHTqZh?klkbPvNAfV$SiK^>i0O+p>>Xf}j(>Mr~|l<}Uk${%oaD zAN-<5(%bl!T}!I;uNYeRRk@VU+}fwS?c%)gYlhzX*A#92JMg1_8~nmpNzY~3{)i0y zbA|Rfr0O#Ao%qE*8Navw{qg(zPh+2q?Gh;z{oA3M9i^*(JMcSsivGD`wEj&rCdu|` z@Ll^X<10(=xwnqtJ2bBtp`q<#pML$@YGR|mQ9AKQvron^ia^|De=586AH_dM^Pkc2 ziyEOt7-}^AD+ZA{3anXGF^FIKZ?y5Dr~fqlTkM~G)V3YOZ|!3C)E@T9@zrVnLHxF? zr-MQKX8##w{PdKK=#;w8Uq+bc`Q6OVPyBN4{ME004M3J4u$YS1_^i6>s^!kvf;gWr zmiWKt4Y@vl^2w)24?XJ0Qb2sxEh~_Qj zi!bgcoS10yFvU>DbPk(rqfo)lB}w{g+b0paCQ+Ct?DHbUua)w!tlT`tXRE7CZo?@4 zBSXltCdvL|5d|e>Z2zQ3!pN_H+R%G<+KK1IG?gMG1zjWEe)-2g{xMdyge}C(lAIY&GpBIhefJ%G^wB&F7`Ee?*OgXY znF&9Ha_`N3ICJpiKmPGgFt5gcA_V_*UxZ z*916v!X+XZPo_s(?0@!upRJDw>%qYBd+oIs-w5a48_ExV_#=F~6th6dG9^s$p7*>b z?(=Em$>06nU;gqJnOXnTlfoRC%L-xa!!y48CNS;73*8l=WYW%tBvyUbFSn6S@(FjI z_0ww;=?G2rvJ7fu?uYl<3)7Jn8ov;qF@5~8$H9{J`Yb7JpcNypS@$()3k^>$$77x- zjPpPRJPlOCNnrr{Pb!e${Fg}VpHZE*V$&}uR?;ZpVrp72yCWK&Xh?G$%@%E{(JsfA zOS?FzS4Sm`z&C&>yQoE%vBgs3s|NmO62ApMbW1i8a59#1p+7t0s|2y3h;7P7I;jyN zDamvuZ?_}#hs-hpUjSk(d*jPC(L_mX@B_5kTISF&-nj36`!b0YRS!P&@Vh+MEI%C5 zhq2TaeaB+qciDAUX*hGz@Z`He)OXs{rcAzYit49_V+ZHL|QCk-3%g;AswNTB|(xO)c_~T?Ekml zeut+ZMfZk{HlpEP5h)eu9)`iw&p3l|Pe}TTnZNF$Mk*O!HA#+i=#F1g7F~3dY?Ksq zKdCsUXh=R{FL9C;t%;Uq{aspfL=)jDCQpqwKyH48p7EO^5oLTyM$u;xziOV&!7cmv zAS7k~Y~<#KhOuN;HzMp5>eu?$trjh*5hndrnB>RahOF&h<5zk_))M6~(cRcnaT*7v z$bRkeBq;RASYlQqB*$O$ii5C==qmh>65FhW_NkNEk4p=($-SvMD(Pci?yFU*rfQY!mpZkbP8`bcnt2Yx~#uLTM!nRI5TsxgB_w#&!jcU7iGm z9vMf>gZPzy+dMXl?C27e**>m>MAbIBb4{tcEy|3bY>q(}aHxWiQaZihtA^x_WM|&o zWkDDub^j)8vo`umn6ZE*u}S!uywWs^LjMLK^iOeRh|JpT2k~puW)sKWhOF&RS7Inv zc3C}@QZ6>OBzh|jQi=K5B+m&cu8WPB2l2}@h`N8|ni=Ei3M68!-2XO~eu3k6%<%2E zX$=H1#0CDo`|gW^S|U&-M;*5&T>i(}#vwm@q+@tUY$AR&7&+B0;sU!J;W{#M^G!F? zYMyJ<+``E0O*eb9L~At2%w&`WLsp8E@vTckTDSr~5sR5Wd{X0z4DNj)R$ajWk^0*H ziJhG|qfQwYIMd_m(U6hz)B3)zqYr$-^xR3yj`p-S~*d5W)JPVSXlvEncY&y-> zlb@+4+4)ss+-$EtW%VHCew#Itb}>DEd(c4#qLnNsCB^|cVf=*GyymrVGZlidm;t>1 zzI!>$#vSIV!;3Gz7&D?IUl-YYv&~ovYDbh_v^REAK!?%|*sxC&IzPml-7q39fvYg& z@cu%+k4{M#XN@)2V44qi;Q?WJ&6u$}?ey;d{ontE9yeQe-F4Sob4_MLz}Esx)(m1v`{nJ*9MoXivWc##ALoaPnyhHyWG<3$~Kx7@MRRWfPbi(W)s}60SIG!K;+!5kv zR1g@X5dY?Kl|CwS6XSM`|p1Mcg2UGgXXl;PFriOwV8BdW(pYE_Wd)?U@$>{ zUSFxuo}em=~ zc+tUU5B==?fK-isVyCjaDivI-J z{KQuch`Pw=8NW@SRsB0Z__ceq@m15nT95gQUs+XueNqqoWx5uJA=>tQ!QWB-=^nXTPZ#XhR0)s=ttuut)y zR{s)@;wwp&IrYHLRoeHkkE>hu5B$b|YJigOwoj>QG7Bz#8yWrjx3~U1>{I(^hkc6u z8*$|3%RUW9#&1eI^wquQU>0nZ8vWap)e+e?PiD{lS?n*S4*X1OH!KImK285-{d@Rl zVGI10|2O&8vpM-ezX3*?`5>o zof8(Z)0Od4nQSGQqqb`qKT}0bXVRKjG?7~N5B$ph+`mA^wK{7fxZU1(K+!AXa#p6}dbd9~IR0JwP~P9@fu&BTQ#tU7tsul(CrKK`+fb6cx7 zR*sAu_s!$J`OV{){91Uzgr_{i#kV!MC+9iSxE1F!zAP;SjVY?lHrrzSLSMnhc+!@~ zke6I)DZZJ#!3G;#fBkjmop=6|Pdph@%uEroDD28B{`lF?ewLTkGaChOthwPPR(I61 zXNPWa=ZWG>9)$_?DS>#`74NN^J(~}PoOr?stFJ!gkw+hc;PJUr zXdRKmlonH^LCjq}8h-LqpIm=~4MGzRA$;Ha-gojzC-X2YhqvB(>nkQLzs2TTth3JA zmtOiCUUtT0BE-zH^C4i~x;SCNL_W~D(D(_27I*JVfw84XUbY3e*O7=cwk)1RI> zar^|KGx%Ye-^;n5VEHAw9LV}jp0q}`YhHnhB{PL6R72eRJdLnr4kIp=Eqm5qw z@WYP`jn$gIl~!394cA+5y$9~U|AGrIyzkz7xoJm3?&ps<;)pRkmC0vV7;@aF$f8_2 z(I1unaQ{!)MjO2Wp>t=>oUrit`|iJ=nF4wi`UmcRfZEG1ziiDl*JAk~gAN}(;4>#v zr*6H$hU?#O-3=FAG>zvg0gU14gcDA{dbk8#s}ai%yO3vjVq8XY{7Z-7-gb`AO5Plf z?EzfMIZ;M3yS$uiXii}v3HRqP4F`!+G9)fUx-euJyhz9}d2ggi5@WlMCgI6e>yuSE8;2GJ{OQOJA}4^28Az@gPDUeDJ|Z%TMAnIE+Y-J@y!Z zfZNz*yeMyEDzC=-72fD^6q!P`{9QkX7cXV zTW;oc2x!8*A^L%{!wx&{v+v&9Z?`?(NMw0NQcwFbPyiVA(N7UW!z}*gg`vzX@^b<; zG$Dt{(8{Z<%ry5k*IdJhIcxTu4}4&^kM94^JMXeHFIWBfFMe_T_19y2ogRjVA)7jN zs;z<&b0gvyrl|V**S~J-W2UgLGwlvpWA)WP_OXvKRNi&h-TKxT?*nFW?m`P~`sPhn zpE3nntjBK^lP6EcX!-=o(9kBEyy=>&uVSHLM8PsITlTiw??7{|Eq331_eCb|#|vJW zcl_{&KTL}#)QB_8ix+kE!D!DfK=x0$C_k1&mtp*S?zx-a0Nza6bkl3Ex#m|FT&Vd+ zK3m2Yne-dLnCq^;?$FO1df=xHSYl~CeGu-D3>xiE_N7K4QXIuL#{|hO6w1ybb;kQpp}K`SQ;sx6xq%$1hD|=#yZl)Mn>%vF&3egQn8$B^STwPv1m`Nl*Prf0u-| z(Fl{JZo~#AQ%&}LQ>lY$i=>{yS}K*%#1|AELQ~$%-q=H?@mi~`c2Q-7C>P0H!7#;@$7n$GcGWjQ5@He=)qvPlf>O>mHvVzxu3Ecl^q_HfA5 zVp6+VVMDbG#q}?!lEn;~@S(6`FIN0YcPF>nosb(7ZI_-|e-Wnsb;pJ7DlPU&b~La1 zRnFzYXr`@|jn&6K0!%4%uoZ*2Rc_q|@hcPCE`#`$8SKe}_^n2&LHue=ix6QLNrOrr zgL{&->C7G>}V6G;k}Ii%x{cyH{?;><`$lVHjbX}__e~^Nn07; zEb~5e8o{l~tJit8<^*&9M$^JkLc`EJ)(3?DsO?PA`M();(ekG&|MZc)_vSu=FE?iU z4Dop70}njN3<-A=YTq!Q4OnUNs;lN!T24M~>i$|mn$Ewbk&IwOB4V^YX(o3qv(bUO z3u#hZ`9(edqNMbbK1r!ocSftI5rO^3rmUkc@_+=tNF&7sQ=)f~1cfx2G}E;N8l)Qk zMGjDuw634E?2fxQ*5Yw97K!^(uXfX|9d}|()4*PmrdarcG#BZBj*9XqG*Hfpme=9& z%xv6b;!JcNKB1lQ*+$~|&~wf?_sJ)oKmp^;o&MBQJM6dv2W5y-IS z6UnFJv154Fj1_iB|HUtU&W*9i#*JgHEwq&0G;H?lHP>1ber~NHV3isAQ^L7|CpM_bgQ*|T}1x9HCVVy#-jcM;!EmJT#F_qCsG_;1L6SC(Q)7huM{j;4LLbMWNcT+m`W5T-cfhSK7k*UiCqVy<`XPbyd@|{4#)yAf zX|;zx9_r^k1J+ZEVNnenqD*@GWu|O5+;BraQp9TtU|?9d&;^H+d{_ zypCZ+1$?zBQy4d<&zQc_N-G8<2z*Umzv)%!yCBfn%%pMHAJ{*^LRu+cuHXhACW0Tm)i6`7pF;81yx7%IVtCkUtw z)}Gt2k02yN|Fn-}sGPNoKumHQ{KzFP?}Q<|g4Guv2ZIAIyY;qPu^Ug&mlF-;1_+># znsiI}NtOgTt7bI$sMS_5393*ghGv%pK$&S{-%tq2_{OTsGrr0KCu);Wm*o=SM?T95 zfx<}msk-;B8$Tnfrkh8F@mKJB(utFbl)7PL zLZPJFs5{GGGrIl>+qeyWS*fr<>_ zOr}f>%`OR$j-t1|1viuRDbM&K3!Gr6fRbEM$_h@YI&=g7Hvj-Y07*naR0QUlE}4lq z@4E2|nDH`Q-m5VFLUKCH)vbMCYuhUBcVIeg;;-NbJ%uVKY3Hm8A{CNs=y^yKK-+FY z4EPLDFspK4LwLeBN|t3hZ8DA|G@bpZ>muu6?1>V z<)v=2`Og9WeSgH^yLZ}2D8AQVeu{Yj8q58KZqk9)lY88?h=jW&B(g%{rkid`;3IRF zS>|P2tWw3_d*1tAY~?nv$?CA@LRRG?Yp$^d_b|2x}*I0FV z##dS3L~Ro4vRo2Y&=K7ilZ#|xicw3kVg;z?68T?GJ!*1Se=*TnCN4GQ$W&#XqEyD; z$whW>@44q*4?XzMn>N`5BGBn`WYhg)hxI`ZoP;|-R<2(6x9i^Vj<+we=%P>0S-Hq^aebSJZ;CHa0&d0T72&E-(Sw!z?hHKY$$J`;yE~f2M03T5MB8>g3g)c;cx;csH4wz((uxNQwpB$g?4H;e{6-bIh?kA7|YJ zD;hnkJNAb^JoCT)>%W+HW8NX`L)b;9g2s&GFU~t3d@Vj=4GvG*>Jj6av;`5K)Sb(N zvxOcOPdD>z=*7XuCM?pS$tb1nAbBIbw2)^OH{P%ydY7GdmQMU>Qdw=_p+ab}=C<2y z%RD0sd}sUu?i74@k=hXG@mfvd!K#cqr=cprM9sGaiMX3iJ?Wh<$>?)$izh! z`NhxqoX9wQfD(o(`V3y{k5WT>@4e4eS6#KpqKnZwFNPXs#zbR?r%jhzZn@uG_S>mj zZ^KwYYiJ0nVj1ku7X2|DQ^sV3>?FIlQtC7*OPl>Ov)Zm?zn0C&`DKf?P2k9ekeJOG zud7xd=?MMR%t$h(64YUOVK+(rQ74%j9FarsM73XUhv>ky;Lx%izm`;^S?qkpZ?r4p zHyAEYzc7S-5S+#we<_BMOM;&z(hR_+z`csa&vahT;hV{Ka|Jl#Z`2HEhIJkOPozJ06P<^A^Ck0O{2xKk*@!~gV8|HNxyF(Y}{)g3-adv+YdN3{r2d}lPoF+R10AOEJ41!qB-`2s zD)`7LDUJUYDIL7N7$_A$WVJ%=IAca5o<;l)Yr&|JtgS8)xY|jQpyan!!F9G66%a!% zMlpq|eZqgx z)NPb~)m)nyL&}CxQ`%yZXs_C$q<)sIGYujPU{SVQ(5u+JlXT$N{agD_RAqV{)&Ar1 z1Ob$VYm@1PANos~_{;i>js&8csvWd#e93bXoqH*SwD6@fm$B5H;e$Ng!IR82VG1vdA@tdzT8FrkK|2Q9~RXJ8Q zj5V&KT&jowtW;KcZ> zcE|mT&^Z8e|UK%glu6le>17FhD&%RM{uXFRD-sIsA zUKyt@ki9kA<)`XWx86Fz7t!1svnx%b|K4m#-CYp>-AXdV#e6@KsdKkxX^o*&}j zTqd~@$1OMZ0S((HA5!bpe7^I;dqBVQo$vhq_rK@1oN4t9HdvqUkgT}kiiy(QK$y6% zhFNojkwN46!6lYliU5<%{ctXYJao*rk7+Vc%qP-hp2*|2-_q1=MB{@G@~p0w;qajJ zqKolhrJgmewW5I-lNkq;1)qn7AAaQFZ~y1FFTLc_zy0lRtF1Qqt=qqChaKL=vLh&& ze2G{fj~Cs2_dOFA;W_b{tUrqR5rh_eyG-&QX7291`|fM5zM3Unl&>~rwO79KmGlLz zaQtg#)eDyWg{Pi#YyW#6L_w?si^LZVT03fi*@lEtv}5wj=s zwVvPPBgM1ziw*Ws%hOKYkN-y?2Z9Z(Y*F>mM;}{u*_VO93=UzB{tkt>MF}_+g?$RX zAlm(_k|1nbxIqhw7>50oi9`>m6qWtSM*jwF=&x}!Fk4N@aNb9|>Uyi(L;o5-N%6QW zkG1|V{`!(jrf#(rPp~q?<6xB3$calPY2ml*Y}*Hy&I^93J@A7a`|FgVb}7D6p&?A3 z?5~<4C5WnR-M?<{FrLOM@OZ)AwSWHe%{Sl7>pNFpeT}J8r~b=92Q5Eo60_aHs}#kP zF$nR+s{nrVqcfQ@yX>;dmR@G**KPQ^-FN@s8f&gOap6UXipws)oHwZQoICFa*ms|O zcpw-3`8iEb!urPntb}jlCvKEpoci#?4=ZFQPGn1fpf&EA`d2$t)c`f~=~)*|yYToE zPC#3JDS6A5e4c&Fx4h*oEUyC!CkLN_H+tu>M;~SCkm<~1v9JtV`wz|HA2G!aa165e z5{omZk0!8d8r`GuVv8-NXl2r0gB#k@zdYqXlcl&25KxpYy67V0ncd_k4FDM*uDt3h zKHCERvM*oub+6YajLAR##N$ZQIehF`U7_i|7h!866*Q8O8q3G=5XhFyNq8cmG;a-8)jA>k)u4pZ z>R*$jCX$Dw9)A+1h>}OCB>sg6joZ!m{9Npl@rvS2{N|>Zr0t)fe<(+FhyG?zA^JyK zOz#$C&}e*JK0oml$n~~Ie7QR8BPFuk<_G@;i{H|O7b||d$A?g9^!OUYZ?Y1Ok|Q4< z?(qLX{5Do{4*SIT8RhkM5WlrGBCk;~kDN_O9>?nQ%RH~2gT46YAbu-*{sW$W@frqp z?kos5euj_pudjQxdO-h3KIty#^Djb^Kz95LKYjwYAvzfk4gKH;Kj6(sNf9aJQY#ea zU(VeW#{F+SK5#v;%Pza{H9}fI0+z&So%;e>bSZRmeK#m}B@h_7>?jP?jK)v7l$oxloCg2|pid3Z@6w6>_5 zAVej>=8_urmV$3}S~#HzTLA`LAN_M@RQN3=bdYF_Ge7zVeRsRM?IWvZ{2ugbX%GFw z464e!iKy&q};z1VXk#`jSv-Tse0F~z}kv{=pXq{Kl}H?7f>(% ziEfmTHonDLs>-Xz6y?m~qhRih-&_BoVaBXwmt7VqOtYqj2_}Jt#JaER{texd`C2=3 zL`y+Zn{1~robkJx(g8qcN|t!l^PzvHnKypo>k?Dz-;GZRgYYGq7M!~7YuCN_P`*zs2!){I7 zbkQWMDyrHc)$(hj);-1gald;}OFPwBwZIlQg;x)>M8kxJ)*h(sRzzM{ARzZ2BUDJF8@(frBgx=|IawS4C#u^ z1gRL0{zzy1uzxxiQ+VML||MtLG0P!1A)V8dD)lQiWg88veOWQ8{r-7+U ze>ERBuiQE`kwjH=`oE7`_79>OL5w*?9|aCVke~X?@zu>$_i4t^KC}vLl&7eVp9EJe zSCkSAcS~TRNy-h*zhOCVF->&6j+`6Bul=X?|H@uH{Bv;r9o;_v*zwEt7%{}}_ZWfS z>|^}lRa-0)s?9>)1EEHzp5uQE!zl!-`)ifR7xk|-s0DfzzcoGs;WEs}>8lBH&+WF~ z?wlW=!%zM-emy?VoHg^5lTX=izx@%#@&z8ti`TOAlhw{l3!eESUCa&*@3`ZRC!Kr} zUs+&&NyCLtTC6wHTzN}9kGO8>a<5p9bqO3JeeJc^{=*;sz-oYw@j05pc9m6l&--0u zn56pmV~^!MhA~r_MHRvZUoFb^smFhn>D1xs!e`2!_IFckWi`azXkJT7IPZP$d(ZmO zSy*6~UEXIJkDVGWLV$8fqRig9{t22|pE%R0U!|x)l3>9EXx&FaR3lW)Lg5CdTYpVn zQt$s+X%|JaRTDm^9ny`Z7N7@}J~WAyP+4bkJG$``85IS;le5RMsJ3MFL|zLl{VLU1=>Dd|Jd(>`ilA9~~UvVRejx+3Xs{HOzQKJAmR zTK!w$J3E?vO3&2xnTP%R;PEF@uJ0a~cW}&9G4`tG>vQJ`d<7d3MQiqyqaT==X zuPQ5Ck;Fa&wD-w>o}qnejKJ@qzlr7#zgD`!uJrGLkKklS#umJi!cP@cZ~r5BqobZzrUDr_+D>;3w8v|1Pvb zV^ur-r__Ss{}SFa^|p@{6MY&Vda|a%!0!=XuJ4U6WUJIY{+O%SUxcF7+RQTko5U5+ z38rd(s_U^6A%CV#_@)wmd;A>4Z?V6O=2Y8%bo(TRR{y55va9W13NvFaaA(b*|74cD zul4WSzoX-4sIr$S%))+j{KQh@7nF`nxBu6^T#XMMeC6=j*{86RLCp^TZ`G>VYHm3{ z_PTznm9Dre{cHb86!T-B7Fd`4UE8hy=>9nm{oRz&{8Q#I(KFEhnb@b`S8cDez4Wi@ zYyEBisH3=ypQGC+VGZJUe)@M1zgQ{4q4L3=SrOW#X3$Y2avkH(=>FNu|Eu__{XdK7 z*bm}&wDGO>)2P z<4-UmY5c9&XZq86tvPb_Hl*P(tON`l`k9(X4oXq6P&{p5g%9d7f=}U4Pxq&VwZ|~M5B!5oMh|9$6 z!YD4WVPJ160Z^)IIc&fX7D-BoD)cgw?GI#wW__Snb>^#Uvq~y+Q$xu_8Z6YgHegKU z`h;nrs*AQ{F(T2tZzT5S5gmG)HI)QS2=UfBbyzqUj1`bZqwNW#G`b;_Le@M$olQEB zr%DIxZ7s7!ZSW*f7FGZt4g7@DFu1&Fqbd@|Hl3!W3YdQ3?``up9+ip z13_r2(ylw1!NqQ_71Uv6AC5|K;G4d+N%)cx*=|{GqwMBX;Rln1WdD>DQsV;&w1<7f z?1#3)Abw|=M-NHK7zPWuY!Pj|=wH>@e`KDP{%&-t6tfzZ6v+@OQi~vsI?<8BUb$8U z5)q|7P*#6t4x8j!N(b?qTp@fi*(VHbqbB=@0o4WW1rFU^+9(B|>}gZKbmF(Gf6*`e zM`}M$@oPV~gGKvghh!gTTbpdv&_YM8Y8^b|_>I@X2y2WU_aLboh;paESGE+W@q_bk zw!H09JGApLN(|1wSjj>r&2u~Z0E z3}K@hBD1Pk6iNvq@+OVC)ZAdnwMmm=H;Gi4glex%96G~haQz&YVuSN>kxV z`4S~zt6XJI<)Fj2Or9M&(~2y7m2L6xs@B;-{ARli;x~N5J|kmzI4k~Qmm58PbvYKU z32``Gup7&WY{GX{;h}OtUOzLtcf=7#pb2j`;(-zt=zZgx-;iJ`=4iuC@AC8Os(HwD z&d8h#r(H-XG%OX|YU)%P6z`9c+P=u7798dVSqKR-cZq6-&T=FZ>5!yjauhL*xZZl} zQwu&Myq}F%y{XO=%7{E2-EiX#4?OUIN=D}LGB-X<$ZMKjQt=xnUsi|4^Tp9}wxMM7 zuk}}pFko^8jHLrNQ`M>QE_I!qC*?#bQk6L`>R>TNFdHQUB}^TfS)0t`EDvzAkK!oE z+oB8w87|kAJ&dio=yEsL@(N*{=_SqeVWtjJ@M{Lc=7J&CTchWEhRes^ zcNezt>!TLF81r2N`Bq(J9|>vIS!(Z1q6_;x1N`1-QCiEc^R{-@zZ0NESSpJ%mJ%{O z?9;D*Yc{p6(|B2aY~Z%H-3n3PUD(F2 zk6QR*%>GZlRaeoF8Z&UAFtxl`z8S9_g z-En=DFsq=dvMqt>pr=CJg>}SY*9x@ChP4Or+si)v`nT>AncZ~GL`_HkM$h!Me~p&- zOHt*YDog$L(mnsuRCJ66H*M**V`<iL-@&|3C?zT?-rWOX^H@Ku#$cZ|O!g|)?juNq!u!vlv$ zWJi7np#zvhj6Ngh3C%2`o`w{jP=XQ$9eZ9o_qPTVNFXXD5>ay*zfAfXqYOjs<`^RI zLmS&z*jYKGNF9kxQj*vr-TH`66xPl=P3?4&iXf$yjNBHt0;bfFkV>~uppl*ml0+*8 zC03LIvTRBfOtOzF#AjmbE74}l_!Re({6$LoekR?;v8WUQ0_Vn{ldrRZGVq_<`PUdlTBDLgTd?&B zMRaN9ZfVG@^5FVeZbcUkuAl7>*<0i+&)@a)3w!<@#BcN>p&lcEMktLFF^uT~AdO4) z@Dw~IYQ~5>)_FvQy73Vqi%x&7T940?G`N1YWn_07UT*20kfm)&*-igb$3Pe-x3Irz zDuahbMd0dQ_7`xZuz%{Wj32O>#45N8`HJB0e)rq8)_T=b)2H*kKxP9s+;GER{__0w z)?1Gc2QzQT6d)S&UBRP|KKh40_yGufWR-~{K6tqKW}Bgq=8!m(WN2;*nWtN5Ax$xv zH=3(;gOsrFP&1D0rZLC2&N}Px!C0mmwKQ>LgxB-^>}NmQzTgl-rtH{Lp7eKU zwd~W4-=RM&W|t3gG22&&^>5*KXn~q)VyAry{=D?>gzGwP+b35S{9gLoa+L{a9j(lb zeD?NtL$e0;epGzy6I~^a+`pAJjs4sB4O1`^w?1)|dX&qye{Ie=icE0V?9ba8Y4v`N3?;zr^ ziI`ShH~t`g!$2|qw2am3pl|=S&Mz(NzqHQ3JzZnI{j--zdX**r{5v`SRxRk;ztN@j z^_2s$u!jG1$5$bU%;5YRx4w3eR+x-tpECaU^j>p@{X4APeb~2u2k~1QYY@M+HLCm| zeqZGH&GRR%KxTZ1`GINJ^3fd30QzSo2_|`v(5q0v*#N-b@7jaRK zy3(J6tYXnW$eGa&N?9p{mE$B>cP7+@l+ZsZRM{apsTyM#wLOppmxU`UwEQhs*(j%O z{HPHmXr6vkUyCo&U{)sZ09S31Zs}T6n^qM3QLWxP;?~TQHg&qgP`bi}0 zB(*_fW6Ef$;Bzbc`KJ^SZ>9^WYL!ab`U}4RiqJ<^(5n|^dX=VCi&i4=gR4`DAQDg7 zKOga{AAS78kM&UJM%%(ZIz*>I{JN{6f9rk;`wZeYY|{4sDn{hi@_*8^i{D=OimzNQ z0S+dKAU*f^b+o={@f)W#8lv%2*K7D6ZDgxf<}sFRGIH$N_LqGI@r#XQra}CM9kRpN zCfc~TWuLkv@^U?rY5!j0R~`QMaQ+Po58^jGjPZ&~5bhW`l&9r1h~Kb8eM0h(Hi%zb zLtdjRSNV)GF;a!kWDm-FTaAMl9|rfoG8P2)Fq5=mtrWg*Y$`HJ7Tz6HAAv$F4B|MlS8ZoB>X?;Nk^ zONPhr6!2A7UG>4;cH@cQ`b1AFVgcYuC!PG7b=PJ7Zp>I3AT@pOr%r@`EeS>jW8D-| z7OjYi^l=gwiJ6JT*{n=tCE&>?pZvGK{f#Grg**vL^U#04{d#m%~GW9g|ix`(e)tz}iLEQG$}SH5X) zV5#icrAB;>-wTUdy0~gfEDdL-M)@xN8>$+=m;S;Jq}o0iyV@(Zikt?&qNuW9jY^Vb zie+^DE6|?VPWuE=5Bxs**Y;@%sPqs2iQ1|-kth9I_zhBT{I-2+{DyP1T(=gYZu*yl zMAGzcjaSnT`K1sPyv&2`eVXgl><2Q`i!#<6n8~?BUv&L_1-77aw{b53XUHGY&d-QJ~)KpyK_oN)0e|wpw zlr{ZZipbRc+bh1pam3Nr{td|>ev>WwjsMR&e&@sfP5&;K_>~Vl^VB#;U;jy}x+YoBxe z|K6ARa)NU*-#PoNz1FwZ{?^+2od4W^=$7d<>N8Ju>{=Cn6aRrMqi#6>^5$&4e+DIU zewNmU&RyldNQc)Kwf?)#w$?lfYWMu&Bve>GbEE~?hWys}JHF=@MjYrMN{fRpl@HfPNQ2h|E&aZi7 zo9c7^`mqN-@>B7<&bHP(3TpTK;v`gR{ZoXu-Qh{l%U=4jSG@8SmtA%_e#e7`JNz9I zf3qNX!qYeCAbxz{_cwm?H~zu@_7Ct$VJhg2d(u}wTG||ct)Gqj@9zUj&mVW*{`R*$ z;~CEYEZ&Xi$ni`Nr+D{CM1|Dqi*bJVhky8mFZ>~V=OMiL{W=vtg4l4tnA`n%=f|;b zFgb*H&i`Cun9hHk&(QC!>+f+S7`_p>_DCY1v*N8kB8Lo5#>A3Pt9`X|6dO3`jgQZ-lX?`48CQQLYAjB?nRCtsp&&7LVgMKf#jF z_{J_~o14vef`3k=bVVP9jb-NArG4wK`^`Y9uV6Eajbp5Vq615cseeB)p1ZZgJB+LT+g+8A5>Pyhfl zgx?LtGx0Ma8mDrS4$;eQmy~e=EzM-?f%W;@%v%FEgi)RX=dR6PmPkX-hLPh8qt2=@ zKjslS*lJ#IngARxM_VL)qC$y4*i)xXYb!U1#=kKsFK803^H=psB2`^c7bc8*lFWmT zq%i8IFnNEOzZyyGYF#xsgsxBQ@A~{z9fH-U$6#2zMWL!~IqR5gU;6p0iGp{;<7g+! zHT;weHU?T_=!NE`n7^K>{v$KK#+oHHo4=KR#DFdMiPeu+I!d@2b2z>=bl6@9VM1Wt zA*!i4uB!$*@TX>8gLVCdij~DY`N!XMJ<^6BgwU6Q)y1#jGh?)i%QF5MM&h`?>+=@_t3GO+_U^sqR=zvtuZLE>v+W$rDM-n{kMbKh;>YAc2U{Sk z8>b1tLSa^lq+e*EM4}_L>-AR!47RqtzA(=*e+#RuRT8OLp!wBOAewa+^Ot^Uob1dRK~>tBS5kz&*wxRzGzAwm7bXn0ZvY+k4otMsLvzi@@kpOwVg z4sZBeFe1Tw^#0jD`?hcTrf>SUzxazUe({Sx{i$2|Eq}f(*x^$-eD~iE{@@S(n*>wCZVUq0apPdvYecQO)* zpKHdg7jABdZ<+AR4&1frNNsqxe6vO!qW zC%0+*AzMmX#g{G3b5K)f1z+)3KOD`#PY4q9VvaN=KNbHX?0bExP8s#4`k9WrhKNhoU)=*M)LiC$ zV?-spH;R=-ac>nfv%ez_WA}Yf>ED#8f8DNtrmY3_BJd<9`;=r zEsomT1fQ7bP$rG9h$#vK9>{(@Z(Zq0FdCXAi`TC^fuD}L^GW=vz@zc2goA4ajI5n( zRlhkMYo54|JWuk=B(R_KDj{k~6g2X%&fiYP zQhoN`4Xw_<71sM_^T0ymrZPO-{MD+Jb}%hXK`ri<@D~-Ag}wgk`Uzb)&(w1L!J|EV zZQnK5T=TvE@_WAfyT9wE8*je(rW-%;iND3ewkxi<;?dV%fAzy2cIA~tj_p!!|`KFiNRr;?8Vg`NKuOgW1hAeOxI;Ak}rkFk$v1oj!C&`5Zu=v!gmDfB4a9(TE*W-hJ5gc_2%o3oRc_46P(}-O+MreeD#S?IGAu|p=h3x3p$KlqdUs2|!elk-xK3YMUlpVMt61o1~cj3teLsuMR}0B#t(FqG?gEsV=NvKN9Xv_694Rs z_?4B4&-S6NzXWMp@DuoS64A0QvK`tCE9#4i5h}yQKjp&tb^HoAR*V=R2O%q#obY8! zD~_<^3zOq%aki+SCq??ie_})4Q@64mI0}&Q+B+d=DzTgou80Xm;xv5olP+x9Vsj(- z&zhvr;{uAB*fmTBV#YLrkW(KWq38k1w(wK%1u}%rh#50s7>_)*sEOP*MHhUWzkoLk;)4V1gGc&8H@}?_(Ml#b+lqjFe5oLH_0GY^KK8L_<9h@5E}TDc zeAmfP)O=4OZbyKx-#>}?1K*y)DLs14F+SXKYH1@B1EGJ%;L))SexzoJ^|3c;j-(0Q zFh1HGC2uha&D$rA8y^$IT~pi8G4Rh02Bm)`zYxeHqjn@dW#k-~om|T6FYnIjjW>;q zhS|gjv}2EgBX16NqEJm}>~poY?mGTltp)%*kmhw+F9Eh5n2E_{6TYu-{S|mP7cF8E2#71ktw&I3VpAOPIjqXSD7uOE_niiJ(<0p>uGNiHO zkibgh7bDY^1&X9pSJ$W?w5g~6MT9svr_sYlApFnxSm2Y=P54Y$yBMZ2Tx{B$zPW<) zJF!9(b4`qINf@TaZ#Z{q{>oYmG#oZUvg~|Q{LIop^B0eInxpOfI9DwgMO5&`f5hiL zoJ3|L{w??%V*YQCj^E;IjR`)7skoZL*f2je{#+G`Ue*u3f#%$~y>I{aZ^s{gQC^6O z|FESC+~na*G+p4NMRR5eU!=LN!_+lC;K*sKeVnDLCXnzWE+}zm z_BbiYG!h*iWq9W-3`Pd;^Z1J|xr$j+qYVTHYHTp4|dkK=?aE4Vm`RHH9eY^pv7SxB1o z*y&mrWX34-D|!Vq$5*d2IQKEHe~txuV20%?J7qg;yA#q+?9gU&j!)@q>-e*-Kyv`f zUpSd-N-~dMsUEeXTItqXhU@``v+U$C3mo-AJOJcqt5&vW{A9=U*aDb#1kp+6q<&`j zJ$?Y`Qsbp7V3}&EI)`DZw8g@$Bj(p1%1Hg7{=EzF1Hn z@7USFCy1jQS)O<^pdQ}E2P{~3dg{e5PC+-GDB(7M;}x#mMd@>#Kx9Ls zt)f{{_km9^jRNl7PNhI$XKP|*#IJ12xf$EvKeu;IZ@s}Rv6>b^4+pmTjs`;pNgXUa zg<)8iZZwna42nm`S**k&vtwKFqqa0j2N`p3=yW^@XxONhIu%sM;)m_S&R;mf8jk&` zlcyMx3`yBKe>>|Sl!PzNUtIaH%!V#A6FKwuQMgQ<&B&?J?Dyx#{DqYnw;v)fnY`>W zjqHYz>W28JT^{I?a;Kb1u3kyP7j>eRSTIo?6cR8Z9d&p{+#HAMQ{LOoV1 zq!(E)?Ci@3L`EwD59L4^aXLQ<*@wPa;-1iKxLsc&%L@Y>Vwt}PZ}%j>qXR#$hCSj( zzyAKIJWTizjz)9|=P%jC3dTQL0vbnc3X_8=IpS$<%(1I!l5v)lD!{p@{7`>s>siC! zj`69|ef)v<_^V(2>i9eJh4WWl`9M7)w6RGdS^QJ7Wa0@;&Y$K49O19f=;!(zQr*uw zK6C&f%R_&&g@E^6z?E(%cm)nECQY%1J_>-4`pUh@rH3&4s1|!tg{If^#M@KnWUcI+*G5(Avu3z0}_@E(#!GQQ_&brGX z{sF%+!U&1Zv1j}|9p`V?(GY-}Y>;%0S0>miD4FZzGxho_8n~+&6b-7=2|s3-RIT$j z>J7wY^@0_Z4f#={%^x(@`KwaHv^vcE_0$)UB-67P&(ZjudaP6mvE2jnmq~R9_Q>z% z^Ot<+d}rs>$&=8)djlDd;>HgHM_)?R#a)x5;nyZ{$8!V^2fLNjsEl71sU~b|VKP6* zaHG5%R-5K;u)VW?Vwc}z2moo`SLQFt<=r9n2}zPUE)yKp zfiqt;>BKm6fnKQvGpZ)_UiAFMcQl4gON>e-9N_al_{AB#?5_op3238$YmS@2>%xl= zGr~+ZokZLC9+(H)fF=zMJ!J(yMTb3dh^iJ|&Sj(kO>Kf0KhJz@;Sm8M9UOlGK|Vr{ z2s$G_0Vu>x{4{qH47yDxi8N8!e5#mh_hI5cBtL`|zMWu?ZNQf#c48`YSRf0Vi+IjY zsLGA;V`6Y#w5%=wPDB+SUBnZnc9GJ-MEHEOwVkNa^?OO@t;SF}XQTcn>)!x%;K|03 z^!f=5`pjq@x;xa~$bShy0PS3(D)UQY8t}tO!Jp0FBcSE%xwCltcdSZ)4>!W+8AN_J z5P-N5Png<8EoE1k{lp19-^Ci(R(Qz`{)LMRl19%Rom0V|zKdrAI5#}r%g?`n6&yqz zPC_HYiTuX*XvhzDyix+6A6b+gWC|p#qdt8`Hxa)BFh!1^NQi{HwthB{z~`T8=#ud} z=1uYQ!AEY!-^7pc=J8j2BROKMFUf27zgz$5}37h<00#_g+~NH>)`k!*&FJA3}cp8>`n)@yUunX?iB?T64U-$L^ncKiX}@RZFFv-$ zm0e!JB>&@EKsglBj+ZxgoId>>y2CS-ki>neE-7t9s#u;yFJ2_^1#0`&$zH~O;R-dr zUR7?5Y`+a3P>_2=8=pVodZZ1OkXL+2&;wzNArz+vDkxX&a2R~KgPE2xfNVO-hfrjrYz=`>R ze#6Jh*VCs@qYE!(%imStjRnDqWSY>A6=ob4&X`;Fv0L>6e4nbcJ`SV_L&VAOs z_Bm(o>w4`Y!h2xoO%Lo&ydMeNK7+|^{!}IKu>xhAInZa?vi#}uD z-6Rd#e`NHGwD*pv7DV1lgfsV+Nz!BNSOY7o!pc&W#}2c=4*1j7G{^EBt>m*O8J7=9=r90_ZNBXNh@B~>9+Da5qE9vKwFiLa3DB(CPs+;Qe2z@|DIf| zvKU@gRYI=;?db?zifr!9`4>_B19Dn^g#9MhidBVyNtRlPHjK(ow<87-XRrLRSFaKxDM+{*Z|EIAd){>=P6s<%iDZt2< zAJn@h(T}d{orTxsXo}Aq(O2z%Xq`n<%crvlkUf7onN_b`SptVOA(;jbdU@m0ktz~k zDLPL6QvyB{NM#;GxiWe6y@l+LU*G?K7C=8ZTAKcxVpMQ{*aB_Dp{6&Dp&vwfD7NO9%8N~zz$%$lT?w}+~P&99T{Qh&zp3w0W+~gtN9M*N( zKT9}Z%=x%T$aR3s(-P7o%_3pcv0ATPf|3xEV;!1&u{guQT5U*}V8`Wm!VGI{My{V` zO%Ip)?2dcs01FC<;Qz2ARxGHjfA&{T1s-t(V*@^2UXx&Uzf)%+ zyEpG%V^7LT>-D9+qhFG144+N{|8)R0CU-qZl18(N8{Xyfcvh(4-6MGPY17zkRae9b zUf{`(5)s;#wOT=Vf)~7FTR8g}mLbkeGE$^rqi()oi+>Zid`-A6hgL#|7l8$5A8X!~ z6Tm(%wze%_Xc$I&U<|B4$?WTqZ!a|O61b+f^EfDqHT5R{OSh>>E*Dp6)g$+;6-&>= zSXKq83Gk+p3cR2qpDID$Ov5X=F#p1sIi{ca&q0XApwi~;J>(Hcf(ls$nbH^3lK^k6 zm<-oC%z)phj!q-p;954&U*JJ|vYc=Bjz#w1+MGQcUkY}-fB4C!BskE0?Mg%YTDcj>~milR*^2s9VAjqP>%f~G9D%W94Wdg zGuPPZzo0tjTYWKvGmYM!W6#0Fo);1LsBDKz#s@NE|KX24Did2S@9X7d@Qtl8qau*W z!_e8i6Av=Bw(41%7oY8OZ{P0cX*wM7soFOhcUQO+fmwKr z+03%&0vJK0qNGKe(JeV-c%okPf?a84(C|ZdxXyO6TOpLR$@$XHAemkLqC&)Y9*>Sl zR6MV&N)II7^IOvTH=Ic(bGLb5Z38BZd{y@nLsHr*MgKx8alY2mvvz3q?#pKfIxxO! zRs9q1jTh8>4}qp}UzD1?^clsjwLx?cWgitQ(cN)Z(@itXZ}~kUq@;+97b$&wqzJVp zRRcQKXqQ0Zzb6=)$Y@S9ED<_mGz?635wji>e=D75g=up0*2Hl0VSjZ83V>$R_;CA# z8;19umvyf5QfgT&c0`;<`Z&TGP^GPGb1=@zhDeJ1i{d8wQF!*Jfz<~NVO&O~l>QyO zrx?0>sWb-pPgK3(i8u9I_aG2e8dM$|ge@*HV34HY;xD`3vS};5v;ws`2c+MU3L?rj z!?<1UaCt$Om!M9B(a#`kZo-eTTkeqE8k*GC@BLUu{kyR`k8!iVh(5P?;&FQI$_844 z_RU_5KawdW1?Uv(q=Wl87{O)GVRMli>JLnBjl$U$Ga_|y{}!hx1>PwTV^;GfUddwc zvh=Xb>{DslSd?GbZCnLp6u@C`@AkO@ToZb}hH5dkOuy-GLxQm-U;<$1ECGk|Wa4MV zk;;Ic+J+1Uh@iPnx1qyoFDudIpYDFe7^M#wnct4T2Y* z_Vdcz|9NUFSB|JUN?KpH1HfhbX@10ue_f?-muSbOrZDO8{w0vq^V<}Ox~fY8bUfGL zx&%}ByOy{M^aGkb&!@-LhwGOHsH=Pon|hYH)0^<89GGqTSR4pg@o(sU9A&WflR##N zpF`F>hp)i2^+LXE<35QHt->mrV6|l$sh0ugbH;k4=ue&z?~Zkk`EzEoly9PDLb_AD zm!4oZnYri*M1dFa{Jod?ElWC2oIyccBXJNK)aN-MlBNj9=@s?XkNT^>E%NS~Uvh|H z#=xcMeKUA#j|lz#de-{qx*Br&S0lt~%u0r^%LBA{@(A->5zJsn(R?X&2t3aqo03`O zZwiPaS@Kvc_H;k&B6ErwXbXsp;zW^Rk`rEMCW(5JO_Yh+8d{fqm0BkrH{|tt4HB$l zI}`_Bi(LoVIrcr>mVU4Hkm^2|&Sq=-;W{l4T01J*AzA(;Ika(~c2m{RWaYm&Dpyp$ z7KStkl11ofe@?iFZGVn;!A!OyY`s{$T>bYE$Pj# zF>;Cns(b!KUe5?N=_pSY-As)pefGF!+q$KSS+J#5+r=<#W=S;hnA1Zta+Dk;GZg{b z9DZVP(@-J^+^8GMz zF_Ma^cyz=rX|vLl8}a>irs^NXH|*HwHv>apqgmJo);crmH8fiSibWb+=44dnh^<_D z%VBjWUSxd74=7KI0vG;s^9rQ3Jrvj5UQJ>c+6J9Jt8ll>`|RG=G;;tLY@M#A04BIg|&#sDUHo zX?k&&Y5Bvc>NQ9Y{5FE!@r%mN7oBkjzAg0JIp**Rrd=37))u))YDU=D!cg-}mX%hS zE5<}mI4vzP&V}9@~a1xDnz>?g}*GM(_t9Cq)+wm zG{a+l&|c+8ON@Hb^IZJET+s^RxT%7r&KNG_JU5+ID>^DO6^6p26!BfD!&vU!O5B$d zBX8?4>nP!VB`VB+I$}jYP<{R{hQq{L>HB6#G0-L~Ydj&cb!J`W$AA1!DX9qfF+GMadst)_!PEm6yCX_cmNL-m4J(NHXoB3915EaSiwng$k?qkS-4og~Z3C>Zo9L z=ErpKc~!r`q`^3%+k2Gn`n6*jbWaMcmeJZb2TLo37BdXrDgn976D?U(#p`XP>I^#e zVAt|MNQSsZy}R=rL3DK{gy%piQ6unzE0E^3Q5V1_X4!+)MglR|i8;xoWihDP4Z`>*fi;W3He(>JnUG!KiOR z=t-X|Q4OlUzYt6~UHfe?o-2Wt8==}GfnBOQFa^~p-2Et@Mz3)9+C2oY{w*tpvdVh~ z2Hyp}mALr$O#Kfc?Ox+zd0c2I$nznkor!lkP)PeT!0Ds$b^e9mk5tGMQ=sjXcqARE`wBl320Qap2*rBZHGR>zm>lX+*@`Tf6p$mZOKvO9JQ?#<7xh@HmVUJnEO% zwlB8p^Y*&+1;P$;ikbZ(f((Rw#a5PDtJ_)MNk4BonmOekuiX-S++z-@i&)5!*a4=D zoI)e7zK|EH`9#m5R}+w^Qw&*O#2f7Hw~C0ch(E}J!>A9-w<&VZE?B`A!)i{??JvL1 z!M1f~T%zVqz@{NC0aMWRuHktg%sp_vw6Zxrw;Xu8(XJ)wkxQhH9z#WZb96XW(Jq%w zSo?798j0N_AsAbxi=F&|G#$Gg7KMNv;Lpvne7)PmyFxY9CvX~=8vZ}jk;?9EniG2& zU=QBN=Tvr;OuWK7PbZE(E9axQoU6KdR^oI4X)k9mrhJwx$#kU`7TgsC`tc3);Lz$9 zV_y;kulqQ5<|o^4b|r$#@G~q@<}J674ii2ts&Y%%o4^~qMD_4m+1uPBv{@pr@~SK847p#x5mU44#sc8 zmkJ4t?P=KNX0y-_+b^GMWsV`3zE^-|jJ8kthTTAU#Fs!sN`|QJZ=X#nj1C4ZHGE(~Xn!)CBT3o)sqy*j@4($X93Hdy$;H6MaN# z?awR`_oP_4zD{?8pvd=AwX@`Lfs6@6Lhf!+11dZff};jRXapL|KZ0|O^0Efsiad-t z-{9di5a^wK@H3bAe(C*#HCJq+s3Qwo#WZGhBTX@uQ{#fO1Ho--d7Oi~I$++o4Bf-? zLHe<<=tmjpYY+NQr$&s43&&KB+Qc4bE$b$L7PoD%O5nN~-9UR9yiP){6KTr8cBecC zHEYc$nJ9<0qcjN3(<`T;R;iH%@wm@QaLpBxjuw;P{RQ^>r(i_ba3lC6TWxEo^`r(F z{G#{qJqK(e=p2U0bt#0t6F~qIbJ0O5Y2Ykbkj@MA&xe4v>X6}`Q=P>hir^TqP`pto zUrr{hm|f&Q?o05RdGbmfZQ2&lE7X&yCj~ zGvj702Mzn=9P@$l`dWNCR{tnDCYOFKE8=l`&D9!G`0Xd^Z=6264gBjtmye)hrEoSK zH7zOXkvW;HF>!*_InG8q3GG#e8HxA*nW(>mr4CfMVeHDUb+gzVsGT_hK}(MA`gWZ( z3tx9>Bz0a#RcRy@Uwuk^D;XN5%6F51w%K(vea9RivZTaA70O;^wvUo(SlO$d6j9%Z3~AK+GOO;$Q3RQYjd$r- z4ayT(Pn3CT{7D+3me+DK&Z93T*2ruxL8qU6FI6+=q9VSk|JQ6@4W6!^ zVxN1W9{%FiL8-#nD;^r6$Kn4yM4_aaF2YE)ysw&T72d@;sc^guY`KW%Dx_FPWsXqO zAEMZ|lBwM<;I-zs-iptpu*~I&%*8a0^nlPkU02G=U7eW(FxzoHLL1Aggy2H$x@B_l z=1)%)vok}S`U`R+yg!+qF%!iP&WnIda&c!w63*At=%7N{N$%Y&{lXpI{`atvz1B6O z2ulk2Hy849k-9Vh!RQw1v`5H+TOI<|j?E8oR|5^f^<%VkDYr9JTMBe~F0g-}PJSO; zxJ|^G@LA|4CpVRK62-B>ahfV_IKK=BT#y|qD1ssll0RrJX>i4)WH0h5xVd!Ey%>RL ztCy*snwJWL>chi7ngGOhVf{Azi(xH^**D%xr5rZ!(Y#`FM6|lLo$dhXr{B4v427k3Ap5}$I zYa9Wp25pJMnl38b|DEl zFNK^G52|03x@*#!S!W=U;Dr2R5QW6u08mQyup#eKgU;9l6=nA%{lmLl?aD@@vUfj? z0V&Wq*qAY#zuQN7af9!$zM&tDsYF#Ciu)|dvbu84t|ghXWw!4?8Q(Ab$w4@hAs|{caUa48;Wh_F|PC#MF%xpM}K!$(d3o?jU3Xx zDMBmO-E$uHVwsYri?^XT(TegZ<{7Q?-REZGC&<(3vU=RPA@9_!&W7jjKUsx7^6z$K z(71@Q_7uq;l`sD)TcD?yjOh50{ zx9;>~Tf+F(V~S1JjXf4+`vGF-*BRLpPYIbL&?G7S$@6-tT0GoM@xL&YCzYg)f5OaOaCFMp zhdV=Is>i!6sH<^2DQ7lx&U3gTlopDso|^N3kX3$v;L#s!6Il(CEELm?^oTN_H1c|G zx#sRQ{>C@Ub!UUn_2(sP)d6r_J+w~_$nbA_dU|LK%SnZ>C!qs}6PogWe@T;N$!VQr z)9Y#GYL6|a`q^ZyNBj+V@MA@oD3)uAAEWEv`5YG00&1tXMbANG`R)|o9mGIv9X2o!wbZgMeDOFBZ3HFo$$-r?Gm z6Qe60{0wBnQn8y<|3(PlRAQTOI#NAX2;}Z+7vLN<((;l52{SjU&LPe$!{Y4^3Z$R4 zx%08Tk&espLjC4&{J6l^Oe!s=&*T-q*?!Wj6J@9)COE~FE zY@Y+JLy?M`hd&wum#ZB|gRtSbg%A~>ob%V0pr=08n_3!I*7SXn4n}GX-5zV5M+|mS zOACLB(BCZ#dbNn4@NYQf)7S&f&mM#(icV7tZ$`G%lq#2=8LNK>&B7s+5(ShpG)aNzl;!|weVl~xLH80*_Dm{^b``K5lqXcC%q%K5D zE~VVX%G)vDLYy%K*d0{PBp;R*!b%J9>dolc`R?^652AF~I_d|fgVvoNz06e9no&7&V+~S$yK`%mPwmbR+Gtz{ zUJ#10CeVXM4iv!Dlk=ap(r!a;?k;ZAAbScivIX0EM&Nm48^E}l+nxEb0yyH1Puyon z?Q=dvx{#CnKEx4QuGmHFp!DFX;5d-GaLp}4`d`f;`h%4B3~nc{ zr6mXKeT1jJZC1so`0ftgV7U)6RAYS6PV}YO3-89VY82R>FLmRMxOZ2e5Ntnd>l;t{I;ePoBWYiTKqR; zouw+yaCm)~mzx|2+Zhwad_tv6W_)GmIHzNt+`IM*go1aJb{CM8TjrpPfdpsThd(L? zlb;El%Q4<1nTfSO#%?B%3xXMNY`>Q>_%Q^avdAip9=7g zSVWyFZYOsEBKAHJs8B_H@EDr3ofNDLC1OXA3drBB)9$2K_HTv(Anmo@ahj9740-pS zE||E3S1ADfrd41}m$9I2k)4mI&KRz8t2ProqnUu0=76((FK8YHlK3Fp!Rth(Yz@92Fh@uPB zf2V_)4*dulMg6Cqpjc(j8Qb%iMWc_=I(@39eD4S$%P7Z_>uu%M9bEGB_hAh8{ z4`>Fc^qm92S;Xha!pT7oq2BC7kjwrx?dFl@lEowZl)aG{l1u%*?q-5^)R5&ArD{;&wp1Ms9v{z zCt}O&*`aDOsn6<;jI!5!zdS%3&Z;f`;IHeeRG!l3vb-PYjXcI)c0DAfLa_Mr31eh+ z$BiOVq!~SAK|l6Ub+2TMAGHn0#`5mL1PZBknbNQ~)r?S{XtLFZj~8L9M;*m!`eDu#6*W@M60%9O|-1yJ3ce^TR zr0dlQ1p?ZS%Vx-8{89sA@1(4$TbJBt@U&K_afWd#tPsOnm(-C@eiet{j9I;i-6sOk( zz0n*`l9ckjsH8%1QB%_0Jh$2Ah40w>?GPaKtH<^JtuSF}!R^)c^nSNJSZyr1$ zKgW3BSGH5!5lJ#+)7uttXjr<@7k#7o8wVgDhKLfpz?a&Yz3G}EcPtgESIClaVrvS* z9^ey6;(RlVqYB``X#R{(%X>>?R(7!7&oKBpaCRT}z-{8kA9=_Erf2U)b9m=*oYI@Q zvf-{Xc_2)e${i7`T2*3oBPm9yqq&V_$^pCT5Tb^%FQf6W!5(oUfe`ef8hDp7xg%^Zotz zANGS09(9Eo;`qUOzjLU=s0Ek%V3GuFqUc4dCOPB=Vyw;h?>W!CcKi* zB_8=?X!z!2gN7)7QvU+BnUcSD4Cyimnn zZA!Xr$SRr+UU&Quj7;pGiV^Nvwn*XS#S?JJ~xzP-P8V99gMJ z*R-1Tp+iG8#+28*cjb@&bm$1b>%mHAV5Fnh48Ga4qNSyDD3i(_BOP$UoYRi!<36fn zh#eYK;kd`V--&9V{hpeZF(BLRoSlavzwfL;YC33Cy=$xp$si*7ZkPzau}6)X#?g!9 zoOwD_MI@;Y8nNkfn5o$voT9w|bDtt@r;;r!zMIbG@|oW+6KudNYNfZH^tf^+YPyCk@Kl zDtfR%f-%52H1CwpXqZ~M-Z2r2f?_BaO-&KYxYHZ?Jr8M*g@|{jLdFzchM*9p#sk19 z4Q=d>pI79H;vO@;ug}e0j=PYW;LNe+OnB}s4C*rT8EoVEU^%M1+ras3v+;309&nZ4 zc)mp*v{pkK4?a8)Hz3_*$_S2M&kpyXh0BHf`Hi6G-<$j+t@PRgy-JhCsrYSuH5{WH z&_(wM4jVBFcRo-69OV1Pd; z`ze-$j2TfW<(cB3+rlsTRt|K;*c83fOk)Egk3+!ALb_rn@lsO=9bxn8`5$zNMEmv^ zS`)b;7QkeB`{6aOFP(=RD|IFNJ`w0+fj#Se=KEbuhhbZ)#t_O}4nx_MEEF|E1z~^A znGhzy0!fu0D^74uvdCd3vtpD58#Fk9{?{${rwskC(G6>+B%@{+RrI#3B{J z{)fuhE!&Bc$g82?&p#=SoCwJ_91}g*tkZxT&GHr=W&-^W&nL( z(!b)#p=XjbGN*q|JIlu?J>-C3N>N_gNd{O;(6My{e5{z&{M)Zh~TH@}nLC(+I0gRYh` ztLA5CvPn}X|7bH8eF|BMmT?IrAS2XW_%r1_K{6x>C8;K<{XA*hBR_TK-b3nEi3nC#tNTyf8 zk=c0vN_=~dzn!*3T*Ps{od(`_{0bOVf~6sEM*fg~JpB9J2=4ad`sRCiUl4Zg@YM^s z{2s5bbKhTF<78f(0L!UCJTE-DG%A^~X zb3FJ9->89p@+Z+jTCf$w z$4!e`7``BY+wXFiFuZIoX6fPlQ`VWfR!tDAt=M~0z`^~@rc@8V;q9###yW!S^pFPK z)S2V8$};Stu@n9<*qd8J?NynK6%)?`>)SvvT)Oe-M?-90%EM1WWs8YM@yAv0%PE}z zE$2dyrxwDS@wy76;~LL>oge<**$DrTZ6l) znOI)rL8zM6q5;P?3WT0)^E@Cp_;dMXi=)nl&!e~m!vL4+8blSsRm2Pv|H<*>VfK^} z?Q_Lbw`?Os(wvgKIjC+%L_2;E$}M1*+_~FRhH#_KEP}aW(<+Mz0OTS`pgKtE2h{An zF+drc3KKcRd=@?dGv4EaG;&SI71>74g@)|YHAJR$*R^0QMikQ?!=>ZU0iy;ZUhnOi zwR_wFfk@X^DXzP?ODL<|<<9NEs}pCT+_bDU3jSq->~b^BQ_L&1>Rmr*!QY0HzG<6- z*{eK%bugA?TFBA|n1O++s&8+eI!F2P4-1(WI<&33H3PI9gYSf*kgwmXL~PwcKM?hC z2A@K^4*bYf)%M$cUaEs=o_g}21yASGxz^_gz@z79jS>hy%x@8tpy##6VECPZ)9waJ z;-33|1E^#NVS~j2qBc~Z2k5gFt|YWg^rE*}B+nT8tM%~vA8Us!_XMtHSl2O@2_J&# zXO*HoCgJf&)cb+-AB7fNRoQx{= zK$tJob>@4$>K_B59HsvhX4RLurvSuyItwftpUG|ppEJF!7~^*CQ}5EJj{W&p4wY|R zzz5uEbR@YrE zsH)F(_tHHgG(UUx?k?6wD(nd zd!#jfpi}hOPJ_#!W&OXt`ON@ViJ%6fF!4f|>yl@qOMjBRjnISI=Q+-%f79f`w#h{=N780;wa8`RB6=eJ8uf0q;7_i|JLQJ+UHOo|zgwNZ#;Dny!`aHYF*p`l z*zFZZjiKG&>?D;D2uKcG&Sj|*;d{O}X?J_%oN{}f({U}8G;7%oQeQ>DrBn((y3Lo5 z)`nz32A(41X+f(`SFX-xBq*$6MJy4DZ={wRLflJ3+c2X&d zQ~tIufr@TS>0O*3O&C=;?LmatD^kv_z6e%_j*d~XI^kBA>EBGRk97%!7F|8ab4R!F z-{>{d1OJM3-RCB)lUwwF!W*YzpxS7n&`2CZhW9u3SW3(8l0DPW7uJlq;ZYT!V$&v0o}9SWH@e$eb_t!ZS(K-_1c8GMPu_6MQ1@+Z3Qhya|cGJ^Wt`CTDAIzRu9F} z1@zQM0WXi~?mO)cTMCqy$(O7bbB&uJPG3G#(=yFkpZ%`xY$q8w85_jcv-!lT(j%W! z>sk@8H5-tUuPl*P_o2)i*QJ$^TL6_C@<(QWz0HqVi-Q&}j@=5ip<%DU#Ac8@r3$9h z8Z*jPt?%DRiebuHT+3b{Z``s``rOh2VD~Ft5)k0we2XYE3#Dxg@=)1o>jjuabYEgqmZNMeSv=iTfn+7IO*#C3RJRyjpl>=5J z)3zsfAq+_LK*^Pji(m5W8DKOq8qE$&Gc^UK07gL&Tu~05)TUJHWrL}+%(OwfSvDqb zBnp@%^X|GuCvW+`Z#K9-NAaV+m|+gwyrM3zjRX0hBpuJKnONuiC|p=zC!)|&rta4| zcIE>%Kym6|Dt|g@GIPN zjuL??>9aUc48wE%sFd3sUe!GbCi#b+UICIh^QQOZz!(qa>sKJl-v@mw*3&IMG3msm zZn#D{tvwMGM)DGhSCkW>tSPx#RuTe(me8}Vy{_XIY>%76ovT zhd7>#6FuqVlDd;MXz{TA_&O{Hql2rEk&G%-Ph^l7AWZvRHTr{%N;ee2ppb%pY}nnz zdlUq_3?A>3j0+BxRQGa0I6Em(iqYO0{j-EZ%u+S!m3}|*ea&}#1Uoe@&$W~tG3yA&xGw8CXN!}uSW$gs0 zZl%aMFMb?auJ!dj-@V-ThfuqfTPQuU>+ltbRz@oe_j#uVVc zO$eJ&n_U)U2if(?fa9%4!7ZPL++!bo#VlR>8F3ioX@PBQeqq`k0CC*6#!nvaf9VkE z>HW(V67CX)2h2a>_MJNCq3HYI?1g!ri1Tgsk)M|2o8RwBr?Z1gMK0#cr?Q^0sYJLT z(v*Y)hFD%2x}6Sxem~&rFo)3rWz?WDjFv3_6QaL4u(#`T<^&79hg)D-7%L}zk|h)5 zzG<_(gNxYZz=TYCE~SEtzkU=w48XW_vv9JE9CI zMV{zi?_+;Y$#sWB2~PRY1KOp;HdCAS*AEYz%Ai3Yi5M$j399ZoZ{_3&qa93ke~MYX z?e)NZ!M~%I>K)wwiQn%-1B_tt;{5YDpT6kSo`+B5cd2^mudn~M>t;6&q)!4yqdCkk zh=Y4g_Y|33MmhE`Xiy~1mj2Z+FpQC?bm+Hv;*QqSM z+xoL;+0l!r>b0J?n=_5@zS-UyKJ=u23W*D?urNegWMy8dUcTSXrtMNNiAV8tdC+2J z#Vv}AzqVN+NaU|8>4#CZEa+}TfVJ83XLTZ*6#AMpbiUz58w*gXxRn~g2Fu)>yVZjZ zEq}EGoMNtTv+Ig4ov|-XPci%9l6CiCcYJwPJFVpu!rlKGi;xm0MIBZrZEMgb)B848 z?5>(+WY@NVtX;@i5H;u`&JTTR^SSgFj#vb1&cH#tltZY*U!g2BdG6r1TN3IY9f1OE-1kKlA7nxMX0AP1w8?w|3*gsm~0e)bc(2`;u5SXfU@ zQa%60(cC6O3hho-Ej)etPuO}nNh6)3OulWVX7{4`Lv?;4zuVYVd4Dd%tnGz1tybrI_JJ86 z!*79B0X@wppu1ctTIJ2s61^{Tuqj8^rIDAQKUddGV(GSsvvvEmQ$N?LFD_U>NelE_ z!KhsoS6&tE&FgP9-ZwenCT?_rMuc@WA4<(QvkX^+BbzzG5`DX>OfIfxs;?Q-qzl7d zvRlq5zOLtd2tL_ggr@{-r<80eD~j!ij{=@~V;*3ifRX_fPSHG6T}lTm^TPM<5AC;N ze+u<_TfI&Q!T+wN-H?)`+Ao(hqG0>+e+NWM(Upd9-xYX5W8^mm1jRC z1L|;;{?QhVTyG$CXaQ&kbqt zne^+z?UV6PzAngONz@oXO~BPm{j< zki3IS%zs!#+MMXz#rmKPRN1AV<^sA}&SD8J64*rGi4=m|!Hczf#qBgD@nJg~TpZ=` zTz=gKKpTB=z3%repH5KRx2nWsD|P3!H(~b}wd=Oo&}a84 zM&hyZ^myNoc(mn3^&ZbOYev&~dMZV2I$g!b3!eBoPCPzX;d6?}kEQn=bJ4Jl*0br% zy`PHXP>j$1UWdm#F@OW&-LIUZuFhxW^0~%;_r+0g2wyjDmMAC!=!8bD7P6WlE) zlO#;Z%W&Bze%fwgYrHj9 z&WyFw7PMPVp<^~akK+An!N9Ix0Jpxd|0PVCA-L$G+6P+ucUY=es%IPCOBXPP0RIh- zLV7d?Si@Wf>@i8B)TfVL`DLF{>R5Xke%pVByOTNa%61xOIgPza-h*(QVsPo-NRUZo zcS2Re7;Yzw`0 zigKZlPSoGhGFB=3&2X=2Vc0zX{Pj|@^hkYsvNqXVt)3kM9R#-EC9{IN`KW7 zI@X^di+A(`sO`X32R~$*0L4IBD1-d7@H^h_7W6f6oWO|UWTt32D{RIY`|lpb1->8# zUvS|_eFKSO#IXB8H^AxUt-?dOO_*ZHAK+aBSd{N9otl`$0qn*E)vgqOK8W57487sv zB-tJ00L1#x&jUe`D7pTpc_qNT(h(;2gMuo5?M!3=Og)wo^84qwb5H1#WQpyl%;X<1 z6B7DI?QXHbI;F^ux{jBQ5Z;6b=V2cW_lZ;cd6mAz!R+Mwr^nMGo#EIyTobCqyZ5I` zP!%F0_G?$ch2c}_@O^-96UfxT|998CxPbo#eHv~lRT5WE0u`}t!x)Z^Qtm5Xq6cf+YT#X{MgDzrmMm7w)Y>jV?8@=@ z8B+L4j1D>uX!iTxc;BTKt^aSll^EQ3-nxAix0@4CWQ6Pt5c{_p8Y@>W!^86nX`1|m z>fvWwcpa4Y;C2YK8rfy;Mg?KTkA0ZEB3at*#1FiBBtT_$zyJ~Y^>rYJpl7WiK0P~g zMNxq?i8hyN?3MS9d1wU>ZH?`T%sFdyl69QJcT>wNdP4T!YOE&5%2_LBA)A9~2(!GX zpo8Hj3+^I+36};|N>J)DfW^1TBi<#y$tx(Q%6H0e5DOOH%iQtQCzeTtDq|?(4-PMI z#Vd_Q1a-_fiDbRf%i4;gEe)eI80NtjosoVqRS~6Sz^lA}0e}@~Kpy;^Hcd+K&>Wl^ z_UqR3iqzQ;cg=4)39uF1`Gm--4iC6GS`mxn#~~15^Qb;D`H#x{q(Mf^)vlh8~@n zKRX_tva8|?sBO-Ld78qrz8qP)yP9*qt*wx|;Kf+q(~^w8c5cNvtMYP=hyAvl2rJU} zKOGyu8d9me`}ZA|NbC>_=gT1^HC1YI zA8SR1e8US>1ISX4mXCMv$DKewLqi<>z|_H^Av?#*H3)63=p8|B>543!C{2rGc2bz# z5UwXc`S5@|PvfGrKDl^D~ z^EF88>U5cfI!Qim3;XMn2mLEr>0@gK;#0RDeaa~&Rm=izkW^O@0p7oP|23@w4j3tN z(7^mTU~c=x?!O%nO4|6_SUhc3Aqb?_MI3KYo`E{OX6k*)yT`w@;^F*hq6GUG^1=)j z@MRe%vxzMrO%VzT(3rA~aNkZZtx~gyGkU(FRfEn75ocsO#P2-7kGG{4K!KNqr99D*z_tj9TpVHGxlgRpDLoY z*7;7-%vI;6{}Ilyhu=#5r&S`mzX{nDSn>XeUI7kJ(p~$kuHLDmWgR##n)~(wqf8}w zlqb`KYtQOv_(?A7TXUjgSx>xrl(qEQSu#XbDbXG{M4KylJkZMjl&m6jNi26#fY=UWkO}Kob3^V!6g(XKgPv z7vJJ28p-V4H|=qiIrMv#nSqfdej{05snP+JwHUwM8p0Pfgj}=1_5KG|QgSbbyBbrS zV}Xsbl5&URGoe75FD!`hV@tr%_ahP_dJvsSN2;9nH%h402ikCAB+h`U`WXIw$k(19 z(WN{2G7Zp<3p|qTeFCgK(-~n=)>)-J2^zEZy6fl?%!^PToE+t+44C(ERTr!N)emQ6 z0hjNBnVM$exws%#5c4`D+20Y1J%(WG&hO^R6_bv*{%wq_F(Ke~0ilc`?2yV)f)3|# z>@fh%?-#!82Mhjwb`kvDr|WJMazEE}b^SZLmWc1qXl-dI3>=QWCh^AeK~FgX6}`+sYECH6Tl3Od+uCWYXzniWMV9 zENop0kb?XdJtT^xcV^&~I0u)o{%k0D-9sNG9IBlhBC-~9X_U5ySUxO(5eqO8v{OJN zB^Y$<4x;vh$o)KFgGOYbN#i+U#OCp|)?e@;fHDdA8#JAci9nv#w{Ct^rc5bE9+d&O zu3ilU1pCXP{j_J_@tTVNNudwL_2YLLsL%rbU9Pc>-4W@i6=lC%fbui${!`Wh)xI`g zY9g8smUzKmJ1RsG1jr+96JGePSNwWB5K_knkDfeCwSTDrONNSTQ0-8Bk?CGP%jdK1 z2de|WmPql@8?IYkZWRY1es4ehpB4a29%k_<1^Wo$Czb!`MOBiY;yhXx@|~JT)5oR!NL zc;i=QRBFWR*Yl;fil=kpD~wI8JU}S&tfU07N0#7V-j5f9>D*%O^jyg@X2S(DBLcg> zmh~Z3P#FH6#%oO0+8+ngpoAIBjny-3-xHa)OPQP!ez{k_thZ-=UlNle8Zb{(?M(5) z&Eqr{<~Y|^*aqmro!?=0!RO-YBO@+{hzO2m=fb=SC6{f zaNcS|)^}r(`9jM#L5ZQ|a%$MGSrI8S7tPNL zKeF2tjmHhf4Kc2cp}$@Wy#8qxOH5)_*~l^}{x!~V<;L~)T#^-EMov}5h!~8NED0`t z^ba4H3@QQrB37I7PD`uZ7FLy(JtU^Gxe#>R^xb`z z$bk+bjGJLHu70n#?HJ1%yA`5bEZeg^T}(Z)`WsH)Iqd?Xm~W>nhHaaHNK0tLyi6I~ zevC=xnxKtkp*Hs$+~Wh;s5My^@|qy?g}RWCk}e)rRM^UV>+$Z-alY5-# z)Vp=bBt3Yh1l&mqhIfMDJ)$-}B#u5+KV%rv>P*62Q6_HZk!Mo<+9L{jH^1%w2k&~I2I#XgJEKeguvU70D$Y3SW zX%@W)O@?JUL$U@{VOJ*ZYWyT+6$;QriwDKC*gQzDo2%4F)Z|mS z@ND{A;QnEN(e$s*u-~Yf7-t9Ej;N5ax-xQo!SdfH{o%m}Ez+5E%*i=Pr zXZgHYvVQ>a@itGqnS4%{slsQ#$f)d#CfrZ`kh$?vi#IdpzW$7mT;S1fJ+v%t+WW;z z-Ekb+jf1N3ZU#e~&3TxN3?R76i4ufM*Cvb;e4ZILiA%*SkNO?h5^LeU8wpb%WAOBZ65`iA0GuPL4aTyXD&8SuJr}NGclsF0UC%Vth?(1CiIgwpt z;M{K9VqJ`xAq1BTjzFJ`7aO4A_xEwxf1d)!AC1k|+{b1@XS>)!>!Y@2L%$?4uBnQY zQcmi_xRwo)wBUT{MkuSWPh8@iZrm7Huhx;cPCyzD3vOzcDXN693#0BLPnS>DN+h6T zjLcVMIvD7O;Yr`&2LJpqV04=!1x6OACm>R zIvM1%Qk2EM%6=yvV@}@mBfX~!z|h#-;Bhv|t2i&Kh(DMPd~GO`oQaU81gAujQsJ7! z1k#JJ1|afOVGaMClaTa zt6cKAKb&$!w%fZwP*IY3CC{sY!DGf-wxlCyVVt&9x|8WUo0HuimdH!sVb2*mc=_zM zuqC}DUviz696SKujQy&$`g7b^UJ(Hjd7oxi9c1Y-07Cl(^tjFG6YIq+gJkf^*UACy ziNE+_=|IOvo4ENOqZp>np!46+4r@;xm>zXCD~pcqs^>j; zx;_+bSo}=iJx!q{=7Z~fJX|wWduA83+`5BVBD;#C8AFnM_~C&kP4l>Tmc5Ff41bpF zV6O+7bA!X&_C=rNXHPzv?NAYU_vZ9@I!+NDI5JB?8t*df;jDq5{WL2P31}sDO8)%rnygO(o%zw6q%#UxiHIng5 z+ z9JPZ_@Lma}^h+tVd;OEQ3L9ReEkb(3H*W8di3aV$Sh|lD=>$SM0n$ATy5KRlrHFL8 zLPeCI<=#J6;gjh`Jxe5R40$w0KS=WHFXE0TXww~r46r-Z_1!tt1rTJftQpL2NW2{Z z&xBCuwQv_i3%{`{ylY0Qhel9OB0)vRd7#(AH4?|zq*X~2h`Ir~e7^5{MLn?%*Q;f? zF3F=o3zc29g_5{nv{PCxUXIx8hX#AZg4{ivHL01U;gk11}efaA7T7J-qX*X(62v0S;KM`3@1=WTrbFcmHb?*u!s32C;O?De7a2_PIln@NjR zZXtdth$d>$p9LzB(*#>KYVw*&jst*a$_%aIiVi@+uZjYJZPbItl`JWVTUs#+p9k=> zGqUfuENAdY3qqFfUqIbFXfl!>oX-Pc2y-=u2j3#8bEDk8nU=Fpua8skRGqw*PY+K# zhZgrpOvCigu;$J@f=qHf{`YSe^IsY~$6o`?pQ7>9);ju*F+GfaTe%!Lj|@H7xMF_d z<$t(g9SZ!VA679XQQEkGxSE64TxC1^lJyAuxZ>$~=ddlpTgB@57bNqMUhzDvFbukl zR~6?Tb7Z&0h;00hh6${X_rnCvhwzV377yua4mWm*&UYMeqdOHz-_kN4a#CMYrKlxQ zn*Hw%#`~{RB_8bz_;(>I1%?f%9QU}ZlQGh?Z7=^Qfl$k3VOkTcIU@2J&h3Lqr!_Qb zr2^H9wm&O~jmQ_J(+%iIVFQ(gHcf(Nx7h&=!aS8~{5q&Sn?P(5>4S;tz4trQA_m_k znNl$^^LZ~PQJ8NQ1T%6_wR_Y(8r>73G3wwEf89=4b(0E=yBje@-jVuR%S!%J`})zE zl$RM{1n)+6XkQZutxE8)B1lQT6vLh4K}G!CbvezY3!VPARjlva+`-Ud0HO13)O9}~ zv3a7KuBYJB^W9Y6EqtT^c`(;Jpx*tTqrq)uQWibM&a~(9EGE4qOmIf6us-2@fgUfW z{6$in!QIH#4g-Dsq=wz#I~0#^GiC|U0PAzwp5MB|1J!x zQXH+1q2*g&kQE1rk0?BCj0Sc)>N@uTTcX~-{dMl?wa1`vTEH$nztVE2T)xZWRCgf! zKfkBr*k}f@v1d~cdoV*z1DN^xI3fsL&ryF^{pCpt?GJLXgBnqLM7Ad%Ne$zXR2Jd=QueGE@u)KJkn@YQ>?L-03o$i(PkEuO^bcx0eK z;=DGe(do|#0Iu#|!if71r^L|Q+8FFV$=p76xm_Kyj3Mes zNDmmZW0TV^D*O0Mr;qxZ@HA4qX~K@0p!$Q zcl-%)k`YI;Z^~kQ6#@UoOj!o8`Mm#5=N5y$5E!!+CabS~tCpO6s4_<1Yh3>EO7CEL zTQgxH+rsy=-TuJyY}=X~9hi*4A2mmGeQQAEM}XM?#p8hNuC#6G#PhMm@J0arWmCfb z`gR0lQ=^NMc5-AC1oUwx$Z{TMX-J&ZxBb)AXKy2Y^uC5&WWw=gioUnLgd)z@Lfi3Z z(D65A;yYQ$I2t^j>T*rw-736bFJ6?G_a6s}L2 zWaujpj!8SAtY(kXce|`BnKVM?p?F4kF=2=9qE}CM1k?_D=jYtl#txw5!`oMzmtPUx z^G?;r{lX8OumUlKJ?K`{`W*TbY?p3;A_~Kat_lJpVzpwZU7RiquVgN2y)wy+nsta2 zh}vfI) z;WS12VG^kFMedi=3rftLXV)Dir{dUi7h*o&H~EYLnD=AR1{K7tN%=fAwFarFp! zoSFPq-Nx(sGKrMe_oJW%kFS4|T0T`(j+>S)k9u5R(`w)7+kYnKs`_zxJS*_4r*1GM z?^Mo3u4buyS`hQ+dBM?M>TQr6gPF(s2~NZxAGtToGO}xHU)ZUT<7I=*<~>Coy%Tly zpKv$SNOa5SO0-2w--=8|rK{FA8_&kSFka2vDP2+p3=C)7wJZWNaWkAR@} zI*uJNr|pRFxwg4wBsD5;j;V&VQ=|;Sp7ewa5ZKm1om|hkeFoY-W4{^?%@;Lsy&`e1 zwZ(kyuj>m_D%EqSvGM!A@=4d*035WIyD*g&5B2eg)b9NAB988WnB#5BxrnW`T5CR* zyH3`}BuaI{n@f(}pU=PoRVNCeXWgh$xdj8w5hx@xnoze|oiRGmiWMS$A37ZQryj+f z#1kFFiJuTvzHsz-J>#2 z?^{>BDj=sO9eJUp=pL*Br@yTOx+1B!a^6EFttTN+*aN8PQfa1uQRMZ6*vv)p*jHlw zM_6HXH7TfRt_~LJnNZDWz}Q+sXn#)> ziM`_;J1@C*J2{VV05^Na+6jl#;o^d4tWH})z@w<4yeRoM3=MfFd46s27_}(EYLY}a zVU-OKjvaxKU99`1%-It>LlcQ@7@D))r+1zk*+%DClp4eO4Yx_0SKFemh@Dm;?|u6C z_PU(=#YK{>=)-YN$3LPVkG{ov*aMZbYR=e?)Qc+cCkOZ4aw8f$e@8Bu(d^YT{Hlw$ zj>2=qUcspM-`vFp`~pF$4YOKq11k7;%7)MjnY(e->a}-&SN!uqT zue!~prA&NKqS6V7gBZNU#3|YLEF!@OC90;C_Xy&hY9OM0@fW}5?tM>EMC4VXZQ3!B zlQn5quP=q%Z2p~^N=#rZfbUHZ#tCWnE2f!{*ZUMwQU2>ZHHbW7YG%Umb4m-``qqtq zuGV-lgW8360@o*c2fN-XH}%TsGkNt!e%aBDaz0g|31v;Il1S~DP7$x}6>tPWh=kw0Wf^K!+BE@eqdDKKw9s5nw zbuYfG)Uf})k0#?5ZbpSAPBqxyDY@@8HuGBL3lrWW@THhzj(GThgMnfWv}^h;U4`t= z&2w+$ENXb%5hb5k$D7RYkKD4p22VqDf@fr;<6ceNhul}af$~*h-$G7+>6u? z+)SXCY9Mj0S&i`fJ01Li9h2r^#mxQb^p}LPy~T7d)Bey4 z$osMxa8dbuJCzkdX8Hth1`e29WMXumXph=ww^C42IU`nowmlqw_B;yMjyCvjcXLc{ z$Du11DU26~ZaX$egI_%<8m^RXP*jJk_cK20cpXZ4e$DrX^j)7lomXHHxuEZi3a`%F z#K#60m0wU8i(%F2?tM8~6FwbaY|oOd(l^NW*xA~IgGGv>!Y;p(U$At~;`)%9&pq$M zTQ^PXJP8R9{QZ!)cZABq7Y|imi_9iC?LcePL&YA6(-UVs<6odd+2S(ETIK4U@id`b zBlUNTAqxzs*A9QqxVNU%?3ot%t|4L*Kc#D2?Mjld3up59k3`axvrmM^OZ+O1~POnK}7M=*4~7A=gw0} zujmTN2N2e;FF!Kl_&pxb__g~g3{|aPeZPMH5$}SM1|RcJmA6R~16BT1V}8C{AA)oh zN+qE49dw*>Jk?d$)?7$977_)1LUe#p1pVO-U3LmKZn9*Zr`^Cj6CWa!71rF zjV~dHs7B^keIqZSt7X>PxXC9I@n553me775OsKv-EbcX1$Iv~C_!Ub>xxAl zrh5h@?Otm)M@Q=7x(#-oDwB~d%1T#h89ej)H~q&(R^)@Oz2Z|7X=1tk$frq4f)X0Q9?|te z)*UQJx-NB*`vA0)T~&{Q_bW36QSU`hBo%So+j_Y6HTzOFItiP7PzbFBwYzw$q-AbC65Y98T`|O=5s&PmCrw?88cJX=SlJ^-)%5;D> zliP%B8b`;sR+Fn-1rJuUqalfZQfguG zrzDKzK!_a52APgp^lNj5NyY&<*6{J6xsuu_W)^wnpJE`eI19d*Euya+oGhiqgT z5>4{1Az$OOOp*tKbNHT&(A=Hop=Vx{IKY$Y=h-ZIAHD60AH3)Sdh7@09X)M3HnWst;@hG>bqgY4&zT_dh=8ZLz?|WP^ z>eCw=1r8|D#F8*c!SE&CI^qgZCjRvs96w5}qCWsd-)em25#lZo%GA*2yerrKsUYSKMnU16U z+$Z4S8QKYvGx&1(F>&{I&Kt2&D8h%ThB{YomSTUdVgm();16z_i+ql8&_6E)x^Jcw zYASlu;%v4O7kg&TZSeZ4>&|VDDk7|Y_3(`_ygt`z4NIBy72yP6v zrE92)fzpnEkbTIBW4rr8)k^o}PQHbDHuGl;PxK--b_Bqlm%IuM0*O-_)$;_tnE6a> zM^|XkT9!-DkhtL&o=HAFcm2LzX5;!^Qf04O=WpUbx>e*(CSw3~Pe>J&51kDf^W? z^$1+8hCJTgVC1>by+53Lm3>rD=zukrAma+;T!&v=gMaL1eXbI71whOC`lC!tUtL%q z(x)Y!y{zg23j2qo&G}Kv{iw>g;d>9C=>3XHf7xy*hGI=291DPDq;?piP2Pd!D{^|c zr8idZaG+u)CBF3em-X(76}{-`krC#uc2BEkNM_bex%*;L@fESUr@K(ppLWCKbrHMI zLOo0jK4ElV0~L}fa=PnnId&OR%3IdZ(#ghO6To|J05=voS-REBFmU&0;HQT&Y~{f@ z&dAz(_MzaXH}Z}w!~zfLX~t9&wm*-wmVI~Wx&8FS3tO+RwL_l=gwE^^{j~A@J;Cu> z4DQoJF@ywuv~He&pZ^}K{tO05+IRFxrb8>Hz}N0G=ki*@-nYePoP|OUa|JEov_Suuy9`0v1YD< z;?-n6f@H`FB5~j@IPJo%UCS=6U}f|Dmm;v9C+@BXm!AEe^Acbd6c#7>IA%j04xibc zJxm|xG=AOx$F`vh#Bw)sJ<1v?5@UYl&$h@rg znyLQN_lHCA71y^fH$Lz47~3IK-AsQ~lFn0C@~d)d@r&UVU@mY3r_nH3WylY@SqfETEC?1M$B)$c5vc#k{y>X|`oE5z;$qG#ySn0C&DIm|9zTN7xSmVJR6C<8T z-z2Yxc@x-=w*St}B=Rn-+%sl$o|5z@e!YexZZ2E=+d8{-K<#4`E&-m7K zV`Ht>wZi@o&-V2z$`ra#wI$dQFvUUd^yunGiCl(RynbQ>%zFB^r&}gK5i`5k(`1tv zFjGY?*BQdKC!IZrE`4lNyiL!e>jDxyXL0Wgn-B1@Xaw-&A9{uOsdImQ>Z%LwjYG%5 zgHY{-(yWEnKmzrG;mY)B88qS~5O)js>99XG`Rk3JXokzRe6=$NIq{cWJk7$ zL@9gi|0yvrx$oxJ#Hz*865}rDj`VNBx9>_duIozlzQrK^?@8-`9N{-90yhoAOi++~;!s)zhR~97?oADeUjPH4Su>s9{Gvm>Iy+E+)r#DlCyay(8lLrf|@CC)E%%7`zBU%bjo1C zU=hCX$^&6rWaK(~U-o+dNCfi7fy&Y-Nejxn!`wKhhW(_TAoUWRc^S!&V323It&SR* zXROKGx3TD)s=&!+VB>hl=QT5o8<<#HA8j(iuzrN=&RcFCb8Wq3feO^sPSaKas7*Qb zyaN8K*jGl;t)BUtpAGc`MWt-C&S$1``Km*!tTgZ5D-=p%Os)^?4}6?v{JX~LYjP>h z>#TByUhl!BfK9-#=~CL=5Hyd*8!lFES-NR=2#eHJrnX-HiLb#6Xv`}4bErF|d&m${ zWN8$2`9r>!XSY8TR_GCDsH03*DYie%LsanI%@)>E)Lzm*dxjgn9)#+DJ$LT-izG+>n{Y>z>R45Vo2pjvc(I;?diH=jvThV) zsN>qX3k*?o<2Ke>5T0##-B^-)=8sRmaDQ!$uCDA^M>1xHu%)8g5D5*RB zBYMp80<6e=RhsHm;a1a^aemiSeT%S2S|CgwAO`^W5G1cU-UN4@q=<8pqGF8G+y zu0&;E*sZ9;(3+Qoz5bt6ujR}X*o=n(u+2jmkJ}Fp93tp+E|NWf#UbSBooVX;uxzB` zsoeto_evIj8O}js4PNQ)KuOn=)-%suJ_I(`lKrbsW)|M-eyM&TNN-86Nee4DQd?e| zL6Vn(B=CTVmc6}0hj-7YnWP`zM`>Sx8%^KFzVQFVXX4>M`DRvb>0OE95a?g0)6vJS z5zAsn)pafsr@u>Dl;%lA^@^Vj1=CrLuX$enR$k@Q%E+W`@{qfFc_KwXH_}9WLoqM) zL;LTOFi1K%qmK8oOUG+2q|!OE-oF-RBsNu4vt6EW5`{Tgh9D!woyj=wDLSK4M!&t+ z;b^VEz=h(AoFkq(R0ciV$$Wq8ba6P8N|?&)ygr(VAWtb%&I8gJ53FhCDfW%jl5^?cx3y*3GYfW~U#c3Apt*XZ z_Mq(pI-%`M&eIqx(Z26yKA{>L07Go_<7l_N$52tjf>sOSH2$d@EQ)L1N3(l!5QQ&a zE&Ftws9fPSkE=)w(B1UtwMw;JGXkWTYT;gxFL>%+BTWA-&MvoX#+gKVwu2yFZ*B67 zOngqyK*$GIjeYT19!5;>K=U&tfT6`f`3Hxa#7xFPNPa38^z8l#!lHH*dKWrpC}3Ft z3|d4hKUhF3tHoTfTzpM!F*rFTZ;)3tH?4N2Gfv4CMVEU&s<|=l7F-Q|=tM(d)_)%C z#=Y`O*D&WMpcpeq1?th(Eb-noUu3V*cA;X=1lPs>s z|A^04r%@B2Pf~lkKsVNgQgdkP(x-VfpJBL&OhK|}Gb70K#(@7+661_&a}`|MPi0RN z@-W+cT~aaYN=tre^cx%6jU+|ydbqS{w`{c3YFFI>|NdL!)!|~L&*#8ey4Fe~u7ijz zIn@FxD=2??@}1FF#s;S!mzi$43mlavZOizFXA5#t{svOr@p_&;$~v=&FBksNb3l4`=-o-} zNvLnqOZv+N3m?BUXOd^D`1Qv_^==ygQUq4vT zBj_1+fUnBwSI99Uis%rZ5VU7k@nF;^eFwAr^G%a0bp8OklipI`kOe8u3R(o#WTxGgsAkRXpdIL0~iWm6#Ga!2C@OvYR%SPag^LCB)uQNNot@<01WuV`BA{8YR((ZUGN|LugG3 z7NT_n?K5+QU)`VE)gi{Oq5nitxHN|GBO1#|hP{VpkvaZ|YKg#{-H^iH7N@-=!QNvs z-QYxV^?WNb+8-RjnR>29D&%Mo7kPai)Zxhcic5&q_~y&oT$W@Lou#UhYt_{hI~Vn* z&ayzAmMcqdr=IFAmYq~N_G(?0E@@+!IxG9AkboczDU5BmxM%+?{UaW(aqn;5Nvk#w z^=~qv&Q1@dLG`*#z4b=U8T#WA;$BIf2UP&(q#?FFjq{+6)%EWZjNQhQVdpMD$-RLW zU{Y;46iMn`k|NlNI;!*)GUb12vEp|CfQR(eD^?jk0u6**0QyGm5u9n=OmFD}?`2Wa zsB7>|^m}apyjiqJ1~@_(Zba0-v6VkM7zLnxBe(3iJk%iphUMh*eL&5V;oNP4X zIVq^oB(=^05)U^kY!~=Y|M}s3*&ighiO+BAHNq;zCb_mA+8=%Bn+2Vb_4&**+@5*! zZyMCpt5XAlD~X|PK`166$jFn)$zU?ag(tLs;xOW(M?hkRd$gZ~LQmat&guK(g zT_FWxsA?OR(?9c<HBK9ZI38?9`_dW-&PsZ ziXQpO3)m>?o#%3cX4;jD3S&)9Sm5CW%3h(LouWvk?*$n-!@;r=kv-=v(c_WxA(4We zO~P7Pp(NsfSu4dio^J)5e$5HuiAJGrmC0NZ?SYb{CXosRQYCAF%p74)N}|YbFshCH}%_ zs4*X8nd@pq&OS&Yxc-``V5XaR>lJ;VeKg!oKy8wMYu|e_CUIe%Ac(=bGp)4tO$p7~ zc_74=)?M=1X&%zR@a$UAIARdm9>p~b6M; zlqgX>&CldF+HbaE>s|jC(Vb#d1Wl=sIwLiYwjg&ug~BA^?S`N4EfS#@@urmB){*e0 zCz{0V^;Y#3WgqpkhU7BUfyZXjn;9Qjt^k+Un|-xyl;D4wlN4LdZXK8;6#kA)$p7-^ zz!i{}Qe#5uvtW>Wn`2{Laq-uAMh{kyK>Kg^z{S`K1@UH8zeKr9-}uCb{-v4R^xP`i zy$YF|xWI7T7XIT?LMV3g*%QQ!JZV^B;J1m+>e9vQ<03E06IZ%;eZ6I$SMnB+7B~lQ zyg2*moeTe?b#b8wr3!)vNikjOYVs-XkCZ*TD`!{?8McjbB!7+T8um=d;M~AYz?4ZX z8%y(G*RE#jp2*6KUdzv@E7NCWL3B@K)%Z_=CW^%KUwvO&atX^0+UPiaP3$mAea0>p z=xy3`Lvsy;|Is~R%csg3Z~M%?F_HIZ!arKrlI5QL%)M(p{HMdvCu@M*?BeIQqi24T z6XITQ`|aF`r|*7Qc0Q?or#g`q&Y$i5nIYbhFRi+klUuqg+i*%&V>hv(saoC>;?Yzn zYv>C}g*X3v69G4Q_(z&P0_yNv-z>YZwyPp1Kl=c+njG_l>!aM4OlETfT*KRQIJ=^?26WbH?Qy5WBANnumkohgSUDehU|9{D*>b;ZM((9!m1ByyM^`)Teh` zw>kT^$(1d-xf5So58k+n+m3l<5(-wrD8s~+xyS;Y!}We}L1mX7loELCAMLmo2woH? z)O3l4&K*zXwI8*0?n1yQ(dAGug&iqHrk=oP^tP>)a;!HF1)g*4Ok~Z0YD2Oddf)P?-?1o!P%PVO~{-OUkSgbz}umSd`R zlUuw*>BSG(ZcN1AlMc$hU+;zcK3k{|$VmvDhe;p*XJexb2rJN)qJEX0?8(DRul7j@ zaw-JpO-P)KH1q#)L5>8;main#K41GY2) zK32KFg2TMkKHIjZ;A|<~?q%Omke%pFl}UG0!%5wfiVyPo4ezH5@79aYTiH(v0ga;m zB={~ryNRkPvG=*heQxsS>+^rqSX^5_zRaSH-5lI9^(+lvQyl5$;@{NBttV zCho!+pKJ5gB|e<*Ug6`kG~i!9TQLE3t(hUO?$y~`K0%8 zX=(EV_o(Qq{#hk0;p&^^HOsncoKF_!iWq@~@d&K(W1|Po0azvLZyB>$8D>;(f{m7# zEi~&3OE!m8#NCs3y%EuMa+oi9#vHmQkwTMa8vf`1eiqjWA{$D zLVKyP!6Ry%-7(&ns;gDg@Xzzn4iWS53k2t~lfCSp;~O9ZZ{GEz%C1>W==wemr&dsl zXz@Ul;^urIknR*_1@5^-HiQ%uqX^@qA2{D*Z&vQD8rWME_5rKQ38cGq4)jtZ{EStdB;{w*q&d=aI_vK{0uCLg=YlI&I9{}`>>*FILU*F>G(tp z?M-z^z{E~?GxvCz_F6c*%9E)3hFkHa_mQY-Et^- z3BW9Yidm5b5*a$*uoSD%#gd@M|4!OPmA87QZ+wxL*qju{Tzw5=SX?!jFO*7~>hJjOZ(>gE6v==-!p&&M7mwS-b`L^)g-aYAUw#G0PaZ2(2Ii;O`Kqb=eD|%FY2L{Xokc! zUz{IG@3Oha6>X=)Y5&NK%`b>;oChW02Lj|Z=1l*le0&UOz7t2vZm)msqQa?#y1>Aw zTAU9(H9Vr|viDSkb_(oE=FT9Uz4N}7qch+a0;$tP3PV6$@~)sginmQaw7SmX!<2w* z6}Zo6dZPu{RE9-x10%a*ezbdUBydK!n6~1c*x23B`O>2H@XKv8;u-0V4dkCDtaav>@?GnB;zRCR3cdnW})v~m==M(R>h0lAp`)_5b z+_k>(_meEG%R{t^0po%*ZB^ev{BtobOV`xDixc8zwWN`{D*Ug3Z&V^eb)!O zHa>Vtl^)D!Y*k}VG&If`M>157Kiq5P{bg9`|3q^#S9q#V?Nz#v^k-Tcz&BkI&v9Si z*2Z4yt&0@qIG;WBU|EGpxsUud6t}{@`1A-y-&xf2n}_(Z_usfdx*C$7VY8u5X#wAIyxwrcYm!BZmuE5Pna%<-5^ z6MB>RSab9@m&n=`AJB;hbu`J4Py!`=g@*qNgqE?zh9)LBz*e&^QpHhj2gSrR59(k6 zG7auRP`u^+#!o(Gtlune;*H-X1+sj)>_<+u)brTX#Cw+u#V%x#8IIK@RZ4r%Ht>2nVcu>a^s2!E+t<3 zaZ4x;9(4p?#{Ub6>~{dOD&o#r{zFf8xBUZ?tY}A85Udd(Ue9wO%Y1?@GWwG1%81oU za3A9SGjrgpz7r7?i{IB{r+4xaE@q+smL{8ai8a|$JSAAkv`c}Mo!NO96~BU|^R>az zfo`$5vGM;*l28AIz)t_8XCMy^kZ~^`})$FagvH` z=dSw`(I1jG9tusf3tSr3ykax33G|8PILGu=p|oY`7`l6yC!S@OQ-a^2$NEVrVtVEv~_M)Q;t zB%+7%o&V%vJhdu|+Af117CQgfF{t*kmd}ruaYNmyV_Zx({+8@=-b|jxaY+({L5=x( zN*VY1WaCv9_HKzZl~pc?`-x5toZju+h!+J8ipZS(2$2mjL#m(gea0g3%f@@`{^tpA zu4|t@*!8#0R(2`ihY9^meAo;50-%km1iqUVOXufg*|5E}VnWJoroZ;S2`Lc;>m6&B z6d;R2Al)jzA8+MK803|2k46QvdTH&z`d43d{1S-N*Y6a7yC0Wtf1eVIyJ=7sylE9l zcgvA6o6>I&Qn6vIl{RiXSDOPJFd<85cZb%XoOny+F2U8Fi{~j(3_5Gkdr&>wfq4Q~ z>$WW}FX*u?j_tJEkTba#<@Bo!Yn*_UcUx2(p3uAd=bZMx^+g%pk7hbnRgvuQs%xB9 zLi1!!=8qOmvmQdoE3_eXtG`MYYRP|Jf*Wm-o{~1h1a8#j=4%fWT5^GWw{?A1XTpQM zC+L!b<5axV>8)eYH?ODm%6=NAkJ4YpIV5Sue_J8*3OaKd+@O5JDAGTK>X%-h$mthp z(6fuO?xayq>0?;2$5QY-+>q+;GLZoNEo-gwsc#jT4;uXk_oS|U6@ICU)irFt&XX_} zajtH5vcHzdxpsqL98SaX0I7|ct7b{{tISM(Vg|d95S^%uMJT?;gHO!has+tr60|}? zpqAF;Jr4k5yMd$`Kog#r+PRf_Z6#GZNRNdw#LNDU6}gzo1`h32E0$av(M_8@0NhcV zb4Yq7WVSfKcaF9~J;hz(0o_ge;<2XA2gRM!?~l^~7f^-HOvE)3)xolsp3+@-sk6!R zzypiUVCt<4Q4*J%5=0F~2(d~oZwYLWSHLR*Lv1iDr}CS&3r6 zq-jjlzrYFG*9tDm@@${FZBr~?HFiJBagP-T#c{}lPu+8So&56LZr8VSFDfq>hi94y z3nPy;E~eD{r5WpMO8r_pG%a1Y{rD$q;LKalR~!F82i#S_E!hcc&|6D&h?1}Wa4B}) zxTeE4Z<^VpA>nvr^&7WYah=wU<4xZGfZ(c3eDlGa#|kXS-eSX^xVS`+xc}bEMmRva z`Lv%f%A84Xq*kgPJmajG^L{AwF8@nU+>4($sW*fq!@8rjD7E597@t`88 zx$Wp3R{hk@*M^0-o)3#sWDm~3b?g3f*mheo`E-V9z|BaWL$&f`$N3D1ts6l+uKhfr z&D79;ct?BI$$T}y9(oZ}*098A^4Rnq-_7)mjh|Ch3iqESZ6)yF_#i=j3_g!HFwYsT zQ!NDxdy@bheF$#ZFJ9sU&6R!kWCW1fO#&yoee=Cf?wi+7V;$Z;1z$Dfs5!{vrK|cu zxf(5i$Bq~KzTvLZst`=bH#SyPa#s>OsEYUf=N!NfGD*|?cUX4h>e6;3fmaPn;ABwu zL{}QVrbA?m%u;rhP!?;v__(Fvjxy+oOb9zhVjB%^l; z5u%L=qW9j7-aFA_^v)2y&EbJUDaN&Yj6ljX%Sg9Gh_kEnDH2pYPIm>q*Yet(Yj&t|{^f_gFcbTsc4 zEwfPjLi`uhBAW@|8=|OND%G2RS#T-@Dz2Es?_@XLro92=tjb`6$Moa3bo>3Sn*jfQ z@O}D|sg534jGJ;k>>8sVgWW#1F-0dO(J`pls$*q)YDLwuzkn!(fka>5if1??Wx%)W7T^(6>;(c-3 zfxiENqq7XEx9agY0Ht~CDm^FZXqu@ZTK5YahaK4(`batzF*OeXHypyJS%T{vX=QD% z9$YTYo0>t}Fk5(I|~GNGq$wl?#KQg^$nDOpVax;#2XvncGYckReNgu>>oq`@W# zmIJFSR0F;%z|L-Ql=luDPEqtAaKJG7Pl_dFFv9r3aEmsO9b}ZB^gs)t{T$q-c!md) z5JI}4UUX>J^R1#qP9?9Gm6R9x@W0Nw-L~dg+f0kz`{a%OdrbK17j}b$N=NN?c*?k3DJ!&9Hiv(E z|IWi?q9$@z-IOs@hANviJ`jcHg;Yvu@JD9DnV!sDaa`FugMJj9nAR@f`EnLs{;u--n z5AML@Tl~WB*B*yk7GNtY7?l@y}M9<9ine zIrR4GG2my#j?-RnEt363qHeo$*{tSl-P`;a8wbkk&F$Fu*d^EwB^qzVgl@-EKs&aT0i+>oOT!-tx7Cn$9FBQ=AMYbgbgitGuayHw{YHuc05*LS4GqMFgA|`%)g`mTw{18; zMSADz0k2&WKkfn3)qjgmHjcKXL!DFlYa!EmRBEt*(>zDizKh?=z80n`7*n+{GHtys*=^Z~X;x zW3lsRc{69dsM>zF@$H-YhT_crRt$TC43{w38*o}i6N%Y4Ya(xG24b3e<4{z3*XkAc zHb|Lx!h28AYj@T-O|Kn}2#;kw*4bMCSwB|XZ_5KoL*m>c2+p#%qne=IIG8!n@=|$r zk}WB@!LLJjA#8@AbDZt?{gr!{x*(&$$;8K3&{sOsJRu&6kXOx4=p$|bdp;c z6@%B+XW*_BNe@!aC^li}AzK`MZSqoo$TqSA>7<^K!4Jy&IP-EBH*bxCBv;p1Ht%^X z9pNfU6#k9U33ut-X2?iPe)z9y>@P>O!1BsysxO4CUK%&aGkxqM~ zjm1TcFa#>PETDMr|C^X9c76(U`$5A)`FXPLPsP;R>KO|*uacwq$NC=*YaL#FR4jf$ zq2Y~NihsW6p5Z62rQ3-Zi=*y!{ovRh0;R$O8J*;<%~IYyQ@+avp4wO7`r-?tN?f3 z*PDk8SK4=+kS8b*S*E~~7g!Opv?y3;r*_)x2*@}plo%@GzptGe|-7iw`{B}YC6QPv<8el~S z!mEW3w0;-@FuP*!%ISm=b2IhmB0BS!3&{7C7_wAj5eTl7Hke`jgqb>Qn7cXs1Aqi3 zOrtvBoAtVB5!W`@EB;)`J}=t8HM=$!Sr$6&>cG;+;i-V#TcCFN)xv{#59ZQ?m+@Us z=C`dibT(aw)DB;@5vV}WJ zCo>-8(n)io)|aWpi|=b!JwqIR{H5<$b{@iej|Ta~$#?@&^59ymS5x+lC++ zHe-fo`1y537Y=bm?bl4)lJ7RmpzAm-$+v|#diH;gb{NM-M6n2BFwCv>cM2r2# zYmy0_*%|*X9i(k(x_31*NmOGY{v5wg8wtrX!#pTDtj`=TqY*Xdp-k;7!+xAW;= zHO`SKGBseunk`|Si>rZz?%6@qh-*8fM+|4w>m(POSMLqARZ6gu_Y*f=37?o!*1q>^1t{PlAEP}(#g8?AOu`hr@GbiKOU(^?HRC7laF&xaRrzqJ zN_3_~+-@w#?J1$7;8j!(?l3?-xS6qO4CGXs+`_WjB4$He@moP5LDB=OR?(9^$$qo3dik(bsQa z$qp`-JQ@^br|wn52Oy(l0&O+_X*-M?pcuff`Pbe;XF2%PBsU5n8-`uP=QtHKd0Qrb zLo)g+OU9ojy?7e+Did4!uu-`h{fNoIi?$X{rg$Yn#u+Tv7mKJv0rw~k1#SnPEoYA~ zf1i+~=(KX`=E(rc+<&ZZ-aB3J7@NTnzw{rOoqP;B_cz8R?}6(% zel>l+L%QFeWsitaixMFHvR@O{tNADG(e_Gg0O_ zvijCdb8r6=XYm9xZK@Rk4xs(362~p@O^N?lZeIb)w?a_qH1jd#iUk^w+=<(#*sBhH zl4L_`qY(E&SupUPlpJSZLppQfGJ3xm2iGGw+V)dm=rrhnvOVP?8<==`8~fgnXshO7 zL!`aAclfqvp~COi0@@W__h1(V6e;D~`g3K<-2sABw7Q&z(FV;D@|}MuJ%opxYG*%7 zoI{I5@gGdFxDCJ6oxk>yn4DL8v3QMGn%*p>#pZA=q6C0^=@IBN51)5%w>*nE&z;-E ziXe0?x#E9=go;mJ_+@CObsnd3NaX356MF)lsuABHcWD5#{ZQx0#0>{&_S-G#m(oBy zJAW`|9E)i%WPD62#HQd9yN3Vo{za>0u%;(qmS@u;a*MZ5-G9I76Wv|6vf%uq7soBp z8Qc}4^PL<8n}P5^Xn=u^wQZYS4=w~uKTI3dz$47d6t-^&@E-m%c}Jill_v+$vbXKX z11Xv0I+qE?@SUbcJe3A;2<_(bM_wM{W)cKDvC0Bj68MJXtw#SLw!?Z5-vf}3p4QYV zEiozm!6nb*)|ZRUFnwlXpfVn%PCS8}cl|pW=lPZI>q4|Xh16E5>oPLe7cstLbw}pH zA|=^+VLoWs#ZSWgZP4pfx442$ZMP!1h3Tm*uQh7GejrhAow{|7v*6p)SZj5^Z4yf+ z%9C$z@ovO;T~glh`BGO144AK?q4{gjeJZ?*jVtqQUMFUEVm@7s^4mz zloZGe+lzv&F2nJ{1cx-f&!+G6r42L7mn41aO|Qu<5VZV?1HO(NVD_1 z(@I0Na9IUnX3U6rT%e0Alfb)MxA`Upw)pB9X2%U5>v@v$!F4hek0@1 zPVXc|9ON67a-nGw)-8^m6$SiaO&R=}JlO9P1v$hYq_7RG^D#~J?9O|?aALnTZq@PB zm}ehb$hF2&v96$V4oo>&ka-i`#|*r8q3SG0NHcjOoBQS4Dnr5|o+4Fpbt0(&wJ;)z z-Sx9OG))S@(hp*agu(1F%0G5MeWQ=RJZ?z8Q(5pptKZz8BSSyjr-W;yqtjQz0CU`U z*}$wvY(zwfN~C_`s%Zw+ffpYU<6nwk@lpVLFy$pz7JnAgTK(i7rYyB* z_$ec!|GX+$ujgVIc0ynRjGH)gqw4>j8rTR@ds01|c`ic@i1;b>)0kdnsklLJFL$%#_QLgd6W zvuCusM5g6y;q(()14hTW=x(1h6==BDR*R}52*nqJGUI5fplcPi!!fbquu?V5)j@Xy z*jH2rs(H`PT5S#)XFPov?ueQc`aX;lgJdfVIN^YB z;Gz`+5fOGln@XlcY4C#XpzL&ywB1{wmbW6N8V6a9t$m3>)^rwmY0w1vtD5cKSEg5~ zS8f?%2g}%#YDQb!eoncMZWZ5gRzVxl=kaNc1r;}*>ov7a=X ztYY1A+ET`>Oo122NK&1)$tXkBnSKy?v@k8g$+=+KlB&qmCzJ`nF^yi#jgl?Kz5Q7h z`&8|N*@uhapfe%JV1E_VX!|*R-O;*#PbYyoCF_G!e9#5DpX^n=lAR9FdcN_k@tkWFQcL%s~mGFNXlzd-3!pkN1pZz7D(*6ebwzU;yd6-OtAfL2RKv|o4lM}vrH7cOSCB;T{2X@)kEC;uB1lBY_`*#-QToQ;L2O3H2PD92^&c2 zY^LM6Zr~UC>D>|66IBm>v&;5Hn!2Sv_TT?~V6|h;M}5Ki>`j7hB!q~qW@`NCw3RCf zxjpr(fUzrL0AV4CHM|t7qXD)$&#bYgbe}P|))M=m1NN!h8>UGl*$4Y`h0;a|V2LiF zZBgIDv<0%(sP=Iw>xib(t1+#G!8Gq|FyDFGG%_AyW-m;vHAQylTPScB%2On1&*+Yz zWFFA_FXSV}`B-VG_(vJ#hB8a8;)>=gNkluwIv34UlN?WLjKdeSdZtO-f|=pSiATpb z{5EsK8?|=sBLGv8SlRqZYHY*s6R~It>bzDX$+v)*q9&z%k&cu>i448ZZxVsmRiv30 z;cZ#;XWubS3C%DHe)VdIkg5fZ^Pz#YbSKU%m@y7F6B$#>-`GeU#Faf8KoS)B@su)u zY8_po_jnZMFBvSw6ju8Ty!%nHDIK!KP-|g_In(~vSS8QA>{)Y;??(k1f&6{de&FO7 z%(2M-;-L%{(M%RFfyHdW7zyPY)Vkw05}#mm4!DHnb#})4?<1#{tyGm=8Ll&0D;knZ zy={V;$Ha<|`_<4RGKk1!d0+vO?VR<&DEQmvoL_dY*N_6&eahsUlL9zpuKt0c%3n)Z z?WWwoF0*3PfMNTieNiJMsSL=g9O$|G#Uiq3L@iS2-_KbUE}F1H^{k%gSv=l6W~iu_ zy-q-rGp?c%ptwb5;Yyin%`B|my6Er6yx(-LUj3?9a$iG7`vK;Va~0$N@U zlJE7m0hXzAp;b5udpNADp%8+XjX^&gcA+KQguBo|&`95oBL0r7*e5e2jUgbPKmv1z z!R>&ohyGM5vm zvLAQ6Z6}=l`^hPK`r(XwXg5UE$h9sG-`}j^xdh!L(x&Vz87M~r`rRhxIO?RJ5)PS2 zM{+UFj#NDPWNp~*#FksETlH$Rwz6)4Z6=0{Ll*x*B9vH8<5m0p2zxw+k_(JuV#&SHjhMkkw+hsLpk2vB$^4C%H-1N_IcO%aAb}XZsYu8Zth z4XqzP@fUMJ@&rFb$p--ZYcie$n1rcPrCMIzo19>PRn(h$YUGEwoh zH-vduIOo%MA21YVA)I%fEHvT9zn4C68g?~tH>R2|c8Aq@8ILym>0%Wxi$^G$nxxAq zx2ivSwrsl&q#lvKH&5=vj0wAF)06+wUYF&aZl`YF){KT4B}qzkSDRNF>Wo%kaW$-8 zCG26LLh-sMSYiH$k+5IJg!DTZYs3q3p^Fky3u?Id(oKf3)JgnWOclZ>cSf0Fhl2R( zv0&9tWkHDLW&1>rv=no6VlD&k`L)~l%0wV&L2aini{Y`jm;Cc&gL#Q5r12ngKQcJO z0QK=01g6YX7yG>S9FJS0^kzp29WO9R`&S$;f_4zKXNobsuZfsQy^(Vtp{lW|{2p{+ zis6LKCsM=n{I@~~fA`*f2+|q7K3)FUect9CXLm8`IkMu&g<_~rVgCFGZ1d*nxzzO4 z5rLlIw(O)ul62)xmgO7jJU87bbleR!s30CF+47XfmbFOn=bb#Tbq}eIm3egkcl|tY z*&xfxvkMM#YlefImYlpX>@#|B(8GUcro8T)DKS*>4~ z-lqT)I_r*7brl`+YIprFf%~nxrZ3o+MiD3Xc-j9Aq0=#BSv^x)y+x={gsW|%Kl86ktjlI(D1`;m@o^lDG=c+sBI zk6yl&X#boDp;2tp&^qkcMSsZQL$=*7IJ_j$g26Hr*rWo*r=e)0@H6#M^85KfcESQk zFbIM&qY{aPGlxU4E>C>NlFQz z7S6HxG8=Y2_pVYJSq%yb!Kp)V2VKXhKN(ZWr|Z>Q&p9-c7??q-MOS!#BBP3VR^8l& zVITW^m|ViNs|MS%o^f|n$X*dR^*2GB^PMi}Q>2SpUTw$~#UZ;VdByglZIkh&-fQ{KUz z!5GWsJ*`8@;r14;Pp=xuA^ILS=dyHF6akgH$}}$)RtH+ZJ`AIWq2z^O73?Y$IYA}| z*mBm&k81Ci;yytRU)c4D#lk39q4Hm(D&)|K+h}Q~5xb3*iHwYPAoj z!KFR;Z4aP}wO0mfVDOVKt&{$Wp~OED$|u}H*?YEswkDQ?vLJ|>$WFN54| z_KrDisQpgzJI*ojlrbnA9Po zV#Jj!UDz74)kLLW;zwiY1M4KL@-LkPoK`M9+T80@{)w7m!ZO|^?yN2gMl`T&gP^~D z0hx}vOL=JGows`KPT$`6-(f>|-4Ci6succGA0V2;%pB}SuWG_#uBD$ht1ybnR`%3&iSsZa2AFm>Eaov#bei);l=+#j}epXyK9a2$Po*D|p zBS*Xs>4xZg1wgxhngozbPT@cg-uvNLYFrMOh9rRqr$FZjdG^Kqgxne8wQ`StjHuT{ zuFO8A68(_MBjt+63O1>18dk6iO``GC-i8ArQIj^&97^I3hSQ)&0$=C?PVBaX z9+WiRRW+?!-oDz#0sd`3lVYOQ;PSmxfrCaWr;2;59sWUl4Zg0_YgHpbKe5$UMv~22 zx@D7RM^7Nv<$DnCw>I8E9=?7-nw5(!Jb>v+8+xxN`zCOZ?nx5+$JoNI`1=lkFp3hz z@xBB@enJ=CH_FMl6~BULI{KS|Q612##>#!meoZ{ArgX|K|LQ`mk1(97LSZ5gvInO7 zqru|9%+kfagCZ9*pzOsdnrq^wZR$bVlE=ZN5-ACJ`}2DeSE-&ku;uk=%EM6AbNPJ1 zBgq|6)Mg55vd_(On`1h~6$;aCxR?5s@Ks+2=;b{F+`{@ysmz?E_OzmA zpyWH>D&7lN@WFYXk*@Wgz4zS98^ zZ(FMWiMJ}!I#%W|F)Q^yLC9`NGQkn~Xr`07?@DdosLeKBlTOsoiXf@dd$sX;K`I@V zS}UXP`5QDuG4u|JVe!nNc71%V5>&2&Rzikq9&L#Mi^FoUzAf$I*VJ=w;W0Cydk6j9 zQ#+yEu*v2I56pn@N(A1Z<}?9BTC;Tmry93+VRlx@-t4V=nkN7bGI239%k8T9N*Cwc zX?2a$u`{qbhg-H>P_8+xmp7(x$dEe>jbJL>s>!+5qSL@6TV7TWptX`! zI9@OT0|>jlO3E(iRmqO1UJ9l#ykOS#IvSq&L`v~IHZKP3g#P*g9T>QE^->38)FxOn zZ9%We5Z=Ib%$v_2$(`I{xuBP;>w}f`gFg3h(#<8a&ayr=ca^QmRB|DUr+Py0$w31-DjVvZfMj9G%?3#T_|`h9d!6?-L_l(D)U#JyYgg8 z62n-s{^vw`{_+T|I8D>YroaPsE#6&I@T2+;U)@sdqDe5ftI9Fw%nooZea|S@=I8gR z@^i)$RtA!3?^6!DqE^EfXVUt|8KdmL5hHSg-ywV2jT&n+!p+>7 zUI}+3q-jBq-l`MOTfzZ1xmLz1#-ixZJU_>!Pf@fkdS{^=2uZ)Z55q|<(6w-_FR!AA zVJx3#y`+pjf~^(npufOuu&|7`;?qX&7CC&`V8|9m#?xI(Ts1FJSV?ff%%(C&b^h{Z z2ir$ndfm)02}d3AX)lR51k0Y0ba zBL|v0_e%J>eNGFF2T{}c{@P>uA%aZn&44J?53^DobD`kHirp_?(sAvKp%cKfwhM)m z!(%i#PR4DAjq!tngK5t9Pl zS98h{Yh=ZDQXI%p3F3OBkk|0NSUe~+-MUynU)Q({E1YO)G>^(Z)`+k1#1PEntqksR)QyJi%It10P^5ZfDgZyyM3T-RBrdi(+&BU z2{U8bRX@UmM}mE|COtl2S5cCw!k_vLGZfNZ_~UYlDIfTvnaMp))lPT5*_G!XEySxm zVZJ7ubRAIio5D8QYU*#6Ui$r5pv7%Xjwt`b((?o()$$PIzjJ>5A*e9dNcTsz+xy`- z#=I1G6Elt>PfgdD#sF_<)|A#zu}Qar&5{f=&{0Cs_dR}y4b&C#fY&>!k>Yu^t*w7C zHQM-6m`@vZ6B=0Up7O<_kF2-NQ1zpaLj=!%#k@jqGh_b53jb()k!McAHzI8CD|DEq zXE8k+Hgw>8)K)BB1eDpeqg?TwNN5_^Y|?(YTk^awMTC zrl_$>PF;4~n(3h;na;8&LAAKL`LBB#J_N&*nLUo|DV)j1S4$oV8y@_Ic0-vmGwk zQpmkaXz;+@q)o7LnY>$#0c|}Qob4 zpsKTs!~w=LV=^##3r0a36IpTORgi7qYQ}su@NVJGERF_fk-6Iq2Wc5C4EoOjBSod& zy^1Bd{q!adl|4Z@ItxTtuA5-UuC$RpdAZ#$wib|1*EXbO(#wQ!#!qGEA~~>G6&Bl8 zf$Hk8Te2&mYnQ8htxCv&aMcEh@eO=|TK0WrshH16NJd>+VK z+TK904jVrcHTB$d^d|DEp^n5EJS%=?Oy4+^8N{;<8-F+*p1NN2u9URe4x-$K z$^aS0579r!}5sW2Da!Pdju$cC%KM^Jb`u!ZsL|{)a|K)$WclU4M$EN#IgqNV$ zbjkl_dU-SeylDkx7&j3jK)X95EpIo^uK(c@P8A^F3yS)Qv;dKrGdOfWl6t{4xwrfy2w`UB}i?0deX3+^RJoca+jS9@Bp z+I>d7?l(_n;FtRDj{VKh^!v4%9a^P!3y?@Zo%?z`n`&nSTO$AB^8UmsHIf-8YWAyH z?vw__rS2XX=P95UTaW*DX;J0n7O@2MSk0w*CS}B6sOK+(@vOZyx{nvQqD2bfGcs9^ ze>;T){^Am%jv(H={=z`gD@5~BGpL)WYj-)6YnWLcG@q=sg4A1+u4C-kgPoo`$OOk% zqbT3M`8%TB$rX|hEf;)tt_TwholMW#eanAHnJ~Be0%GL>7K8*cX2aZ7VxBy$GNGVu zTJ_|=#oCIg23MzCR!um)72Fe45lZxqe~q;T@a3_JLDgLM)KeO}9P+saynS*XZ+NvC zakImIs@l)@hN~z&%b!GhA?V4$$6QcPuV+=?t@r0JDSvRoRl9+YEX$&W9;Lf_egyo) z&5J6$SuX~0$|fN~imoyq3ECrsQx{rbGMnzMS}4;;cnw)*~D zyDzGDR(vaJ}w- z+6>*IEYTF{Rza?|T5rQM!-t~gyjCxl2&_~4YZF9~`8>%!7?ZIu81t(vpm-W|-$k{Y z*7NdS8j+YX|^pk!^P;CBKHZM9LBhoLanHVgB4A@a|WRkWj4xD5!91$FVR zH`wP}!XNKHUZ(49(&!~QI(a}fX>ag&qBO))ZkX8m!|Mm0y6dB!qo zu>Pp!Z?&`=dT>=t7y8@hC(?J)WZ-i1@IoGoj!ke$1TC% z!1EPX4R$YX>vl>=Yrm{JgW&SP4d+)imw(PJg1!PjoF>a-asc!;>JMu@2EUPFL?nAK zIEb{Z3?7kZaF3kXobcEJvAkX#>`ei>U42o~4Qb1ONC~ zXyqsfZE*)cjaXL7wBh}*x5{1x3$no0j2ivzt|nd1?aEDp^rFgF@!bJr4=!+_@RRw$ zQW6rVlbL~biao~)PCk6X5cp*W^H`07<|`O4>E}Jx4;#unF&$RdE-H0Tp6*TDQqJpl zB>t&Z5M2KIXV4I#Frc-&E@F91hmHDrR~B?rwI^5|RIJf5RB(M8!6y7euH>W?@oo-= z>yZ|T5J7cr0xcUwz%qMrCxh5sXLInx7~F;N5n;d2JcX|WAJy)Dsp$m(=dg0^XvWTP z3dunvhT%h2O~6}Y(?{(?|K%^F{l2c;V*^3H4)FfRv{R^FanGjeRBJ+)V`E91aGj{UqSS#=X5F-!sA9 z(^}P5)7H-~JX>u?M&o^3+!YtK{u8MrWYuKG*P~l0ltevwtZIx_FR|ZEO=5!Gu;Fa+ zvwCU8G#b3h&t-Qw^Qp^+xsHAPJPZXN{}Ah)A>9*uL@^;wgzoES>X}5YHU4&~f-Mg$ zUy#Hzs6G*Wk{s;$a* za}_(e5H=7Mw;2~r#;=4&^|GivDqbcRtnR38G29J+v{+aHRx%O8Q?i zpZq?n^}m2+cOS#6On=%(iHGQ6AyF#>lypiUezqs0XhaxWK1%E7)QD>r{dqqV^M{joAlLlXLne3g4V4hP?0P+@81~&89&^Gi5yG zDp2Y-nfC2oJ>gfTDs*UZ;B-aHuPp}Gj3umlP8>F7$C(Mt-0nRq@`M^LZ#rNSe~4VU zxkgo7*7cr$o6V4uZOSqFUjb z&!h2KmYv$`_uxAi-y#TM!cUoAtl{yyEy5_xASjH~Co;wyI(=QV_mG1jo+zqeL45S3 z#q71J9ZT!|LJeAPdGNph6LNn)B*wVmD6L6%-1DFARF-emNQD1oL z9<^cDQ!2cHWMD`$AFNJ@dHYqI>mewP4dTgLu&SePe3U6t9_sFXpwU6S@!wqSEBAW2 zOMFI_=$)&f$f}du1f6eU(zBLn{;kU?FmA@s*)y-OCyjrVKfjM=BB(oiHd?bqsu_Pg zV>kA?d&5WsQH{k!e&!?UAV&rWe204`%{jT8Bt?IM>r!PQma%Gy)@bICg>1|Cmzj5k z#&dk3m6<;FsvOGI;&c8g8b5`&=r9A4Nt$?&;-3QLI-hZQooy*jfb!~!6EVR{?;Eop z|Amp8k)@rOk0cVjAeNUCjOJ+`V&3diL0!_<7UrUNP8*RnX(o{@* z0U|WE;;da?DRY-oM4K~0FJF;Om2AkQu?C;HuZ}~%$-Y{b-s!HiYLH*DBd;N`dkk{&cv$dAhjtXp#uVnQC98c@CEB~JiR;}G#jTGZb0AW;m5d)y*6fcb0bfwNi$DOoHTP_sRzds zdOV4dVLbC|J}I~C_!Y1GGy}X(H9B0tESEU=W8;&?P%8O1ri58k$`4^w@Ui?irF>G3 z&AR4pH1@RndwQ(bt?P{4{+-?$MC7yTf;VyiB2o5Vkuj|IxRQ%@JR8nUpc*l=<_49NhZod;fmtI{#>t1^CRCzFu#@Y z5Bhg^Xbzc)n-#NvuJZclCfzx=?{I~5DS!j|wHPRLo?n4XJ%=&X(pu}Xbucd;Isev9Heo0*4$MdUKt zrB1MN*f{q3g^0rc=HU~O>p!Ra%wD!ld1-Kt%RVd`J0$BTUiT9d0q(c<`-QNL7W~ob z#A;gDO5AY7LpFk=kMYc8YLinsGBq9jtfc(mWdqcVp4achyBbwTPNX}b{Ty5Y`Ip{aiz~X2t)n`U z&ELj<4-*d*hNy)VJP9^=B2CSe{$32WZw3dVNFTwxE?9ea=yRuDJqBg^eg2Ht8Ls5~ zy8Vt*rWkpP*LCkKBs1n}t*Hh7e)0HAK!Lmu8whv6 z@ddXQKHP>q(KDtHJonE0|GWT{xQZFqW!)GfztM8Mmwce}se%l0TlfwjXYuJV5~{%( z1w)#KZ1|b)90G%FGwPd!&@HFi`|JwYtqo)Bpo$#wGJ+S^Ksg*L0J`?M^B4npa|xqA z!CR2P|trPE)^rV^>mhxLiz6Vd#0!M_*f#zS_*OLfXTJl1j4C~*qkdQQU6Lp}Cy zuEX@j;+Cv;RZrh7bcu$HPmRC)2LC|wB$*kYr%y{I|Jh``ap9FwHp@fC4CwLTtsS`W z?4?ccR<_I+yiKsv2Ss}6#pU1nvyN8}E&tfRX3!a99s;j;;a{JPKQ+MN;V$GVw8f<* zu<{y?>aHQ`K01l%hWYs5e|aG3A`lbeB`YK?wpXL#Ar4K6WPL_N=JGT59|rp8pOLZY zzaRg^!>reqB^$cCPV)Y|xUMtw`M6y7+xNb;#(T|$KEh0lwpQZupvi|}MjIe+#cCjf#>3Ws?I({(hxNJ7b( z{m-t}5!fWauL>yVW-=IKTraHw*ckfvXnS7tgKXA;qNDvv?AU5CLUWacy#J}d;egA$C7@u3{~aU zepPXJGLi`s9v`$6P25=F(a8PPFN!tk$uNQVhFO0`y6Lgn= zGc}XE>lbv{68B*IV+U2#a}|T zQ~&BhSGL&Dipaz3qf%p8QX``16dTYm-EfZeDm2Q95@o&Rehf}S@g+UD#g2V3m%Ua!oGY#6N_emuOmm@wdvQR&MdMe>p#GPL*iPZW{X zM-k%%jlhj*L?3ug-MT=i%7PYJl;x=amm)o^;(`Gj2I4G97I_b@pl*`LYSJdRF)p)L zVw=X+6*BZ6_p(5F)z{Egrkn!FOU5%C7B1#?fcs^*bA9K%Z$^OlLAXAND8cTz@dU`l zT!X$lzFTw{Cn1y)QT%%hXxgl16GQg^YOX2@RD-GMS+i9*Z8!EMgv3H%kg`CuNPRQR zU9AuQkL0(VyHOsq!LJ5;z)AcqGIgrMrO^f#uw~eR1*jDfkuuzfisz=pLzEO*GWMSNXV3S96yS}fo;8oK^*W@LH2QVCa7QG6E^-D62~&*_nNEvS>SVtKz6 z#wJ!OJ_jPUwA8o@M(Dwswb&%y0Ytd}My>k14eD1PcG+Us(i`*&C_SY`tbvV@`gdk; zMxLwe0NC>83HQb3k*zTNrN8R)&9NZ%Wig7J+MDYV?Cr>E%!@(*zV(5bepdnTFA@Ee${dv4p<& z)|1M8D->x#Ap=Y!+PM#gpVU+bufvTsu;m1HM5$|vRjjs#pEL^=phSZlu*w_}v=h1r zl}JdR30aVnBOFQ{WJp$3FQ3~y&iud=;JMl&RtMJ*+L4}yE!s3q8-yKbg7BRnwBO%= zTp%3+!?wLF@BZQdqVOZ9E40V6i)|wPgs<7{7{Ml%g zMC5Gj)S#|6A88R>rDZb^UdBmdG=>+UoX%;`WH*C=U)fG%A_o@q4(?M&`I?^C5;;c) zjoV)o#V>RB!0)`(+5EE&{c;6qM`N~Wk(*yG%>+kGdK~ATw4lu8ycn%tKVG!;WS9+U z``K!#rFxG~^j{=1#Z;Vz;!Cz!LATTN47>(O^)@TTO=`zoAHy&|`Sz?vSm^jIa~NoJ zRMa$&HnVV(q7*4-I4OszFzf#$Q)`RaNT!}l5}3eaIpKD=%V*0CGpxT98c&kcx{06F z8V}$%_HzY^<;!fU?Sd!UoaaX_mu3k;GTCB$@V4XZMQ9z8-WkDwP5CT5wvj*; z_Rj38$TNQ0jnPp4$nLSA-Ufqx^(0OmPL*q$CB^@;8RZ=&?hN!rrIF4Q00)6;$*5?W zM(PMgbNe+x!uIYS1mz7xKW)OT*=;N2;a_LMoI#_FI!MTCzuG`V2{3qL!w2@u&F z$f;V0o=zMoakvzBV6B@BU6B~aWwSh?NU2t&Z2s>S<=bWzq)Q8RprVr2D*_?CFp?!$ zr844_2$`@ulOKR#yD#?~u%>0m#ZBO%J_Bg7Pa-)?wv;DBMVdYMhFay9fYv-0&2^KXzqop)#(K~EBU3s1QZwH~i8<s(15qd(-eNVHr4>CNi(`>-hP4tqK$rdjX+ay1xhm@>{%=Ro};Pj%dI|#@@Tm zD{t(j#{0!gL$rrJNs%>utc+Rt>rAubu_&mlag?g#yCd$4E0g!qWoR1eY7}Y1yraK^ zwPJYsqtLEi_wTi$o@sNM${bmm+@U=vPA*$;-{G9n_-B*c} zNcCGtX<*SAgVai1#YMQn1zGj6%9;>R6q&{{6PZJc)ETo;^_DssK%w7HhEFMXS)fe_ z7yW*SEDgIo6pk5yEEGqt1FJYfu@1QNe6pXkTVN(}MbG(-#Gl8B*NriMvDQaC)ZnLD z#;UiFL-}7TKm@P*zS||5D~0k^@b)@~@5bK7zixE1k*E5GVruhKIsZCJBim}(1&zf#tnRrFSwZNA@F zMOn~g3g}}v8^-niR953qw&Opv(%ZvnEv<$iL=#y;6)1cC=A(w;rs|YZziu41!Drrr(lsUQ4_;=n;!yFSLvJM z=)4_*49|>U*vM6AR^Vv_2ySm>STTkY0}=#r&8hL&A{5#Qo(PuTK~~V>oy9BLWkqYZ z;NH5L-O5h$^mde~=Zx$yjn5i67&eGDv9ujaB@8dBm@8GKp@gFy-hZLrEZ5FSp%^lr zd_E?Z+g2^;`T#88Nc&1hFd^a1uASNB(65UunfQ`o_RLJQESjQyQ7E;Sp(OGqi>?++ z7V(SrBL2fak^93UaxP|W$c>jr(Pi66!0RxzIw&Y9tmqy|P zT_6|WvcPY#xC&ddH|X2AZNmh658A)0qV#`o9XebBXY-Xbpwy~L`lI97m^cdik*#T7 zy1RvvV-$a$6yzwLzQr3j9}z6uj;M?XEb+2oIrW8Bfl#%H#Sc!-D1Jcljm0?;bA zWWLm#W00r53r?g{1cw|mGDvzbBbo}t5I#zcD-5)lV)_?!goO-=tF2`~*$27L+Fpdp z63r)HhV0At3uzHO)fZtZg2>{>)=Il${49`h!c_ewm^(=~PAB6=oyQE(1Z8=s6N!Tz zP5)y!;HE>RJPPxE9z-X;bKxt)m346Jv_-|kqj>`=x_uP68t_!3bqoRtq2ev(+AbqD z(U@PBUz*5D0fDokki&b_DAc_qu-P=!5hKpfIJqx;h%GGfnTwqQemNDDX_H++OeYER zsNwjLA-9!2t507-kY~?ROtiLOo^g<)!Po_fOR@EREQcl;Ry+?)j-c%=Ch8Faxv70# zs+yC}lMtbAE9_C~B2nwyp0H^GV|7%N#H*jGN4=ECLgHdN3b2*L_^>%>(K+qh{bc&& zPU>P;P9DffL`e-e|9$mZ?C>W9u`evXJMK*_N+zvFoh66VHFCX)a}BV(niMX>-iDh& z3C%x#sh)K+sw@ln|xB0tz?$f|T=Kz&r1hJtEaU{xWe=( zjnq-rUEHA)*Vh8-QO4d!CFf!nJ&n_nIZf&R8XTOhe67Y(XG)ZPe?CoKGebn{{R=#l zmuxsH&1u-ofJ=oCD~iV;(*j8hE9uiJ63orAB>^*5Bc(PnahnG{<%gxckc9MfL8Xumctc!5b{~oI}qIk6aQw;%GkpL`eWFm8Cu2gNu((CB3 zJje2F&y3~^_61T%GpUZ zC!5|#q*x`b&N^=9MHQmj@oh#4j19-U@U$PiF*`9`)F*5=UKL%>I&sD!?;jZXCat0< z^J(6z)M}@a(pqSZp@_Ut@7t6$l+AJTSM?BSn>6DFEgeX_V(UN{CZ58B{|fKEdwY0>C{v*+s2`DIFaE~x>Y*M zRH5SCltuABHUlgWc729b4p7$WhYSyYx%>;c;P=bG1*-NmMLSNt2koiqLeOUgS61QiR@ZAFW%^I=<94^v**nwfz~0VIX2S#_abJI_H=JL*3c( z>!|`flv*&7R#*?Rw@&XtPX~N-VuqGu^Wxa?y}4iC%A537l6$xr)D zdFV$r`Oa7{W;$ZM+!tn&5bCxY-9F+kMcuZ7*Uckn+WaFD2^5eI#a{(D%U-E(-TE+n zyy!9lX(!^_-MxGdNO%n%On^SX0Es9!m9vK$*FeXJGoQcq2YKG>&P(b6+gRH#I%el9 zEgI}0m3(-pn%haDu_mBbxj>j2S0{=$Y(v-eYW<=PM%i?;RmQM#sP=;{jp`J@ngx0t zbk}rb=A}r7=+L*{SRoYlVP79m-vP1;z}h8MQ8-Z@fsR3i+GbR1iAPPn)4mmNn_GYE zb?s&n?~VpPyYNPr08Zfk*W|`-y=YX5%{-Bkn$8--*3)zJB9WFy;dSE=mi&t%=a%LD z+{S71Xw29O2CapD1Na13VL5f~OUig|YC~?kliG-dG&a|b7iptknM~h!TG4kPv6FUr z#WyC*;`ds-Z}0gggoL`SphUFH3bX!W-YO!J&8Z)5Y!VXi@9H zyT6tRw&$0pXDtwy)w7mP=nyp-m+64X=mlGo^^W}4dyu!1HsT4XYUK+yiL$iOkDB>j z3tU4}e}=PLq7VsuXZ1_DFwUt-bsbF>N4%U0e$Gf-Hdp$9;>7eXTPZSE@bS^2e_%Fb zjBV(#<@kPAG-wE&BFG*fhqJ3saY;x>4M9qsG<{eshCD)yPcB!=u zmY00sx#kXf%Zv&nB1!_mqR?G!Vfl1-@3~ev=jQ$5AVL6%|EJ}y*(cc%YKbtUhohhQ zulWltqj*@g#|knuZ>KR7a`*y52_ALTOc+8LcGf!ggSTz}=sGe6i514V#AguL?Evy0 z^=|JseGn=$KaA$1W{ALN{{KBw#KFD5BL|}tbq|L^l=SVL)*o913D0h=i8lYp44OAZ^!ULDSSh^WC8>9qH+n{MGY^;V8B$kQ(hY~u)ur0aiE$ZbeUH(Zy#3Opm5t;_u}Udb>m08?rv#ga*&oB zj&i#su=~-cGFyKB2-t>8xO^`2B+RY!o?}KpFHer{o;Ye=FGD6d;BYJA4*0Oz?J>dI z5w3-cH=QYIxqh=47Z=r+kgZ@2^bxq$N85q}xqQ%2=cV)uN zR-4sFTN6)BI3f-P9T%RlY>TTIKa}d0#HV%;X|VMKay!GYU-T=a0oZ%+W>|9F`hr~7 z63Ud^{gmFn$GaF{sl81t6StleT*?Nb5S{;ipBz`!$;DQ1>oU5>Y{!@)3e8LqRg?J* z2DQvK9f*ida}x`P7?D{kc!cl4u-Y|l|G(A2UB9ob6^@Z3R{Judj~BQHtV^&!L+{-7J!V_X0*~)YecvC=2*Y|ZN2t8(U}#ZQj@eW z#w;ssS&vghHT&l+`G-X}1C!)q6xo1GMe~b>5F%3Qmb3hKCOZ*x8y{Hg=S!ZYREyW7 z3>LmDHkZM@yECiR2x;c-UgI=fB@@K==vC*x;QBfK4b!6h4LJ<;E;#aX}t&re<)r#CpzoO`PG-A@R#2is3b249TCh7|vuIxX} zQB3l%sq5upP}RCG4IggN|v*;EjlYQOyK?Ta(qtT z2$N;5OjVC{RD0z#+#-qVBCla@SGqycu{3{lgp+ZrM}XtAihRQpt*c7SB{OuHt~;I7 zTr!qjPdM+Yiw7uNZV~zOoEQ~Zv5%2H$9of%{wj^DxbO2NWN+)a1<}Z?l8m(2@C(2S zGH2Mvz@w8Srf(?CtqE!tGGBISR_h)upPcSEIHzmqW+7F=CriSvQ;8v!sd&uQgrg0o zWJ&xEUEI{$W)^!O`|5?TRfU_)_9a}sXBIy5A*xU^AxJBH56{ISEHuw$Eg98p$+#*} z%il4)&a~t`9>e(B0J(ybz82)00iW`J_UblnAVhn@AQ`7qI%y!qpA_BbM}uHtSFgJ{ z2;y?q@R4&-_?6Z}uL7)XXTJre;Pxhv>=}-{C{+uU?;i_>#aNsZ+w6aM!@5Uzj zjwlB{l$coR5bVh3-b>4xja@wu2w1`RKNU4w{go1#6_YI%i;B_yB*27C$ChRjVAh?u zb3RWR^gWC;XntuhwC5GY&aEs1x<{nk5!vxG;7b^(8dSjQ6}4(=Kx9BZ zSNY24ZI6=i)@@D~y**WDF~4OotMkI_YKbkrhXG>Xbw3^AtxK=vzb|oui?CbXVJGn8 z?Zl0p-_&1CZkoD|<4Ux-fGbbespddiBpPBMN1?sxP?ERx32db@MjL!Vk_ncVkbljH zx&cI&5F$ar%ODI==D^nDNb;Zst58baZD#jb^E)tt-0PN#+-TPqK$9vMP_SB{*y$hX zb%#o9&JDk%Jf3Aoo_*#_44MM1*ZAOzmppApLplDB=l9xhy6)yjy>;GVj0siaLt<^n zzE+$J2cg!Oi88rJSmongJ%0BYeO7CqdfL(+IAWD?T!lS2n1AAP&#M#>cj}+fdyt>E zlPx(#KUNX**6pFY7soMm6HPaJEab=fR-vel@+}sJ&l!q5R}h!C!A!NkV7E5z-vAfo11-Bu-kk=y5QmbFPIZwf{Z31XUt61Wr{w~4f00 zGUEObj%E#Z!F1f*uP)ptJK$y3y|y%X#bpQL&dq3aj4jMagj}*hq#m*GL0Fy3Y zsq?k5xL9S@612bz)U*LVF#EN_UL|96`Ks4`+cNUtQAq}$x|mqUwU5u5`(*QyOViAd|3O9DLE2A| z(P9d9<2QSfCloTj81yu2(k?5AzCNcjh91o^CAhggJNy1?Z1l{LmM`+1k`Zn!S8EX? zFMhnsliiYR<*c5Cnp6ch7sWG$6^7Xg$KYT3uEy?mq8UeKwE2;&{5;;(8U?@SJ=bAFNc%){Xk z?Uppsr)AS`3$LH)dZ{_|0oXVUT7Le{?YN~{Qc}}iKR6^0lKL2Fpb4utXH*-BG=G~> zE^d&@WMo-gJ|TO?$ZWD@^!~fo6u|cA?OzyfGY*S+M7w3K#Q6;qJkQ1?C@4`)Tcr_`4-nQuniK#H`6ImGFfuU8e+uL9Q0OI)9_`m7WGJ=WS z**x&K37=cvjjKa2jt0&fAPLR4)4TuNJAjAMbcStq?+Rc&&+kyetH4KcQWjS0VRvOj ztV-TKq*Ywo!UVOpAnpe3|4u*o>xDp7Xa!G!0;46~n??DnLgL%vGS$53Cp@++XG$0e zzs`Mds)p_vkY(;w=WVtVG!@#IEG0}5f#Jb*YDwBde$!}B8nN5FWH>%Uv`N%bWsw^k z$^U-&McU4=odZ2y4J}S#U~rF}IAIzgKB*3-J+o0+B}dZeipm9B)Wr~kTjH(_Du;sYSzX2Sr)z?>9>9BlgOM?@~Q$FJ=u=ELXPQ zG##qFoPrG{De2;z1QU@?FcD31)8`}VXf&;V7Z4#i81|Os_UW8^hs?^E>`pM%v`eR2 zc`ZKvy9^u3e>Vi`9!*j+xcbV|v+=79GG)tW&sw6k5 z=hB|+GzLWD^rhd_mqVwT0-#tasbL@WbANWo%6+V|eerE7%>7eaZZxrl8<~F#QEvFn z=?fSi;5$e~w}o1be582k5(1SEo_Mtj@xqgT#t8={$;wU!UlId8YL%u~$qes;2X?*$Ij{OSi!n~HQ8*{D_3SEjG5gSpStBZk&5Anjb7MoaR+&D!(B#qNEmj;3J?_>;P` zXiyKPAx5?CPH{`kF@62-<^BQnap2a7qqR^E7plL1>EG3gWx|Jaqvp21QSgY^^*2UH zii_#3E8!ZlCV)%tkU0h6_zZi}d+azFhhNM^X;P0D46nfl`4A6LVK3&pVD9@~cYwb0 zg3ba3(qU7GxLAai>VC=x+_{%-^tD;hHMn}3)@zqLn{1w4oOae{^{jQ4VC}GFOsQ%j ztLj5c-S5jW{%P?P2@V1fbi&r@7cxcYubXti%tZTO8{HqTKmQcf{fSEbkoWdwdEU@L za{kHI{y9+h-dz4I+M+Q{v)0Bk3i5bGx7l3$^Wqn;xS4I9mjOKFJ)cu=&!jM9V4l=5X#lX@*(6b!`B4Oe^^D>;u5kmbq{CL`Dj5&hz`CZ`s4f18*W(@lx_( zd79Q8wly!ojGGCHtf=?9+Bb3A3d=woZ-4vOWsB}{1BRwPxVA6oI@UDnP8iz!dbj@p zjqdoOXFJ>PwFIiF8^)A4>VDK^nB3-o9#3s20JV)%V$&Pe8LC3-p6gzV!MK^W^qW&3 z&I8awJ}-YeJ5H(}_FI43k6ny(-K~%Rc!vCwRDSvUWMAVQ%*^%iM&DpGfcMwj4OQ;Dd}Hse zu+PYdZuoQ5_2dhFBjYEbX2l3^^Liu) zBlg(J@+VX#kq=;#ryspeF7!x?DxUfl$N8p+FpgPmu~dEX2EHdRmJ(fQEScyzPam_~ z+iHPIWH;s3LYe;V$Zq0S+mfK`pFqKGBooUe{u|QNZ59mqXCGTK938KC3>(p)O|QC~ z^=oD8afC>yY47!#3ECxil!<(9e1u^@p9m&Cjr~>XXNd9qQO3F0VGkW@claRx8~4@# zzN9>@-|jcGMel;H5#Ew7)f1YCsqqP1-f4*uTr%#q<1r3(Nq8!H@?rjXvvjcKufA@9 z8~!@HLBN_hW(VKlTO^}K*H$<+!7*avBPQS~mYf9YMc9vTXEm9rXs%S< znpOHF{KU0~UyB=Gc+HLhVfI_Z=C9y$AyhXU3_iH?B*q|KJgjNm=*TanF7QLlVe$mn zj=5Lb2q@#JDo?Au;5PoXd3U!KYN?&OA(Li1mAM5MSqP>CN^aG5dD8*{6Tj=+I=M-S z^z>`*KV1j9wm~sWJ)UAckb-_~_v1^5puMh=$~@zm7js1|nJwuUdZGYGXbIYbt!35u z+Xa9|1sMA@(O@LUrTgdrfP!-gLp&dEgeS5EzeUirK(;H?*8>_SKWhURDlIg5hYvvJ zs2fYhJ$o>08fpMHlYc81@an!UQj=w`$CLA+N#`-+O|(br*fSgv+3Kl>2Q$V8CfXCE zkyAOd6KPQ5kR*{tt(Y|`MTy%kUgI;=l{x#80+M~Tr+Ty5D&e$AP+Z<6h-5)N!ufu2 z8?=)h!sr$o3$SpiWFKA+_)yT-ektD~1qBl!@3vdF%g6ye_m^hPuC>dXmV4kr2_Rq; zXA~8%^2Q{wk9lr3HCZX^amOrHq2m%j!Ac+)^t1VJ9F{jxm_ITFQ>c`%y9;}&0-~zF{)sG2SpyH4J>W)1J!svU1)}Kgl3zEA z=ch9BG*cwn0Z~!EqE_}`_S^hnNqx;j(}*T6tF7?LXRB5Peb&@VUyRwcp0dv@w{2J| znvHlyi^$@x>q-_bh!T4n>o@!iKO6kkO<`Y~Qe=Fk$7LXKg$hMC(;0PqV$M{q)4TPd zeRR}2Y26&PZS=BeD~0wY)3vH`hYz)_?e};A8yGz|yWJ1}#`na?n>N~hn|z@hSEcmZ z?8V%Gl?r2sd}BT@m$X)4F<0RH3!+1wqiTPTvHzVNyhtV7NNxDiD>Vt>+5_j)!e;s< z$7G)Dzweq=;-CTcK{blNs0c7i3bu@A1&rS1)4c9gQaLX~Ics0~)sJ?LYFKO@=u_6- z9tQQCpLh^20yF*L;cw&9?q!=C6Lbq2Jcui|g*kXN^}ys2cF}(u{_z>szoW5aun{XcNPAWwU$DVmEAv0Pcia4`**G+qv(-hkspnrlRWKDn|bf*_o~2 zvMjspE1Mzt$P`0k?|r(j@@D1I)e#H^LQi7Q)>rEhWI2DM<1BbH?wZiMeImw;K4}(O zmC9@xM`EYYPo1i_XqsPdD@&f}hOjBxL&4l{q19@ICyLIOW1rrV8*v& zrH%pfjs(t1XmUJqwKZe}~LZWc{khg2qYiATRT@}}J6-~G-p z=;?_7Zh+l;I$!_9;<7kL!mog^e!ApH$#KFAR*R1!N4qJfvBqVseE5<0oAR48jpeVe z!oPkE$gOu=(9t#DYfiMC`tmz@tu}Z4ItJbnjEANne>E2QRTfGEYwPxB)U?I%KvA=ivgR}&Oz5de{1(v4Vsex`(CmlS}ppYN!L=4%O} zMAe6>AYU*zGpCXhIl@Xh0vS3(b*-dXlqMA}`G0B#ERLz&p>y72m0<%-_4vPx=W#_a z2Iy#}DL-J$h)U}Ge}>Byatg0R#6K4N+dHIZm zM$?Ij#>auP&7;igZ@r;K1KV7kY;^J-Mi({SE833%8|z}U2Nt$>aHJ)EQ_d-v3CV9e zS)T!7Rfa;aLT2JKJM7>)2Re2Bq)Q*n0QV;p&qQx^tjf3gLWorjUjk>5RixzEu?xmU zwafQTyH!pf-Bp%tn)OX6Z6}^Iya_)Ze9nk3y8FEN2wzs0IDcX2=;Ura!x2zx`o$W$ zJ<~WX$`Z6of3Oun7yfCV^#I}nAM!I_eN6BT33-qW+-OG(%#+hrQGj+=z>k+Dr?#*_?4b}+f_qGmnQ@ar>?bW$JlSU zUWFiUv02_Eh+U}}M>ira67k;<#eBE3+4zO%BmYgn<<^brI=5uqr^<}ZF3pnXwy&bP ze2-)@q{gz<-wqp}`0zgNrswf$8g@kacEC!Q-^^I(KYPtP1Tw#zp+ztwf^-;%hpQl4 zp2mxW-8j542vejomL5Cyt2y}5d)4pwv(NElu4JGURb$Yx$T01t z9{-RxeN}5C(O;&sg3iA|`Z4E3zM|ntZX4Ot4d#*rr&E9-n;V` z4y3~@SB+>_EBwYnmVDnrHad%a^`PO=i;Ilz3t8r}M0bPgnJKDl?LlC^nP7}-LMOsd~pvVKYD zl-@E!*bNKg;S(ST9{4Lq?L1%{)%U$j=b}KY^rBq&w~^O^jsb3LfUnX#VXy+9L}|BB z@$hTYpusdwtIo6A)lB{3R^o}LeNsK@LE9;qw4Q(ZUg92EtT9jEbN1ptzRWrgL_O36 zEL25)4q#Y00(V1o0ph2R(OvL&R#1y|i=L03$D=G&8DsK+#_!1!xWbwJB!zCb$-$9J zV(!r?jy3iB{}qpip4x_8#td3cmsKv1z66{~=Nmy?6!I>CDfIYz58k}KRolm`;3qND z0gL?0s}V_fLTle;Q}^ZL!a%2r%b0TC9>aydEpjfoi@nyu@p76D{O7^bbF34g@ObKLa@h)jose-!94bKWyA}1O3h)Y$kKDwWpZ_T$#4`brvLb z!}g4$yaR5J%nJUV_PP#&l`5fgWiDs)OjN3xfft=nNHWk91TG024C5%5ZnQ=ztF592 z)H{KAv7lPiBaAKl82zZYD-an5ATksO!mvy!Ry<1&Ry zuO&SWg|+4cO}fAh&}F4${9Jpi$wXa11s-J*e2RMFb)O-K+fDSiX1`UILa#>@%tRgc z8+Y7qv+?c@a}1dfh$J(;Z*lDeE}R+NXFNgk{q5h3sMEK<9!C>V6e?bafs(NdkMc^u zr87TsIX=PAn8Sb6H8o#~ggGKsrYW)BJy=v9xEMJOn`vA?w&Jh=fE;3C5Qu=UCjiWD zpZwSjVvM@yNWx%yX=>CiS(S5|G>m%jL-Shz?f~Fm+RTL8W0eEgomj;d*>vTlnoa>_ zvLPy(z{g{(V|Caot&@%DzFnx3fW5sd=bI@awWoG%B2<=c`xB!{GP_P|K$;pSwDBX3 z!+dQE=W1R{ZEKcVXWI;wu! zw4;_g6eiO8+#`n;%XdUfFi~;m2F-YcK~Ogkt1o+8u70={m6nE+ zpR5}2j(WXcU^_+s`Kb*zCh~B*O`Q(9<)=fzwp2p7O&ciAZKra>TG3tihXf3c^9FPF zi|@~0iyRfFL^{}r3B3II!8^;?GT!numVc@Zg~UWBT-W_^m22y|kh*ao9xTGY`RWhM z0Px|Y4DiU-Z2B#)Sl~*BHsqD-=EX}U){g}IJ>?|M8m^h-B-8a*9St0W}PcXDsFxHWUh)^ga= zCXm%t8VSK9nEN9608Qbrbg#}dLZGP=l;ko6V(3BkKjh>3G4p+v%OMB_m$* z&&x7>hVk~r+w9z{9xXNZh{3*LklVAyZ*3Bm>FIk@nNt(`Z*w&S}}xpY7I8H$9CanvQ()R%D9N#3^Y zkgNu=*4Z4EKfzS*;xcSXlwF*FLKD2^5E6&aMPo^wRFo`U+kY{;2qFKyO1G@*GmHZW#ku`)EZg&4u7v5C2E6;0`@oa7P__BnV+12 z!&t=1qdbQ|TBtbayOj))*tTMn+*|}I?%n;!{RWp0LY{+Lx{rW|G zTn-aMXst!(Hsfh&&jd^{?|DZ676uA!6spg6J}JtD>~H*)sqrI!mjmTdkq{5`%aL`a z5A0?I5QGe*Gt)lHeoz--N2ZRFA%yqtmAK4r#PA(os?1VCDt_3Xd{vR$mCj9B=;g7a z(DO(;OPB>ZYtt?tkjRdP0np}B;hCdI<(R!c9FV#Yklk>kTnWp=xfPnDVSWy+xh`@k zqyo}MOFiit-hJ@k6^6sd4-}fPa}lI>W`WNRriW+mlh7w~ur$gZ#*v7HJD^FzpV^MX z9R~U5sz{_ZQDxmTsb`3rAz*;MBE2gVPP&t@8^?Czz^_K)ud4|`$yQu33HRO|=ua#T zRsGzn>&tMhf;(o#%bL=0*4m9JN{LZ}kB_S|zx86*mW!JBrKJ)7QZC6z)f7s$LQi3F z=9h0M4qh*7mp&h4wF`db^>7HJ@kWn_+?+>eMe`2^1^M+>jCVa05tFU~DayuHHj|my zgVSxCybhYKyl%$731Js$-m_XS8{P1E_FMrby!@ z+(;_(D=v6fJB`{2rL(ww>e{|xV|vp=gz;*s|=uyaMjn)ib`1n(l{%0g5I0&Yj98?>)lGt9b|OnIW(4TYN% ziwttvJA2geC_r-##KTN=-&!V)$*{ z$g+kZ|H|L@)ZxC1gD+D8XpA;q_M^!#iknBQMEu+YC)fFrq z2D0X-<&`Gm(DlnV(biv)_D{hes07u%ipXp@@SMtB9;O3|_tU6%{64O%QWu4jOq$m7 zQT5il|NA8u4k7t#nfux5IZkGr@J1%JcwL^)8T=qm9zl1Xln_~59?rN+nqsZO7KCp1 zHv(d~4tGBO88(APXFvkRxx|Gx%NY`OCAY$G$yIZNcF-d*kaNp=xY>u@a*Q7^l1wQ~ z+i2&aIH7%?m&`|heH&Pay{p{L(P|Kutz%ndRRfI^8J#!XM`kw-S4DkViq#= zRf-wsXo(6_jskOb&N2%SIGzJ}2EZ4;_ya|K6Hj}bFGFGK|9uXO;c96^XK%YXylGCo za}#lObj0&*RC(<{X1q-Dv0@5F=~@$s!H)%z>BAsnvx;ZHFi^m0KTv99jhuNE7W^tkFi3NuVE^X{ zkZ@dxosS*)QsW{<4K&cObq9lBR*W9+4ZGV5DXll4;tXj3T&OB`!wz!rvm@pzSOpB5 z?YmM!#ftE!Yv?cB4avHTew4NoIiI8-F&s2z;go{8J|?fb*~o)rG5Vf!5P;@f6NhC1 zVOf#Dt8}C8_shg6E@Y=_>IHFXFQ0`=diU8XG2n5f^^^W<1TE?V>)D(sdZQ}OW94}^dP-~W4#+i2eL(?LhkFTv?WeNm+BN0sKvgeBCMB?}H%I@}#yWOcXUcCY`8f)v9zH=JF2>H-mLdv2;5gd%(jb zD2Q;JHI;v5xRRPFwp;;%9K7Ms3#=!Q_DHn8;v=qk$d)wGYd3y7R2pD%lO8y8$k=9( zhRw16tBrYA)IW74T`z#5?)fL$*3!#NcJ0p=R0{Zw(vAVAm*aH=u`YsneMCfLDVd~v`sO=dQ#o|_G;n7x~t~0SN7HJIlQ4PMwFTf zf21~J%n;j39FoEGoQ2)zC#DhaVa198d*fzF_BE5{Mq^djZ9%21LbmY?iFh5C^gX~W zncE*KDjq{Lk!y9sJEMK{SH|uD&N2OHS!b0lUIsSx%wN=$@G-q^snsnkj~BVa3`1>v zs3qW|UQ*9Eun|9k#+#x$IF|lytAQ2Ul}sG*p8P=EPn|?1Z)ohDb|hw}(9}5b=U%c- z$J*}f4-pI?>+tWtCi6vO>_a%~1hc*%(M{bGUXp{Utp4?7#PJrAA;{I!7v{J5Vl zZ>(ciZg)FwhWz?>QFVDE)0PvK#1GB)DkA73Q>_RTs8|)!Y^xppe=Pvdyp#uJIuV)1 z>|`cpoh-xO?A49^7BfWOkw?Jh<){FcBoU2naqb7sISDds#uSHggAciHu?GPU%LW8} zXREo1=d6j|v&p|#N3qQ*3M83ls<#S1D^rxu^F14Fwo6@sHH-5OF|d&an(oN)up)7> z25AEZcVQxrVndkInF$j)J~WpsNZ0_D)sQefyvht-IjnvoDD(&U?;QF1Y0&#GyU>0l z33|Jp4SF}LAFtOB>v8Cp7kKpI%YUGBzv>f)4`k^+66sY`G zs8AId{uvLhuxSQVri2UyD#<)qAiHNbKF@bMVdRx<;+A{{C@)Y*M?M_-)C9JrN03s) z8xdS`t*YZ3kP8V$%b|bF)1!Xrd%7M_gH@qyWG{0A;yU*tal}Y+N^NQ~WGQ$X$aDzke&PJF`xc*3&|hAu~1dJD{9D<4M| z+)C1%L%DGhs=qW>a}O?((WAHIQ@1A92RW~)85E((oRE6+3jxfE%yG_hDa%|R3 z<~v;8U+HhKHi`!%Ua-%!5oz5n}_-%n=;_lJiLn9HHS7%%DtvOFJ5WJaaM)u3`` zXWVbQ8Bl1Ms4ty3Tyb@B%2-~FIFR{n$1}LNU!AB1AF8pE;<%M94mFD!kLX5x6rgk4 z@@X=%TNr^hiw7i>Ry-!8OU1NY+m`$fO=scO7I0VhalbE-QB&x*xr4Q_xSw-+Y{UM+|PBNpYvi5_Nnq1NJQH3tGGU26;-2;3yGZj z^7PfLoJlAzlJoslDo-Vy`UCf{ZkFLGGl(9bZdt5n0sV-aJzF9!I{c=SSsv>$QD)plx>}R1K_QT`mieMz#+pKxb_T0x4Ej$tJJg z04!?I0FBB$YWhR~m#TfBQn|QuC>AL2HX;^%Q$P1pPA&z4Yqc-+Pdu&xsTvt9$AnQI zbusmRx9q?Pz!r-KA=Bv@+>BHU7=YbBoCnY9k26{iTlV94mosMtg2~z__kXM&8!X1e z^KrnU#3RnSVhNggHV0a9NkdG{#< zOpv4HY6df+VZQxbxr?z##zyt9%V>eT82c{-B*eKzz-B#Jn8oOW!!F?SJEGp^5P>U# z5!p=~z1N%?k#WMEx{O0Xs{@l#te$`18WFhHQnvcV1Cuu57^?8{@noe>KWtJD=7W@t z7YLFwROciu72~GGm0I1~;x)^?52>3JY?iSxt*b26+?{ zOA9cLgMc)3$`+GZLiWAbBFHp)-UZLO;Zy^QoXFpk1|#49MDJ`1Y`O3`b;hBAkQjU6 zaRjF=)4n{3K)EghG%EAn50R4}w1#(V5&bwasQIz20DweTG5?T(*#(105M9x^H-;gv z>WRM~i}N(juc|^8Lffu7zD6pHfuNsUTN-#?A*KgPeA#AnntEgAM+#Z&I1sv~C-3}w z-8XPM0cVn9vj`>T|Ln3=nlSdzLkzop$x>$LPLjH=KHK2x?;_WDAnJ)#%ig2O%sz71 zM;3O5dU9fPc_!wPn@U+M(NldW(L%8eQ-DRqp8*li^#VfOx;o^q5_=(P_~<*?vC`Z? zD!&L_XaN`p{=AzSdG`qTZr--rHO_&r1XoFLuCZoh7)bMKHXMd$fN=G?-rD*fUBh-MXT4YVGY2VZro;R8 zsC8|w!{&Mi(T_TY9UumOP6GQLue}#d42|qb{E!2IQaejWRKZ=!imwR)%G$&1H+U*_Hko1pPrKo@MgH@m-iDnI$-4bG z@i(;4%Q#bZd}@k+Fl$u5vunUkai}Q=))p!^nEfrlZ{vfP)?_r!+pc5#`S+zWz-P$h zA?D`3-#KpO%00*_vjKV0sv)Ura;9Dt98kV^TAO6tKpV)m{${T_v=oCsR@`^@)RE5L z%Ck8KH|?y;IC4%SDRos3aU$0EaS!LyG0|gnDwTFx7m|N=Ro~U-q510>3%N2DWa=*1 z=wO4r@lO3daFLHMDVyW}FXOWwgG$+`TFfT?*?aV6L?VgxR0QS*4wN=90C5!ky@xCT z{aCU8Dqn9#fQSRuy;Pt{YXb<{A)S2vQ|j;RcZprRrDhoYgt02Z)VBHY;v&oaXI5&< zXLE^avcE6oDtu(#psn3*_kYDyI zzDDZFvPkV<)3y(bCg`NlkWc)9h(0F{77zhU5m`!#JFZW}p9rX3j>ob{9^cXU-%VTG zobRo<0!tB7T3P5$+TWSR<(x>+^hToTo$+?_5fEN|*V>FwnYdc)d}uWGSZ3;Y>0^<) z&c*TnoP&i!MC>)Q9 zrX=Zk!oQ)|C@a6h+|}N`1;HuZtE=S?}gz_?7qwt|=Yiuyet_*#?<*7i{&O#efw_v8JYtxCb^KAm*Wo090 z$2L`I6J)J*2l;&#k0DC@Q}iyye(Y5O+$E!auZtinxXedfl`wnEL~QJpNaqoTEDbi7 zUQqVcpss+F*@`maGomAdO*+)*#hsMDaN46Y6;p`(qS*-}J@9d|u(8+wU_rh>?WtOv6_gjHtt*U2e=M57C3(SN`a zSbxiYPj}<24DtCeHAnJyOSYuuQ|xvRr~Q&Q>=pz>L_BkcFukaKuIL!A6P0x3keDYt z{2%JmmEjY7gIi)&HL4_Efdj|ZyxNgrXMnIY?8796#rK4gm)^cVPJg%&X;?qWR{Ao_ z3>IZNj?Ckok_xe+OdK6$ZroX2Fk7wDB)UpZyOyF5q&)(7L3n-9WXfSlUxh%rNK;MyNpIa*`q zUkv*Qrd|sPVahzQQTb;kPgp#{ZlWTA$+wJaTP7-Y(Iv$$fJdj{v^?5VNvN&)L=tc7 zhZd6o2HNGh&vaJmW^%Ey+otV~!8c zbq2)%&GPXRBBUplnOLmRlwe0`e(o#no#2Eha&1^cCG8YqFK;O%`WNt&8M;@ejKPx2 z@j(~!m)xh4GBe|ht5c*I{y}b7H3k8p_5ZyRBD+V9SPTXs+Ow$zQX-$en+<< z=r*x=*%A}Nh=}(2lhv~}a-)^iMOp++@>~j`k_C-S&lP+OAGsYqZCKmp@fgaRXbqB1 zxDjF!f_>3!4NMokvptu8+f-12J}Js*A^GEtK1^ff@ivqnT}NlE?e%)SyurbDxuVF* zy~s$T>VKu(zQ21pZ)7=#N%QnNQySjm*;w;t^r0mC|IlpY0lSFZ5=3gezInPKKugwZ z)KtlQnc_YR&w12U<@$rc5?JJcK!XR1OBJA>2=5sPb*a8#tTS@vWh zryW(r;wW@ep@@#>D1NPYNXr<%c9SIkZgS(0Uf8$fyV%z0w}>NlK_blQlf54rBlmF6 zILL*n#AN}QNJql(^=_=h1o!c=vBjHzB4I?jenFX^FdbT6!TMVdFRt#WAaBSMTy%*! zxtkkn6Jnd$Z~M3oDu*&9JW)9L znod03*A^><*SaNhK?dg1M2l92i^jnBrght>i>9!f)NVCobb4jA8taQVbX4QFI5+|k zuItz$KV>q~&0tF3k2P~X+hB$@vK7y$7GNzI5pLLktUN7J6NXZm+0vssb+$PG4EeKy zcx)sM>U9>B>p!QxG3A(Y&0KZ%g@*q*QGQpX4}}MsF4CjlgJ}+Wfnt<3bJCpPg5`Vi zJmpv8U$nNv-HLsZqNol>jCND%T_xvXqjD}B(9!rC_Ov>)_}G@X^|-Mg`l-8KBHd;-!~RW7=0uB5nqVr{7Yy?qmV{&4 zuJbMov;5JD+J%g0ifOHya-8a`z%31^wMOD-LgXvROloJJmilGOu4h|p&(+ivT{vY= zk4*0h0$j&?``%7G?|~{u*jzmFw6eWQx(wVsY0$73#wL+V+icQ($Y7Wd!U@4NQvf@^-4$U6Z7Rl|leO|3w-)yxW0ZyHhN^ zkTRuVK3F8JQ5L#G|DoLf)tzd1o>jz>Y^TMOLm;3;a7Z}7I12d_N+l_`nM79K*N>H7 z^GiUeWXfM|0F6wg@;d+mt`H1>HIEuxr73joJ6Va=eOC6hOt(?POWd1Vr|db&ZZ-1( zHrpO)M=v~9DhsV~jLv=hq$K76r$#41WiY)?lL=HldQv^9D(QL98f@AK)Hng5!Bm3S zlWc85B%p#Q1O->sJLuj2F3W#^(bca?LD*b#nfeAx;KU+xKmW8*DOn}{@*BQeH8@|D zr#;y!dz99OmYBeBklzy66>4114r1stNg-C2ONdqOIj{5L>0<4wS!F%D6vHMh*KZfi z{zD6RHUgI7mdvspyNGp44oS!cLzOPOw1;RovJKuzo>Rdk@~8sD3t!aWGl=$9^Cc(^HP|19KPwo?9eSm;N$u9iV7lMbEWZP~3E13p@qh)KN-OT3()#Ut z+HLs#t}|3Ay%O?Ph)wkrWMV3eQd4XN?| zLkZz;noGuhtlQHWrmWgAh9H5wDfjA`Kv!b{jvN7A-A)0|6nz#(XLQ2*1oD3L#64+W z$jpReP|3|gIEE$4>GJII9ABBYfiA ztR$FJw$mI@_6Ca~jfUdz+Nb_-EL8J=A??sCOm%U&6P6?fd)9U<HqZJxVfy)S@^Cm|tY9qty>&2)!_NdOba^?is8V8BbXa;R-TIrw5hNPnr*k#gVFH z@!yJ=MZLU@B7xQX4dD)b|5l5%vO~VC6rZ{ZIh!WCr6} z@NI&jZoP-NIIhcw_0Cpq4X6IkC8Z{8oGLMSdg?*|)p`OzZDPoC z^;d`FgfZ2VF8o;K-_(YTd&QX8nt9w(OpzAzKrT*bbY7Awe*hKSi%z+$mQjXuGWm zy3KpOzUYG7hT2`5@~P`YT3AUhZv)$ntY=;r{9qY>R>WajCJj#Dbf|pYQ0bp6)^Eei z;WJm8UljzwPW?Dcba-B5f-53hc|f5lwpX&>&+*;_(wt|y_wwu-w8g3|ryK@hh~fVc za^DugqMy)Of;(`D=&@}X$Rw-qF9u|?qV-gtM<3Zm?SAjzAmd4&C7@i^$)Pn73 zNM^N-`&pt^(JI2rArza$@3=zv9e2RfajHvprassHvR0~nT2>gpm9?XYfDSQXtJ8z=Py^o$M-S-j&?6oM!pmcsvQ1s0lo z$B6;%QkVkg%T4=T(KFRaY0My*j;|#5E!10b4!q%On(I)J&PQS%F2 zL?S=C=YwiGXE4S}CR557>BxE{khusE&7nTU_5MekNG{pHx4nz4m7T#66t}Zu*}!=i=-ZoZqaPdJOn$QM!ib z5pPi5@GCMZ8f*6O%_mu_<5yx2qN{c~C)!`1q@g66%4c0YJNzKL-n844g+BTug6PBlUR|h?Wqb6>U^= zoWoGOwVBg(9#gg7{>C8HV{pxIl(Rvt-Y$D3xF8t;Nf=-scdl$8aEYTFZnskNw2}j@ zK@sZ0DLzdzX^VP<%3uQw?!}~Le%)e4l`kuL*%006oQ9g4V6ovJ_daK^$Ns5QoUlAO zJiKKQoPFuVqDF+a+~K6mA5rS=Q9-tsyq2-upriJap3=FOlde9HTpGF?a$EQY$p_d-v^vi-pRQA5BRN;McD{pGj&`wi@a z{>4=y#=?W+`&S1mw3VLtiGfckjmWnO>wky>YEQ4p{(V%907a``*GI#`t$vaNJ`h#c zGl&7nAd}RgPx=3Pl4(jRR8dZ$)rdsDyV9 z>jJ>76;dhSYOrl`^Z1>dX*i9l{XHTPZ4JxpDWb}wCXx$vg5a{2^}`cp8JYj1mY67? zqn+O6Aygn$Guzg6@RLkgA(&uqMA7jly~JBIHq%J=MKVdHD3Jblg&1ZiOqkwK`?ASGKYrtbh@D%scu|9n)*C zQ!gZWu(G1K+djvAp7jsyJ*}$eP9%QQFR*}k1b$ zcOk>|V9eX5TgK}hT=E|NFZf;bU`p<7k;q>KcIAA&!qdaI2@0pRX*L66WYq!V<(8Y1 zf_-nQ!IrXg1Wap`DqJPMvFnDM*%QeH#nX{rC48?a-mY~3zxuOIb%642P4m65_<&>` zF~{)CRUS%|)kFp&e`;t9e7WcQi^j#{yF7}VEByVD=&rnT5^M%$tQ4ef%H2wAk&)zFp+ejf$)g1n-G-h2EK6b@&d zK}cQkkHuEI6pNsuKa)T6K0{wyvhx>7-#i>WIk~j#urMd@X*>YJ(>R&b+~W?1}ZQuV>N@uE1kzbAKrXtx5UJ~B_L$i%8ZmL3cWmB@P}S8q!BZM0{*!Q z;7Hu5KXLwji;U7B5|qgO4~u+v2GFrfg#);&x$>5ozB@+LF^)jyrCsO1&X{bL3v4xt z^aDDDf72KmR9C<7OTM}yov2#0mk97fc8#z;tDoDb5eD%q%Bhc2xI?uAC;grT5L&p` zkzT)8bLtt%!o5*NE*As(H>%Zf?(aNX0vXls*bY`g!kOlE(P3#P);yLp|$({|`*sqi`%TC8fAQA{IXt0H6|B8r=gJu0H( z#1d-ni%)+2h_atp?CMZf`j?g}ja>m8Cj)OJ5=!I>mmNdFb3uz`b!i_}d(Jx9+Nsl$ zXcAVab$`)YD*5~^Uj>Z8Uv0df)Bw#T%%_X+%A{!xDN)+Rg(-0_E3u>Jn$4d(o}z3T zTogT&fQda)Tj3+RWo|tY!TzHg;vDmT%TjGW6DdN?^>UV4w@lq-Ny-LG2 ze=rxmgBnoQR7Z)JQ(v>?Q}Sz;REHV_Gd$~2S?`PpwBaQngbp!Hv-0o$q3pL#8UjW! zk|l@2;|>q2p|$c44a>{TXO;E+B8DC_wh&Up`N(%EjU1Tjd@PDkV?&6Q@Y22Y%C?J# zNwikz@jtO=LU3lr{oZP%OAU;A+aYk#Q)l|6&wNI}t2EFsOkx?R{Zy0g3rn^G~{b%3ss0%;4ba#aU=i5AmJId4sM$ z%Zk6!wW`>ZnHD(x6+44LPN{vxN)!<$T)d*jH9~KqnR12-O z-A_MNnihCqYR4WEV;TY0H#g4}=i*kiFM*g|&_lWY|8VG2JMTXn&Q*o!+jBzKJF*=W z=8`6SW0)SnpvtD@uu{R0MH6cWi(fdR+m4oF(^hLaqm+g8W2Ra-0O*gi6RD-z!pV*M zR$MON{x(_wukD(IOIKU|ET{hv~|hYKBkfT z<-Rb(#lbUUEV1tt8?c3LzD>fyw!&IdjGq!hbpZzbWGvn>U?O6x4hAVb(MUiWRzzBV znyR+?BNbkE0qoQquRwK5@3oSnG)8*ia)9SQZ~hL-)m!3^LIo2>u=P-{v>=-{jo0T^ z`zAnGA=mqMZJFQQMq+r43*`2^xyqk$Px{zh4c#n)xDJp@k>lp`0W)gpX`Kj)w$tNo zI%ARe0E|Gv;kIhN23z5gVCRr8VW|>h(grDV)P3a-C(M|)&>qKoaL2%ZWc%Gfd z4bMxuOCW%ZQ<>!!fnV_}9UcXL{s*Ukc>wP9Z{y&RG-G8}#|~+M+-@X#dnu;Db)@tW%-kpSn^JrstBo?Tzb;PeNX1s{@AkoU5#bS|(2y|NI1 zCBZDm)#2w(0i!YFcY4$v;F;8pxzj(VY8Bh|RjqFo-coM3NLo*<$<4dJp}|S1g)^_` zfU6j;nBmd~s>bgAEI#T%ATl!5DuPeE5JAd`qLF1m zpT=pno`-Lc{9TclbQiw;(!BW5yu^cfwD4L$H8l{pR`8&2$O2iFOBCUqk21Y=TKmV+ z-e(CeA;*XjS=b4?9ysQ!DI&_lxJ&>*?|MxhpI|hpyNz?xCC_5{lLev4hyVcV z4bH5|dxuU0Tz^a1lQMejpztmjWcBe`tyf$1 z-Vd&Ta*+V^f)^&Yz~0>QTs*rPmD&lb>)`^U9JOc&Mk{8j`y?z!pk7>Y^EY0!{nuf$CrhyWOpN}HSio>wBUe?5JGuvT0M z#j<;|@ngXLhEZscBinXi6frhwuA=-yan) zH5;gHju7b`_)KaS*4qhJsxgAcmV5|;*~W4;Y2&1(dXIqHq#my0>W?NbA#;Q*B=dv* zAX{~leX%}%@f3#2F4)tesV5cnPnrV_3#zqutLLvM>a8ZtW&&Hem^&7}_`92K<51ne zWUd5@+7LhV#R<_Dw-aPQM?>)bdjy>OFE^apXF9ZNr_9o*_sBSR8A)`Rfqz) zq2TKX;1RoUY^z-5|E&XD1>*b!NR5p5Fwi{1-?5<5q|ri#fG}&DYZS%t}Uu*Ug<5cLVo%RHAU%c*f+SCrtU^TZ-qs|}Wv+R6%EOk5clELNL+ z>MXD7#Lzu2@{l-h$j@;c5S9u!H1;&PE05X}=h2xm47(?7`A_ATl&E&FZ68ITTlq;%j$d;ap z@?ZZAYJfdus4#e-2bk~`JT6`3<(2}(AS{yGu>(wRY;AWAcWL0HKYOeN6~K0KhI2Ws zEqlT5@Qx9lUJM`2k?A2;@x5mLHf_k-z zu2xEFnewCb$A;A2iE^Txp(-jf+>nMJcX0Mk)ZxH(wY^J;TLP7;gsNYe+*Me=S88DO zJcICf!45JVJP@eOjm4{6oVeNXFL|WZo~d!#J;vo0^G=`X6Y{;)1MnQuDBgMzr2y$~xAXWDe{IkD~7sWI3w2|aMEWo%Dt}U67v$axZ0)68?C=}HYtcp3nx1!X;v^;aEN*HLUoKBaC}LVqNfHs$^x%a|blr`NEOw zoiuW8&Pejfqv7iv4^lBV1^uzbs{lcSt_~YU;QQfe%iq(F<2`G0^~yd^G2G=t?#ZGd zZmvH$khMVbcV(N{O~JRn>;Xx|*Mo+UKg!b+13_I14A=%_0QXod7Ukz8%Hj8{S9~dQ zb-dKR#=t`D-7fw*Cq;Q04fYc)#@-CfwbMG5etMGnBsq_R8h2unP`S^hTH0BE>0>-| zY17Myl8*0<5Sf!^{oGzzQ8G!9D<~%Sl1+Y$$$gm~?5K~Rbx87&q|57h_3>~*|2+k! zQwwT}VKf~0uz`G$lF$Aqb(yYqAdPR8C~d)utWM7!jEfsYf*=2rCK#6px0<9{nvf6O z1YPFWmd@M1CbSVFf5I@yllZVUSxzp4_+j{OlX)5G-q0y4O!#eF!X?#o+}$t#PR676 zHBWl`>Qx(@>X+vj=(M+cTyB)7P5|J3L4;PP*zL(t3ZD1m@48bD8-)^*NXFhMlss>Yf*bHxq{0b%oh?3myc(@Z$U!%Y(F?ht}mxAt!pR%_uH> zui=3>ep_0gCv6NPP(6Nk*)q-s1ohGv&Uwx>X`XBmU@kHe}MF)DiE9N~k`N+fU z2+rPw&^Y(O_7Z8_m$;mMZ|pbyk$?X*eF(h}tDsW^J~vq-z-()f|HFi(F(iNN{NrQi z<6i;*hMkRrDtOU(P)!FfVqyQAUnU#D+vz$eItgK?|9G=20WR$$@ZSrzrpM>4@MLjEsUoK!+&TS5CWM8 z`uL>PzzWdH{I`FTf;Ml8!8vyBuf^aBq|7u87cQyoQgDcM-PPdY)$yQxb^FBxLJ{KN z=t`m647g8MGj$nek~F;iWw?HlG??|VDc(SG4kxY}v7WtfE0)+_Sh=d+P9Z)04zMy} zTZxhEUtC$oppDx>Ww(K7I&Vuud3U(Gvi6lK1H`E(UJH#oYht zQXIzkJMkGY!@U4ELku`lza*~Sar>G;0=I+d#k~+CN)YHjE(cheFB0-=ClJSHs`;;Q zI77~bPd5rlP05JLtfW^`HR)-}MqyzWd<%8+p{cplVYy{JTVEP`@mc@`+2rm7@9>yj z?>qUQW>N+zOExUVsD40ocn-{u?Zf>-EQJsfG=tn1^#A=R7gnS7Uu@s#&m5Z9Gsb2* zUAzfCEBE^MQ9wDDe#JA_v0%nVe#P1btKeiyPFNfl{+(zOm0|vOdJ@OePf7+mWD6pq zvbky^fZTnFmI73m`aq4>jJE%sde22oFk63B9mm_s0YIao%Y&q%d5;>pHS)^qb8c9K zK_Dx*kss{3RqWi#b(H+G?Bz-vHDBk!y|4q(n{&tLcjN~6!(*?^+e%R9hzF;I?6?DH z6gP?MYtjB{HRR(f`Q779j#uWuz5d}2anDJW*|zSGIL)5ST+m+_@@{!vdma!3XwH^D z?62P>HVMMyJsbj9uj4rO-6kQk%V>oXbJKh3@yl{V{AbIhiay3DO4eML45H4fewQyz-;!S{)qP$Dzy6@6?pr$# zjeauOMs@!OG7lM{-la)LchG>TfoYZ-uXT24^w?*TuQ`Yn#m8o}z_qH&ADDb(;1BLe zhhGI_I;)Z%$AI;!xl1bleW^T{&p7D=zXzpp{=v>;?wtc4MbwmptOR@Qn4Dy4I~*~| zoEz>geDd^d5+VSCzv26E=GJaj=RenF+zcJ`hTm83fBe2^V^E6Oo&j#nD3@$$&asybJ9V=~O@-Ey_Msf6B9w3*&40P{GQSU1!v;jW-fp&UzEu5av7LSENV8X$ zvM*fKizE6Kj$r2q0xV2&4RuFHDMY^SaRju3eLxrUyGnu3m5R=p^5k)*9EIlJ_q2O3 zB20g5-t5xPqUb}t$#eMqa$h;6ivq#21RP~|*|SJEdsh*N=>_|GM&12L0`uu-vp~Bp1EzgKj8cQc~5h!i+#I%{+J|Xew-oT^`TRoH-EHleF znp8x#k&KVJ`%-hI~M6#+haf@yaVC1ZN-|ZVPkGVISmJbTc1TakAsDSPnstN?mtY z7x-p7yO~Kw@tcnUj}qW589N)FP(`%u$ZUBzST6v^z}@7y`9uQr?ly!g#(mEpKBH62 z7;U%dr`Wi9%jiac^Fd46XuiI3w+As+-WpC<*qrr4$y5A5wNFl|RVkh+GhA|yZd+ZW z(xBfnMV|+JAku}7W7zclpjjPug#(Xw(!3=VMUi*AsHW*$jFWuUpVa03`WyWL1>WWz ze+9`id)j~!PB!sKs1pJ$t9952H-}0&69|FRuI8L}BSfz=OR$Bgqto|;Tmc@NM;{c%W@@O>bLh6mg9KMI2&nVfVSAv^?&y=2`AD?ZzV5jiiW*`N+MugMLk5=KK zT=c%79LNzJP5|$SVYA~0^VKQ`9-rwb6Q2pOvIUax+@9IF8TJd!PrtN^#sRQhfYg4{ z;0-B;{e1R^*{7{kj2#A%aSY?#qH<=*235|Hp-MJ_AWlm?4=`o>48xqmZ-STMtjrRv z9~_Pk*GgF#z69;NnjDn=zPW$CwsYNY`tEw7P2;5mU>)YrF;R;!Qxyy246x7VP$@+j zwIQj0E7zZgXvIsV^M-wwY!su6I+;jF0E8Nx`QYeiRqwxi%CIq*`#$9>^O7mK8{PRkMXUM486Bg4tAk+yZr~z1>6J z2b8Ah+*A1JiBj7!v)VPDR$qcEX9YyVI+q8X4D>RJk(x{HOiu@F>$PeTLxD_EGeZB0 zd;M`x4VYlmn6a~jMpi}xSj>jF2&cZopIiBOQJfO#kugi@ciCB2HpiFc||DUX?^kWq36WRR9cyuAQs)cRblT}>^}D$%Q;b}O`}K_7g?>VJ6=O|93)8Vpu@@oXG5ZVO znzC+76?QxeDGDpvJ9ig{NKY|-^ET||Nu(&!OOT@!NZ$?WavG^(MF8*^HTN{D7((jY zzRz3|HWKYi7V4h&g3H7|s<*y5Zj()YCo3J(RPds>M{ozm1HwZyS%6 zipVZZBK`w7i%hwfD!USCMuDdh(dCOZ%)jv1KBgCYUW)<(UNfu{bh*J}jU}4Tr&PR0 zzTV&Alk5q->o4xRA86zQ&?qY^v;?>J66*ixsE(huq8fVo&ynb*6U1w>b@ff zQCeCigh@mBadr*+J$F3Lnr(M2Q;j{M997gw%Tt{EGYx^zM4H1UYRNKduLl6hi_yd7 zl7gc`U58)h>PDDEW{d8kWzzyp@{1io`6~neDSCrRoVA<$e`Sk$!Qti3j1Ie~-~Plm z{Q$nxdWDI5nDsKFoB5{OwV9S0(smKIekg?>`Cz<8rQcln?ZBhC{RLI(>UG{PI!|i6 z-6w2{HQdTF8`s-AgWJQa-goWe4&X3;Q|t#7P=AibB(QV6HI}7w9r5xL25gKwEOJ=g z4%H2;G#w=Qml`K=K@Gn|-KhSAx4uF^(^$S(RU}&>$wK4d6{urHsYK@-3B>%%QH%42 z&E7BHqLCYB!nQvE3r36xBu)5T)Xp~cY)_9ZFg+=&# zyxMqq8THi~$M>GOQjlS_g-KjK=i?6nOx6nQM!e_oC4UXCM4Xm6Q)83(lx7sWONZ)6 zoLr)LDplI^1->AkLK20(Yi7T?OPh!>xJEtNX2~IqRM=|YxUXW7g7#6h-{HyPL!Dp; zPB>zB>f4+=uVHC}Sv?Lz$OC7uGj=Tf>JFnW#e>w}k+Oyg5cGSx3o(ib&^G@kuPuK6 zV*5|2&T1LH8Y%;c*J6;pHJPiwM&c(Lm&7!-OC$?VQOefr;3O|T{QMSPo zg;Cf-o4{16;hRBUjK1_uKK6jA@^a{$OBC6Hr=V(Oyp_;9p?c_JW76Sa9EW6s_&r|$ z$!Q5t0*NFMAZxJer1(4lJglb|Ut%#ho^Rf8a@p7zJ3HB zbcKqb0;S;fw*XSiU7GDn52)_=4@J0!^SSMM7 zSjj@hTyt*ER@W~))F9qmD2(XdD1k9Om?l>eEZJ@|!z|f%S~&iQb?Qm=3FU@iCdd#* z04FPk6OY9C%nbu>kK{q_DLJwOLMSYErfp|REP$uIaH+f83bbr+4FSynoZuc_Oaq&Z z^IzNbI$Dc^FZnN28TN{#Uj&@h{>RSf^QDcd?T(;idrBfYoj!XEaLi^!$(;iEAHBrQ z+}rgol=qqL&`4}kG{S(k``Rktv&Yy3&MesXT-OmQq9D>}4O18=?b5OfV>?=jig(F?To#3MFgPtu!WEu-#ie<-N=d4gXsrW1gdQn|ckr*8?J6IDt=liPZfoxLxa| zlDi%h3oc#;^I_PJErZUN{+GTG{kZ=Ga*}yft!l5)7K&SY_HL4iICPf@Yt3#{_?~t$ zsnvh+lgzKZmT#=n7r|%8LEgFCPj->E?6T`pd3JqK#^TI!mccHUEk*lm(Sk5+Ed7k^M?NUD$u)IfKX<=ougx{&b?+`KxM$FU?vX~{Oc*r zugi?36q9~?-+tLIR#|tacyu2cUwV-6&`yoY5?F~&<*jB2(LlF0>#qjIQF6ZET?qjS zb^O-jsw}$aOli0wz1EnVuF^aOjj-{4K=wzdq8Q7LwbXMu$=_c*>~(2*)kEWZ z(}@)DeN%X{skapLs`oScCT3w~(}L3%&!OCKzpa1K z@s9PwdczDr$RfHc>9^EI;|dEY=}+}BlIIL%356ox zyvxDjxYe+xqN{N=w>Jo01G#ul{i9oQP)q>jJ=jb#W_cFu&hkgY$9mkZSJZs%Viw zs=S!yXEGEt*I|FbZ*6f`h`H|yifh1k44=?)*w+D?!fOwk!mShk*jU%Z5pBC^<+2!ohHz5} z6La_5WVV;RbaIbr|IF9#sZvK)U`b^OBhqweWu}XLj#xouITuV6e$yS&Jd#;~@8vJdthkAQW=8S@)V$-2mW zR|LBX3+grkNp7|7CGAuWM!-ZYswL~$0Fs>(=;R+GZ+K{kIGp6&YP%-bEvWI)w9uY$ zWix3vHIZauuJvT8&U|5wF*yhdS_?2G{CTu4t zKThgt)U;Dm0;uW=id$B`f!N)1dN|=dimss zYwqo94c(^nTUU;h6f>q?M~ZQ$Y$H3d+0Sz)RPD!Cg`HkdW=?$^ZlPK`e%T?X%eq5d z-iPIxUvvD&^vVS5s=k-G;^m|XKP?y+dp98yLP@QKu}Xa}t-Tg-q?`UWeIEWjdhbJ3 z<0tOhlIKbz^u&s`i}^7STFRGmx!0Bp9WA^p3}RSO?TQ{?8$5&cYa zomWA@rVG{&n|1l=h&7vkvDYT3D)M4kLi?>mor(C)h^x`4X5%AXF7Teibo(Y{Kof;o z3CReWp8zHcha8wyG>E`iga5?<5P~v&4)mvP(^jdon%;NL=0^ z=jw1wW)#Iye%8luw##g8(t@kC)*p&(z6vf&Q@x&kEpl!x`$|mto!&}D#JPQ_u*-5M ze}t+8vU0r~5g+I_8}NMc;sW-7ff>&!GW6Bv}bLk)<~$AJLtL%7NT^2Fpb_Q;i8d~>iZQB&cI@ci3$w&39V8j z|4=aPER8MQn#L+$ps=th{W7h8b2^x&LhL=m-N7nzERKYh0dGD~q6CA~R~f=lc&GaY zEK3t=s-@(H*qc|GXc2M5JejTnIB!UCU|gF%tRK`2G22dZdUyB@gtXmFqKwkHya+r_mj}&xx@4i34bv(EoM2aMpm-M}v@Cuz zlfUa&6T_bsF6dF9kAjyEXOo5n##2Tns@#n_e5I`BU&$|6-yZ&Vmk85CVRN5B^AiKM z#R!+R9{=%aNQA()RT%);rsj_nmv<}yZ(jAf88)Rbbfn2pkM}$dwf^n6O-5IMSaOyD z7x64c@x&jDP!|~|zXWiB_&&DBAz^2V-7erHm@FjjAixt9M=aKKea43N+U{h%QM37x z4C973GzWkt3&@lIsN8}arzd~L{lxq@CwtUq@Jl_oBW5q;5`<`d3#O<^$KOQ8fuL zf?DP6F|3SW3uaXx@CQ6Sul0AW?QZ|(jh z#oguqF5Q?`0J^4HJjyv{wq|GbXhj1jtTj-(`WcaBb!8;jb^5pJL+b01kb58J(Rn7? z7F9aI9aYiR4=~!Q@e`|40@<`pWAVIHN#g~AzChX@(_Xe&|KqhDy)hk>;Y--tg^TQI zsq>%A`!T<9FWhEgC}yy!k*MryYsd-ppJjOL9|EL$BBtf)LxZl>6dy&wzpI zS^41Q)?&E=mzPNsY20cDS%c@PD5BqLe=Ao%_Zjer9yuhx(s0O^H575$Y&$%h_o~`v z_e-bGIE{DM{IIYc&Df0v@f@l*EXQTtRA-HiuJv?g^hc_@+1E3Xyf>Tfm6pa|lTRj= zROesPsOA}Y+5OKGiJK%iNWvN>I2puHt=Y|QDpwxe+o)L#NxIb3zUc}(QTvnDDX1dEo#v6-Bde;|EPpJAc zS$~|nB)W-ley^OmByQzLU@$`3I1HZ1=u3eV-V}0o=%e6nA1q=f7Wngnqp71h*`?*3 z#j`*8rGX*^zsqMPKPe?y1mM8-7g7Chq&&x`a+ufG2wlx=?HTHh@TmlSybVO(1rEs` zXVi2zF>{W6qZ2?OBX*}1J)g$xz6cc>bW-WJ#-YjV^Y(l%h$pAoM<*&=%c@SCQXn>p zY!N$;lJMVxd2KXro4l)-$ba`AdPPpJH8zu+#ubJlCH=e(GxxV`YVq?yslS|$x7H;h?1UP?=k&ST3$J)4x2VU=F>p2oM^f=YiikL34klr^egG_)asRn@8#M_ zHJVZj4ke{Jn9rd91dj0@ttwj1N}Aq+`_nGk|9Kv?T(wrGr}Ats-WH8(-}#rG+g2G5 z)#LOyRoISQKK*M47SLRAu9%X7c;Io>B!EaJNw?+3Z(#SHfsEI4v&n#&m=2BQAo7l0sYm$9UThH@aLBzchJ6B9x}BmILI=V)MZ0n$#V zw9mytnfBZw{q|d-tquzC3#X;MiU9{$OV&pMA?+0S-l~s0n^r*{!hXGQVg`O=d`f^y zxHmESJ=LDQMO&6!@Z*(Z>kCA&6ejhg6-mxQ&Ic*7GaQB`IBa=}aKF2R!gcNg?B*HA z;gCFYI5dlQ02;a-FwpqQfy6&1aD0l1F>?l*LS0v%7hQl1s@2bjmyhOp5Vhfe-L{by zXsj|7`~^-(RjS5t{<*~>-%$ORla^#y-^&%0t=S^{_ zUQ6(C!)Lm!3ieA75k=1{k2_?oED`Wl0>?e7ph&0}06z_7T$Mx*Zo5Wd$^r3C(VpYa zG>^|vf&*d2=S@0i&?Yr_`L!o}__K`cPLIa*Zu!%s;rePBKLTW^E4F=K)Dy!J$-y>< zZO1j}J_Q;F^(wGcd`Rq&VT;j57#!zogHEWq3-?0JjsHzv0ZEv~6rh%EQRL++0*Nqh zsXQ0sc<0Ag7hYT_O^5a#Zw9pRV=kHiSn3c4>RW#fZ1sC=Md4a1KX-_NxRRl^ECJi7 zydmflXcNei0LZgV8MV9XI7MgaXVo-}I@2=B-vW#mRN6@8|DMW5c?D2BL?V9|$PB?+ z=3{1&taczBs(1`i@fL=^n{gEL63Pnom{!};6QXqRevsOKcaFctfNbp81_|{1mko!h zq<;MRpvnk^hxuc8pQo_D<>^g4s0heT^0DdxiTsp~oHj;5S8sLQ;-5O3?U>(7x|$@h zOI5oN_%&%Y;WP+(ZwfqPgtp|}|r>kZSrSiRP z8l1RbXiv4TEDOuyG#uwN{Yj!~9uQohJwG#^=eyhb~cU8aCic0!3 zT;OSFzQzem6-`*%BIxaoI+S-H5tI-IOT7NnR+)dr(r%@QvPW|jzHIX7`A!iwh z@BT-?IXf2*XsFALuyFRB9t03LH7Gu%nSU|vB18va*RJXf7@@=Ew}>6asa(Q%L%@nCB3)nC@8BA)cWUET```e}IA@9u-Z@6n7z zw^#K=>av>#?Z_DvD>qqW1CMMyJEUa)erxT?unYcT&|)$uIpyga(Y^&xe?XTo*nBKL zwp|ZRl)JS}A!2?OL*XE#Rb!Y__PP(k~Z4yhT_Gt)thq>t};z}<~hh9FbPE7x7y(J2Jb(C zb$L8Y9#`I404l3;6Q{RqK8O-3kbqnDB=PW>zY%EKr7(vYt($ z9@BYC2niovlKPMHAHv{w#y;M=$1Mk@kDuEW8W)kpu|MrR@&clG3$Ia1vPlxe&wFy+ zw3YJw5(`jdm9q#QL-xetCv{tw<`*1a$#uYmMOE`p#-Vap^Sve3eILGNUIA)7_yBTE zfLI{1j_^@z52>|RVN?2=UBk;Ub{J4flSV!pz~UthOha+e*X2{z{P%zVqS^878qEh+ z2cB%2j@z@_*ADA+Cm&4*U%fvY^1LV?@VswsR3Jp~%+W$n#+`!;zYH%okjx4#Rm5=q z3mf5kWWTyEkM_->!^iHoV3VQx=<*+Re|07RKR7}+FKf-1%Jhxg9pkiOup3?M$x?YN~$NgKNSnI z)pPSuE2O=r0*TnTsD7Kf8!RS7M6(N~Cjq^t!E4%Ltmj+wW z_%6XS2cMih0(buYQCj|(+4Em1yBhe)2)NN6B{J$Rt^X|-?av3p^sG@Boa#vbsC(6! z2=GP>vW@8y__M2K1skbpzNj`v^pm54pMO`g#!R^oUHI+ryqf}9lnzE85dAANXopgT zn@2J-%uQ@>YUZnYf`;I;-!9SdPs>C7=QHCz=)Mg(63!)lWX%Km9PIb_fzp@oL+hMb z$opIY7w^Bew|3R9DR%Tz*1J9L(Iju`4@lN9vN|M8>t>mLG}SN`vAmM^uj9ev1KV<1 z;aN&hjQ*OUf>t&w>~#$XgivbDn%D?}9rcjZADTF7c7~eM2fAz!_->O9Xi}~Iv)e`D z0JQJx=R&e1r_$gdv;$)J8WWZYePwRByv*Bky*CJZzjwhXbd7T2(RT8bj)mBD4KhBR zd!D-({@eBY4)y+1pm}0}!`v|NrL8RKV~I=?lF$Z#_s>n{*~G8{A@m~EWuUC?x%=(x z@9~{!RF8vs9dft4dcYu_d23Xs+>oR(m3O@0DUdMhzvp@gktf%X7^v$-aBxle!|I@6 zTu>}fkPca>fHQkrh=6VD$T_^~YK@Jl3pFD5;&ef_?0!DC4fg76Luf7vu2fnWuM^dW zE2K~K*ZW<990jpu7L`L^=8?t$@i5a%VCLid@kt=Ob@@~atwenj{O2c@-mw(;4(02P z`<;piL1{pj+!*&^2RPw5+7uZFOsL=I-F1v;d;%e!+M0?t$&z{8_uX#0G({e;h3HLE z^q$d!lSM5=!F0l48Vp*msmxa0^l5Q7X|k)Ia6GU&d_c}hs{r^dwMYfr+UM;Pd?S-91RXa37{$ zDl|8ahv`5g`3A&ARM{rB9ZLj!1mXoN+WEVdZ-zyoA`f6Rbwoa^=fDjtYKtBL0P^W7P*agM;twQ5M z#gzbSk9W0WQtEkdOD1E;&p}LxFF~8lEH(uepu0huQTIJKm)`18Bc|!#zJ7bLPwuYG z@B0R*`NQtPG_-hR7Q1(38{drhDj?#POEEvWIhD~fcz6}odp7$6;Wkif(Q1WB)~b2F zyP#~$A;^Zl>AsiA1bMxfic$1N{Y(b_Te3ZS{O`WnC9y{eg^?&NyR1P3OWpm!az6EK zGRYv}zq6=!@Av+l^`Q!=GBdvOgYRp1bVwHv^H|*DB=AG|cSRMcX^hr`CFbA8H(c@7 zZhAvc=PhS08{F1ToGh7D1BeH7=Z8jXa*SrblaEytMeenFhYDJ_m%Oc~Z`VyIsh9G9 zHuzbk*c?le`H@DU8ky5z zCP2E95O>@#j+*VlgHB@Wd+YiKkvx~S9@)$Z`TgH!L*>j&91TjZ+`x8BUU=j|Fs0Nz z0b?jTo0~=IgRa1fAGunDZoOvpE!qVP`+EipoNgaWGpDh;KhT2bOdlr0Ur&FPoq$tg zeWP49uK;Nkvi!_59(m(0tk)FA*lNn&{mNn~|2HGqK(bzy_niiI>_Sgm%JuxwdA;8E zH8bm`xPpM`Z&l+KrJ*WhqMZh)Lr2s~F!`l1DP=yV z$`dxeBTG*-ewwNA>{1EHT*$@nFi4Mv@LAu7BL%ji(nQc;V$v$DiNqpPnR{1Scfl5b*X z@�kwtRb|z%8M16!F}5u?{mh`BEgu4Qvz(B#Q(Hr~Wzi9w$|HHtI{r{G@}DCfB=* zEBiR2S5_t+r?*avUjG&KBsQ?FXngq?2B4~_T!?rlmPezw&I?Scic-&Eh z)wyR{Q^I>?Untwm{mboY86(gSgIf490P;dPZNM~jIJ1h%$I?}+GnN4PN`N~&MjXn-p(i0Eevs| z;fg*kcW%{5D`ty?Nd2fpOYB1WUQt@oL8xa?-FA${r1Xm1nyr<+HzwamI!kuz+muzB(R*F3WbVAjO3Cb_2tzyB5KwIcz6>hUQ z%IQD-8Q}0)fj3Jx9dSlZji}f|p1}jM>r~ zu!iA#!p-&XJU>B7tRKuoUrz@4O7XYIVn-h)1=_E6yGJC)1(6J1)pv8fz+T;gAt~w; zFz42lT9~DD+6Y<5L45oDDuxiRTIE7Y1s| zVh;oV9ex@@-(Nh4sXO_r1p1LC!eIA5GhN|2YydBg_1CI&z0c^NE}N&*FD$j8WAj*0 z=G%Zr*dk^=MiJbcwR^)vM054~Bkq-)ADHGnj*e3R1XtrM-x@MP^~cIvj@5)F?Q7^4 zY^fBZXJ&%{F((P4Mb5^XZ+`Fcs!ia*6D~cA-Qef0VJH>6mzof7^esSUue5i{j}TgX z-UX0~-bdo<*1gLA*IceAdp&0WyMSz@*z zo9Vhbq{OBZY`Fg3;tXV)_WtoZQQP z41^<3&WBM%NcP8j_~P~|0;7nr0%eh^k?;vwo}f|P|F(ZY($h@f)J>Z@RT68}2$4;l zy8zLR%IKecj0ta$Ri?y`c{lY>9Td)qA?5mw_oBW#44^H<{TSA;X%1|6gfiwcs%R#$ zUd`h#LW)#CC(}T`EAfcv6Gd~PUm$C!%9ZhC68~tp zbEQ5X#R0}9y;GZWa)=bVvoAF8JDG|zPDwOZ;w@5RzDA~qOSr-5Y4&q|+l8Oi7tY${ zCEMX86&M(8ot>|Hz9_^pd6_N#hogD!FZ!FFEuG>ryhf#>YY5BeV;Xf52%!2mwoXJb^2PHA=`OQte&v1> zXkvjl7pgp?j3*$heu*ABp(-U~LnhyGy~y{=d4)ZgGF}-5We0EhXfCs(Q-HfH(yFZ8 z^GkDeR{$it)Zda@z5w%sC1f|Y{K_j$E$)Zh64H5(z;Eqs z&(>0FCDis`r5JJU88?AI|*rzNmn)FI<&hL(xaU^ad?HIwkPs z*4-XemjKH>eGp?k>Us%ajK3zr z_V&DupU1zMG2HOoo5}`8U#_k|5Bol&n;Zy34#ml6cf5fwe@;Tr&Mx(^cA_3l6+Sn1AQ)NLm}Y%cH|muT|FPn`5G^S;93=j2x8dZU#~-W%`RA(L3rC}^(X z26TGPj4|LI@g)TO(7zj@ntOM-*WKm!94rj9-SAMi{Wm0?Bn=eLkpw3L(MU}0T$6w2 zIN;Ug&Thmg0KaMUHiZ)UmllS^28nI-*k9su(pPkk9b4?m-dTbtJfMla$YG21Pvm72BVMpjlSIZ z3?HWRkujGtNlv7>jyug$4`d_+Y#CadFYiI^t2Fwm{n1g{{mI6X6ke%g`hKIcQ6QC> z;5m88c;MW1DTR&Y+%G*ns@hhuWyL@`Xc|4Fg$2yKsFHMBud%(Tq<5Qb5?vDyM8-m1 z%eII_JQE)(IDj=;B<;e__#xbAUSL!mg7R<3Q~}KXiS7!uZxax$~e?L1ugYY(n5XrZC>lnp{mdK-)*{X;O zSNqy-bRTd=#kcA0Eh&NuQU;M)CENRdM{I}Cx+OA(e(}6RtV#z&-Xfy62N9qGghh@s z%ofE@7gwoSn9;9uMK(YE^>Uk$b+_Wm)0u4K_H6WM_%Rqvc8NJqZ91dDntzK9?IX{2 zfZE;=Nn}pJ$wOOx&CcyDV-gQWOD9-)V$caIzI(R()-EoJTHhn@1XNb~5Li^p}yRcBYCc`(AGt(EE(BQb?_!z+oJ9&IF9v5^|Th9s}yXv+5|0nXlD73M@|s zUW`I_qc4);Uqameygz%ikNeJjY4N^REF1P|f%#~}eD1QQUlIGymH>zajETM)68kRZ z*^?CHi^d(-0VYH(*ZI=xN#;T!C%4o;fkmAC&CDk` zD|h1iu|+S7Wb%RZ_D<@k$h?lMZ^TH7M~|YHWyiWdtot)>rN>%WO#R|yjxrAhUq%k@ z%Ieim+p-fw(?skl$*K+G`~PKUtqrOFn{mi}t*(P)k1t})j+8X~nKeRBY(twxq8B+} zy)lv1STS5TE0p%7ds?P%G4#>s+Xu(*XmtI|`%00=i88SqgSuqq+{3O)5=r!oH$9KP znA%BwuKK^H^O5ea$P=qI!n^$M5q91)=ZWRCYJIQCOP}k}T-t^{J1{?*A|MInszX=| zCX~KK^5Yled>^E?pAyBaN@0!K&L7v3kT`X}*lbk_m7%X(ld4p4UZcYqC*&dvW(vb2 z3-~G`^dpjHI3r|68Rtiw@(PRxeH*InV~^X z>7NJ@1`va(#)2OBP08|7%32pKbjb1W=n^V=yh{ze6K@@V5>dS;&RWSeUHN#{H?Zv7q$0bCk(D+LrG8kCJpu1w#P%) zDF(ux5~`nv#QSuNiH~>v7zFK8wyNUXO;jEVLk{KjE=qoi^1r2(<-T)tv7~PC2kRqB zi|VsS5c*Fz1D@+Gy#Nw(!iA72oK)5!Tw}Mf=_FNE7do&1!+SL!^<2I&_A$zn*H!$% z&^35CuA}eA*Hc1O&QPYv4);||U2@v9+n26M(NW^C2=>x{d!3m!)a>yL$-zN|&`v&y4qP`!dL` z+B0{~bla*i>8Pa%W24~Zg*!iRf2>QSoE}Ba`!K8fHl@`a9V7asp$2SSMIHoxR~QkJ zWYnw{A-3~nq+${nE84qM{5&mPKozz679nMBcuW~MKXrSVL@y=gxj`~4<$r32_tq4W?rrhUV~h|k>5~#p5SvAt`*O3_Z0pJDeya}ng7onM@W@g= z&}8%QJ6V$YMbY432>41Ch)KJLoWjtFQtV&5?~EYG8JLXYed|fhmmVa30bKbxe+E6hd|A!jE^YO)HKY%0Z;?5cCpRJmge z1UFUdVUz~NRL%r{swuk7(DdOVk*(tBFaXhM?tkU<+)^9<6~#(zUC3}72bHhM7f{X@ zo}%cu*DMxK_|qnjZ`E~E#@BbZ&zw!ym6rG z2nW(g@i^d+Vrg<|&OSCcZYz({Ho&%Oc=2~~>n0(}1of03m1#?R{ou+t!I>5A&Ns0| z3M9>#Vc)ZptXuK1KKYRhot_@7XS&e&x8*n~O5)u>^kC7T`V#Ti`Z(|UR;c!9u5)YJ zKQOzkQiesdB^`25Nr-kVrpYVKpsJ7idW;X&b+fcI{(fhUh(eysfqNcdka=UDk|e#{ z`x{E5ABne&of;3>m#O;`tF)^ll~ysZ#Ba-JqoHQBcj0dG`AIRA)%A7tl=L=1U8eu( zN@c-+mK%flox;g&I(QqQj?{jZYIBpx=I&~O3jJr$3q{mStw>zeue2u;qYLLazDJT7 z6^3Li6-^^ppxT_?DYx144kfm@AElF_uk=WoGStfvu5fQJR8Ltdsj}l$gBVi7PPw?* zo&?elFetHgLGjeAZ*^tP1J6^v&trp!>*%HH;5AK?ofGaJ+rl+grrBSra|#a0a-NaA z6_X=zZ+Va)@hq(^Rk@gXVZ=V1VS+NA$CK#^i6}6JQDSUgkLyy#mUPgn@TpM&%9GXB zlTSlJV0Ea2u0`2Kab7cF{E*viDq3snO7E_#qL=?RyyOhV#1WrUs8EYMfXM=LbBw&x z2r70GGW^LN8HAj%MED?Um5Y{;eziPY5;=CBe8Kkxv(u5j-2pXP}nIIGDSffYO>+ z&jV-C9I}zP$HW3`i&>qWt#=Eg4GHP15%DZZMJ%m|QXH}jcRFYinU(i{MY4aZEXn{x z$eHC>(6?P=@%R3f98JIj-2wpwYwdSCX5CQd?%5r;yTBNs!T^UXcJQ-u$X)r(swX5w zrNvxx2KDPAmMCbSNA|(ltpjx74!INsSC>cw>ac~t^mk&2a$}k4L7*r#W4hRX1GUI& zW@%bJgY9>06zK&$iLBpOKH^I(DL!KBy`uT|QeNQjzeZgWchQ`=TJx5S)-;Djg&19) z!(Em6=g<_vZko64DWC5$j2aV3%*iWFsXGiuwO?-4Tsqk-7aoXm+Jp@@j_iUI>gg#X zs?kl45Hd19DXvV3(Mx zpRRn|bs1*7*{bQSn9;`MtW;?a3}rVf--V_d`SEGVm$hn;m$jOq^b0#LRNroiB7~sg zR3UwEqHaI^U!kJBC+`M3OKXVpQpu*dQB)P&E6r-Vf0M%+`R;m;w*d@0R@bvAjK&_u zEkTy;9?zBa+$@d0ig>o@DRhaPXR&OY!Sp8OC0CTbNq5Yz#f}`d(Skcmp(-~A&0(Iz zEFL`Pu@MqQqu6V4r}}v=>=v1->fHC|gT`Am)-x(b<8YuL7U1j4?^^*AX(4SAy8~y$ z?$s~t4=J2I%j_b3(NRy~lK+^s4RKIQDy73z@cUnrC)TwonvuT_qVOp~U`a3yxZDT6 z?7Mu%RI0kZWD=F%#gHMcY|>V#-+3r6)${>;hfW$EJjLXfn&+S0Z!5QHZBU2Ux#QOI zCIOkt?>6RI-88VAxbz4WB8Q?Jwpe{1Qk8B=!G=3OthGGmy%O2p!adI`p4(B3s^L|& zuHlf_`A`l(sa>o;u$M}pIHER1poEaMnXM5(l7zc&Osi2u*jvJtaE7x~QyjL#beUh| z6Z#^XtLl2x$6KZFRWteTHO9Y76-=q2Zxr4UWa}=y0{+%9@c%hfFR3F#it=rdDpb=@ z7zyp1UpBjIqlMCV$)@40`YmGIMFn|Nm$~h+4ttaI)g8fRLY7Q-!HO<#D>Xx$!y+z+ z>)#rQwe~7|K$whG?MO8w8s;R(!Wm%he*zPO@vBxAcQqs4@%g>FSb61mKA2_pHKUIk zAW2VkV-n1)g(8H>HjoGXi`e2smx^SElDQjH-ck5>nFT*Pm@S(%(&OMY-=9_ewp9`} zeQwL6p^#=$>IM7_ZcAPECO{PbIJ^8@(K&3z? z_oIcP@eshO+npB3)HR^f8#R@F1G<#6mNfGfDoshVnu#(0kz-pW)Md~4J;kly+Yf1* z%uB{_i`jO08^~rTj)Sbj)QLAGC>SAg>NOPwcw9~!ocz1PzOZiUd-;q7D$;Epu#{eD z+IHh$Y85~aTb)l^tZ#YE{3(T>&r?O7PaWS6^quE~g)f~y_Y}+0AcrdS`PW7l;J?P? zEd@i~vl$;Evt7$3hvB3Xi#E4FN+`QRajWOl=;oL;x{Ki5D2fL6zPcVA1t;6WB*vEB z{5%UG3LH^%A}b$sUEBwYZ}vmxR&I^p9q$AK0Vx_7&yD_v-iqu}2@-Wq7yWmSGw7X1 z&P8ZLF>X>uy6jl6@m&&)QKLb5(i=0cuU`3OI2lD0L?cM|E zr4I#k`Q62^LnGlQC=uo(E-%Ra&)=lGuu#4774pHA>m_3;LlLONpI1kLcADL0QC{&D z#73I+ifF@c`Hbn9*)7~8*=0#CMYGgQxp-JiKC)2XeI|nV$KSSjUE)m^lo$9P;^rk{ zSv^JWgX}fw4@}z9ejZ^#F^)&G+L?9l}_`UDd zM#+bD!{*zj(G^!pNNQz6ChFbR+5IAd))|X8_E~d7YvZ2kP6vYMQs*g_EgTt#hxErP zs_Fko+2k`U5La~s+p@W0k!f_f7X+qgVtlnU=4}kZkt8io%2Cye*x)Mr7kv1 zx|*R<9ir{?wH|&LKAF$KpjcyjmGi51xwo`5&NR)`-iJ~Em6C-q9??Hub>^JuMl!h? zbcFv}M{T~5{8%fKbT-cNx3OKUOIOpaNQ;`wo$zED?72!-Ikv zA`wb||5ZtT2>&b=4;%Kp4r9C)ZynY{tzT%_SM_@?{)%9lps`H$tV2zQNR%kE1~WuI zVp6kja$M_v-E?O_6wuEBRxOQ8@JBnbYlNO0%UZ#)Kt~1*1vDMQVy!3#7X71L zz4KiSIF;W2^&I2Ug^9VZRGd47mtnW$Tb%pSzK1hwr9?d$pE731m&;bQ5y%VlyPy3h z2{ttt?TX%=`qA?jeX>_D6>_TIG_jZ=Mi(#@WT#>u_Rs_j>KB3Cwiafnw?c1op@EoT zb&oUjq4jKOvVAASxIo6K;fQC36Lkwh4dP7LouJpL2T1-9ZQ-Kc+0D&U9IFrk(a^V?l~^mcroRgTWh zDCUGswv3ngJ`Pu@q3-yU#dzB^2gN_};#X@q=M!kXZsIDZk1=8<&$}DsMTW~?Qk9Du zhj;(ut)_9^9H1)=D~ohets}Yt^Z}zaFE?Da+gr6#a{d< zj+N~dA7MKA6Zr*$J;S&TD}6HD3|s-#Yh95qE?~>Qx&;>3s5E|WVW*26K9UPHR{yh+ zs9ZFz=(U_t)&-9)HPtj*w)p+W$cZJ~(Xc{x?^OA;?^~b7b==Dij5EJir9LNmc1zyy zre4H#A>AOb^i_G(AW4Db*jE>XC+i7zXjRKKi7UqkQnRi46oH7?lLlWV2~Jd+hL?Q{ zCV1uR@9$ZI*$pKn9z4Ai0ftzx*B>HSp8XuL5AxDuq(AWR2wZOf%ebsb_Q)sa32^KV ztJtFs+hR!zyJH}0@r?g|OoJleFr)b4`a+SEpIkkbvVPk<$ox*con3kDkCHS{FVL?m zmS5Ugzt~RB_Yy$a1DU4|nVlGPBVPOQ1dW)$yI;UBsOrn`&{PVh$@qfyHMwFQ3yt{Qb5HOLbnQzhdDZc+L0)YI*fhx) zZBI)jK^vZd_0Fu9OQ%xg3aWVi@aZ?k@&1B|0;Dn}nUH87z-A;f4rT9S%>`Z)Vrk!8 zG%2048VnasFVIWV3)Qy_<$CIa756GGX~k%KX0cMxCh}((ld1B5J*Z)q#DZeu;;O3z0&C4C@QaUo_pHake;pk;i!@I_MI zROo?Zf!oVyiHvCGx5eK1IAnqE(tAE1NQjn)9EWl?)uBdl*Xsgw}?qN*~mXGS;G0K3SCQr?gr&#nw@8zea$0-f! zka|<~30_`?F9=i~ZyMZO(_)@j<`p=NYY4mJ^)6rTVy)G%P?Rna7ST#kr zY0CSUeTFiQ6V3mH$9;(x0sJR1s;y6tMRT%C65HuL0#T-I7&vqB$NPG}igkkfR6$$j z+HlbN8#zSCJuPxb{qRBGuS%$@8@F0umW|BLU}xcDgiG0*sDXkCOddlW--qYPU*ZQk zdo%GLoW;7#pyk~aOPYxdQ7hx&^V?G9kqqB<1(8qXDukO5ykPDcP@sTudNuXn84K8s zmxFT`#a!D8Baw0feCox=xUQbRoZWcqn*`;>lPEyApnU5t*wDl%!#!V_VrQuS^^yaYl8bv@_VkEDelO?qQapkEMJgm+^HN<^6wK?AH zRp!0fq(07o&#|y?7GDO`;XcO;p=GV&0^kp|#-~qBr23kjY*5a|RQq4!*~AC4{Tx?P zMwh2^g8Ot{%pL(`9x|zzGO~yf_0ll``Ps5;xHcUG5p4hYzja zoLhg?67&24ljmVVAk%8eB(Y0$tE)jTzHR>Z8t?Qb1VxdIx9;vkdZMvk`csnsl)a;8 z`v!w4?twj=-uDuLzqGlv{x8tB=FZDM&K2VqzFgy8mr<1miTS6RN-CuW1Sesv`|DyJ zYu|_X`RKina9UB1rSXH=8Il`V?H|HF37c@#g3q@pgmHZj>S9bOQ2`{m`Rnh@d6Fy+#u5=PFuk%N5 ztN?T@q7|=JuG3IM5GnIbk6LRag#*%Hf+kC&S_pYvCh=lfHnOUHsZ^h0rVb+ckAjD* zvJhEC4%Q-2Zel64IHi~~Umq`%_=+}d2_B+$JYMSPFK_*j@=w`h5oDQrtB>lbCv9J0ZaRp6~$*WW~fZ4u<({AyiJOLN7p z3KzgeGu{q~>WT~LTnmMr$$+UyFwa=C6KB|K+4rQusbrL~MN2en;Tni*)c7Eb7L3PS z_&b#AeH$a{6nl{*ymJuu+sM*c)FTtQ=1;Ot8nFJ1l?ms!!OZmA#jDpY2~KsS%CAY) zohQY?lM;Q&cjh!J%U?Bmhh;EXkVt={`{fkj=mSOL&pz8a{h^u~W#y;=sWHMF&^ZV1 zxa%-wM7NWYIH~GPh&UCT=QUZ%rD1)u{nH*E4R-D(^C2@bb63q?ll!5Q??qRYK+!FD zefc+k8c&XnU8VKA6+|X#ttEc!{eI3x{Xf%V1xs?zZZ#0zid)>T4}+{C52c|Bup5L0 zVw?N6_<}(8E!+b1wD7&VCdmu`72XVot@|E2C|v%V{;ek}CPUX&^fQ>-R%{rl<(5+* zLeEvo)iV9|kvNDEIQu^LG4+#2XicXFB}r5^V}s6PRafFv3MmK6Y9>mrgLfz5Hgsk} z42-K;TtuLMlY-^$x-FLSW!i;{=}!jIfS)jNEl4k3yK+}q8c_Rz;rqCBrGrII)ri?i zQ+A7cgAe7+J3cQ~T$g1ev-BKiwQ4Y)CbgMMQw+BFi3> zD~;~aizDw<9hW5eVxJ+nhX-N+qkgqTdvsonKe~b2miVmgShi2ZunV~CGnT&7=u@THdDg=l&`}t zR0(xbI3+O}U;pM6Bd`KH@=d2vro!u}$G1IYl5dqgHO^~RKBz|%tq@W}9}Za2F`9*b zNUN+`Q_O}$tTAq=&ZenWsfZ8BC58S#M&xtb=H0K_C`V2ldI9(eI;Xtwq}|(F^H5tl zwkV_-?^Sgjg($oQ6SL{gkUyh6X8JKCjNUwE2Z{g%pJjO1!|2G_zEyvm%+f3VBK?{h zO|p{psQ2iPplm)E3r4G=uHIZC13siT1g+oN`?ZnSU}QGcqdGZ~00@&(BAN3g#&VO6 z@*KXm+&@=EjR3;IxFrz9hp3Z7mq;9v}t(iH>6E7 zfgn!{sb>_W`dcRn=j-^uniHDK^gg42D$xdibSpCwc`pujmUn+FLLG+-025Q^u5Oo) zw8t=zwZKVdUi=w-*_E{)ftS(1t3n*qU!6l4`bBMp5LZ)scS|@J!76_u)o*T`> z^RAh2l-Ca165r~i{99$@m>;;iP`fG|e@C|+N`z83-TtxzoeJKarID}EJy{!^DWBvM zHl*XW=QHLmo5(wIVpu@eZ`>_lfISBuEo(1*`zqOO5tzN)RXt6H;Ax8Lh4n;1()S6X zQawetG=~cqYHVW7-5RYVDBa}w;x8}$4=?5k)gEr-bWsj_4ESsZu!k2@?{m|lO9*eZ z>EQiZ?T8$XT5~Kte0CeJ6Wmp`MaCs(1YOu~Sg)}2hNi7r zRB7c|)oplQ|CXXbls64f2E0AY#4fz>@5_xi_G^XydhKi4cb2yT&v&;}g}Jm`VBFZX zsa{$}wt9vv+`$XgB7;#;*zI@9PbT%d82qcoKzzUB#+rVpS*fW#6K+k_yC0&yBFOpzD@e<<0z&48 z?}{A$pQ9BE2o56t^T{DRTD+b6wfFo~-VZh*2=>~_(+f^;2;m-b)yjVh7P|OzyoW<5 zmMq>HvUKF0lZ?PrY&wptd-&jNOhCRJpuHDsP$4g8dOK)f>v|D;w zyW`KQl#6Rx=LdxYb6Ey|Lb+G&YmP-0VoWCPtDWxaQG}AIaMC|5pQh`N5y~7Ht4`ou9}yu6=gtlve@+jt!-*!gQk9BC zI?1z_`=ll0gGAPASO?Qs)(KnRFa#*t+B7h?lk>*aX6V%U(Hq4BpJcmUu^PuGX08l7 z&G}d1&BTMKZt(i&#-FxQVh?c5&q+^3q@NS%xwhyYMb%qxC4lA{EGu?@s59onGZZMS zxnhEi_y_)l2Y7D zixqdb;-2E}4n>1|&;&yA<$2foer2uv$jO9;+<=L(f&_RWPHca-Ty!?`h^K+o zUkaZW?7h{w{z!I2#mqfa;6ipZpXX@l)*bQT*$-n6ExMtRb_pd^*Wy48+!2u$+ha;U zz;5N7BpVGJXG6^P{E#(KmT~3LyoUYs+X62rw=R(+Rx%!5y^4s`>`q|)X}|j!vn!pB zn_euMK!?<*O0!m~D+|)3P~+$H?Br7QG>se_kM_F0s{sAc4LQDvnfeLY8jK)F;Xc)q zgv?ec?JP^mnZaCg25VLrBDdT};)6LMqufe-^$0xrLL3F{N`Z^&k&vm|jtmJ`yeFB% zp=vsuOf_)rYnirni1%VOp?bg)no(rh?29Ej_4F}KTdftnu5@dRCw$%s9IWah9oy2* zxfN2QCmUsR{MQ2glOTn$FW2C)h#Pc-_NH8kii=JuZ8RcSL6@wPAf$`OcZ~h09HXlz zM)h9f=<>?_adjsy=LD1wG-g2)5?&(PDeUtrP9%QTUjMCfK6>mtgt_>yssKmMLo2O? zfBowGk6d_A5RZvxlXdfKUDw-jpCVw~EP^&DECYh%4Q=Iuy~?`C1tB6pFET3T7CX)w z)x~dgHrUyP#Zz6tr5Gc5ln#jRBOg=g=GPlq$KE``qKz12?)+^KM`P+8X_SrT9i3>9 z50m}@s~Z2T2@MBxg3pF_yD(m~#yMNMQ*rCr*Cs_z;b#Hvl&P3yMGsHkbTLtsQfNt( z&1@$b%Nu+&faVF9VfACZ@3Z*JF1_cMbMmrv4zrN^sGBROKQ{fg_3OtED!|omH6jZz zhlj#k!gwjD1zYbfr=zh9Tqh%i0IWdwzN%sXF}8h&Nb7wDW!Z#5U% z>K18yh{`zP@nrc+AN_(e1y({=U?KWzwaNQ*C;h70yYpDZ^ztVTr3g(eK;3JKTC0P+ z0dYR)0$aRRwD9~>g72Du6(uNF-hOd!;67aaD4{;xz5zkVP#LjV`6xuMs1X|Y0^ob^Pdl*@e zfw_aW;^7|=d33O39P;030w@jVD|MJ1gjDyLIcb+4+CvV7QSU|g%>8@@(Z~0L%p-WW zZ3Kce7xmwSLnZt5zakpp;LDo$zj=@Z3*udBKK`kDZ%xE+e%nUt7m)hRI(XZ7=7i%? zuFJx6OR5{d>bPK#(ax|0F4Pgl9!V=~5~sVS5tGtCO|vY|d&INqCRMN@gaC&c%u z>Jp0hCv4%-ab<4-C6D(tgoZq||7AmGkm+XMX3qA}fGYIpT{`k>&O~y+s8^u3;qQNR zFwK>t1??O4#tW0ur{eZ)8x29II}msab#K>$KQ~hjfIWT>(Y91|LL%x@(}#@5ndbbw@rM7kt8r z!2%1p)Tmog6%0DHq(M5vX+MPYxY2q&t~yvXwayBnn;|wc*c|J5;(0HWNT~N30H#}QMyf08)>MhiJTh|l~JtZt=j7uW8^H>FQo2< z+JindzbL?n^_Bfu5D9qiLk$nYJrh>)D9Jj1k2w6}rGFNwQK3=gY%#C0w=EiU1Y`5^ zfbiO$ksLX zMOov|FM^Rb4~T<=po3o8y>aT2zKJ)lD;oZ~X5TI}cO`p!Nc-*R+|%53?DKm|uyl!s zv_Ui#H@0xSTTbdmeK_|+{-$=GSb4Xvt_9uNfJnk?F`{^>YF}Swdw1@O<~$g?VszRQ ztFs3DRhRyzEt#WuNVG3!n0)Kpavsr*bOrs0%9^Mam?YbCH z-e%z-+&Q-^F`7?0T7|Nxm9Y;Ft>pH4j$%82c8o{PeO$U)9P?-Z4X``AJ*SV?o}gRF z2udomeGVPQhHW$9y<|Ur8;mmSZiJt3aui6d$OH6tOP?7mN??F2jU9-?nmT-3H1YLk z*_dkyY~_ErjnuvQz)sW}8TLlk-*q za`_Aq8K=Schq~aK=FDwD8~m@`!`u1K4q|l!Qz;i2EXmfoHqp^lPxj@-|3YDZGtNyr)PuF$(1%%~p08SgC4i>i;os`!}^vgh8HKKar_l#dJ_DfN3p% z-kkEn|9J@UT3Lr*<<>Nc?R8#3pxn&77H`%_f_+ASI9u>>^DRd_!K+l^uf--@Z$+~(^++wOG1-zi}r!jH?lltzABH}rZblokbj4L`Z>BsEDQ`EnP^Ds6^(#M_*ukj9fZ@9;SN;PY-iMwjos>!Sne5nTVQ z(f5kv*5&>RFvwT}c0_YIbGGzPsb=EQGc_TX0m}+4ELifrPvK27Y9r?<;4nWnRF=lrR`ob?5 zVSn=(y3C-1^WUiS24E&m9xVG=RSA9{2gSdbgzWW!{SoV47q%$wa8pIwn-*Cf$NBVd zp`XppML*&NKOYW;dyVk}hNfC}%wHB9&5rVTFb$uuOv~8NWtOi-DY`8nxFVmI z;>2ovFZ~h{?cl%ZyGu}7Sy&Uh-K_6fa+E!9@ zMvSWV=-XgaNoE8$ku!OY8CDs#8rqI_P#&3l-u2;nDF6NbxoCZjf-H7tv_s7M-By@v zpq0?hh)p7DnKtE`6|LjI4*7R0cp_C%SSzKIbbw}Viv2U;^BmQ#M`{;+NraEB+8BD|sY^D^Y)`R7!CTQ0bhuL2Zy~R5^n-Tw(;6Op z93^W&&n@R^z|E=#)%K*qLhC2A4@-mFP<7PFq!{A|ZJVddg|YIcp_+Z+#}IwW!t!y6 zwe_Foc8zSJj|Ksfr&9xPLf++ll|IvjN|gxHoup8C#mB3kH48e8N8&DPnic7WF4yIj z^*o%Vz0a(AY2~U4g^jAJt+TWz#0UzXzwO$5B<y`XRrb0{F#kn9G|iDRHF?UDr&r6%)bErirwh^%NmXS?^3IP;@(T^=(0pkdWSWxWgSi{kW`Lqw^?!r zc13vQyjs}1_Ch<{ym#!v5GE+7|K4w#3vRrLjQ~AMmrZVNDHQx4amz14mMR{o40{gD zkBf0*WVSM}XGom6L?WPT#K{36*87oFN~@Wkx2A2Df3TsJ*7I`MyM1&`L-w-w?jAoY zq{ZbNgn9rUgxR=ZQQpz7KfwG6pJ2tMZ9Fbn_$mto88GZPcaPz1{+F%PBOKI9o*A)O z+TXSRb-a9Dh~AB|#nZ?Ul*B>d%t=52931Ny)lcDZcxs z1TlrT7Pl|Y*t+p}zIf3BxxAiLU;GWJHt_~Ota%s?{(j^ir0T_q(im(-aN|lP&3fvd z8ty4R{~Q{o{#L~4-_Tv`!qMn>K%hUX>g)MlKIP1&Y?ejIX^kzQGYlv%E6m>k=!ZeW)_g|1oRAL zIK>>E@B;HPU@V};u_(h5feZBx(jhXS|I1;5D6bqj-0;(BDQJQg7N>;7qsWZ$5Z&7B z<5A$l7XXtkI^;o0JS4Z@#J{xn9QH~Oagm`cq|q4+t_eS_O(k~5q6LI8dUqaiGPwZo!2rGrBp&rx zZE~^-<2i?Lk!-~;wV+fyzO6v~va`RZW(P8xYqKS_gOYJ{B=@cWzplf{}1^iEGtG%zZG%m4(M%c<6R+-%IUx1)L~)TpYP|KFTm!V zQi|n(pQ@S51w%*-lY_Ux1#}uj?t! z0-vfB@`&Pq48XTwsdxz0R$9)dkAX%KS%JGHf<;SPtW#*uVO0z$`05!+5S?J4 zC4|ofLB^^9VSS@GM_RbmPhM?+w`Z6#F<7JBj;ZWEnBcL2zs9k4kdDT%yYio6 z4x&)?cP!shbmm)8ol@R{BZYLBIMHIv_I<%JZX#~6_!^mXWJx=n*vNj;f&q43T+LWt zUv-Rrb`Em}{J3Y|HQzHc@himUe;dnvCUEm^4R!vou+r|8B~kL~M2&C#sLRvqv=hC1 zX}nz#KGD{-`LIQ!%Kb78Sm75Lv0nxId-z9ed2-dTOEy#JWYrR*ssR(bbwL3zUbW`?}8 zf~RZk`Y?KslrE2~ZDv87#fGnZGDjpn6}%+3+3sCQKWf*uZP^W5t{-vU^#ZF1N)zI* zK!v-N`&g^6FBI8tn#JS1l5Y5F)Q&p3KbpcOqzVf|$JJPD-(N<&rK6H|=NPKYHL3%_ z-{)=lk;8t4%?r9WgL3^ zTiCnvLG-8RljRDsgqo8tb}&V?Tb+!!NcB+5F&L)RZ>`tNBnVMV3OFyrE`sUFz@|D4 z7>nc*OX=99?a^k(>$CavtI<76A z$YAx(3gRJtzNdSQOwZ(BN>Yi78(L_@@HO_ygaSFKV_YH%W5c3gI1yYUz79iKPz198 zMKH+_GzwuCXeZYsBpp}Y!j9}je99*f1Z^mYixzL)x2@^*4NhQ=;oopvg!sO7y7I_W zY*Tl7l%Vm=bw!tv9;4KQKG?`2|KN5BVf6AH8N`oPiswn7c>=x<%AsU4nI%wW9v6$^ z-n3ZB?Z|x)S|_)|HpxYU3q9qv_Eq{uh!yBvs{iz8p@kK2IE;?1QoLP)WM{hR4Ep$3 z9nKW3=&-n{l+loHxjpY56)~swRM)HCT7G8A3tI&TQn^5;@A|agb#%o4?pk$j z$8p+Y1|Smg{!`_}H>-1!W@$u&`htJu{r-*47W{<{t>-7(&d|PSZHA^CO} z@S*#3seamr;2R^X$o*n@yM6jr#dxRSB(_(5-$?Gpg+Aj316VS?w=t5y&Y1i(^LdK8W`R6p}O!cn)%Bom&N57pyuGBO8zQ$-y1g zqc#|=`?LCS1N)hBztpTy3Xjj$<ivzK?NQa@5ey4v%YKq@qAmPVC6`qU~V74_{q`yjYW zm_Et#?Ur8=#)EGv$3$I@IybYWs9F@|p)|vhq1K-B7H-w0Z{~m(&~-&SVa?$h*6@`Y%K%Xwi*Kma`(%Rm(l#GE8gWc<=3n@UjC+ z7DR!nwDZ=$c_~>Z7GKq?n>7dZ3&$qKgZ!sX6%IF`u&SM0W2)r?*iGTq`$ph&zoHuH zQuQ|>e#&LkHNAVx6>Xf*gwsOJ!%zg^vLEe!+e!c!?`z)w#=<@OG=BWY9QR;s2OV*) z*`W*@VvK(hAbvizNH?EHo9Gwhd2b_q^jaj~nXo z4s;IeygLWniJQ6*NX@@}{+Z@?tJ+^NBMTf+HMHdXM&K0Bp1HEtaVHHT+ z?D&9Pk~a6SIo|y58M{5-e}!i?k9d;+zd=Q7K4^3Xj_=WLc;8DmA6`SkBomx9P;8wDeEF(IHu_@UUlOXJM%5psNp{JFbxK+6WA4`7#q#@oO};kr zTfad#+Aay@vulaPl149l50AB*WoKF3p4}9*rYP5sYivTpniLQBL5C`44AXyU=QQ$c zv5orI-SUMSndW*)ok52e)Sv{$j}5RbY~k;;KGwW^gc9)yCRoF`C2SHU9t94Kw`^S* zoV_dMAKZisw&;IuyYfE$@#Q#Xpgkp4hmCG(gz4+v{%hF>-T+Q=tl8U_9|fs4Q}v(> z{+BW7XC+D+xxWIW6JF1B1HYI_jfEyt;OL4VdMzT!l|IeP)|y_7=ddgm#|ng6VRw}q z^c8X?qdjObU6{f5l1y13VMh@#XSo7Q7I>@oEO8_y8)@y_e z4zat{5(j4g@Z3)gJP>x`$wzop3ye~(y#LOyAcX^1TbcxMR33!71w~j!XcO5Cx?F3$ zTrn&BB@nx6jhGQFfZ6GW9E{xb%syTa> znWW$RW(d3T$+O*!5ECPQK%*_6pKi@KhPjqYt`39hXM9Ttnpm8op0ZYIc?fkz4^jv@ z<~w(R=H#tZ1&-!O+|2G{%!PFKSMdl}Eq(a(N5aOhN# zIJ6)5Wrjm#uEB7cDte1Q-enfz;*X>vEDp>1@M6`kIcbR%WPFu!dldcc=#e*YdLXUn z#svEDkH`~tt@4?P*Ln>kLY9`QG7jdWP4+s6Nal1#HpYyUTMpylP|iK?TK1sM7#h#M z`Lnr}BZNaytJrzPg}eOUJrAv&EB(0{LD4{qo+riMbttVxb=mj0e;*Zh zbO)ReNI@Y8R!55Zv{pMC5o^6E5d_%pBG$4*?-&Q=%*kY9Fi&7muKS70Fx_9`;0e;L zvOL@yscl+OscCCjuARXU9GLW?_teXLpFaSec|S}&$WgGJ|z}EtBAcs&YUB zN3^rYP^&KB$I<-&_((-f+t&TD_~?q~q6zIo+Tgp}&5+ioe4n1STMm0MqpUcMXH)Bs zZGs-qb%6HKH~h~q z`rOsA2B@`oxdEhcsGbIdFcfvgdKqW3zM&Kfa?|v8N6I&sneN+Jy@_c>^T>@fel-a7 za|G-Q`U!`PDE>EgDha5fKb1y9o5Yd#hZ#L>6sMTe;>4UF7?0tl()%aFxfYf zncikM@s({7$&7dNc2_tBFKppGy%)w%YTPsHjW6ITCFgN-E$*B;3+dClz_StQ2G^KE zAVfp9*Ya&us$1Sa8yol$EJ)q#4;QJuS_=SOFJyb0=KHQDPTcP|0)O$4%WczmMad_z z2_QMirrADN@VhvnM-9o!aeBZ~Cfgot|f z0aC-2vzG&fFh?o?|7yZA^w^fonS5s~M?|UUUq5(mius`I0X^r$MdkC;qhwj~vo1rR z#PFJ0j_OkZ$)Hu+7KBGUP*Kbanut0|=eMKr%7C?lR*_}!jZ?rY%T-JoeEG*a)aISV zV)%_fc*DjdLNhrVd-`yJkD5Yknnp5+QF?h!JOw7*HKg=s?R`9Z`i{c3pn{df!>e8I z_qRl=2>r#+eHARoxbT-hl-!+Blrg>GMknLST7ir>1QETdsSYD-Mn*8np=qv zFVQ!<`_X?u5&wYu7cAy@?H(3G>ONq{m|!nM=}fr_J_~uftP~^y*B&_M14ttJGGNpF z00F!@=CdO*;aOkwW&)E=wuZ@$RD{EmF@~bf|uUW1ImO}JNzpccl9rpYqotHx9rMvF&Ht(!2A+Rey z?(g>>?g=sy3kPpB?7@UR^jQ~)cz4W=Njo=zO)HZu$i0m%O#10Igq`kSvI^bY-W)~$ z1|~#wdv;s&(@H=!BI9=2d`LZhvh>~RkwZcUKWyYYhEz8n{F}QJchiTb&EMxY z+a(H}`ut2ilh1%JbWasoF{M{CHRVqV#FL(F_^3twBwe?9@ir|SQ=4QwnOZx*Dn)eE zT3Ul1ECe0`HM*?-e6Ij4k8qLv8$X;w-*IveP{ouHpu|6+xGZ`XK$}&mIXfZhdLSw1 zD`FEOFe?;{&Ct^vk>c$@HbL0++5<;sC_Umki+2aT>{&;eZ?d=%-c7i-Hf=LF+%}Pq z*a!=0R>MZjuA!0)&#V!&izH!$&w zY!|yZK7;t~`wID(qk=zufL*)x2P#l&$hY>_zApIZ;(o(1KOl+O{8z&}s6h`?;u5bna^U=lxhG4X1qJp-)i{%V5nmQ*B>VLg-p1l=7AX zPjiZ3_;dNVcIasaMuXN%=RH_sIn~eEVy=@=$ABg|sMQdFiSg{sXBBfD2Fvf_E`UJ@oZ>`x4wnU3qr;)WlUg3={S!flOF>O)kSdQWD zm(h2z(c?5=^zP>D6YY4+s=EXg8=UZw$KRZnrLh+^I_PpaNhYDZw_NyTM%UXu(e_w= zPI3GKHkz~|v`m59Zp0`(|Jv3Wd(6rGaa0|?VtkR)D@g};`zg^^mpkybG=ObLKd=_o zNdM3AwUKkgVm&paTD=0OdGJ&6Ht|x=uTos43O3?#JGoazHlTI!C@&x9OHP^oTxKTn z2tV-m2wv`+-#k6UD_LoC+OlCZ13#EJ(n#<`p8d^^c(C_MP}+c#8}{gL^ut;0ze9oi zj0<1r*gkLliyNrPR6#OJZy%3+DlbYGg(oTUPwpS}&h)!lf?jXHA z#h8L}H<&!*3HDVg-lkcSs~#fy(kLzsi!wny z3cPK2<0nJ2aO)s}$X*#5uw9QA8r(a9orW=QBQ|Ui#hlVz?pZ6A$0D>~Kcb?&Q(J*O zFS~OPE=2ud=`llMQezxh^|kZ}%ejnm_1^I=rzpr{yS{4BrQ1bT50*T+jTd0jZ|@jz z7BmP{gDzH@^jf^<^*>apAa&2*+L*7H8|Yp=65~)jp5Oaosj-68T{SUyRx|tWbx`ky zR*n9#ddsl)~BQ_R~UCLBg{v}PnLH5LLXvpIl1 zH-J%+Jpy1+73gu4E4wSZj{M@L^gPW<2$vaYkfR-(ePpi3KeZ#X3by!CA1z8Cpg0iS z+WJ{eIk}uS`BA-7I^C-(ztm}u2`jn?XY@s$n{o`7qY2_4Sok3#y9QT_w0ow$0BYv% zh`Oy6shi$TDYS!t`4NSm>1LugYM;O8JU!>0ry?vNvE7r=?iMU&XHepdj!9Jzo(R}J zP~xnkVwTr49Puve^BQ~g>O(tjz#A_=jw4T*IrM>trEH$l4mJ^DxwD1(jhjY{Z=v(< z?z(WNSYAEf9)Txz&?bx$NkC}8VWmu-Lok=SMD{Xo``Djb({t*D6pJ)IDE%`7u4eqr z0K{d8iBzmAfRB8k*^ERyLT!OjiRd1BNP$@&i6{0m$>=&5DU2XCnlBy3XM^QgG1qX> zmA@>vw=|3^-CZKD@2@5k4R z$b<%_B`jSTTOtP6#{9mLQu%GnxSx6$;1+^^mk%l5>`s;0uLOk+aaj&@Djzf8k?bg0 z%1%T!)29FNU;QsC0D1oH=Rq&jByf!`nmMB^@G-_tkJ#sHmz?Y3_?GnbXC)+qqU3m- z(vrQ%3kaBYRdwQqbH1ffgYhFIx|SRVH5-~NSnanLYPeKbXPQ!T{?M)}K)gd}tfnfo zzf^1$=j2?$mTw__}D7%{O;=52bY{KT<}en0DBt0~Uoqq?eTnO!S% zfReX$hX9qbdtE?#bwKUC8Xpf(WJv#(!cs2Y&MW1?J1WQgZz9!JMI_GBRb3HjL}7BIA|(rh?)6cvOl zO_WfP4~VYzTMx`?`qx4G`ys4HA6Tw$-V#lDxRq|O7>2CgdNai9 z%mzzGzqK|`V-a|2K+mT>y{sbVIc*bi(4Czjlux+k;K)8maKL3#32)ML`BRb6_-^8p7D_MM;g zNE(Sa@LNhPOt!TdsGckLBsI9ZC+n5TXHr_e{4Q?+WQ`JwEb2c>GQxffO9PubP zUm1a2!$<>e%Ii8__1)&GJu9a0ow`(C!)1PGuGgGeg$=rz7kc`!FPv{ESN(Eu_Yi~- zLTN|AuD$J;ddDYq@d9ak)q;rP zXGz_~R^g7dSL7Tgr^T@ILd3_-gpRmD&+omw5Y|i9gXcSlX|{3t((V%cQUEGJXXPbC z|5t+cENkaOVcHV{j7Q7x83SG`{+xfPdeD_`O7*3}E#7*WvWTz_#U*O4L>i9P9R`z& zvH*D65dy^+;_?>C6+7Wi^tgb~B((UYIKw%=VmMrDKOlY#{Y^(AY^?rs3u= zOK`_?gt8;w3u`er2N9^&YJGDwTHS)MQxBwQ9_@sxZ$#R(y-ly6y)|j~T0=uR^|=3e z9XnMlR$4!EyD8UQH1S?*MxAb;GbYYK533skFHZ;~I6S^!YmNIMNYLsRD=4e6^oAJj|H<#ArX&uz20&}$LaprX~{q<0rL2R~Y=z`KYNZ)A$B z?n!8GPV~|Bl28i;3$)o7M)-9^;ejkvZP5 zT@ihPHhV2%`DVBJ~|4Eo?7X(H$A;H3YwI8#N1gJMotE-F5M^@jqW84LXV zn>jcI(BDZYDWHAsK(0;4V3US856C<=nM%R70CuEW|CGa;N>^Kl1)BOkG3rAGp!{j$ z+OIv0^tU-OL27-Zt~^&?H!$;@%Cal*$H}c^dcb6du;zD_ z#-56XA4FlSyu+ffx)ZuagrWwMx$pxCG_gh4Ai_ZwF*V|eZy^X%Y9AQw~?+H84OMZh^nryq5yNj=)9s3uA3458h}>yt7S|1#j=elO*6%#h z{ZDlV=YSnm)O78%Z%Az-?-Voq?+TBZi~s(_)7C!;yxAGX{$)#15S3#q+!6N0W&(HZLTk|(ullkJtLP4x2T73kvC_Q`dY zWX}0;Z%UWuz6g))QBzG>nXZE#75lhXGGc|^uDp}G_`$;9xA1P zyE;<^;T1({+G*og^yO@go4v2-RSqb3XH$z;ufsv^S?hYsevN9e3Xst8XJB5gn%5*F zrEN~LFt-Ah23bH-_pY0_Sn65mtd?uk7>q88$fkdkM#Sss5I2`Lb*WAbsNn@_2?ce1 z8DD8^Uh|@CXX%11`<7(2R3&E^4X>c75kGs=h}5o&uS%dpFI7@8P(#OTqm^)nd9KT% z-5k|U*ozMBTcU|bFyV6qm`xa=4NSG2^ZpP55E<$rf2+U?ZHJ^ z(Y2c6XThXr0J$VH_Kh8NDBDG&`dh;ha&=f9XG{@?bUPb!Ua9kie05ZUn0Fy&nz2>4 zeDWCZEW8ul6kW+9<&hZiUtW+^Ng;yLm`PDA{fh6o@KKRf&4hqIPLtpNM4ugZ*rx9& z-|+hs;{<+%FE4XI&+R|GTIW_G5-yqmUSQ@#@a&yMud!-N^(a;q-EZd_(R`>@(9c%{z!J{3$3{x;_|SxY=Aat zvCl7u$M#X_jOxe4)ybPPOJdMAE~z_ZKr;|VG`E>G=3neV^Sii%2OQs%*62`v9?AHf zbD9-vu6eKKu*Ky?WhfI}0T*kmQJ++K4wB+74bo$mE?l647w6w3p3vRTmzBVTwCq}| z>6l`KQSatLsA?J~DL54uGP=3dlsyy|-E58sF+(GN$2C|CA=9}$)= z*sBZ&Z5Iy*rwE^|z`-=~D5{cN3{1 z**U9#F#{SqBQ~`ruZ=+Oc^?#EUJJ7AICwXmq$x+@w4tK9>poU+0)3Wz_B4@I>hPnM zC)aDDS&xOrI-4o;vprM)b%*oLJJSwJH%GmIjKq)#D!+r%W~5{LskhkU^=uAehxL`x zhv8K;dMYsT6zYANE)2TS2i?SbcZ4{c2i`5G+G(3NM(WG%3B&?EF_Ho0r1}1z1wb#; zYDCmE3`wY+AvGPGbK1@8p_)T{0>!dWspW^A7~1?Ss9^&8(s6}?zQ=6@<|jq zMcNy0IHb+l%V*~1xSRi77V!oiKA!udlJ!mCzj6?vB~ib1>GANSVL#9@?YBdW3ENTC zXY8NuUK@gL!6?yzXInQeK2j-GT_?2I|0>IE=ZJg2q5G3#nDHh=olAH#9bwQYB+il- zqA8pJaEn^56&`dc{_ydQ(b6_F<^xv%L5T<|Djp)(8#f09;Kg9#(r$f|59orL9Thu8GZ^I(sU()l zA}t%BqX1)EXG?iZcyFj~ejb`bFP#l~N)dny5sao?cX|dW!(GEg1TUM0aT)Sx&WTA& z$8AVye@F{JtZ_i}GliA{xY&+e8R4QQ1ziT}jt-F7_!>3QPkyW6`SQ@bFn}^7C02b? ze7NGqzO4Q0a`9wC+0>?zmEt9KR%|ZPBvU{I@X)k-AE!`B^UV zo7&*g9pF;ar)TY4#1--?w7~EeNkqA2p`a31I6lySxxOf*SSIzt^a^ zvn3M)<|rRY=%HI7?+1ydHa~VFm88TXe%lK-V6N@t`dFhVK60Wy@<0Ut{!!jmuyEC@ zyzhzjr3LTurel6`Q=%qXBb;b+l-{cCc*!G2_5E6E#M&~ZI-44-`~rh3zta6$+bheW z2Bv**zLAMS*G9Fu8Fao6*!(J7CUNuhdEm#@JXS!gBGi>eOKFjm2^*15e60bqZqhw> zP9i~3Y@#%Q@PE95I5(lJ@96JYyhYnu|1$PwkxKI1BqAM98$7C_O@q95pATGhTg{|A z$X8GY1Dwv>i5?v?livKF_@T{zJ!p+TmFL)HWE(d6>FNwzQ+a^e&zsuo>Ec2<1C}q> zyf$>bRoM--ul9ML$c+5vWyo~0LcMv1Y9V$Wr%=^?HeteDpq2`aMJh81MPau zw0U>+J7&S91v0V9+p_t6AMGtfdx)7^He67Tw2!5Vx_57=cVl?E(tJOz-`#d=tNHFQ z;NF)i3JN5H3!8SKqM7{rxy6{%38I!aFi9FO zF?fcRb=lfbUE)dN^9?Ex+>X?beF9l_ZHrlT+*_yUd7m)Hv3 zc+5bP<^D5od}?`;1SXOe?ze;<30I`4+7!d)Yl4JpAWU*&28uz<@>lRm`j5=xJ!h zd-c!C0sppqPU+Cc=22O4UUeDy4}R0G5fNgxBnPp?0cgffH^_k+WEIwbRXHu$9WpkW zMEqyzAP!jJ$$8kr#k{NPbM#~Z5ov3WiYk1*Z_LSOtb(8?D26iqqtBS zTTlKYu%R~cb67ND@ih1o@!p?r{?9k=kau9Xs*5#1eI#+%_K$uxz=o&BSZoqYv@;tK z{0m)MBRz(PS`YAh6KQk;I-yjNEl_$AuzE?VxNWP`l1kh>z*bnf+ef6&)kMPMiDs^>#mTS=5;@#*z5mHrLx(* zX5d1{E)1ZA*C?L?x8~(Q9~4XL%aYk}I$4g+(wER49DDb{VqTe{0RQP9?gM~}=?azZ zb7+b1y|+W$hLBDE#4BtuCDS4cP1%16_r>nIuah87=dnH%G6dzaAxu^myFT6caswP= zqsz8j(6YL10}-O~Hl|nqN7Gq&McsE>T$B_fq+7Z{LOP^DKpLc_yQO24?(Q18Lpo*X z4(aYxB!(Pd;+<#Rb^n6-tu@~|`|R`C4|wUsJM+53`_v)BPmjRw{n{TbYz*l(Bb#{A zT|C|~mjO^PU$R5YhA_tW<$1}f4(jZ1P=>2{YO@}|$+q2F0>=Z?6-PD>f-l9j>k4a^^~^_H+82~sIuYa|fjK&_%^=HOU|LAw%|*aRRD zPBufXyDt?&tB|@~2-tvxsg3}4)ki?1fOm^EWYa*?h{HbI4jKe{Nql0>29!Kr{aC%E zK)yu7<>f+K3mw_6kln#SVjn)}(00)`9S;gq^p*?iI)fj`;}qT=Fr zK(_P8&SY>bTX6kB=WGzS*xu2k9>lo9!4`ffcmO)gX#Tm@t`k4=H50LUIU+kKuG8&L zEOgtblC#%@AuaKEdXo8QAi*r`ySLl*=NeZ`SX|&{MO)DCOe-4 z6&C)Ev9nd*$?Y!W%fGRxB1Ts+Rh6!=os}Qi&TroxW{Q_0ZP-PkR z=bKaCcvU&q9bAfw%BS51aBo)H!zs<80Sp8%E%r~TNMO?!iv8_2+t-dR26DJ)DPt|q z?QsAW<4-Xm8R#szn@yTfqQ%JT9Y6Dyft@!ckk4WBDUj+jVVuq}%_BTn+0I`DX}LH5 zVZQ;e6~OeQ067sA1_b1NPQk=wL2`8*!hpp2yy0vBNc1y`-bEEnCCz==-BCHya%FRl zJ4iz=IS=HLt9&=#u4G6x#G*Z?!1+bDiXu{~2++ZSC3g^KJur#w>m@3}_K@MGK;D>c0kdrO zTpE1Xz$~WQN5t|48zr(50^=G64&Qsfr93wqWf4cL{~d&jnZ9Sa7FD0Nh)DdT{s&d(g0+#Muny$N!_Z*6K3j)anmu z7l=Q){O4)>LXZ135PMC7C@!Y6DkP8Bror^X2x)k-d=yGiK}HS-Ju{nF4g$?$D6s0{ zE?^g<1|KtU0ylKy*L-*IVVQ_99E+@Ok#rP0fsje?-v?4%blrk>Fzu04l+<;4IvD*C zD-ySnqYTWv?rda4v%e7$g^qcaL^}fThTeBIuOYQ08Y&u7WEqCbc>j`83_c=39w&5M z-}11K(1SDRJ=Jyu1VBCd`mU;VIbbFw4_(_8?wFtUsUv4nS6P|^9PXtYxWwcEPmoFF z3G+y*k<@`roZ(kLi~1KwUf~LVKcSx$zqwKOpC@$f*i*O=5IJvAhdly8K_FO$MQW{VRVMyj49QdJ{{Y?I+x924u&9dF2I$&s@Q8EhAv0)qnu7;V^tA9leQTB7 zufG0U!;+jb4gDhL0i$(^I>D00B?l z8?FeXulwUcY?bU=(d)D=55`Yel0d}tEbwG%@4S(!Gm5`T@paRXd?drAA>Jz{8)GxE zSYkS~n#_g^(rTy(6{z4&!;>8N5~UAv5o5tWdd@QiNoR<28E90i2k;j*0&;)By?&+( zO!S(*6lK~XH}ungLY_eB2q4{90a7j+wS2k8=1n`U$^M=)I1AhjNg{$McSd9r~ukvUchB0 zgS;Rf=%#6yC*T4MM}z-7=e|~dT>{2~3QEexGQ~L9Kl-G&og0_>ylSxQdW29j8>A?< z2}&tGMy`>7n%6DjIeZ`{XC`MmpFv!AS1dz-F_1j#5qAeWQtN%2xpz{&@452i69^4} zi4`KfiQN$VSaDb6_N1f*k<-$ywz4qX2wFx8N=jj{vGp{LnyI=`H~E57(mC`aI+G~r z<4zXt8U!Lncix;W-+X7R;Ow!E!ItUSr{#Cwgefxl{7q)>`FV*<F+uyYr1kjy zMxDX0h@HMty^2q!tV2IO>9^5W69zGx`Kr?%SN;y-*eYJ`_!556`+RXT%o7}KwhdVj z>e+h;zI=egQZ8#vrUyYvuj;W4KL?)7E`4KU*Wod3iDC)dic{l4ry&H~DP6vu4%vDw zpB*jFdS`dyFx^6gA!~C+1NiguU+7fQuoU-F-2gAydzC44);Nu%lGC#d~}&{e+w9UE>jRJL5%JM1)B#2Pw$CLJh%9&RnUwM+0?SEg1%b& zp#rU}>iVe9A{sacHY(vj*O{Rfvuo z(LxvASC`dhXp{yrt46h!j09=Sg{3``lWU8*F+iI1eaY;Q7xX?5%qG&H!DhN%)rfrf z3rcP)wyd27sa;%ZCv7tSIC!iPOfB)77rhrD2E-r6KDvySEWGa@j8Tnt zzj#M&5GGy_jNNIcDHqa#}_jrZ9|L{cdo9#}V z4OS=i<$olnwUfNT(DuvVhi)V*4UpQdQJ0_Nw~)It6`WEtVRF$pxv>NHo`6C_+*bQD zVC?CIjaS%opgZV}7HXrpg8>O%#RdV#r%xQAMc<_g>XvW-5O)UkmJfHAr*+ zKKM~OMO#~igvW0}8~D;wW;Aq=u`{gGERiYf4_VCnd z^*xiKuOw;ZJ1XWcuPRrav*sxArlk@YNC#E9E<@@Qp*6s+d+cE{KQqk(;npq=Z)*;E&3{oTwV$BvH_j*A z%lZ(Q_3I{azl;d-7pmZtIZcodCbm$#ibRjx#fP=U#}$ zd-@6Z8xwrH@15Hm`h0R|@DD0B$v8H3zDjY{xm18V-5(iL|I$@%8x*rnwP<0KNBFVe z^ncam9xoo}eU51lgjy}62)WlK=v>dixd)TXgF2G{{!t$G2^#r=ZOgYj`8aK7E6$MH z3U#sP+UD^l$8dd~R@SW#=Et#ELXzGOK6f{#eH=4e(rUVq|R=EXMZ zM1ajW8#Sp+4j#!HT$qM%u(>GvYFPkJcko_u_|q?&q!qBm55In?IA&QQdLSak*Dwqm7<1XqPcl`6W3!Q-H-^)c|610w)%4&hZ{yPFZrfLc zW6nMUu)I#lEG&S4cycL+BrpNqHpPIc z4sh>#j<6sp+@CV8lzBJcwwSHXP5t`y&(CE2M7I9mBL{-N$BM)L&oYpQ-ja zP*C@bDI=wD6}-|x?_)J$&Hf4^JjD@ed%03FoI!Bjh^p8TW$_3ILF!A9m<9nHmCxJz z-5lw+CEi+HIIZjA_^OLBv?;RiK?-FGvo;HjSUpE+807{rKr3*dMD55wOYkQvg^y zMWBi;FQ?Ow@rDkqqj=^+R$szVK;!WIDGRMhU--VUfFZnW20G|C4ko4Co^9WdH$U`@ zqUB{vDuH|eIKU}?zekoo(SV`OGrSz6215uOqb(jP7{msHi4C0MjX1`iMjytOrfET< zZ*p?FkS%R&RdTLsJ*4rJ`s0CCKv?lpCL1UdF*E7fvp4$H#BC2voH;s|lLdN#)qUQz zQ9=tlMPBtN9L6cISp-;WM(jcST`!V;ljpOoPjmm%42FcsJ>&xrJ{Q_0DJ)U0osT5k z@ep&f_de_HEbN^l3$5BawHT&)Xox5*YhwN~)ncY!W#rPE?dZ!F_FCbH zE|TWuisHqb5d>7t9rglAp4VQs*zYkm{>bbj~4_5*s1nR@T-bLRDdywJAq!FIqU zY%CP=F!t-dOK7FQm#FJ}n3(+Ye+Gw=nug9NM>E*EuD(ep0{(#XIt>0CJ0DCU6*8fW zrQcR~r&G;V{AJXyQpFmo)twCCICc<9koT%C5B?2JS&i|vsF7ufPlESe;!nwG$<8;w9$z74>@p|i% zO!A}(iEAW&^VhSL`aQ1tK(Ob&BYeeZ=b9wv4=lF#QO%U2AI|kTop2LxemnU2Jk!+w z4;pD6VEV&~#|9^x`xsp$r%4yla29nWdDkxz@RCNO9_2yy^3D!Qw z0n-(jo+y-i>#UpqB@5`w@ zbbXd=v`yz;s@xb75Qv56_hhoyW*oY63>f+}k7>6vTEUF3OELUVS}^zi_+Jua1dPV? zlSGQ4ivYUo`gUUL#jdQi2+0YHQlkK*=>4uM?_e4X??Wy*kOm$GxxKS2jL>w3Cm;VyZ#lPaxUF@cB zCFjQjmCvFJ+B0_7~Kh^tB_Z)N@PWwLyM8G?`ca(U~fMtHC0f zH!{rT#s^`;Xu{Afvs}&oLrN@|I5QZ*K2fjxg~38I#<(?5Nsh6*mZNpe#059d%}LG( zUAMt1S&p}@W(esk(xKApVwg-J=*YU8#)c9#=g+42P+r zQe5TIIQiTz{KPm?uAbqLr)fB{X}d*hpC|Q+-yyo7oqAoeH(dU9)iNP{_)itnrO7pN zqqF;%7GCENnhoYwK)7?p`-vwr8Aj2T&|QMQa9GSfPoH=Y4uzn_LYOQBJ0Z}0Qjjk& z=%U8x>Uhd;sRj;e?|D=+9&wvis0l4cVbwo3iqLf4OAXRY{gt8LXqSuVJcl$#N9%Iv z!yznEdtJ%c;)ZwDpuJ3P6aSMYYjOIAoRuDQwE{aEn#dDTq_SSQ^6SD2nNYL0?f`<_ zBcAzZgs&@0e2+*{3y{*;5979=eOF0vQlunmz*GO)mtD(+P2lj-<=x6wUs|!lH`s|i zQE;L)yhrS^qzck+>HiwpDba7b=hA1M`IM??f?y7Qe4$n_na64PeuD@rTwd75 z62&3;zLu2>50-AG*-RQ88KFH0u8n_rOpDIQ%$Ou=*!qL^E&AllIJc*+w|<&IZmZ}x z&YS8thA*WK7o@X8TMfCkbDE$e@n+m2KV9JTU*R;q?4xg&u>l*cU#xWx7MjY0suh3W z<NHR2*%k@ox`{^@Jtmd^<;Nrwju z5jGo3OLjJLJoz{g0RaTG@>)Z8XZE zqrz!htdr1t@QIWfR*W=vASUJJ9m-SpwWitr`0hu_oXc+!QqQo|&4f*HU>LM2O#~?W z@*=ju`hy!91@~$A1Zf5A68^Py|EiJL_zDAK4)~Yi2er2@Gecof_esfLT_Wi;Ux0MH zn>;Aql}Ts4N7xC`8JKGyAMuw_fyU)@JKwPo4XajXTyjoyIWjRbj+dp$M?M0kTUUZF zpLbZ)7$qd`5bzzumfuPKowM4aorPr%G_PGl1pQPK$+;?c$Q$}q%gA`k{ZtW4gMbVo z|4nV~Jm?Q?)0@Bg4F;f$+9S2_P47`1isl~=>yERo!N}P9B{am9XkgD>n~}Qj5f)J5 zpjmGG$3R-iNDZTn4g6Z-QsllpF_h6W+=_F-wnndhSA{wyU|7oTNvK zUt=%9tyT@#y8((_qbcs;3*piKA(xQ=Jn)^q@)9i6VzvsEsr?~`h#9<0P@|-pK>#fQ*v!K`kl35oZl%Jyy}bbKv3Ip|ntZdKuI#k|BdEA6SiDS8hJVUvMy-HN`=H zT|T&@*kLX$meGLmOLcL8&Jj)3WA-_pLh3h-&dGxnq6M9I9OV(iPFa|NYaFp#m|TRj z8AHjvfrp{_@0@p@+Fgq}?Z6h1dMylZD$}qWH_G+Au;{&;npJD=?_Ni<2lXMV9v(IQ z=hy1HMwJ$}{eN|3(j(3t3XoC%Rj!Xj(PgPSp>+GNgW#kvAz>-`3f zq#=_qLx;%MJlAr2>HgA+G=n_k{F>9eY)zlB;~2fZbayE;f;+?hLGr0Vk^`=>U`mD% z%}*nm<%Kyeh_*MQJVLd@mJ}lNZafA*2TOG@YBs8hx`enRD7PR-%);D%E#j$cZSu4V zXO0Nn#6w4>zZ)<7Of=|pXuXKH^5L`1sUZ!!W6UYX?j#P+z4jf+Zb!K;$#6tM8wByE z6v{*E6zc@(R@%DQ^|lczJR59aVCrWv#-1p?6Q@j*U=bm1@WNCMajC&dkRkBfyfOe1 z31AJ1ZdOH%1EJ%&+tBi}hF_-iw=0XcDNM1zt05Nn1_-uMLUYGjZtwQkgcNO&C-kOt zqUh-+4^ZDHWB2-+R8|KI%MVXBg;nklH`NQQ*nt|$Jad>v&_#os2e%0n?G6)ksUNUbKdEO>(dBr zms{{Nt-;x!=T=7`G>v7;4fcjY%rR@%5;sJ@CmO+&X?#7zBDsgu^6o#2UAEXPQP}K5 zmhv4NIwLa4_;ARpr_`&X${oky_PBzR$%04MKSP+2G%2cUlgH%w!pVz^N>) zC=#fXg|jc3EoPK$8*M&Qa)E3_S9G-AUr+KN+kEH5N(z+t3)-7C!>1jO^D3KjuQt$u*gzL1w5&2+YOSS%_Fc_%UdXznYylsI!07dw0%MTL-W@Jzi4%0#8p+rHM=g% zVrfy}!JXD)tVm$E{R6xp`dLfp_lbDpWB{YmB!>7dN;~}D@~L)t4RlMvP^-t*CIM2% zaO&^zedyIEG1~&ulV2qK+AC%OTd)fsK+}F7292HIc*u&F_^CAB{!uduhu>pnFK-t7 zR2OjTgEYO90}vt+uhfrj98jn8!IH1H@+(LOXaLa5z{kmxr)AfRyHvjRo$C!rGvOQT zsdTg8VPZrecvy!4kK=WzP-r;qfd8U&;y$xMqmC6ZN5s`7Z2Phx@OnHgdrmu+ zx*?Lyo#Uz+IMg--G_O$;o9g?vwxLbgwi6t=^duN-f`8{*uLo+41{Ge&-V3XE z3d0jW=UH&zKZl=cdCM6!O{SkkJ{kOu(4>SOn(Z^Q#%60hF~yQg{_Cprl7cIf9{){G z0HJznwid18&JYQNy{?;nK?A$d2CD00*YtJ4`IendoyWGw`>4M$2 z8Vh(hj~_(riF{>@smH1C8bn)BZK~m*wHV?kzWuES8HlVe`B}`eE*^Pe!rSXvgQeQ( zFkm4TOb|hL=T>N1k!w6Lb6~Yllqo<+f9C6y8cl{O%wpOc{Ki?;@9LQI7)8I{xZi5p zJ2n$~z7vED-VVZX4L54>okNC<>=RGwyIB&s28NDHJgkO3^A#W`UI6FNn>+OOR!7j) zBaa*P35~7~8b-Z5nA+2~=oiQkqOC#fGz(PtN}W=5#F3%gH+QV}DP-{6Wc!<|N_Tq) z+l!Wxno-hHN4(?-h(cRO@rV10`NZF6J*gl z@!93TVjX|szevD8(;Q0|zh;9jJ%PXcUZ!=M`F%`DJ7#}c-f#(k9?(2o?mT`*eG{tc z09_xX?N@nASN=s!onN={Ov1aWWr|mmsXOKu+`@eX zgZeIkzN$4kaTkW!%V?3W)>{noerN$;rh@=NU~m^y}yst8|A<*y6b*KdpQH z=jP9rY?fWX1FQ}z3wo$_Z*BrH^=b_!CJ9*+j8{3}o4GwUcb5fM-L}DS&!syF@10QC z-|7#JE}%O-?$aN;Xsw{@8oXbR$41`k?uzC!Q&*06fsO+Sq=4h+U)h`WzJ0H7-@#lu zF8sAJrx-VPpvOXH*D0j#jt*P7?l=~pI_`o3$(yO!1LQ2##n_vFR7JiKK1&n2|6o_) ze$FCJl*Wjl=Kx9>9HVQ{+?*aUd zJ0eh|8JGM4QJu||yy!S^i*aWetun7m>GHA}6g{%8ivIM$N7o@X{4GVA?r)%G9*|qp z+_g3~D~yYIuE{FcpFjScr8%IQ?3UFz5Q?sI+GMj9bMWx}6%Brt7qwG#DbAcSDo?Q{ zv@V5x6^yB`-|!{6y$Wq1z3Hj{@eE9&F4V|hv%+vq#>Y5-Kg;WZbVDja`_i)DKfoSnau=Z!>FBR^741L@Xx?qSa49{|ry0_y?Pp9O{51cb8@Sbq}rn#ulLm9)m? zYWx;$-PD*+Yeln6H*h`;*;z~bEbr;H{#WjkvfDUFCp?7mNf%*%km;Gc`=2K5HjdO8 zF8h6+=?}{k9k2zNhd|PpP^MT?ep%9`-Vd^_tg-K^@MB%mq8u0G0l1@B<%J>rH*5;>M+2YT}LnwICubq~we9pv6aYFra7 z3E#5^zbOu;!nZtUB|3v%Ef|vUpaa9tboIWIiWl@-hDvccSnWlhJ>F!8^@p!ZFY(~S zn>1B8{G;lt)M?3K8X#Blom~%v2ExLeEjP6J=tbtjLtu!Un8WQCF9mes>2?Ll8v*gYoSP9o!;X8WYk*W)-%Gtn^# zsa{-{e}03?1AN{*QIcU--ac>JxtQXgjeClc%C-2)j+EJ9`?%6$*UwUgnss-Zof}+h z7qYa_ceRi$tP!^6CD2n1$nddb=KFqsU{cZ0W|*&kCsOnqeqpXZ8ubyzfs`mDe0F4` zXg;}u>(pUxZD4%_`o?yb=Gc?l$eo0;>sOyYy((CIj7xY?9~9nBrDUfPxrecEU zWMah;t-m|QdmP*tcOZv#FJ8AC?(8XAPD)Lx$-Wf$O0=Zyc`_2fh^0eGdyj`fMk$JsJWS9TXMHIz2Y+K;m~^Xs-#_a%VwFK3$DU#{BEb{+6H< zh|GvY;XU4T|Ka2Q63q#R1RsLM`Jx4qIpBs2_ti?lq%v$PG+$*uYD>O&bTegMTnCiKvCB_hCLDvyQ*$OO|B|m~{5=`e?GJCd>dlr0)lXmNKsWyO zT`mbr_7~{`b)W3cHXPdT3}yL{`A(J-Z!}1_O2K{g6(A3jIxR8RtB;$D$nDR&wlB0q z4zzQ&j(~0~)5v_4(>?$7me)RgbJAW~-*?qh3H`AKS`F zfo(->8wxJ2B*7)W2P-6>3u$LZ^mZvf&NDzM`%QV>b4A0=jo||HBe*IIu%n429C)4H zG9~DWNH<;44c&pxbuKwr)8s)2m*pxFocQ!U9kOj_Id74B6y-&pST;Ocy{DLk)f>9p z2P8s}m?@;X=kORkaRx5F8sG%f^{4rydp!xK{;gZyf8dP{-oHH?{yGLqx1Ste(RJ?R zA*A^^^Edt=@>}e0N;79!ZI{ih9+)ihLrK*T>5wl!HQI-FY41F$MVGJ0NeJ8Rv(lvE zqZ(7C6GMeby8A&(F&?{--cd#siToPak0erZDN@r88%5j($zPc#uB}B!AnnT=wd*AEC$3}V%Zrw zD4I#A`a#(~;>=T1bFOpT7nf_USqN=toM|%#N z3m&K1B#ZxkVKo*YFjBP&x*>RE+cK`zx%zJzSiO>>z~ouUuvmu24<=u9zyOOFITK2L z$~|qhf`~hCS?Z@>FwfmM1EZp0S8E+Sb=s<@JBK0$`m??c=rxCL{?SGEIrkeAs8C0^ z2EoNXe~bZRnKATrVN#s`G9&sd<8`4mBR`wscCt<>>jxZiK?!qSg!O1o963r8uUk)n zIj#{%&S?*ZoC)Hz=B73@KN`RW(P-ms%8aQBwcH`I!QVhvg|bJSyR&@9RZQRFS?ZCF zsRsLoQ7VwVvs;2(2}l?gAF@T`fc2j{gfV1p8&Kl*{HkI18;RMZ-aJba+K&wRp&XYfn&;E(JQj;>G_g`FMCDg?Ln?@*DCjxY<16?Y>7w`>kNh z-~Y1GG!2_|@|`CH5q(+xu;12Q@mktq3Ohcmw1NpL{8}Tq>{>XLdDb zJwpTERdn3dj*#l2$|t58?Fo?!))LY(Lo3~8{Xu(Z($My!wDD6Ao|zz2|9W#wBCIdb z-<&9KdE#eYhuR>|I3&K8u6oa(a#oPp!26#r_jSD9Nd5T^a~{mXJMp<`XFzXPJq8e) zc<2is!P_;5x1bvVlmme7Y*M-jsr9VSsyGXs9gE1r2l-U0o98;gE3>hDWV83HEaOsC zt4&QTDzPD>1+}Mdk+tJ__?U05R&8K<&IN^c>96=u*_im;xFpZpR%TFal_;CgYdXD8 z-di3{!htsFwu}ZP47wgofn39n`)IFeF`3gC#TExZHB}bq+^o5YW=T(Aw$}3s94D+` zS;{QQ-gSQ@`W!FTXh!Omy?>QcS;J17gXib=Vu#Ft@uCv>TNH@jJ5bsAO$WyMoUoj3ik7nu3 znFyP69RD^q!0hAKb64kjw8+U!9fgN*$HI3Y;W94NSVyyYeKLJQLpZJW)6KlaoY~A} z%!}y6!t1A-MF#H6Lq|r_{fLR%O?w-o@8MW0QA05*qf8rIdwfKwsK+-(8T>UJ$NN?4 z+1@LsUM`9bgY!9qUValWXw>nuW=5_63V3+>zEgtH#&EJiQ8Xk_X@xFL0 zD53V>ewSy{LiO>jiKCymD|JxJh{_YNTavt~rv`MEC8yfCq$^W~oL^V8+Vhi9Zb^~$ z*40iN+}Kj}CM`fQ;jA`Jq$`g1aZ%aG|It|jKKGZpdjF&>&;}YyKd0D*U;5=pi=+J! zzbKf4{;=n`wc-TdwtTxIAb~CPAW;flj&wFAh+SHJOVVdVsTzRt7>w%NjY>TsO|btC zpvOGguHg|Lz2Y4L|IO;YOQgq=-%q9&Pn`@>P-W#b>x~mZwF&UqvL-$4`EO!=@XS;Z zAtQYdTWzyven{B>O5WW2i-lssML!tuTimn%%P^(z)hk?V2-xMhIC;P_PQ3Fm-c@|J z@0C#P0<|qvj?)MGdg6P7neU#O5G+mEAOhYUuK52ffGHKTYpsiyvMOV!?l5#rsk7`^ z-ziI4&;Cc!ucyaGE|1rpClW_&>Mmqet0%%yQs-)U`3UO1At=?YN`+-^$+>iKRhjUq z*_%_%-ja=ekZXKh2*NXACuHN$R!UO7Pfp&)3hGV10bysN;bt^X%lp?+UY_OR-AgVp z_H27J3;9Q0N~?B=(@F5`HlMXp?b1GYA6O>Cq@D)2b2PJ-Qn-QP3GZfECYCix_C&q+ z3~>!Sp!_fAulc~b_&mMAmx-rP{BrZgH!$l_WbCJQ(7;|~GeEB`K!Ep4ma5p}p*GFc z_g4Z4A$`x^P%RA1q&+dJ`;*P0Pab^f<&syHGo=DToP0lgmkn&EYZvw+NHP6|VnO2P zqo^6+CyTN)6ibAl!FTU?Tuf$j`gBnJu7m~4wv_Pg;CYSyzv8K25rf6fgw4mLo zGe_wNye`c5-^te<^&04nQRkp1Vt`Gw?H_~pw7pkoHsCe;pNPcwz}pKSsT+e?i{C}T z_iMJ6s$o4n`w;#2HjM+2qs*o#5tr+`gH1N-(7c}=7sC+Ee+|zEH9I6mpTH?pp-?tR zyv3+`dQf1i^BROJgYuPSO!`-Q?6>hvJ%vqG(U2&0z_>HUI`wBZa1E<@W{N_?4`Nfc zF(X|aIajMICK*`)5;psvLpE&;{cV{cv|7b+cKfFc=9~qPDsmYcE2cH-#7moesluwj zPtX>1uOnJ|h2%8GDbQ6b7D_mn{2VfzM5U`MKpLnYx97V04TqR=K6z|?IA#3D`;dlF z7+o%eyXQ5mI(Ft7q?3JTPl7 z&0bXH*}reP_#V<+$o~d+haGBWB=S&l{7P zZ53yIxdTmVTr<>Im&F5;w-TzbdYl^~>4V}mz%!}x# z#+ouV8k^Ab=v#I2ieFqjb*RgWz5jU&6C|#T1$>MfyyUp^z0dWikYj>7@c}qB<{Toe z)#x!eT;=9^NukSu9Dk5BEUa)n0>woU0><2W_LtE}LypSB?_4#7bmOJpNSFx)5?g|t zfPx$uhX>y$$ZpkH{1M8+Kin4$p3K7L_HH?vO4NV-bkC$VG?g8Sqq*!J^Fa1@UQiVebIh2#n=Kpx?zQ_yoas6;VK;P5k5anZY>WHjxbAeB#DqKjM>M@li`4~iZ@rr8hz zR7^r(JM3`-Law%`m{H9nOp^0U5Q8O+&sL62u8SdOf6%EG6Y4U>$$(Ae*@yZCj#nM#gY!aiv2 z;vW;aGp^2zIgr;4e8^zi=s1e8$8>mc!aJT`qX&K-_)qvn&JF#0Kf3-{{^DySA2wEu zyo837K0lGFqr+Rd6186LBa$A!8?rBP#uvbSTLoV$2=dqw(nzvRIZeiZQS6|;!;&B( zbH_oY+VlaIk$yHCqoQxe20V0Uwk(7&xv(C+1+NtD0W%4*DG*MrGVB~~rW3mu75}(i z#{TSNk8AoG#;5QVVk>s3%AfSEAZ?GN55%0=t;dx^+wFtQV? zI29-_Y&|FS=NU;8Bo>%|e~BAjfx;&PUN)QxN1uLCeRKd->zz%Rh{7*8Cp< zz!Q;&<+_I;z!o)Tlb08w+GQ(X#%YK#a_ z`7m1k%S?>>)*CNtl6rlYrzXMoQo!b7fOdheueVT8{E$w{e{oJxv+A~)4Si(`2{F@T z8Q>H*|GqhcK9+)l0e-d9Xqm;gh)y4 z+EDnAj%_uNnU=;BtL2IhE_aSM4~3ZXwx5($A!g{7C7xfC))fObK!C2lPTNA z))hhY_5p|Y9ZMI#sZlNtLQnW~McXv&J;!v!(6)0ZZHMqM|tQ2>5 z3GVI`cbA|6LUQtcXU?4enM@{gKl|DD-fLZJ2&0F-+ESe(YdISvs9g!h{Jm2oKu=EG zWhYh5bzlxP>w@hl#-d9Vel(TXZ)ToJaWVaUkzaIJTuJaZE2W4Bl4N|^_E#L;GEv9S zgax}M(fBjUE&+M##;lYEACr$xm6f>TtQZilfnLp4yf3uQ=&3TH@-y+7$~o1&CH^u( zoJHiV;)dTixc&?G>xUS~hwU#weulA$`{J@bfNPQ^i+~TG zob2$ge*i6bw66t7BBH_9n}Vx%%5e0H>Aa_(nn&2}=%uh+lC`xD(#=f8&6f?32*DP6 z?`TibZi^t%U?YB@wJcA1Q#O&#KcX98siR?Sw2|BaBpo~DaD$s77M+}9xckfQQPp~wp8V z{AGr?mYIIZ?L=4g&r`A9J5S}pOJ)QewUm?W^i8gB3+B%`=}r-2lp8OvB4JR4gkj!l zP10sU2&fprac9)>dllp5+jjjkvS{TtaGNE4&VtOqOYJ)}g{VQZLqe9f)1qKlG^qc) zqF!urNXGJ)n8^)eahAl37RX7=T|%`R6&EOyn48(;5Mb4VZG^?5j~wpfW^FrXW}^Y8 z+km-bO;0qWxgBir5cuh6TidZ%ZUquuvVpEqedI*Ee7+=rQ>p8x-xz!Pu&U}+sbHR6 z+{jt6?lz~l<|nsD;-x6C$=86>#Ay_?mX%arjKJi&L2c2DO2^-zxVc-!jWsCyTQ*0J zgXQ7a&VFRWBA@TV$o)DAI=0>-64Y-plpuQ`pX&(FRlhsvK@^&vWbp2NN6zEvln)}j zZM-6@V%19EXm*zE6HunWF`WAoUT34fcQV^j@=#LFEzL#uEP4pAW__m(i#H|}D`Bg)1X662(} z^WBcj^-`ITZBfwt&)9s<%{xs_MZd|t(HR|sO)IeDlx5NTI^M3G0}0r$1Xa7(Spfri z+WvuHY3HphJuf@U>uAV+a<(ne=za4xn8vr_}TWwy)4Id9#7csK!-DX z{k94G#N*zqTg(;1?u@W!L`EmG>2n->;^O8z6rtHG>$!m?&TUN9adXI`-0t6o#NBw$q_^zQ!k$Zb-gmE-hl&nU=Mv{1C9 z89++_TS*Xwk@(Y+?-8188*yi;B~bu{WWh$fzBAjE);@pdA_p|=u8ooSSad$fdz+}W zk#6?5=8rRupY(li8QjFEBZpeYV^z-m)g+o>qKVezDkBnTx0=N;FNuZlneezM$ZpH4 zI+k(_GvY6;hTWN&Xe3S;x-@=^e5>BS-xb5dt=iPf6&9M7`XM*bAsR3MP`Q?NtG7_< zp~$5bORXwf^L5IEStbyrygeLvVR*|fe$%#-a7w>RRn= zP=y{-ZS}D#*(artobu@#)jFVrk!_Q;q-)MfVRKsEt=*7l{Ke0bb3g2F)rH%|);$AR z@EbV;L&3PLPrbo2!?}8@>9}7Fl6!U+N}lX;%|?~+hSPgFiSp$_LD)tVlgz&<;0Sa2 zb`Pc>*4EYGCr-IL_l_Gsf5lbemi7w7d8a{aCq{8=`TI{9qy--N0uV7QlvJ0te zag#X>Z!NQ2+Fm7U{Y@HkiPl(wMmsmufjLc4q5rFLR3ot;J&i2P%^SAxQlQ#Yjr3+o zAF+mpg#d1=Z3+}9qpWq=ICQ!hg7N2!+Xh_8_qhWVV;nI_NgYp=lzo>rp%Yqc9q}7K z_=1Cr?wSq3dQaO0X#&OVBzpaUMopu1#G#U~%YKHS-I+K~9(S~^C*Eq~G9-R04P&J_ z9;gSdwr3?H4%-(Itvv6wWW1|Ax=qQCqIg8L4`(!H8Ka}9vRsZMl7I~0iW3;WHZp~| z>PK`REi@x^ktNP#{IZnyE<(qzCaxy+W0^#sdjub2>6j!hs&08;N?H{@i`xSJC&E^0 z^j*TU7pS5oNe@2gtYlJk=>$POq!t|zi@5rarsKuz7?^#r*_xDFV&7_s7SWIVe56li zF)}9FBJ~@(ja5c}@MvfErG=biPpQl?rAvGiIejFvj>$BZ7;Jq zY~SK0`7+3H`&eCBEgxK4=cN97oX?i;~UWIBBuE3S6S*~q$Cz=5%INqF<>f1)c3P!@c`e4v9Wj<3m+Ur#@x)Kov zV!*XH;pcLs+bSAzXCF;7*3MQ9OXC0g{L}w&Ba8S6;n&=mN*pC44`KQ^GWV_ERb7H+ zY*-NS$(?b=AA`h9rdLdEI2EXT))CfUJA|VuUzV7nNb#o5blbPxKL+wB3(h<-RBC3j zgc5;OQvPX2$Vu#~6$@XpLq!GPsQKH)6^%zqQ59nanv~d3*>opQ%7s=t4*IT}m)T1B z0ahtt-8WRbUgC2+qx0*5b;?nMslSzR&Gj_O} zMyo@O5{NR3uWS<2G?x;fColv_GvOnpe_7r0&fhX0BiXD_C<3Qd=D=q^vHwiHaemiPQ-2Cx z2tk2WMIn0+*<)gbP{PV_f8pgkxf`~-Lu0{~`-t{X%Ea-RoZBw~Nc!3*IN;6m=!5;; zW3wMAm)8V%;=q9ZBaz445dV^nG>U9gBQ%3F*E35*{j% zzC*z9K1u!iIL~oYvbz!)rSW6KRBZ8Ig-grpz+0r`^d_Gx6XF~IfbaCTJaS>TCK6bR_sL8St4hD3W*;KqK=O)Vnv>&(GYxy-u|1G>W&hxC# z4{Ge*u}k{?wP{M&FSW}II|pVMv_#dQXLB$c?sg9n4-FWZ&P4iZ!=+?sqkHg_Sv|xW z#xHRF?$8$|@=wJVaVzcX>ETSqQjbQr{lY_E?>zMS`4aeOmbST@10YnCVV9!*3>pHp z5(l_0&N9P9#qcNcVi3_cQh}8$m<1x|^xcvnzwh{m+~KjhrztW%83xD7eg8Y2Q#+E~ zXNx#~Lp}FrPYPs}y5BqROLAXyE;v%4gT74$t^N8Z;&5041gk= z6l{XBUx3PD`#))HB7$N8%2pKF0_#7*8KvG(bf=f)Ux|6OUwS7Cr9tsbh#s3nppbTlY-Q~n<>B~5JCqq`ctB0@uR@J*# z3n@5E*9;Mw#%(%W2uM~WOe#%mz^QNJ*N`w zlMQG$PI!{%Eryt%j=jDsQx!U{u;}}k$NcNIv9DcU?^ys#1L}QvKoadez!ve>*l(5> zB}4|V(LW(XhV$Xt+d$0dP5c%{e8v%XFOuSYyTDBR5Y`#ho9Qu0pVVIRg-@pjyY6p^ zNYe-%pmXlbPY)+o1P=l5Wsm|SQQZzbOBbeihxW+rpamP>HSLpb)c*g4sM2Ld1hB3$ z7dYc+gnuKHs8b>&*kWk0AFWLNitBuUgogyLLAOi;CEuFtE81q}7M>Va((N1kak;^! zJ$?yGo3tr%?`ovmfx=~qG%O-d#&kEn zQp!~9SJ442lu|6js_THfF#-N&Ug22xk`FY>e6nLV;#Ir@7%q0*TAS||EzKRoS79tr zS`qUUxMoj~Yq;wCFcqN}fFU$H$2C&Y7a>v>DTA+9y`C6N#J?IT^$=?mgML`Ko3Exf z=pkf6hAtT$@dsI1hB-Y2&@0D(MhapT`zDTo0aF5xh;f)q;ond2J-;F!WJIKD&I zNgie2fvo=)!B!?9`?_6iDKQN$rr+d$)$^n?0!_?z@IT1$XRBfCm+?q`w^cc=A4Jal zxD5B#XDa!fUv-E+Ev=nosV@Vmu&LJgJ1WwamnHmEh1W_P{LJPdOJt_PZ@X4-WD7}u*V zxYcj2hX{s^jDHFIUV+(`3$N7&9D8K&4JhpkF4>9|%RkW+lHQe#esuEuAc?HIJYhvR zkMZ+WLq=9Y5NxEZR+~`_Clcd+Ec&;RhA)!#ZUg4L>ruWz2NKk^x`uOqa@D4Wc5+rq76GZI%t13(nO z;9k{uC2>JRV)*7kQ}cFA^&X}>KPLfJF^|j-^&g{R zkKd!7hBfwm`AR9tJsl_mp-gau%GKj(4!F;s^GqTwZ!$8+&3khs1LI_`{!+38!R9vW zb845kWoKw@F9_O%#7OGz-xf!MjCo~0p`%#$18BzGeuF;xbNGd1H*gq;mRy$5+Ar$n~Vm|Ym<9A#tbe8 z8z|m>Y_<2h(IdO+CZD;qT0-n%K7C18&BGAQv!(vKkU)PW=y- z7J+k1G0DDi$D>^k7}R&KOC2S_kT7%=7H8LFn$I45zgsm-?mH~$LeDy+e8|np^5wV) zV3@k!zWzzc9Z7ByBC*!1z3cvx#?AIrgY#PMVh=i?+wZj5DU(&pF9P>uhoo4DP*YzZ zR;0aAhER!g@S?Ft4q|2DQ|)i;u%~=FkgTLQ{Az`dzyojdd-3{{2b35fD^YdE1v>+4ZxqE{&2h5A!?Wbim35hA zON11z&=|bzYNC`hCX?CaY9X0rgL7nZ`~}uanf7i_Bx`2y479@p7z^kx?%*@JiTgJU zL^`2G9BrKj?uiOY3t>lIjZYM(v0ynY1uayoU#}Y(Gn^$Q(>$p|sxxhj_In?S?j$&< zR$MA)wW#4mOi-429#g4SPK{?gxW=Oq)@DCcL#&)9Yh<%jn#SCFE;g{t{GV1tg}Uf* zvs!Y1Zpdt~Tc^hR0W*EBf8G;q5N_Cqm=kwS87s*B7ilj-8Wl~}*YWej}OYeV6a7&Y(l183~TXPww69uzE=H;KieY!yt$U1zMJ2tEpz7)A|wSuL%wN z?Hbt=cJ39rI73{f2MsIpG!gt<4n~w`8Cx>oKAdn|^Wt%LBXO7opkQ$cp-qbzQ1fAm z-qhG#%0Z;CML|;(gnV-V%x{;fNru>`KCZT!-W;5Q+kzM>!s4V@ZJ`I$Pk8?#fx*`6 ze_DdU6JBS|`;~Z$G93+6PkZ!$3^c(hq_ZE1JBYCsLi2xdb3B!G(>j&=EcwvMUZ-#7 z2!5mnTz|Z z5W|PE=#|!_2yZg7lMhiCUA4&sFb z9)?JOiG}x%K<2@}>2s`6+-5KTvz;U4DSqwR%Tz}z_` zneDpF7cw9!L*3$tZC|J)W;>t%{QkuET)-t-23!B>dy0CqF)4ZyU@t~o`*!UR$%Oac zUujlS+GA*Mc({)UEF5Jhog?S5m>0EYi6Y@#mey2UW4Uuv0@>na8NZhaEYzO0VC4f1*l9YDX;>aJ{MxOl+eq4;ak(1 z?ZrDgXjH9Q>DuwuMNLhTbzWJxYFBoYik6l0SmQjv? z9swY9UWVi>87YF9k+o`Z$m<9X7(s_*;IqU2=;;$U_qB1!O0|Yxh~5<{q@q3Cmi&`b zmm0Wm{}k@UiaRmN)zuRT4=1nL8cMbKnO9*cvN}g8M(r0b>|PaET9#g@i~zSog*tG6 zVSjY9xy#Z%l*%Y^=&Dgm2@c={w_!SMnZn=3MHT~6rY*%XzxT&Z~{qKLM2O+NiaiG4#;R3Be2u=7!=}Aqk`&vx>uB6Z) z7q`!fr=_^M1&EF25EXve@^*RfeJT9CPq zM-H;QVO3T3=*4Y0+Y8WH61wUR6WQy5zQMsN63zuj8*Ser%Nf94@8!P+<2|CbGFar% zu^x@G&bYlRFHx_s5y(@&7vLxmFq%6w8c=Yi<7S)94Hj1@fgbfOeTGHN%)c?3=F`=iJVq{}wBMezbLo z@mABL8@up(AV<()(EtyRQ$`*8JJ0Kv09Eh*IEP!hCE#71pJa$lyOGS^Mw=rL(0%2r+NfKlN_t)77p6ztHrz&u+)TxR($z%^X1VA z;YGb%Vfui3_cm%Q{If9Oa?K4wP0Wu^1nQzUC?E8h1aOG_3 zv2Q6`ik4japby;UWAowcEk)!oTV=iC;vrk)l0~fRM^4_rW7#J4MC2~n1*+UDCP)HL zLA9BS7nn;Jj=FixJZYrlBW+A6)?+{Xue^?5i14^$(5POFuUuc8^mmNPGuefK)R&1E z4gd9~evyXuvRmmIJLQm~W*j4^jg^zF(dtGE4k+WRH6;%>tUY|nl6gO2fNY0AcIhm{ z?3VGf^(r#rrca6i#RrO{m6PR&Y9{^Z`w>n?{;R65TI(S&AaIjYy}1DmwhZd@-2#N2 zJpl6BTiAHjj0`)mA9uS+dnBCVo!`#f z=^$hrytHrBmc3F2`6B|Ejbh^IOo8lx!Z(?g?*~&L6Etd~fc%az2qt7~U{I6QJKTuE zpq?Ogrr7d~5{8u<`_6AnSwtB=xjRQFh#q3^nFAtH3SReS&mo=z8}@ z{zJ>bqYQHp#0AlNL@hX(%ejwI_Svb1om9hS)pG|sWwnkI6Jzb!nk05S#`HE4BLGJi z&6hB&c7)sMkvq=g4Q}9C?)BCj88lre=n=!4crsj25On+sWURE@HzENXjRw52jeP06(E1$Wb^A#w%RY&pfHoPWZQKS2m3 zLOhm5bITN-zxL=9cFCOGF4}p>uJTaYMm!*t=*q%PFTYur7iw6muRQ>aN7@AEUeCpt zxy6E=e0Y`|BC4LNOyjYQm>)D}?m~3o{ zRj0}Ja46K~0oaXkAOS zMf{dscAsrD(V;xbn|lE8+2F#*!W_^Vl!y%uM*sw;ZBBjZ8XF+_T^nGTm-ZHPaIvEX zJQ1W8RkuW%`!37@dp@VoX@d;f3PGZGBe(`dVL}gU3`_z*h4@$Pzu09*qOKlo+BKTn z!FrREyO{BI%Bw9?a40!EsfMe4LQ8-KIjjevLEc8Vt;zT?uaP(Y<5D9G=C=tlBwzxB z-fz6^m`+_-4tAurq8w!Qd|M00Iy5$^~~qS$65|Q-?Q)AB_55Ug|yT#2n0dRj**V89$ zE`Y2UH~nzSRdr_Tx%=u1eC~w^yY9q3s%2=Zkat5E#ty$8iRx?CWwC;aa^OXUgxso|ia^oM%;(DHn0q*=7-EcyEiei~0;W)2e?JYL>%cb%fN(!En2? zEHIVJCXu&``M9|npdAgW^9bPCdGs*}p9BpQ=if__SGj#| zty_pNSKaQ*x=xNLOuqjWFP9L{ynemm)W@k;?g4;$8{+&l44|D)Q!#{P2NP+FUaZqD zdWyv83u}@ERH2zWY5De%sZtcSac);SeXF?>1!PsKI=5LR*4p8R$=*Su@1x;tJKX=| zGmJ(!zVm*V>_X^m(nwG1_AtVbGgB)7)Uq}sm_R}|R@Ix2UT0&1V(>3`Ax4n|Jo+{^m?Qt*Z*so((;xGCEz@bjK0jJJx41*_X%MU4a^g_|IUoeWz<-7= ztI7XTb62~fE4fDXvmFC4Y^L>;o$@yyRcac;uHtho&y2xMbGYIyEicwFm_4jLnDrZE zepRWJeOGLzly2+<_KEo`4ta>K;TExm;n;-by9V&D7laO9dqY9;KIyeFmF%yUUEyrI z24h7%=l5u@@NejyV&vRwklC+g{-Q~ZRb-X30`d-_1mTj2H6HFc@OxS|Ax&1GiP%ik zQt8O=-#Mf`?!%aN7|xNvojC~eG@@)@)2rHoRrkC)caGD`-+iEe zZ?;r81J)`}n^h`|NB=Q0s+=-JyZCw;!M{U31chqq;p7@yY6d-r|1ifZ&s;w4iP3D6 zGu`qwf&O!bE(R92;n3^DWYnj~8-=jsK=gY*9E<@0vY15hnjD}^7c8d}$%vFu#7n8I@zoG_CcJOUVVj@2Sp zVw6^!!P3KH9|f+yTv)c8+uHS&vu=W8-Ss_b?HlF?2)llqBCeo&jR+A zIk)8xmDrjjc5F>Ahr0cLg4i)Ugn3bK0YO1-91p+JDq-)Iu>_?vqxZdR*vyl@^4`S`Ymfo)YKsqgJfM^mEB`pf;o0!w;D zt8%1;LM>uS3B9|E6QB~E)pNC>7rrnfXy{9X#W-Ziz3Z=ZS;@%-f_v91bKMRgf(Od= z+KY9oOZS0v%R1c&F(sa5mYSP+helpWRzWqI!3I0v-|ON8cM8S<< zH%$;(+4c+)g(#eFy7-s zU{#;noL0x}y-p?!uM^WedX1iS_e|bWj-WGZ?fQz5Q*r6dMmod+8PU~x=Y6$E94juu zOWA%r@vIr|2k!`8Zc^N;0ONf9Qk!F)r{Ix-;y1pV0EaO>r}U@VFwtc9gXyn49#1DX zxo{{#&at`2FdCg+j>cuQbWrsxe({g%+l_$lX;U>wPy{60vyNkdD1AVjS zP2&?HHu5?=It>KfcL%Mm3lOLv>&8}Hci;SrX50@~(`4re!1*xpD!Ari+ic~%Q7#Os z_~A_}(B3x8kRW!m=FilAHpKB%SzX^USwqgM6kXbidnRFlUob{V9@a&^yONaj>Rh`?+(~N_0B7}?0@#HOSDocpcJ=IO{dPAVk>Bog1ATom zxo-Ud=fbagZO5>pLpOi#NyWk09^GU1d;LN4Eg$_x8X0PY_v0QF{xp6DE`fClra4Ea z!!OH1*LU%tY*ib0K|6r?c5R5X&I%|B747*ddz<~59ef9tfdx4s2tSRHKZ$xs@S%8~|xw^`=?l0AR5&_5v zJFB=s?9|s;+g|&zNJsvK|2r1Z|_Zk;>Syn!(;6lw)N)-Z)NNSC7TQ^HH{9H}a8 z(Ooz)+72c`K8k`{{x7YhLB(6VLNNq%8n^!**#pYC{aSA&5DhQ1F)Y{b$rQno((1^P z6E$CRP;PlXul7#vx-GnU47fVCch~dtLtu~BFGw#To7Vs02=HpU{S|n93wF4UU8}uC zFO7Zrsr{s#J-5`9@&-_HTCdj9R-c+qpCY5E>}-%iFn?ys zMl@oi51yh&4Rd}c^uQBDauE-|PqxQS#1M(|S%M6iW6xUxvD1~QwwLouhosCk2tL3e zsvUt5J7UUt>SI^>>JMM2_6sRC@%24jjA|i`Dpodd0jIJO^mV{D|Atmu7fNL+u(k4l zRpUd=2o4Dp3_8JMm7;HAM<`vwpe}!Y`PFokwIl=TvD`6+Olke7Qo_%f;FR3a>935q z4~<&o>Uux2A-`QEG_6Sl75v5wy550IWKnnyM~?u2JGpX9bf3_xdIl>Z58sIIXx%?E zcrPXQRr}qQ=O7RcCjnM4Q(Bwt49f;ck|mFS>7yf{E1u{L-8YUNEXd0y{6+nn;M*^u zxJ`e0eo+9}?$7!@-{sgWcN=^op-R7Q-wLee$dKLedSew1R^k4^JIME|1(L{0@g_GM z9LE?3zXQRSBzz{HmUZ}BM@HVun)JPsepu3OvJ0UcLMd|nCIT)%tVu;hknxytj=^bx zCZoE)Z9n*}>eZ6&faaMfrXhzKeeA7^K{zM-cI^v@}9LQP{O zDoX1)6KcFQMBPb$*xh^ngzyc)6JEt7A<&K~8t2=h& z+majB(&)?Ww1w}?`Jazu;9$dt4ube$PEiZ-uB{nTH_*KztB<%8$&q_qpuLQ&R6mxv zS1frM@H#2?$4M)wmGIKDrZ4-#L>G8*{BdjFZxwSCI7x_T)-ple=+Q;d*2)vqXlCZ zNE%vi4C-_7{Z>~u+z<}|KJ9cmZoyTwg~;x-V&v{dc+VuM9q4;_ZM)RGp5+ z0~8OCU8ftHGa?Xcf5)DM z_|{#ews2XI1}|oEF5Ikfz5^u|%PLjoNL|e*Fc%UH?Hhl|_`ArEqByQ6=-Lxk;)wlS zfzd`!NcMNvVh{Nv3P6{QSW1TlX0|aA!oGhX6__gelPE0|-@cqrtmp@p<_F~~J)bZA zy3jvWNFUzq&n5wz)*RHZd4B&iV&n6Ova(1dD8{M%VFPySw-@rY$r@GmrVE-SOdUS@ z2oNX_352kJ_(nr4T*@S^dXvLE>NimM^Cy?#moMU+3B$tD1EaRQziBk!^ry+6efw(c z8Cz+icZ|3=&Fq@AaqL`KbG*Q@nxue%?->_wB3K34jyreLuo%j8NeqUR5SkESgsc}2 zt#G`CW<8~0Qoore0@dhRUDd72MwErMpTRdbW|Jm!=WjV9N_k+ymB3wAP^=S5An~1DjOA9q|wF7EYLgSh0%7zK-MvvclHVdh4=ZHH$bmS6rkJ-|XoQA?s zD|Awl?@6+NmIDt=QWnb~56t#F&>^sG4IdL+A1h-1dyY{5%<5S9p3Ncrw?Mx=Abx6Y zfCqSh0JP6)Z=8el3*rC|xgtT*b|j_UAa--gN4cS=>`NhhpmdTOQ6v{o6mV)C*1)x~ zbCDp>6aS;39SIL*&w6<4w;Im2HOcsCLOe87>8(6bL(NJ?yx&y55v#=Qyr%UmxbF1Oz-J9H{d2?Dhpxzt>zNdObNLpNrgb17wiN_WU@1qa|>xlC%2;5igy^bJ8Bl%TNbsGF+$X`CD z2WC(S-c)c8OoKm&)0w5`15PWr|3j4BOuup~wV*omN5-V+ixWBt2l$O;5oo^j;- z>gLc8V*bHu8H*--U9E4QartUk`V%C^RR(3IoMX3 zMgbY)8|Ukhqmu^Dtpkdb+YV15aK@&$MnLgAcI|T&uW%Q8@CI|u&Z20Bkp^q#plcw5z=0;XZ^wswv-!A&CP;vY& zITMfT(Sv@e{4c2)f>LCyM$iHJIyX{zVMB0an)=ip%c!MFwcmtqm{aVan~!F_bqTXc za#%?AwW`qhliOs379QEEek~B|wbQrCH^uW1zmC$`{h~JEA47`9kc31bouIl^-Z#AQ>m6@OD23 zU;dp@%w<=hoYQ;M?!XN^lwFDZX_LanR?R~u){PnKt!k4p)J;5AHlz7aP8dxA>SKCc z6n@<|qk*|E;ZESV?22+#yEjdrD|JO;2ox&DEDJ(pod>JsBb9?3k~ya#vYy5f!zvL~ zTP3v)-4m;AZ;K`qvj*_Z-ZkjVC`Kah8*GIgcI%5NYpcHNQALZHq%l$IpCNMT1NP@q zPNkU5{x!ytdlZRNPS!s@ikLR>5vrlUw$zNFkzE+Vr=>4B4Ndu)0%aM4PE3O>AxKt# zw~flm3LP0?ClkF(O?TCe_Y5%az)M&OgvA?MZ|sy4cG`f{^cyI(Frz-z01-gvu8GY1 zYsrblo2uJlQTLBXM~a^;CQ$mMQ?!wSeUQ5AtHmSt;LvYYE4p-!PpeJ;>E2q?u7M;* z!DHK`bw3zkDXfyknsIZ}ZB#FrTXB%ePFFO&-Qx1W9M(#0@@w439Pe723Z!o^fi&#z z2Nh|;3Wh%aj|BjyZZ^R++ZBFo@i^DoN`XoZ+W==0Her1n3cLM-%QOTh8$o@BQv7qk z+oJ1siyLHDmw=J3hE8RBUqsMn_p3j$w_G7jp<>7?44e#c+=4Aq0UGR$vC~8z|#UDT{OXWcevq@WsfKT%Owq0By1d1n~ z_nU}(SraybDdEcwgd&5Ch;2_~sJLD58xYdx3EiBqb91(f!pd*NQ0#trs`twdt8lcz zNqAfy0{G3V${eGrxWBdgQRQ%R$2TW_Wau?Q`%LceW9KqzP^tA#+jS&!OWUq|V!R2# zM`-_9H}XB;C7OHVsi~P$e1zR6W73~eX&XKVc$q_prO2Hnw@;1WPu$-C$mZz8c9u_l zh#g$v6mg>=-7xSMsjtm=Pu!|QCfu7wDoB4R zO|+_8d&0#z#EyZ0E_|_@9e?&|{`GibW(0yV6Zqamj!0ePaJC;p|3U7m zjf%)|2Gg^LYUl}hcd>|grN8jE8NSx@lb<3XdC*HJSBwb*{OhVbW?r+DayL2@TH1Hh zy2ef8Wpg`&T!#?&z1SED|A4>n8^OaR5vv4H=b0IFFG;oA(hDJEe@;y65cq&_*W^rT z_fe^fbl7WD6FF*eOU~>D(ZrgDFaj7HWe6XJdI=puyYs@Xdrj6qMiBNB?J6Q;Pq?<7 ze19e0#oIMqgs~zMTvhqjCrQ*zvHp?8?iz_DWz;H5QxYII?AbZQ!SQx1r zX<|UK3eauy6Q$j&YT?EY$h{=**gLim=p!deU-uDOJwE#`F>X^CGFQA8?@#u>@7aA4 z6ybFKBf#R5-#LI5E8La;x()+Z+AsmSp6w(T8UUy zy*=*wfETYtuDWe#3FThvrj)Dv>)QPr)#bGRNeev7l$iAlsq$X$lXDP>*%i$#WY`s2 z>rK)Dn12Vy4A&n_e*FkpTOjkfI5dND6;~%kBZ?7j_;%B|7B+b=ycl?0YyrI`BF*Y4 z$K|K~cyRYaW`azb!ba)`$`pLT3VZuDd{H8Y6j1uNEl97Q_kdkUwT^HR@=d)7 z)m$G3PSl0{E6#BK(ecdel&vq=Dv|1H@{2gFTqun9ZSyo8bQC|cltjBgP|eO$P2egC z8z#NJ#m;GOMbL2|GP(-}x>QH73<8OO^Gt0@D{qARc539pvd}rA**TSz9x7Y4cA3N* zhk98zKb@ur*o9^jlak%-;}x+9VoYv*HQ!Q)lqzd!tgxQT=KCkR&KXYuPVLt9zGkU7 zNkFBGZDeS4-mFi5&3cJ7!cc9xZHYY4R^N|Qy!u4$V2R@u1=Q*PB=im&7nqX}nhS=MOtcIpzr_LbR@@xkaFy}Zck+^2a8kzf z<`*aV-1y=ba&1H11=~>1lQFFcIrHatF4K*>M$$?`Ongs*dmUx5O9~umXf~tU#lRWq zf)p1YABC>-D#{q&Z52z{M78~?I(y4l(x|yX$de_7{Z7)atcZ2&k^5SFF0l8PCDmjX zR-f~GCCTijnXG&7QYv+QFWCFe0T7FXVb)p#*lAwluyMeI(rgVWPHj&bIz8HQ==KSQ^EB6y-x<*Il5sE_5CF zi2o5MMOG(ED9>Tm*E%)+k12>qxijY{-84>;sN9&-_OzOr?O)zuyw%r$ws#4EKW+A7 z&eeCG0;5paBp4pQDEl~dq=iOPn}P(LJc_=p4D_bRz3IR==L~Mj-m5~jw(~t*%|ggY z6_>&Fk~u>UHDRJ>Rqm|{Y%#H7Pe=43+u&-?2^%|yy#&BBLOMZREPAxq+*UY)LvL(y z^dzGXPpS5yW&a?4fbjTDa}JT^cUzu_tGkLsoh(#)HqyU-*sl$-{4+R^$VT9s(C5Q^ zgE8#PpSneIfXF%y&t<*VOeBYGlP@lOwx=@h{dQswa_%GVxNj6$i(IWkgtn1uHIRk| zeH2VzQi4HobFLn5CU=MIph&-mtPrk-TpV(8qpm^Hgjl3cuSU}e(n)}87@y$0WDD*C z2=(m!fe0_~+0AZjrxP_{n@KCc?Z~NiY4_N=z+*b)VhZW}ziJPnJ9w9RYQHgFy?X1w zj2+)<`BojKS$a1~hS+OBzVjcm@rfFY!AfQ7RC@KBd3jp|&ArQ!js_fm2`eNf`8@P~ zG4*}ox7^RV;pGZ2Y^W+ZHQJF3P&394zIyjY>8x(7H8I@?PNI2X_}=_`X?6&!bO>UY<97lza35;`*UIrS*Nf6?}?)4-|TD@vJ&^_vJ${H zho6*B5S?&nAP1MhoBF`miMRZ?L4>NZOMC13VH=1)&Q z23|&;;C3*sG-saR>ur`fF_QuUE{q=Iuzf_FSTnwwb+R{@AW`1q2D54l2YrTnh)1>* z2^Z$LprK|B&igTASxXn@IP~E1cp%e*-lq9R3SLyAR%c3^5!2Y4-{FSQ^bD@|H(PV$ zT&P>VuGdLZsc89L@n!JqPif`^qQ#I+8YLhHW_WnxZ%@Tr2aP|o@=iG9X^)f~HA*r# zRLc(u#&l69r?v+v+)KDb>SvlB6z1TEEPDuYV)gGg#r(X-bo-JY1qD_Z;+wjuq>7+{ zr5~z`BrxoEMR&sK3xSxsSonW-?($P>Qym`heB*(Qn-i1Mpj0-3!t z^0(TDrZY-UDvgJw_AH|Q@k9RgNS2{(K~U=Z@DLMDrHSSoXzBP+Xai9NojAA10%)kd zZ8cUKxeegv-hK_GNk<<<$a4I;saOmE@dPFF1wd0|V^V(+9p(Q6xIjn0tK%aLs;yUC zl%A;(NVQHfQFNjw9FuN2Y-<)v!Wg%}v8HCPOtSD(B_+p418LSvG7_0Ev5lFo6#>@2 zZcHVCHpwWPR(QCvS~2-uH-1O{4y@f$q}0E)<6w=FD`GfCDZwQgRY(X0N_>V8R{adE z;_n_nzsWab^APj_Kx4m{~=QA6{~-T{8`!KEzpA5 zMgNZZlfK+LewQQP^Iy51Wr%+&_WbxW;*aq!VE5N3GnT)b#;@}!r$B-;B%vJYfuuY% zIc*1r6Q6#3km490HP6rQoPT!g*t~f&_bFV?8wT7fUbzpgPG$Nr=U^au|1RR2Nh}&f z`&?PQ=}m9)#R+#aRJEoQeO}>(l`IDxe6TLV+V83a+Z+xW*Dc2#cf)u6^}BqJgg%1R zvZlwMJ?c@9LY({Zcn}k(Q*O56Mi1e0R|MRLKN?WNsVLl#@}B)tsyHRVH*LClpm_;kwx$XOZW&L~MTW10O()Yj^4=0z>}_1*+gR z{M4utt=dt1dVMdXe{+wCuGm?$o=MJjy5>}{;c2%@A~@PNlU+cC{{js1GyXW()5W6U zzo4aE$7l3;Wqb-l8`sqp|Kv}gF!&9RUl11abKo~6)lfMU!(aWN2&ntq;o73A zO%}Db^71zP7wQN7#r|!W>!1$tkL1eu4CWT`1%Nlkaj7{X2b~A3UAN$r15m(={0Dmr z__P?eNyP3t_{x4;^=c3D$47p^xcQeUe(nZ*>5E^=z4I6&nR$>dUmam^mOqnM)xZ7t z@BH^UW{A(kKk>02Gmn~QEzsIujE~}4@k@M?Kf<|2#&6@Wayw(|_?v1Xy;38Udl3J1 z_z!~v2j}rO_F{aNSfJ!6Dn8Yy!qjoyM9%v)gy9h zk2USLzbN{{>zHh=n2`G?fT;=yW4KA5T|wg0KkFRcVpNA4rFr%yXGi?Pv~ww|m0ewP zQz1LxP>~O*BYs?UV|iWi@jJ{CnXxtTF;7s1?*LGc`S5p*+9<@jYa%&|zpql%7+JO^ zYjNo|d|eO~n%az{eJZWcZP{nFr+pX>%X^;7)qeQ!;CB5CXLm%qfn)l*^q4Kw}*x$;xjz!LBA zDZBc)O)*jH-tp@!?D3!XFL71;Eu!4mJARX&^W&Q{I0MGP%8KMu-un39F_^4NJnQ%~ z=I7q=o49?;#xMP$k4fqaI!obp580}NPif)&LJh|m)m#1>IsfvjB^Ktb^cj{vPO&;~ z`A2_DWa(U>JOQ6n$(cmQWel5GIL3PDR(yu@uK^`OS~7llb14?MuY`F!{m4rTwZQMx zttM^hm>A;zION4cOq{nqevaqgeK+x}z#Z^sB2v0Kin2vV`L8_h0S%6^!e2KT3RgE- zAV|B9=WB-?tox^O&xps!82>u|8d&_1%*`^g9dN?}$KwpaXU>soUVSIv^CXTG}!{KDXNW`LW{TMj-5V_=sDU@OOMVTQi5laDFVVb9_U7 zQ|DiE0LGzFoV2i0ms;9ye{L#@j?YkGBHnkrDb3<++;4XL0$OHFHf8m%={l)pM|pD1 zeSVe|<9pO<^vd{m=lNH0(aUleOG3r#`H#-}-BD0S^}o<{@3ekYRl3VPp52lQa(3MM?Hj)#x3>-wwm)67#Zo`=` zjPXe%_m1Dh!PCL6*ImLgfUKO8J_;Wi0{%-io(qeoj>&yIOC!IRRADtmS zmNIPTznl0p{*AfzjX&`ZEem=Dexw)qkMS9OnaTtI6nB|F)*JXwI8%Jgh=y##%W^CE zPY6DQXm=0sqdCWK<7>(vv*_`$&Vb+IGoY-Kr)cgbK77P$pL%Vy^Mj20#Tyr06Ctfx z_-&HfqS#9CT!(9e=>3eZ;rjI{xqpXTS%o zj9RJisfB0Qwu9Q9;1B!-5wsb8%bP{~UGo>#LVQ%kB3Dux`G>h$;%bjSRrqDm#K-zZ z(D)TqGYJ@LD%Ryc;uHHGfAMMYZ}>FsqJO2)Jeju5i;B%kd>fLwj?YPsp?@b-1Oz9! zideP{v}^!JJxk*sd^5TF__1a@7h5WYiV1j?erj@!0{d5Y{7OYp0B}GFaEnhfGj0`_ z#AB*W98#~oZ7pNL0p${c4~snUNk&;%HJNezwz|b_!$qk#xvSX26VDgu!zi<0+j#Bo zzFd5JmdRfq>O;Pa>KYPb{!TG~4#LEQ={6Mkrw5ItCymb!b(y!)w4LL_GnlwQicz^@ z?EKi-;?%XRjWXdCbF4+oKhb5fQcs@ZS7we!7pV9Q`@WK@=QLpqaa*LM?yG<5QT-5= z`vkXd*v`F;{D@dT%JT&k^LjS2h8LOZLl=q}wBcU{jjtaXhZgrYLD=t;l}7#z+~TOc z#R6NFC4XdUv9e>t3!c)o&!&BOO)M7FDM3PLJ=H;^Lk;3hE!&PJykTchC=mKL%O2uK zxGAwW@OLO_GL$oIkgH1RyDOAg!(z2zzZVC5*DVM-KfCa}pe|St@4-<0F30Zxwb+&B zPbC=Z;&=5a{=?_P-a17g@sFu0ui_s~SzcP!!*{xm(v83BT723zas&Qu_)C5-1Xh|KhXW z_)7CqeDr~MO`k%AkberW#-kl`&oO_mJ({_goOP^FEz-=RMp3G2L0HHv*4J-Uf*f%h zYQk)FCwB0o%Z^b5mqQEuyN+PoW?oz3?yCv_xw{i1~(N%MvkQjYsV_GVt(j zqEz^jFK7GrU5yE)c?f4bU zRs3biCRvY5bYp$`;-*X5jzctyf0*N_$XHo8t$%kNpQMT6)#4w7a42qy7*{>Y9~M?4 z&)Q2SNWdK@v}q}MnW=v7ZLQZ7ef}h==mU-KQJ{2ktCDX}@=$zXiKj5ntIeDV* z+)xwBJpZdf-iQo~#Zyd>T$%m|=f;2J@U0%d4qE50%THaPCMpVMN~S7z+qk2eYgK5& zUtz2G>s!e9*$Tl2v&F}qgA`Y=@-vSfH?)CToSX*i=y!i2<)z)rMn#K14{ZTOEYly7 zHCm0|;7cOaVrV}w9ahC(1`1y#grdMy2D-(6F+L62=%1)V;r*|Ps2pl-p#t(nA zbZ3YqWzmR=M;vqh2%*KN9hXc_(!ed|PvRpNcj|$0ZkMF?WkGyf{@9Z2vPdxfJMxz_ z4k1JZlR(P-}Yz6DQ56{zgbQwWi2{Tuu-s-`5$Hf6wRPSe~XA-hIe ze{E_nshD3bebF;ds#Xs>;y91@j zd8z*Oe)TvTDQfA_R(W^hx6(tR?im4$2c(HEtxMGDVwzw9p=LH zrpk)H59Vdv@FV^#CT0{%riOH08Jh+%b3H5YMBT(hRL7B25#$0D2a!gu@?TMIaY%ig z7|dUCE&l1MvKaF+b`6YQ)2V(Buzc$Av0*WnH^`dPoPSulQ8+p=4{4sLB>pr0HU=i~ z3BD>QReY4@iBIrLHz&@Dk2Jzxr3$kP>5p)3_#=nF$ff@I!3bHoW96)tM#(#aV?+E$ z{v)Af{@?KVcY+-p9^zSSm&dP0ht6|rhaGmlepoa440 z`ssh}U)L4XP6vIlj>RjO_KB;ebu5+Mev-REb`8V!L zT8rZEO*H5DictoNMEermSWkS?lo~62pN`)iqFxyDr#gq;YyDdh>(WGf@A!2zDsIY6 zA1W)$mHqrB|DCxCSrulvy6*T@?AzExV;A`^B3Ls1^8AcGQ$nxon6wqtg#ooXEAyS| zy6CJBPveXfTB8dloSH;q!8Z05@F#!tw(vei`;5G3uZO(@A&KRFZ@CGY7`Z!#>@C2t7;{s zmKE{8n(-O=EXJqfTgYLq{EYoo=Wnr_;?vP*@tN|c+(04zbou3iLlL`6yNbX z{FnHzh)?n}_`dkUUHH$&uMM|Q2vj)yt<%5ZF9G;p&G-bg@R#^!5X|sL{!**skF2wk zo*4Kq;u}$`%8|bBLXUs;mhk7&)7}FAeqeCDa=(_2PeZRA^0SFaZ@pzXKNBCOc)5P~ z%7tH%XU6ARL!TN)TLC3L`W(bcB_w38iLJtKTpNFEak1fknRx` z)lcWHCX zd^)~`90t)@9KR!k`)MhYE&$~>;Byn{-f2z zr(vt5@GUgur+CYL;k&i1#7;QwvR0{GgR3F@wcllW!HPXUWv4F=g&0|{bQ@@wqA=o* z@frDdHd1s0HR_B7{Ej=szt>r18cXZqPYP8M>zL~O>lS;{`&Kd||E}EQ(`u|&JBynd zH|xZI1^yyH+b^n$ZqAY0cW!_4o8SDkty{NjxtOn=4nO>Qx4F%&4?XlyUI@O)uT@## zPkfZ-FljNX{2}|Nk7|sXA2mg5utL;OenBPG>O*xD(46 zQkmjY{cHZ6jTDFYEaG=u_iy6g>n40xt&2Y?RB7a2{Tl?Wd*3ed@5(*@Tg|E{R2va6 zuUUS|#&Ud`#yZ=T`J;F(d27bLpUGN=w{sgR!1+8U?N$ybeX6eR zXF*8ipSdAJi)2(Vxs=uQ#YF*Wn9-H6DjPlRq{=q4t72fKPSMrXHt~_v1k@;~7a~K! z(AspKu1w9qzZ)rf6(^Z2rqfYT%3gvmWXY+jG9axzZjDb!7<1-C#K-u!rdE?^i+`(; z{qjrpt24}{2sKFrCA@N3=09lJEnF*YIXSka*b9PMd;Vz0TpCv6C9oXRqBZGKZC|Wv_Mo*IZA&5M<(n|u zk-+69-+YhJ=9U%b<0z1Bs8z2~grZ>%g6w>Z-zAYsWTu*yKk)?tFj6iVg(u(s6~}xN zsPEy`fsUj_k&`H^2xi@s)uKhBq8CH9=1QT}V*W0t?<;@!?uAOrm2mO&y$sQ{DB0BRTPR1Z=F?62Z2J<8*xRo>yugEIR%j z-@uN4geb2>(Dcb*~2e#HaFF z_4WLqrmZUJ%bJT7u`)@OqCbznMJoPL?Ky^g;vYV6YpFKNPmDQdC7vr=0LT2;gM`SPjo#a|sjov^B0t%+@`RI3(Aw{_oDgVp7iinH-$KEW@yWf3b@ql9^O z!pSAUgdP3ro{&t{(&Z0cZQYb@iG6<7;%c2 z5oWFG%(C8cs$|*32t9=qcZ6|ESC1jQf-mo?f2y<1dC1Snwj~4Vbs{t2sBb43c?wpV z?P_8Xe90yql$mOdu4t#N+_L$ehlBOvm3G%#HLt3T6d#M^F;|#w`A~o`tQp=zCqKdiQk$>W7UuO7*F|rVa zDQB|oyJ}Eajjy8HOK4;`0y#*kKrS*AC~{+<4$<3p-u13`%{@fS(NkbSg_k}fzzykT zBwWCVvVtHKs&o8THRv{$^2pyEW^qeZsh1r%wEFn%0>TH#wrqK7i7oB7__rDfrZ0r4 zW|6J3v6`fn1+ik0l67bc|3S+r0hOxwjjhr%XPNas(5O_;nF*^IJ_sd0!$DX}FjB1U zG`_&#HtAZVj1<1BdJH9Q1hw}3(T;s>SmGZn*|ubucU`J`S;CVHU}{OT873COtUT}r z>p`$C)?zeFx^Yu$V3w~YZ-QXmSSm?D&hfh>!oqC)1=I2;wp8)mFn+~6zBt(BziI~l z>{eX8A@G({sH=m071Wgjy58C;T#aPKie;g)*$93&8O1@iip%&S>dYux_Qu1}j_eA* zSsPdeUt}8l;142GF`TFUA!M!`@n`&l-ze2wntZmd^ z)az}sJbB}?YigzD_1Nzb|KP76wkPqI@?w0J#{EclCv_=eC04Opzz_d#sDFpf zu%Ut6JmQn_fyXj`K!6OPU>R1cu`*4&T`%J=sto+qG>=bfOYP%etNJ+8f59L8@31xMg02G@^ zd??28W%f>Z!ufG#Y}^TcAur%Ix!`Zzy7kgaE@`$G z@WX#ad`AAOBQzMbp=QzD9cHQ~Q;by*Vtv%c6G8Lc3 zzbp^3Rop%QJsZEFGT@hIoqs<%4}L6qzZ{=A|F$Q+W-8zOP{GylcHKbEDB*Xw6Vx}z ze-wQ+t67t}Oee|4;?K~d$%DORs)%k10ACAvMTCL{adw5n@?%(QJHpBQD#AfQ+ zS=~18jU_oX5A4-GENZnwE({G*G59NtkMW5(q6-{X;V+D@$x{pe*LLcaBbvya#AHkJhgR2@ zx_aQ$Mn38Dq>sr(f65k~6_H(>d7agC&x?UfyN*MTw=(9d2wP=GmWZMnC1V9ur$N)u zqs8LiFwNdbBfj15o&~FjX{Rt%Y&c_;gw;Lj1 z9O#r*H7WvDq1r-?l-4^+YU~0xkq^EMG3E?9N9*7AHYUQfDdr_$~n|+~Ad~CpFcXdV~<{ z0=$rGzy0>-5%qk5h9zzb+Ly=DxBjID;q*NiR*bQUzA;6t6AeCm%s|z#Yk_(Gn{DG} z--ehEE;W23QV;*&&*BpwMHC9M?uyuG9PGKPsx(+Bd0th<>jlu(Bl!zyC-K>D-~AMW z7D;hwXyWHE4HiwYaWLD8Q25g#8o}?eGo~BVNNa-Gs~g)%Ar?n`2}{|F@mqd|rnnZ* zbgiVDNR(t%V(bkbO=UOk9ls7%Ma=oHNGj7-=ci*D9m!B~{J-X8694a8=lOdFmSGo*2Gq|L7 z0a=Io)$s{V2T^#eAVZCVjIq$9ulg4jHCWQyDo?J;{5Lp$xr>I^UtE60<$6*9xQ0c9-1TB&Tnna~Wv1(Y^QX>{6u5H6d36TaweTdt7@J&1}N;LR^pggF~$|2^Q z>-o>#w_*H-|B!}ptHmNj?S^|(le#iwJpY4R+-$~fv|cE?HfDSynqJ{cvdqdWiqZ*4Rm4W)5GPevrErT-x9c|H zHt(uk@Ub%FkFadhj!$fghOr!tq8k2I5}pc!6Ty!?T8*i_a0AmrK!>c11y1O+j`-~I z5yvqo{PeFqc6DGIXTRLOL}bbJ~2j_Lyc zj*$2lzByzS>-H!Ip6;S3$Vo#`4t})8a%vL)?z{>x1U>!xA9DV6q%c^F%vq~x-&KhT zMLDNN{aLLae=OqKcuHC!ET_C!F4EA4))v?RT``WpzsY4t;ql|W#!v6ejD4#>lQ2g9V+di z-DXw&s}E=TV6MotKB8}0&+Wh80h{*S=gO{ntOWqB{2g-?$t`dj%$d=f)#W!=)ZjMd#mo*mVQG$@)heA~nUo;3k8n(M@+4wCqNs)f>8I z)!-fYBY*kZr5b++!LKUljmUKNJ=!f4Bmaf?bo`zMoxbAT@E7>=W=Xhk&7W`B^c5(i z6;E7rJ`_s6991RlfYIit+eI3Ce^}mRs1jOV((iqjg2E`QUbl$vOp(w6epgVT>95A0 ziDqZk6RrY(!+YQN-Upp_+SW_A4mfcZ7Tx}opJV<6m)PQt`?Ta&_HXbbCfB63cl@ds zl@BBhZ(4AoC)u2FJ)TH>-;G~KAghXx0;R<<)2fbFdMP6yP}VOHwu5S#`vv0A3w{wC3CE#I37RU;^SvKzELybNmwq@2PWgb2%7QL z3Dfbp;=hC+h1KgB!!DBJQs1R8aI)p*reqfRk9xbopYboftH*z3e3YNneG_p9a^%*^ zGyc=@y)|pQUjFd1WX^wFWXep%f7FxBDR>Cs910VutJA*)JjTa9nSa?a{+xkmt;fGh zF7X#vm`mCCEk76IL;MGCP|>o>{E0}`!k^-!y2bdUb&vRuz)IL6GtbY}_%l`SD9TzU z=Nt=a#`u>A^jdseGp(*p|0>q2@F%q8(n9n~V42P2hWYoyloV$DyI`X5EB@{H)A7IC z$M1}P#ea;zJU^9Fi}^$PYa%Jh*5U(1@@Ix$z+?V6r=p%n8-LC~wDu_fXZ(}@6MoCj z#qm4ZO8%MrkoT4t@@LeT$44!T@%gSBze6A5&V2m(^SNuQ$N7Y`_xwBMXNw)j2SUf+ zs^h1t$CdMc*ZH|#{?EqOEdOI*o}cTC-(~(N?;XF6;QaXCdaWP-TgBqx!u_5EP?7E% z`q7c6Kfu@)k8D6uVUYSgF--D>1-BqsM0H|;w< z$oc>(LNX{D@XW_cwe`n~*)QSCzD_|HUceU>E#{BVMvuzYQ|^!uY4pG!_{*s2=%6Q< ziGRB$r`C2wQ~1OATRZJMpXK zk3DvL!1vZH2L1wW4GuKkHJ=lIc8xyfXW|2;_z0DbugXkFO3PTWsrVSF!dHHoMgYJ3SsEB!e>s;H&y%c+^!YEMEi1;l<*Bk^AzzfnL2 zuL-oRsxP_)ilR$H=v$%Tt12lJoQCf>yNX){IWq8YVr~xV`AJLsGp6vppA^|Ue$!KIxFeFu_;nICzq$nQm0o-tns-@waE=m*dhjeic!I=>$*i>p+wvf~wWv@y|B0 zYK(w7P9koXprq4N#>)Z`AYo^!ZTGD#7wm1-=2w81+CbK?DhnP@o7!VG=?Ux`cE$ z$-Vd7d(ZotIoFzN@BN>16K+C6&Q9*$Yp*ruH^23*xz_&w{qO%jY_0rxo_=0G!`TOk z0bhzE{lyUFjz?*DMPp3Y(tJrD+f+;VXuRz^azR7~i)b+fjj#Gh7i^AO@m+5SBs}u z#jj3Av;HCj3moV9xxgG~qM>A0**$H22hP{``pZOTt|IB%(5s5f@Gn%vK~n@3>U^o6 zUw_rRxjqe_m{veK6Y)gmZ=sxu z*0&sJ9OZ!{`N+_9rmXb14a{G0Q|_6f*{Nf9)cM=;C&o7hOoyDvx83XS_WWJ-`VjMx zvr(`2IG8(j?8b2N4vt=^YgY5z1sLqxyLbJ_`XT%~vc7Nco>uQ^@7sc01cXIIZG4$d za9gp_o9hw2MMxZ0#67Zhc^5*u^2pVP*BTaZ>bD2gU{#r~!@rs!N0sM zao^b6#IH`)r^`(+t*PhY{{QQ0*9xo@dF`&d~Ey1u+`K; z)xq_)_rAVuvp!Cc=ym~`@Mnu4T6FT)&F;an>Qm6{Ys=t6F_7?Vzx5ph9l(cot8!$kNJ$*w7w>(F!jWWlaIdx}2Uwo0GQKlFh^pE*=C^|NYdy{Y=Zp4*Ay%lM}3 zW{t4*c z!-D@zf492lm4A=g{d;N7KiWb-?AU zXLJ0{t&|XkqrBPpc5CC$naw1D%iOBtumr3y>G{cdmRr>)>z0p%$?@$L>{LO!S@Rca z8xZ;5E({T(<8Ph+ok!=_OUwMxKGUb_U-dEmn4jSsFmwJB{Gz-)D}HcYF~6c!hao>Z zg=(_8yaJyv$-m>ThM9*g`m|x3TM@D2PxAA&I)7Ko(DJ97)conSb=LLTWqnHf?O^_P zw)4{V{GDsEoc~L9i*>ig{2h{YH*yejJC|v`NaIU4PIvyh@$g}Os>?V3d-SFdO|17P zo+|5?RG+B0HT3Al(F@w_)_g86X?JKbR2Dz=xZ# z7?_qLJ|UuJz!wQJ<%=^s*3tRv)f!;*T1v>oCrZ~xv^M3hMk#?E-?vL^5^S#q2`6N> z@?nVN@4icHxRJK?fj!E)wTc>)i4a~R|7`90+nl^pAFn{T$E8Yq8UUw`Ki8jJ$sYmv zjK)MCK3}kzKI{ZsaDW;J!%u@bSb{712u9*Dj$%e~&=MOw^x#N>i8DvXPKmE(hJ)Pb zBTLBU4LshcXH*m$b#xOxGu$JU`j{+lE-)fLEx{K9aX$+`iOszPMJ2pw!9WVefXc!| zrsBF0LmTlIWd#!f)X%r!hO(L|o4$nAjIEFHiIn;nRSd-pJ>qfDe-*yxmocr(D*mRi zch4R?5QKXHIQ5Wt-}HeTV1|us$-mK8e?Uug$q**$Mz>g7+vQ=9jgj5j@I7k0C#WuR zIEYBi3CWIg!!H&!{ECBtq)Dx^JwMOvwJhmOy-0jTu#wV%3bPwJ@r|=NzU>u+eV^!) z-DNcEKS)-36bMSD;HI)~ST}uyA3Z@wcj^u(g_dws$AofV)XxyjA)pVRPa5+Fshpm| zN`1n`hdEn4T`rnz;}-loJ{kax^R`(B3?b^Qzxi{m2o%#{H8X+7XxTD{+qj{;h8clH2lrTck{g&qLXVE?c#cxIGCU)y3e_mHQU7l0 z{MFjZH3}F&fhNd*Amd#70B-mwupCTSrl+-)D9py-Pe$H|#u1^N=P&wj`0%0q`}ZF_ zc)e{Dnj-=m9vo2G8 zX80pUW9=&q8_mC%h5i%#VlWgJ^pV~@lFW@(Jv%-%TcsP{-&Ze|g+Hk*_yd2M2|ogC zp^s37-}pCuEb!yHcNVex>>BC=sHz;wS%tFRR9X#IG>37^s-# zPvS2bZ(6sUKQsIhgRF6{9OCg8Y1G#8GcB1vC{OUSb$U$xMW3boF5*QwS9G2 z_><;>Kk%oS@FPG@LR9ZNfFJLt->z~em-fPMX8uq0sWvDlKFovv#9-Cwh%eSl{EPcCK1G^s$bY_i)jv_} zs`S#-YrtRBhp}#z`O7%^Kj&}f+W6nG=5H4uN(=fdjRu&=5KzkEfD_IFNvdc4&~jW~*5jzkls>pw;;)zy zPxTmay3vOr6-UsB!V~S}#N?n6UlIB{q*_F5pCcohK8(`5<;Sw9 z(Iatw#!JiNu zDF4UNn&Mae)n@(q1)y|vE&u>P07*naRCd#A_!=nq_wgk&urPmOruiX ze^4F#DTdJKq|UNCtl{&jnV+GL@G)*MB`!Nc7xTwi*88za>BB}^(4%d@7gI)Z3vIM9 zv^RPsKG`bSSp`=n73#DHKtyIEyA_-oW+JkMY}7yTc8lE@FD-j4`e1Vl9Yt9Di6#z> za>A(-oxj_S84=Wts8O?@7}Ybv=nXdzWHcm!sagRD#pXdTWegbgu&(LzAf zdH%BKY)@#06B?^!suuN7xMjL#5cXx7wRv3Lo;#)k!qnobF%U0<5+$EIyF!e zo&;19(II98i|lYjQvPc!AOo$l#rnW>iJ_{|-n8Q-St ziDe}JI=*U0l)N&rR}qx>>e09(6{6+*k`6qlna0=pNE7Ez1cBIe85I((2L^$oIv0S+x0JWmOjD^1p68jhA@!1GVjrtm%41&vE z{)uYNF1txDfYP5eviFWLHrZP5&Q1yuP%ul#% zwwvSFyvL#sO;k@u22=sI8f!2(MRUqnWt!7_CV6a?a> zO!2%t;!4R7Hf10B+5&Px0QT%U*Q16{9?3h8QK8PV9M@ljK(7=}mls`siAYwyel>hN z)gN3HGs=8jCp-!kapg`$D}T1iB)9`a5PZTe`Ew~{y%5!`kf4Yz4fxbb+JL_SrL%*? zkB!9D$32uj#y1??0aEd)075gC#)7Agf!J!pjyyK-3ljKo1GMhI^V!4eb0UZt8=0T# zioE5FYELBAkJ$)YxN4)90yTtm>t4i6e0zm2>4$g;%{u*DwVR)~5-3t=ofl80BMBiG5CP_->J^mzr;(_VzZ9X`>YQ;x zrOY32B&K_(SjP953NAY*&pEyUg`l2j$F6@v%-^~GzHD=ZIU4S*&Q!5$H-9J=)p!RC z0X$a72S#}?mNaNSz8khC2Xv6HOqtm z04ngmxpjQ?mCeHH&;hxO511TyICgx+4h$v$S_(d6ivS8iVBpV2{u9qF@ri742h1WqLnsrv{tSsq{Kvx#Ul+|4<7>jH zzh5#97!&?(t3ghjzw>9padCWOXgAe|l8yRTd}fW^+xRjtHu_OOcsHS0O=~v!pHC8> ze%v^|)rnS&glZOj7!BCME!eO)Hz-+118<#w!4KbHo2GdbfxstK{>!#J(k53D*Y(wi zs-Q#Zqc;+KQAhUtk33z1r_E^Fs#F#z8(Cdy!c0&@o8Jh*+VJL_9E18=farjv#dW=5F6Qv}!YB9Tj-lfj95FcHkF0a|?9mPge3s6}3WyMQwB0 z`gnbA(Yro#2i45^+t3IMmh1DP{>JDdBjXmm#O(T1{9?c?X^x1?tc~;9Vh^)Mb9sGI zu}Zn|&pIYb4gAmdO6*Q>$^pH$! zh`S|@2R|yJ=n8-xZK)4E(7+G0umOAh@Ooy0%yNgg1n!&S@M*`tNOk=9p51%!v-OZnOw)nu!Yo%p^<$J_@2Z@FH7t zRY5hk>-no06Zg}b{+z!Y>7M^ga_A^Qygtz2_y+VjcK8r2;yJ4CFQC9)u#AQi*R4EBcpSGktHpQ9s z@%T22ptqo0c;TnP3CWo*Y*fd(ZVd3Vv(`kk>YXmzC`AlDXSRbyi7!O)y|KiM5 z>V#o=`#>lT=T2~>(=mEWNWcXqOC(L8OlUOo%qhVF*x|Fy{?aNC)Z(2W9oTyy5F;#w zpJ8oFGcWfmMlR_1@j>AX`0+kb+{l4o5gI34VKeV)z4?$^>O*5#qH~d^$sbyhzIX;c z4<28%pp1Xbh`q?3R4=+P7kdzlIcN9xHlz3sas1l)htc1Mc~c zYOoK$aw{--I-to#JJ>h=vJyz!kw5GR;QW*gq@{D9)Po_sS3UhRKdA*SeSTdaZ`D{C zUO@$lozMZI{Gko2*yygHBva<6ZCRfgA2x|0E*3z#D26KS8q>tlLn!;wGPoFj@G==v zadk5EIS@kiXf?i0)ahgk+8f9476Lz@bM)|`!}L=0)*dpa0+P+l0@;UweST5rlh5-t2vVerB0VV5K~Q>21O-Hzbg24=I*3B3xTcL=>m3nh?*ko@v_&-tGBAJ}JhpV{5nxij~&1j~4q zWGb=68VsoG^k%A;-B*V;_fS|!GRE`YBT-Et(SPU78k4JSjxQWLMd-in;~H?PU{4bM z1a$Lr|5#*^W;Cq#Y>Jx+kp4oK^j*78)b`K4qWZu)YtKMQujXV!IPa9(b^-1CT40@wkYD*037(T0 z8NPJ*BsayYKj2`U8icBJ=mtZO%a1`^_&p09uId-YJJm(838QrHa(ve2R0U1C`F|w( z<}KFL$Ft7{F+8d8A+N68m3i;6?;wk3jUAfXkC#EO7vG<2jgDg%%dJ$EbvYB&o>Dpz z{KPoNg_Q!$|I8WdU#5%Qc8;uY4UPvo7C#=-exx%W@wilOOfH}zP2ohkWD9n+#j@TV z*AiBn6M5skA0c|?p$|tM%%5YA=5Z*K0oT40>oq6CE@9Ra&!5RkHbOtu`{GyJN8#kU zL)#B7(@*3bmWU#k5y!6<%eM8nRCGz~P04fCc`b`aP~WUQlsa8k!AQNawro#>ADCh< zXIqDt$>!IkV~j7HqLghtjs_YRq!83!^~#KQp4xd7|BV=xsFhx`n2}YOA6Ipf*k
1+zRtYT*~msORM*hzj&&(H`3PM4 z_Dh{Fzpj}`Q@g!4P5xYT$GMbW{#DR(+?YT>xXkaf17gOt_itbf`<=m%rZ4qRDX~=F ztayw6u!7I6^4+lYp(vcI-O~9<3f08q3)`<&yDnB1mQV`$0y#le{v6>DdU3jUKL+JU z)9(y^`Js2Q??C?2Ak{R(rX*5Tu%q(Ly<1Jz!wS*b?{X@omA}g*tH-w3tMIiH$gqp{ zyG*`xWdiOh(sR68kIUtVXLJ+^X6Z`3iR*uVP;9jHOL#&*Yfc9#Z*9>GUyOjrXrDB4 z_s70||K5}TsVn*H3k6ScoyoI_NZWCTDOj!!x+(gNNbo)Y`+{IUbBh-kGqp$#n|6>) zCJU9)W)OP%ZKq|Sr)s3K+buxK6e;Uw|8xhup8r}OU7CV%ipSzdvurXTwv%)8wfX_I z3`BrBgJg|AuNXd;Aj!&Tf^JkdPDWcKxKD#U?jmr!FDJohyA=7<3~7-Q(?C&K(amxx zYTUc>sD*5j5*-JBa?2bbxM&bcj};#gS9p0w^>`Lc7Y4vl zDPx+v-<<_g-r|Kk0dc7J`r)5Xe#J^%iqVLiG@WzO@niIqOEp)M`*QO}$XQ(*&F0FS z05HgoiUwxkMy;ILIl~Q)xe|QmGp1+Z&n(u5?p80e2m< zES{N~yTHNaJuLg)+d3mUGc^NnspEdjjzBu=%cVyv1eU(VV~E}LX1#bD@SC}-HHB)> zwS*-?)-U&GK{cku!u`Gn7z8li2r+m(Wj?eBXgP@~_BYrcjIn*L%<12fwPU6g#JVX! z;g*u=Igxi!`9-Mj^79RGg_~UX1G3ij0CnAV zzF@V9m|Lp{^?mf(Q`C~SDJtuW3TETkTW@cpDzqs-kuskoXC=Z8qo}FIw-$#}K}10Z z?RPr7jvcEX1mXSEQM?bAHwRZm84=jcIi^gDsjKi|ZJK6UG>Ojg*$ekx1A{5^UqntU zpnAe6kK;&Qstp%7NL_r&$Ga9+rkPL|szlEJ+$Z$U@J0OVgmTEYWN5JVml;xr^z`?8 z%`ff_O7<2%N`$eQLyEuZOJvrd9O$rOd5(uEMaj?{QJiL}d)KkHSE076WborYdKn!+ zP`2vs2D2>B`}M^*&l7&VCear{--XYQAI?8ix>zQ?ciL$Ga-DhOOBQT>Ca_nM`NQ4e zN&U&lRCcHCqdv43eUFkv(#bcfJBtOd35^_D1&cuo2mBKj=CqA}Cu;JPW+fv_zPt?J z4V;=AXMa;E@{>*A4ea&^Shx=r&bXO}E8!MPZL-{av2gvWj**>#!`9c!*KVWj*<4J~ zUt<;VKjVbAf5A92YFy+}VhqFKO0UMv2CfJy^n$5i-HS)LkBZ}e{ZW1JJ`!O+3nt|- zP?Mh&C?@(&C3^t(Y4j^kbf6_fnd>xLB~iNHg7AVR&y&ys>5ecOWTqsh=^*D-2_T6# zLSa4QvTCtIGH!i4#uV?~S@p)vXancasKMDM5ZUf~b_Z|Lp52{yzICMap#ha1Qu@+Y zc)DWHy2!cgp2W7j>;)I!o@Kjc_x$JCI$Lg}HI!KU;GpJouBc*~dLJc(IO0w$*hIF2s^~=zk3se4-7f`V>%0|W0>g|(F1?}g&@4A>%}&Q zTqn{nkPq&E@}eHRZ7O8#)E-_73%uEPkGgw1yIc4?XhtDxd+@Vykn$Jx0CM0Kt!AT4 z($-xSr^i4FH6QN89*2DE?XqifsI~IUlXfd+ghL#(p0*avjGJlK>}=JD=mC#Ft5IFs z{HJi2!;?~Y0^xS(rHXUf2+k171o`Iv;RCzaPX+Z}cn+WQj86aT#c~zX)_Fo&y!+Co zJ1iS7!M?%lI0dbH-oK)kjST+bm=%X0HaFZY`m%#_b{3b#RewjyEpmcJ0`hF?la913 zHsI<3U+Dd=tEx^63AWto-RR<^B1xsT?+N~>(&!Y8!~O&i`-pEN%HalE`CVDD+fY|B z!dC{fWQ?~h&U9CD0~QHACeZ}RXUd=rR)K}p= zCy+#t)eJzns0w8x@ZoehYIJ1IzvWG|I!eW722&^RyuLJe%&~31*X@&!ZCc3v4VPMZMB14*&> z-P?IvkPnuI?8&;t9WSs!QW%qUFv+n2zp{ZY9BA3#eaIvxc^@QC1?huOgV?lS>0{T? z#Yg?Iuqj!Em5XinQ$K)ra4!CvUfLV)kQ_6Shxl6|w;@m3M?wsqp2y*tBjob;+}Fn6 zJi`F|hI*Z7_xFOi!b4`n-9{AU!=93nx@|e3)noF+XBEC4 zn*u1CDtMmL?6>BFmgB-hf!q9J6#}|7#>qEg?sz7N39ol7IM!F~w>gjza!QVkeUtV$ zIzK8V-i3~M+1zyIRCVJI~nWYv4YH-jIF|7 zFKuQE_e-Pub0XMv3dKv!w``b73R9hrseT zQ}(O&Ca19chm*Ye0Q0ZF^@y`+toCWY3P(wR1`(oN9!kniShtM{Y}n1yq(;@azmW9z zM79(STYdctC+)?URdFuMg{6c3y^><;EHR?85x?Uj;C!^d}z*s~68Q=cjK^WRKi(W#1ykHyyu}+--9?xf8?GZ`e&ScByZ4 zi|Ssx=VKJNn1nX0`>3zv2H{-<{t@st^99CU-i{!Wy~%+KaN^dcdNWSltPGJC!s&hdQys1u@){Np zDx6`H!K_;la2p-M^7O(Y1i#K;_<&kMm3 zF@j}FzQ_sYVerd zKdTl0_=k#g`;Xmn>!$&_2+LO?q5EJ6lk8x_?~v=Qryds`^~SX=ibpsuSe=bdAsk6D zuVtZ~AZ+o)!uOpTXEW6d-OeEg7d7Q4(IF!WFOCpJ`jUCIPX`Yv4p!xmO*ivajct8! zJ>CgPaBmp-UzQU9((^NR=`F67!70_d-|x8(zdQ{@6L7YpB^y{4@IVwW8IS9>rNMJS zrm8l?6W!gdY9I~zk55{F5mD#xoGwH=CmjyZB4wd;8&RzP@hjntR&ZzdotmV5XAvp= z8ke*>U7j8PBlDvsR)m|JP@=!QWmoR&e|LHU@%QjrzbZLSTIP)is{r6HULYX}aL>d| z3$yoSscuF<%>ST`wrnl>k3onnwk`+8R<-M$M*ME+^eJUZKF0~~7i8vn13n~_uW2NS zTA}wCnBRQ|v9tU{OKT(7Rjzibe6Au3Z3XST(WB5rw6kdm$MwR9esGpTNy|qgbIidI zzicM}YM=eUn6(P3*^f|z z1B@L7B32Q#;GZC(W7AMk!s-}|p_Rd#4vyv)mN(Nst634gy<2zx0^K7^{}`91vm@@t zX7`kVOWs9wKrgutOIcU|y$jZk5p+4krPYP_%Sf6Su68$E75{K?sYh1vx3y*yD*%Jl zhT}ix_UfR&Y9e>B50}yj+gN7%pFru?>y3}lml%;})0I-EfRm=3y1RtFEExOIFFU+D z9Kng;*Amg5jd=~3DmZf{o`v+44cAz+xckz1T$8vs-%@RGkb5Bh?VZYYWyrTa|7&*C zp2LJzrs>u1t*`qaYk)|o*bcTsAyH=Om1$)VeF9DcN8|}uKCDM=P?L~0uYf77t1sl7%mFS63=Uxue79Q$`s4~+C z8P-IKS~D>mThqtw3bN2&M46WRp!Qajyeru_u151T#@Jac+->}=ZpCQ;t@>vzJX`)u zkzP(Ag-Opn=T4VY5beZ++fRPQepWFP(QZh_ImED~h~e@jp6ST=C8SpS zPvcHq`9s6|GOX4+f z+S`i{31<7RZD2P5Z~1haWVv78!B@0opEapL&pdfloqu=C+?a z8yvwOgAx*!cb0@w(q+e4+x>RX-}%9HD!G%Nmz`5%j3LmS;G1?>1NZOR2jQRi@z(*q z?w2!*m)YW<0!@E~%zjC1GzswFd&_bQ%9l@Fcb5`D;^Q_k3nY5L!ann&2LL8U_26kF z)2-m?bc-*mLhtiW(gOpL1DT9_dx0CK9N9l_t?gtNW+LO}2YmvJa?ZpV$q3Q#0PftE zygTvkBKv8N*i$}0_qwA$inlO9)ygyDoJ}522ay%bsfA zkfw6$X+(I&m5w(33+Q|e6zEc>*UgaPm38Wl?zJD&c)j5BY0$|%E0ZD^TUMQ}cWgh+ zEAzo9BWEpzkPowy4#tw>JHYl_f1pX|Y^X~NN{O(wyKG6I4sGCD(8+ z!_y3oEOf#2h5oY_@!(b=LpqZIv;bOl2I+7ozt#TFfMCz~QyylC=xfSym2VF>vNUhl zfxpL(_zT{_Pl%!b{Z0FJJAF_fju5TO!t;Qhr`=sDtpMd%rklz;IC@Rw2ikyuX;-EGvh|N}O7Li!qktKB~xI#bk#N z2ACmh|988b?Rm|Ek=Y$vncWgB-*rr3e zXs-A9>tbGc70=3`C#MPGEWdd&SXhjQGi5#+RJq`6`)7}R?`DcCCvgH3z`P29=Yl{3 znTG`WCo+_i1rW*D4?Qu`Hymz?bNh>^f}hsZ9WM`qB3Lbn6CNH+;X$knPa}s_I-w_LF3s)mFDn@oDq8Sfld0*3^vw9 ziK{*XxUB+GbEw#SgI;|`?Je1@v@y>4sxbM3ovWMuR-<_zt&J$|byn#E(mgwTJp+ZP z@vAqYNp*Mj-GtI^(7f4YD%M7pg+(hsniKAz!R59bDlC4t(^D(pck6xE_G=xJ+5gPX zqnIi+Z*Mp_-O+BU<;R+9r$vDpQ1eT)e1~!Dj~DlWT7>4elS|9Eu9{r}vhMiXypO<1tldww z*CNQ+OJ7%HE+BVDWF9$i!KdO1kC+$z;iHUfvfD@~zDtlYzqyBd(JW}O<%)&RV%(tA zrf0WTiPLTm;EEeo{0%%>3OL_wuxbgAH)&y|qSV$_0nNnE+)nFjnC}x{7tdk>D+P26 z24CyBb34@_MQL>3Hz_RMQuj%{Tbk=8BQ(znbYaLPDulC7&4}cT_ocljS63ERvaVr# zJd%BJv1W~P??1c878^z9JmrQPD8wEaIa-;x4_6c&AB9xDxvCA&oRIqALFYA%Rz>C5 zlAk_JA+xwq_imIN)R!dDG=_WU770wXdgusTaS%|m#5HC_k3RFOL_HvxoYYuIz+_*n z^OV%hqGoB+7+R(&Ke|@+&4#X6L|7YAw(l7H6$en{J4yay{_R(fSqwZ>g~K1!^a*Ih z+D#8X$$9HV{Py2F41bL`K0U7K{;dqjXb`d;B^F_6sr}k<&n~=H)o_xEdA~2`K#c3` zr`}HIi0X&lyrlbZ-P=ZQjg*-`U$JfG3azrr&})7nO)$G@VHp?nQE>H;T*fL26Pa@QBKfX4rEKn7a7eUsf@>3F><~k zM&^rU4EO~(V`tM*vYU984Uva^f|kB6t92s_-y(RyW=_3( z!|I=Wi=&F+Vp0$R?e@^KrrY1JO@XFKm~Qo{RS2KKnKNdx)i`TY&&j2}<`ui%Z@EtK zeF3(WNY_z_5J*oIFWdXvtqaEJ_Wg&dLITy=1e&>Fhd@qvh;+Y`d9J@Pk_Dst5QJB~ zm4N9D8ngHV?e^nYlx3p zn|vXw5)UG^Hb6$Q72Odpy&-%AZ($8<_BJfy?)n>}2ci zNd_mP(KgjK<9nR!)T@&BImJbp`a^6{(W18}QBU-OOf>f%6+Sw`-Ac_5K$r*Ea<@%tCcw-zKX zDz;~eCljPFaL;blhS!7dS9p5$$?nj7co-_#RFi^4#z#(nI<3CP_y!46^V04@QbZw9 z(eNepV0F(ODpRBrKmI~B6&-b_)&HyTU8=Cer2ICc47x*o@cp+Oz|$Ix(=$}&rQsYM zk0ejYxK{+M?cHZ4nSct7PCjHA!+kF@`cZY6^#Rj7$Ase9q?IA+0=LEU>K51i zRCV}gHQ;jbnI9!2(x`joNt(7>8|KqZ&NlUJc@cltZcS@lt(PD9&m_|X9~OJ*F1|3k zc!&I4q}Xq}E$}|2IEDd`^n42&mUr=Et(P;`GY8*M_N4k!&1lRg2KYj2s#!53&NAS7 zefzPjw=tu9c4KbSIgT&XOPaLtWysOMEI8rj=e6C25_HxxfPbkJE6dhBxV+_@oXMg8 zJ{;^hR=dT(wj6kNuR$5Y!MxLo`i6mc?HntCdlaWgUe}Q?u})ER(l?&|MKR2{eKbd_y=~js0bFbBq@XL!NBWai{JRuUxh1jJsHy2 zL9#m7oiM_Wll*VrAC7+bcX9qD?kmr~+>+&(5QX%YOT*o>=Mp~RSAX9@PWwP>Wg2xP z=vr1;!}Fm?_)%(#;2DGRI+r5&)oE3Kiyvag`Z9F8u^{f$9l!0quGQmzi?OD@X)F)( zfnO|%*Nx`X*~F5+0MdEW0dZE4b`o}duN@iH)OP6lpK=KBbUe9Z#mVzOCFO?XkORHe zUA5%UsS;g9l+i_p)wGC3QKogAn_%yE!5`jiZj~^!?$rC`FvXbI5hdEoW(s-m zbB%JFzCIzKP*>uv6+ncmgBjQ0OJua*PLvGMm4p0+^^#oj+22HbEdh>DM)Bup7rz^W z-Tb7=rhwhZseJBI*xSCI`On$~`zdBMutc(@!0%VJ@oP>t`F_72GxJ?&77;(6D7^m} z@((C**%;h*%THmGOtJlA2TjZ`yvIMEM?cnU7JqWV(4SzjF$svPmCN?ykmGJYVQJjF z%95``lIOKehTN*|P=RvGaS-B4c|RN8bG1X%pxvZ%_vf|NJDYcy+ui1AhxC&?-c>H~ zJznu13cpScS=zww$r21F(wNRY@mJMJb+_fMRc|9ubIxyT0E;osy?|BK+cr-n(r(*_^w&WD-LI3 z=bf_ex!_0#+=zXH-4a{!G4UNv7&yERUJ!{yvc(=6(?v|N?}I;39cU(UYT*;c1s$`& z7t#^ygb#L(k)+)`ENcYzF(8` zEK*2DFgK+VjvwP5b}cM?E6@AmaCsC##CjXb(WPiqKCBb^Y`K3D>woP;vAQB=emIx~ z<8m?2Vc?4U?mJ_E(X;uWdsqg`a{{`YjZQN*;ZRMdP53ljw_;HF_yfWdSb}=h;VhgL zt?*lEtEU%I0Mkua>_s(gA(bG+$clN##Q$1p7V`P+;#m`&h84ms?!V3Kx^K}UN+jS_ zA;_reXH|c$yr%erx1HWmJ`e6prwZJ2mIWcn^DhW z7`U)K0w7{8M~HX|ciClJwjGCXEVN4W1H!V|iuCw31vG$i-VfCJ2aLZgkPU~tTP<~A z>X%@?UIsU21eA)qjl7AiYsXeLjw;abhO(x0sV`VSnf$Y1?|9*x?p}BAm*}C0`N=2e zCoh8wHEFp-oMOg9xs^G_C#J~WIlpwwTLmRtwII%pjsqqxl}=NW(PJ?{7x7}D(t}s` zsN&3$duA)5#w`ID@7bk~pK8Ib3iUh4$|*g2Z1>;r#U(OaD5=G;m7 zwVvJ;oUliCb#WAea<;xEYPLl_`d4fsvCcsrRITRNP0@v2uC=NQoF-h`fi7;;*;U7+ zD)7EEaeBb4Gc|-o@oE5lPC(p>Los!iM~gv(vm=D<2#t5}{x*}5#O>UKI#1SAmc-kC z$JPxGs~k#PN*>b&F)&1ap!c6Gbt7v=iqW0c0@OM>05b{oVp zk={!GO=POtwUB|^1a4mvqC#q%HKR-VCfN)=E3XRGqxURN|{< z92g=sYIhdjUrQ^@i#jCoRPw5%i#j5|U1(uS8g%nG?cbIXPg*D7AY=a_Msb&Suts82 z-Nt5eck}1RLn5X#qgN-nYb|%2 z7#~EDM!dD5T&@Ii$Brf5i?Dem|7hr%v(wQk>iFbvqqkn{!=@e=IN6J1U-R6>*f)ie ziRDZ{>?(DB!Ygt58SD?2*3p*hK6V%+f5U!tBPm=6PBq7kSK1 zlD3oTZgo>-zQHA*>U*x3E&e2fW7ocNtVmR(5^op4JTVBUwQ7|gknJnwt$pxT@PPEg z=756RH0gaxQaJTdm$J^=On14Q8y1>0Lfw?~$y{1Nt*PsZ>G_!u{3%g2S2NSfH~T8W z55K>!@xg-exb0*#AcPQp9fPdA7%MuyYCmp4cF{mKWbq72BO3tDhsqx}QaBVkrBv?v zV#`^4$9{=MhL|H?_tavC`}HfjheptC*^lVs{^%e@_+u*a#ZP5{(Jt^6W!4|!p_t+1 zfO9*2>QtgM<`OaD04Rhlz8OKWMM6nB0H{o$KnLhS&JvX>zVrFxL0GX|IaL@bRmzj*60e#s#ekCSpsTWcv+s zIlq2Zf4a&-kt@bLL%)RY2FogZPePn34#8AQ$No|?fx8`q|3FO7$8{L5PZ*2)v1xn# zqq|;R@k9yuFrUAi5*bNk(7IY10}ZDLhPP|-Iw7@<4V?f`FK^^G_Vc5s!mG#;PO7s)wD{VsKQW$?r z(mb(N`@jj7d*~hDEW28DS@~sd@$AwhAI6!-nPrSk=2cE2-uF>~Ib3opo0w0{3fihk zJ;XHUEv?+M#wI~O!yxWYcof$9EOXyMaTcpILLfy5u$8R(RW(TUn@ZjHXp(EER&l_^ zq<3_xuo(akM2DUT?SrZ&^V_*VftpaMF;DGgYJ67WHFi2oF|^ZIq(hOwl8TlF>MUSp z(EkjYcJ?fxGb_dFMH`u4^1y2uO96SG#jFp7`ES(comb`Zhwl0H?P5vZaT1^1y|u8G zYZJ-7%#eN%O#ZIOhuC9{c{=V!W@*4fpom=9(dS#LzJx6XNA8kpCOf}GaTu2d198tY z_>nZvGY8?MpS6nl@vU4=tRPyu;T9w$$MO2BV7mDzar*sbMd#I6h(96$&@kWPJgXl{ z!*HT6aC@ounJilQ+3$@cUdTHSyM7{3_*Oq??F5XU$izrjph{XU4pv|g0vR_lOcJid z3j~$FEZ2H=by`|0rHIVG#CY=;TnJDh}0am!)qe#NP_7+bU+?GCE4YvtdwVsE8rlidg>UH}faYioR8{ zgd4p7^%9&WDP~;!kuI*npxc2=HO8=$l3nO`xCtFpN_A;MIBg5_N%6yg11{tYc{=&= z>HO<%(?B0W(o?F@BxLCQS(a+0*hodc{^Si49!*=H2lq$qSeEYle>=uSOR&FqCi{R* ztNefp_N%&GQ+YCsEc9c_QEVS5=?5A3sV~(>4@ns<+RiW{#gwUxn(h)pBY8|bxJOmb zElEf9X%~o>=F?9 zY(HMs=008^)dSc!*@v@|QOpx5gmxH4IA?<*D9*V~u@;o>wbbM$?62&0N59`|jyC!8w{t>qwy{~q1+{+xs5cR1jZo{a)JYCm`dRD3-Ky_emwrh@Iw-+(% z>#dw!3o3e>>OoA5nJ@QkyI*GuC$rbhFUU1u$!#($utp7B4u!e()=s*7sz1Cdw+`ML zS8p!f)Ds&O4|b(HcCkTj3P6ryKsdyvns@6+M@@{@qqg&A5W2NusbPa5l)%7^8A*O9 zQJDnjSA6{!ezmj%?C8d7HojK!!>vKk3=Bc&?(50OcYRMDVeTn4KRQeGsKq($P9)V$8ME^9+wUMm@d4}RTOoMXBE_M?}^!q>hGf7iVO!1Gpm4f^b1 zgTzRctk7%qrQ9vQ9ti_W6AJUaFV!*=$G3DUI6N1b1 z_^2^-RnM|Eeq;gr7X!b_-D|GaL%RPKymMWS=45!T9{4WWr#}gR+cEm%nVtZuvS;*7 z;W!SlyY8<+a{cz8$pI0n@Zwrx4_@B0H*V@2)XafBzwy%&(c?YsrV^#lKH>cra^D!o z+1blT7P{xksy74u0_}ss#ZQWuizp7bLul}PkZrW&fXVoo0B-}LMqVVQr2lf&XZEb* zPNF;VIFv37?Q1ov{{DK~y-xMi3_Eg?0piAx zf;SeUC2ibCQJF80L}>Glewxb_wkr7aqv34KKvSij`7d1a^#~ZkOg^h0a;us?vE>j$ zqwEzDd%|^hsL@8n3;&imxjp4qj{CsA`v{~)z&QFV`~ubVPG!2>s9+V<5P)QFrn>7S z(5*rs!abRQz5dT#Qal337#Z8G^Z530^}6K=>DrYU(!?+tDV?T^i@Oxa$e&QrS34Ll z(qm`?tY7&k6dttF&EpDLO*c&10CLrm1XAqWX9z*^0#076F~&u0U>9HeQ>A2*x2vd; zBB=0R+%aEbls50bx$XO;R@;cF1&l2N2VVxQklEol*YITJtw^FD5pahH)mO!Pe#tzy zR(1$iAoL~r=T|TR1&MG(-+UB&e%%N6{>USOxq<0=?y(5ir$fZYXofu<74eSKMDD@nTDJyrT6NiVJ9geS9WT6f3Ah#mXqb)uD z-a!dTzJ4mJiMXtw#Cfu?8mSuHI@rn%i+n*Xa`gnh&TEih6Dg_K-L}e6=BL zV%Hdx(8=Y{a}UF!ZeMZdw(bpZfrR@8jl}8feon_boACG#|KkGS-6#`**Ef^BPbaM` z)hpOFLOW@`-q0(2p5dcX+A@URK#(Wtqc1b@onqG64^6 zFJVA^e7#1rTNFgjf$z6JM!a`?5=#3SvHo`JH1*F34~#YCoN_l|0%0zQ zX6K#dqtK=p;CWF6xu$_@+Xhr|2lDpD4Xpi91OWFY$^BvpV~4{Xgu`EIK^~8HM+_h- z^C3VGVU~wSgi~~9N(hkkEij}-4UIYk53kL|^elM4&)HCD!V(4X+a`#x0 z;aDnryR9_-#^PK2*T%!YX{l+k=pE`RuH_)#g#$xCr$42@$C#95WA3=*@Lp^pXDkh+ zczg&m6TM(n-VfS)7Ss3l{nOR7EImEA;t%M3hafeFSJ<2cT>rf_GQbbaxwBhqY*zFh zGw*_wl#}U6qgyW%{$FN}j+kP6WjsT-ZMOQksl0mFmPFe2rp9aO9b8%XpR7vZz-8&5 z9HG-@j!d)GKaHiw#Vv}Xi+V+#Oh?XjQzyC5sl=mUNK?J#w(w_zdK-FZLsBc)fyMk2 zI#qj*Dep0vVjA%o|7KMOyd9ZqKqXo7IO#sX#cW>;)6H#b=+mqlnE3L3rvUOU&9)j; zXWLzUa=fiXprbVoQc%Aj;}n;bFgeqNTn|SnWxepr&>*TCsfR}o^~ln1d_VhF^u5Dw zXQtv(bN-W9QN7RGf>?4td~!`BkQB#WwN7LFu_#h!ne>lp^ZRLTiGP_3I%08DFKk}@ zqjmk?WJ%Iro(s6_>Xo%9v$AVs`P@UTZLgBjNbZ9}oXzC^o9woXBE-3Fx?%TL=vQ*S zzhKtYcT_v#j|jM5RH5TmpS^+~qkf*)v#iwpEb3C>zmL0ryx}F zZ5K|u)Eu2D{X4cW)>N^`)sIi`FIv9_Sa;*aI!OUZgAnyAUlGr_-%60_j^lxjr(iDX z7j05BFZQ}GUoZ#neY09Wi(*VDtxP0)q%;@@5YtG~{M~10YYqx4Ostdf+3K}pLl<*A z{sBa$Wi25;|Ez&Ch9ASwo9#_Wi1So-`p=HIZUyPKTEJxs_Kzo{BaknyedsD$A%wb?d&QcjT$36#p9n_;Qe_yhX-?lK`wVZ**Dw; z2;pJq_(7JWF<11QtI8@A%N@NmP5DW+0F`@BxsM!H zwjTX1UHj4l!gYgMF(dAO(Cbymzb8pR{PR&|i7~q60h)4Yv!u zmiKLjA)=u82`lGszdBR)_G(ts_+_`gZ=22hwswtuoDGiO#~u3$^QG)AkK)@Ly+12RKp^-r>nDe!KOa5uY#$yVt9lMCrhbW(c5VWJcZrv;E4sMvS% zV_BgWOqy1$-;yq3-q@gHK@H5OY9`N!V?lCuPoyUa$Ql4~bA1Cj0i<#4(3dWd-)Eox?hN<1UGH;syt;Yp}GcPq6%(tg{xm z>^uVNmmI>Mw&yMz{kSOjgnDCjOb7?Kb^P@tw#y>J(g1m-=D$yRXVphEy@QiOeG$We zZzY>Xf3jDP3%Gvi;LLCd^i2Tl6PpE6}fDH5Z1W#v#Sg%)$W*t1aJGEgl%5 zC1r6Y&#+F69uZ$|rOIkeiR6)f@~&U~H2|jN2qj9MW!^Z!Gynoi%rEp$JY|p!%Uv0% z*Q=l2#+u)zzs68evDL1HxLuIprema@graNZIvj2iGU%QfxQm7bb_%OM12^~U=5K~d zvbHoB5`Rzf5fjh3z9%UHMHV_P&}vV4o3kWRa~vE}IQ~I0^n(r;^YIV@y*LX_vfkb@ z>QgEobq#I)d}=m124O{fm>OGpj{6K1ywSPQ5)1Y8D6KK^l}mZ;k{94;c(WzWITA#3 z+t*0)2CdV-x|iQr-EeoJoSAtR@81=JI>6<}nE@<#mw1+ctf06t zapdBBzBI(MUOXrwH>UMzh2*(0r|Fz&@W6bHJK?N}aNQ4#wy@Vo=3Z<#Lm%Uhj*k$K zj*EYcB!d8kZ-G8Sx{;J_mXZz$=|&o*rCGYWJ0zvMr5ge1ln&_>QxtK*dN3h2ZklPjHG4J|)}{hIU(uAe_*Z>5jnH4&XzLzB_2CrV?;)+KEv! zpEQrm>`Y_sJyTcz6s^Nn!PYSW6Y-#aQc?tGJ1boEjLR6=5-!&^s_3#wnYo7UZ6K zx+($;DUsQAS8USzWjAv#rTp-nkeC&5rIj%im z;bAW!8Y7Ko`>b0YhV2}#a0&yT0AP~WzIeyDVi9?FWTu`ew6-IhW1c-%yD}0@(X?70 z1eS`y|LH`;`wzqcFW$5ta9G5V2P1nz{@XpWy+Lri0S5k(=vLG#o(ZtAuC={+iLW-)ERHeToiE8yYBiNuL0EeteHfTiv_9=lO!74+Cr2zdqG zpxNcSb$?~&!}1FX?Hh5++Kn5U%m4F5taMqLDpMs}j6*LTXz zOPx@Ou~N43*i=0P?h8xhz|$ zZ-C)g;{cu2N0DSSj|%=CdP)5TJh%?_t__FLy)w8-N{7~=Qh_k%s8Ym%I`ZBVx{zQz ziKfq43rnP&u^7S~cB%d5F|wrW!ur$GTVC02z1ecMXiD4soe6!!FSim@nv}$b+dGoR ze{d%o-yhIW)10~cKt;YaJ}fG%ou~`CJ$0X@n-O43?tea9u{rgipIdnNe7+}_Rbao( zZz!8eE|_#}fkb;c*6aCg{u^AohjbPU+~Ukp`K|nEOK^}S$J=Sv{B|&POqj(B8cRtL zDqbWMeR3xT?=lJ@44h*!L%Xv0>`lsQ!%M)0O_{`G03XHJ?`9Q3{eQz0kAPb8+ z=P{44y{={5zCp|kYYgciUos6)6Mc+l_`?>}&1Cx9{F{Q4NW-E!5ZBv`_SQ<1V6PoX zlk@wwTOQQDobU1hP()-carJKuGr&jRn=Gh`b*y9G*wO?gc<$x)MOM4YONoA?Pj?G$ zQ6{vogm!P|O3P19%N_Pt`xNFiEX=~-4!)BH&tmD^UMap;ZQA4hR05Ra&ojWn4)Dp= zUe}q(oK;X0`yD_*B`w+hOGffZ0QBv2+WMX(aIYI9D6v5w8}!^GakCqdm%S!0G?gEG zR>uD4N)o3c!*`ZZ(!*&~&lT$(N3v3nh6has$Bm5?4mSBcSr86GN!r+jy=uTPE*Jhm zKNtu@v3!7jTcxnezx}HSc-!)3!AU@ZC46m;^=s901dksldnafYiYIa{dF>~BMc@1gdi&MZvi0Lg8gZuCOB1qngxX)T3? zwx9K{M~_XJl>UT1c|GjQ0nl7Orc^s{9+3eQdEgbZzh zSR@LUHWV7buXO8^GVnkNfh(-I#b6)4gqh$Sv29F2B7@U=hq_;?P~j(dDXj-;GD*!% z%O7MaxCObYKYzldHHyLmiXih?k z8ma6)H3wN0npK-0@}OPH7NPbSD1jw5s`78ENizbW)s7=PFS^h;8AN5|+av}e6CNm^ zk=tiFlv0As9}7zp%iSc^x3Z;Kw)0YVJP5~0w7R1|{Wbay#$7sZ$7mt*w4(wiH6FjN z4#sbMhqL-dD*X^+$AfmDWG>@iiGXmtZ-N@ z4tS;EW-$X<=>Hox+^lea2{7WgPle6=B-7iilwqhJn)`;AJ(~+14rZZC#1>8b=E5{& z1)VNCxYq??fem+jCL4-`!I?$iUCVmDhob#eaM8ZF+9Zs|yBEq3@&3vnf7pUuJD(Nm z^NSxOMl7QMF8-Mg$NJyqYBfui=TYeIctK!7q>qPsl* z;Xh-HX}jT%cNL-EtY0#2)lbCwW&m$}gIbPx%)i&}2};y&`rr+bU2eP((4v?HJ}iTd zh!xMyPhxMG(eLqBi`KH+OwfF8zDWjkj%?zrmfF%g8N+mcYrNdZsrPv!{h2fQX6TPq z(V8LC$BKp%u!HUrF#y{*uOFo({u`Dhwzft`gWt$JT!n$ zK&&><{e(3|z)OlM0CGAt2RRz=>vWrdJwvsOX>@S-YQb*ZRss6A{owneVUFSHTbrh7 zo(3udvjwz(?YqnlFk}ZNNobiN<@3e&v^>kqeX{Zv&Z_Qok54SSE`Cnki#DGz)PPhz z9=0c`xF!@m%2~6aX#FKB35$fwST+&iQ_uYB?o$Cs`2o4 z$B(Oy3V_`{M165aam_TJ23BpsjZdL@I0!0mY^0c;#ruZE@a-9Unp43nAr>vo9u28P z)A+d-II5#`4UGG!&^tQXAapUxmD z^}jg5IGl@NC}bwV4ODVyO9#HgfR|E0D7*6;MX>AM{6pNfH^G&X)v&kkoaA-y;w_Gq zL_((1ft>HO?1uk7Z6Z-)6;j}BaKUk zkCW)#vuSuz;7wBq*r-8c-+Std+kUFGlT7Q63z<7~t4ZO}YOE`8Kl`!sV7e2tNkz`+ z+!)TX8`u1xnDwQ4={H<;qDw>zuOKdU(DC8`1b+_1|??O zAAlRhT9o=V`O|MlGqJv&Y+J^>Vbk!jM+s0DSo$40hjS}zoJx~;td_C*&BAfFPxxz% zCFHiwLu5D-Z{d^{7bHsV=Tutt=JDOvbZZ7$@$SUyephx&@gS4_FVU1Q`MYcffLcH! zjPWqTvJlLQ23Gz2iSV9-pDdQ5>FbVbDX%x-HML|$p^8fc~5PsIOF zQ1*R|Zy-cMlpG0=fV3`NxgVQ;jB!B>Yk5waOQiPE2G!`BMsQmv3<-=B4DxFLkg(QU zite2~(S##i%?a7}?lbOZC9h@h{P2x)RbF%}!kaBU#pilmpUr*3#gofi z$`HiS@vf=q|5z5`@V%4%SCS>^W#BH|hZ%#OnFa|uKXv`zckK97s-BMHh5dMmd zNsH|MM?UprhXAXaw|}_=foY%mP`a0od7@}3#kc&`nLQveFO%Q|2WQhp_}iV=kZ(qg z!f~TI+XL=oj1$Jno*?%MK-wL6hqetG6W)Koa9l~+$9ZGsK?U4}(E7k-q(xpUhOpyR z;2%k!^Udwpak3S>h66){e_?acdl?vtW|GTJlm>oK=)ow=XD@a>`z!19&l|Ka2umZ$6$s* z5!R5ga&v4?{p;H8$v@wJRul$Sgo%>N-F>*#IQ z;Rm#aeyA8&3JYpob&hM6B{Pqj>`{#YK{;CcO+3#!?Qy1FvHG-se^&@{V zZh)XD;348uKg=8Wpim4841h55@o{G-xz!>kOW~4cWNFxG$pu4B=d2P?3_QQz&px6r1NXGiWWDIv z>Nm?WeRN}E|Ky%Pxm0;#`&Jhr?oqu^S=D#N=R7JjSSLy4@L(gpH0yQkdPXhTw{>&u z(?x-eH|ce`<{Za7c};FxB6{D1KF1fF+uaK*N1^?BHo~Az z^PU7AY|f_fmHti#fa4n@hYK5|UAgJnia%S)$(u|PYsNcly~j3tMCo5jtuivGOvD2% zYk!WWeM3*B&!gcQfoEL&;3fzfd%OJ}0L4f;;kkM(@do|Pq>ykk#_UfF*{SZiXr4xI zYeKqSYg$3Tom+v|>HsjnLqO%CkRM(n-bQYtLj{7hU%g){#37SK;QpOwYRv(UKOpsR zCpz4pUicm7of^ENfE!F#jJ7Nak_4Ve3P#OT<^1VG1dW7&QEP`g ziFbvL@JwA1w<-5A-Hu4~`!33PMti(#e|4&Y8lcPlL6e&O;ZCn#xBUiaRUsF40RICV z;j1+-5O0z%HZ8^yH6qxD-Vf_k`2qBn35AnJ{$&z&#CmH_&u!{0dH)O8duRoHM;_aL z8yy7*1&U{szBv-Hr|c1u#``&V6Y~5FzxW@n19`5)_i>M3=67I_Rq#Jen1G_ycw#?z zx*z|Nq!VZU!pWT2~G4brBfq(f||##vIW@$*C2s92D49C)0Qv}bU!V$ zgSARd@NNbT88s&+G^rSrQy`L=@i?&zUFC+QL=m|FOPvG)9&1$KE6OK7vSMU1)%$AD zpgCv|AnBTa-jeD$$b;~KR(_eiLaUy|kQ8{(f^_BX$^ON5d9c zTbC;XYb@~=L<{21A{Qze(A2vyrBppD#WvsR0q($8Jc6yjQbD2u!(hf76}d7oseu9T z5RTNAv(sL163^cD3&|622_@)ICaoCA~*R8fdC(u@A}3&&Px@9~Dy%{s!C@;M0g zDr?W%b(c zsg(BKek7I6ss9O1sdxaMJPdxjrrln;0AF3eV=nLU6Dl_)06*Al2vc8Z4mk8zvL1?J z1t7y;(D~ze62cV6$u2&RH)0kXE029sLjxaR+z(WWJj3AKXg=z(XJFX&x<0B68cMnh z!ABD%VlBfh68R4wkiWyjPPBL(zw;u-G_yG(o4o0yB()|a6B3*{VU5Gb`Oev_V2hmc z2_C9yX?q2Gl{nG$dU);gw$)wA#`Cg!sjC-z$gk9>Gd2D51-wfeI;-NxtmpiDpA|SR zx>*VmXSl&q2wC=J(u?Ln0TWgb_B*~3zk?;VYT8z_lC$cfohD(bp0)% zg8j|cTjN)?DPZ%Rx**iyW^?z`W@QHUE8YexRypfhVJmp^<_0h8nyzoQ;AbQZ{S{kI zyvz=k?vPMlOuEBRQv<+jGwb(du_n{+H5+MeN8KRry+ZNQ9W0ZlCYfKz8!(qx)yG`A z9`q&Np)Ab--7d|cN9S<0IGAG5{fby$vv(#Niw)FLPRA}Z)Lm=wdFi0O+Q_V$Pzv}N z2!lMGT&kIrifSWQNyR3pe4#YT9>h9tbRX-^Caq+0Jc+8(%ZJZ>R^ z-~MWKy~^*h+@zE9)3hMM=K@U%A?Zl-ZW z;CV2H`j# zWXD>mANWP2B1677QvNJ%Gdy)Sh~uziHYHgHM4R87@$BcwOZCtMjhinc&6D<*?Rs|t z^;j56Q_9SNwRPiw+py5@%G+ILu&p)4(2$+UZ&JMIi=T4N+=d*BEWx`)28e{L2{=aJ z6)D3$>J)-UiU|wW&0X)1RI_DpS;h#RqlOS2;Pi5{pY=K|6BfPJxbzWnaZ^4wj{}2$ zIP#lPfK4i@3=-0$Iwc;rE~bQ^-5`et=_JufhIE9u7L|0GB4=#CZin_^Pky_kN?ehr zETgK{%HaF=3NVG@Qk|@S%vj2Xf{M|uAu}jkfmY7CB~PE|O0L4tvAH;hxeU9RJ{sKb zZ;}~?C8!Y+v+J|Zx_{I?@Ju@kDS}TaGT5P&5K_HKU#S0A`0Dp)-M*D7yyJVYi);?= zJUz}^HMTQF9h#U8dc9I*{j2qRonOl3=;Sg4_R;{WzFiag%>N(v5CgB2&GABP=jAr) zjs7a&38y?;|CATwfM9EE z5{GqXbm0cNS__1+-L2+DM`u!0WY#|5)f1fawtB!hu%`Ulx%UKT$)&I6OVH=?@OSS{I*SVwYY)%baR6Lq_`%xJ_6MCD4ZW$6*zZ2u&J=oWI8+Yi8)M1;Ja^fC zXr(T+bqzGm?|JXyGfWA+rG=$X?h6Q$&OC zJB5I(YDlaAd#Ku<=jK5X3H-88M;3-@6RsY~X7_JoFNHf}4W@{`%cm0AZz`Z?=|&q# zG!mL)q-u~^B_j+|#;6HhmI^<+HOc(EF~-K3j}|0ViqzQJj{?a3>P>5va%k|?i-y@L zN2jBBPIUgV%H*-fsItVRGYhN#mXe*rjWj^0ve6L7$Z1+`C7ud+lgu9{p;X4w#)^^c zi||?La2msp&x(F&#YV3#Wr8O02%Um^2SHnH|5R}lTLTqIeaK;kqKMCd z@Gh%~X$d(fVu;8YD>9`RUkyR!5~Y8mpPBx4MXPT~Dk59WCxcL6Tl^OjBH8ax{xddr zE;m?F3Ll76?G5-!M|JBr@PU&(vP4`L)^&q2elrxeR{VRq&jOC0f9$f}Ro&G&e7P^v z$rh-q$9h?nKxkI#hop2Ye3?v#{1I zSMe9SD|s}IUn4gCVJhU79x?Mc?$D3FyHX&8Y_B3bMj6}5`5oK(ZahDq>T^nBLg0-Y zZM@2_-9E_UCj78+4Og~o4}(LTaP(0=p%rTEE=1`%cUqq&ClDCfGj8C0!q>1}KOdzV zMIqFeRKpe_)Ii@gYtbPoH=DOsYEu*ly5%(pp7W2?kUS!;^Cf7tI?^d@hVUu^^C7+_ z$fL$dU0Bm~di9{S@G4P7#k6}Er7na(c3VayJUOUTms^5bbWP4pHGY!e6@OR@O{qt1mJn96E zZ0y3>#5hf{Qzn-0#Aq>iQh({fJkhq}W(d#Bf1avOU)%lp2$@jt*!l-&C}NpRMx!7p z3b6W~tC!9?S~3-J<8_s0P2?N{at#xxOJctvxux@_apr1K9sPmA{Nq_tn9U?Uj_R_M zO4BK(ZhmOd_k1$*2HDPUv8cZvDHILcq4@Ew6d%poP2*DTOO5pfDfS`$s}}akWkyEd|rZ3y7O{B)38YLOE>=ZUhRqX&c((2y)(@pz*_p>(I6p zc5JZ#EQN37ye8Iafmdh$7B%fG;;>F5lD0!l&p93FClN}SOp1~;MX$m4g^ia7>m_oP z-=MxHJcjOb7waA-xC=2-LtQ{w9%mBF;g&x{MM~c+W(7s_j@M%Glu`c_cyBnQsSA}H zuq1Ur&ftRm*&H6<$txxc=hEvjllXdU!>tc{vDBm{nR2O3$um*|eQ>3JBxQIBj__LQ zei|)o#CZEf&t5u`!A(S2 zA&j?%Dl)t17F9a8>4t&CpQuLpao+KSjCvi#HFPSpqIUUWs<*WeeNswE=@&Y{CJBl; z5hiMvA)vfeMUMccO>NsRWVU=a3owSoWMcys7w56Ujdwq%4C zpj<}#9jAJ6J2%%i3HUiz8!i!FE@?-JzjN6PpA}2z89-Dcl3C}dCoR3kg@85Pdkzxp^|}2^~whtBJ&21Bu_vnWCE=EBsT#Gz6mfcVu9#S#&iK_;o^&flUO2Ni zXxAsGA&zqRZ&AZKXxh~3Z5z#H^at%7X9VzyvCHu!x!Ep4RQ)EC;Xx?u?+XP*yDMQz zKgUi5)7yE}fHM8Js^MUAk>C8fAm+lIr7hGC&c?vqB+6e0wC#{suT-4S{z134%bLNr z%>c>1D5Pf168&?%9&J!tz~HHfPj4uF6wfcTibP0~$(8{}3%kQQDdNnJK7EQ&UH|XKJT-(Y9Wz%@JBtTlB$Uzk`%b z>*bTc(fOxA{DjhddHMSyW1v@!`S05I+Q0jvPd875C%mI*H=HD=1F!%B5#2W(G89&H z)G82X?e!j&*MHAXy6vDnTDBcazVGvR)C07KD*!xj5P38s)|WDfF_ba2ji{Dyk#}H@qek7@K5!4Vi!5w9NaU@o zOmJj7FHji&1u(YXBVd znY0mMR@3t>E*O9w6{&Ur8#C&ll7TRzkJin z6Cq7Ank^cLN-MkdD%dbD7-2_Ui`s0>tG|+-&{R|3n|evW#EPLpaj~vFV6!w7fn|tl zbC0~YrE(B!eH!@GG&1?a;l0;l;}L94CVK$8ky(&Tr1aTHFk zM@?MKk?$AzCn!h6CEQIvB9_b}N?2xW{+;R_dDkts+NUO?0EM3g*=^qQ zc_XN6PdcJel(txy_PxXeuyGt}IaRQa$7HD8cW!^K!7LXl(3Gz?81!J(HxCrd>iCl( zraCzwuB6Un;qV?(YDh>Thb;Zw^H==vJ85LY8xbB8YdeDV2hgCItrv^9Ez zd%!!Nm>q_6Hb~-O5%$}*Xc9Y)Jl?+jxATMo>n`}Q2G33pDs@9H3l)h%Y*wbMe&9Ua z$NB}Sv@Dk)bj$MCu-H|Z??1uV*e>rtEnWT=H}6%{tM18n8<|Lz>)7OR&BevG!q z6F*IDH}jz{?_WCp(h#zSG6?a0SKJ#}nfjh7JHYn^nNU-U)t+TV)8X22c<7^%2?1w6 zW{b7Zj~(zuL4yQxUGdkhofDHZkp40em$A#wIBT z8o!~V$IWYtU&YoD=#0{9$`q&Tr=ED05!Kg-$8;OM0OcM}E3&$9fL zCv3h7Gh&u6MV0t=*2IavQA@$!4^Mk=u<;009_C9;rglx<+oYo!CJ1E|7qPz)!l4Pv zEPHY5oc`#X10ECZLzFrv!QPZ8{Dmb-&NyFV@Ac1m(N5z8B~tys+$%-$ zOZ^|=<=*o_9}uUDjQ|MVpvNP6P#L7WvwQFnD_tBLagsLpgGuaX{G5{T(~hW}B+9?E zOq&&)L6&;2Sc5q-MaJe#+Txzr-Sg20e~Pbuh7UO)s`f94sj(HRXm9#~k) zAA8PHZA(@AA750`N9+q$VP?0OdjBDzU8ebNsm!FxTGD~&+;x=M>a5*FYu9bez~7Zb z?X?twM#IM9mlHgN9v%kWqdOj4a17erT~+9}!E5YQau>RgO%;^cpI3JFB2=*5Xsx_# zL~4{UG%3xt+Ju8SG9=QEJg}3Feh6Fw9qB?#fS%x>)2b@TM{byw@@vFb!^nYFKE^Au z1-j;m*=oG_$l;%=RE3yxx|ue%;}_;wtrJ)^MY5v2@}hH>>g)V@q}X;iXt^g@Sk9mF zSL({g4-G{BF09=VsqCrT7HmYpgMdUqMPx=gy;wHwqYjpsV5YD0go0~9%j$`mmbY0= zc!L9q*)ptSxMJS`jTM;h@QILU^>G#*L6Ekc)Ch2dG!6zpj}O@K8vP|)eqrnrpAPE+ zKD7pdo`8FteH~y_in@1(eWyGk2cz!TB-m#D=+=ZbT#I;XP5s50-7EPE$xnY&J9_atwT*Gj z5^Q}(s^%Nz_GQwHujm;PEVpWJ-NG(~FLXqOpOXU&7Emur4eH-kB=}cBHe9vbzGRPy zEzeG*%bu&%H-D6j?#6$gE;Gnj-_8>;Y1pdTa(tN;lt+fY<32qj&4=N;+Os{%B9_rn zd9fuGPP59IE1mtu&YA18P#+BX%Co#GTM_U)ZEjn%ak1X=w>-Ohag*^^cyL?_9A1ke z=~jAy%Np-Gt&^Bz%^0?6PYQ?*YX9;&x*8(;4|J_g^kfSZ~p&_r=K&VI2xPp)#rC|&Q$JbtqKv8PkG}6r0E1(5M5H~Y{ z{ktLCYjy-2pZKXys9-NAoh+?@`opjFinY1Ke0=;(aLpu|oO91iyC3vNr-j#xL*;#i z#8soC-=j)PDRx z-*5Vgk```)arS|`yO!d_CQ;wStqXNOg9D;2`p?ldd2CG*!#ana7{V$W%1y8 zsEY5HbVk}COm~D78u%-ewkdX4RAx>JcUJaWeVA38ONzu26+H)KRGX6?`=8+DqYSaR z)_!9B)wUgU%-1CA#Wt^3#l$Uw-U}x2 z54xGabDx~^c^sw%C+9f?oQJKhy1%124|C!%I1x2cAO748eX3_BAvzdmr0seq=VGd- zLtub|VM#zsVkyAP$cY=BhrsghYP7XKI{QnbP7w$6>IH*Um8Eua@L|<_t_ol+mW0G} z@|6jXlCS|IDib#&fBFL{67QP^g3loEd=mGX+qm8PNMFwst4z__dK$k+f}~76oi=Cf z))Iy8p-u%}Lp1g_2PQv8-G8DiJ6X-zmY0wnQKhOiGJuw1D41;FQ?=j*8&2c1Lk>HL)@QHWd%q8M+ zfIzn$0M_+i;GwQQC$lnHEHGVPIEF2x5JJF8B9?6=b*7c%xJb!_wj=79u96DVjoE+O9c_s z_DjAd?efToc?ccfqcV&swg>43C>W3OiQFBg7|56pS1#x(>JTVnvou$*yuN?rAZt%L z*ylt>DHk9KZzr{5b}E1z*~v~gpo z?n@QbZc-W1P1V~%Es!?%ND3Gu;i5cN3zQ@N<8-ek9=#QU1w@X8u)6fCePD1BgX{1e z9v9sgwDND$ib0#)iO%6-mUd0~_0J7_aJbS$$j>O9Z3jD~%Tym2TU9l=0*>y-=2<ZNp`Oohb2o9;tV+<0SD*bYlsC5^opzM3zg2HM2$7>K4>X)tsM||xg?E(#%+oQ~0RCZ8v>c)?1waf-V_P>W| znu+J)u@`V#SX*sr;Dxo~>6=zt`2g&XvTP0JfBeoFX`UxY@NTF!V!eCs8(Aghm8ewU4jv2=_bE0Q~*CVg-cS`XV#qFxxrb7tHuN+5F&tO;Ism zi}Yce%>iM<-KD0x(wlPC>jy46u3so9Cvuvv-zPtlT-2&Nx1ZW(nFYLCTEB3e`cX*( z?vwj#zfbeQzWT7LyzB-3<~>9-dqW{^H=FBrExF}P(et!j9u1m00P1;# z=44|r^YpKH}W3hOoExNZnsyz_xgKjf0llX zAZDdzVDZUau2y$nlrJ;rj@JTAgG+#=kE)pD`$mz7B12L!wEOb_HX9wUW?-`Xu`UE@ z*OHdrSV9+G{4T^65z0hgrqCDpX)mR4#2X7~!a}Sa9APij?meQSh0uLhylSL543uSb2Tm4}M>et+H=Vw6^}Fc3$D zyH;NG-p}UR_K>O$wI3t-&G`>v%>u3^)~a=q$|mla%WbB!eeYFdEIYAwtDZbzB^xdV zNP#jLJM94^iDAa9S%@i?iJ21;qC2yJ2DM9?>y@$H)A(ia$4eL`Y3aXQ%R=!VgsgCu zCWtmrmUfP&xc{^@$+;(+uacV*w({hL1Z*93CVUX4 zi~7alz&}Qof;N3W6#Gkzszw$pl)JBo)mMrsRmO4AV! zmKer(c`;EW0JM{m&Rkpu2fy0yVmF-;@26ADPBMX!Xb(X(kBEcz)UM&KG|xEDcaa1W zCVNH7B$bEsb2%O|(zaWBeLwXK-4G%CgZ#c!rxY{m*Gf4Ul@PZEAp!Xs*Y2dNo__Yqm_k~%;XTZRFfYiK<<#B zEyMrw=8|LbE0G5(zacaTWuD!qp!p)X<8!YK-nNJpxP}haxf_Z_?u_nxs%riMcaT}aZ{Duk zioDLXkPj1o8kNin@4+X55GV7anXom4c4vkA{Z&vZ^V979-qGDVLZ(Sm zdxFF`)~WZiOcvs=tyF4Bu00Is4s#W{GN-gt;E(n>6o{iBN@ks~Jp`-*hME2mMNwt{ z10II8J$Ne21l$fWJiTM+0{N>bHMPjPmLuQ$(e^l=>Kmfg%l=JiZe=rhTbKXz5S?T1 z#~QBibVjPienEDMou6BKKQv##WKwgi%t%qchFEAS5;^G=b;}Y6v6O8_e2IRKtikzA zDBOp)1KW9;g*$+1ePOS#lr27QpS!f80Js{Q%5}haZS@)Y)K44UK@Do33wq(lGqklcenQz7;N^ZA+mZLMswK9{&X@Y#ldtRT;*m;~wHLT|~tUGSkh_N$Hc0wLG_}__)Q~PI}FU5K0 z;ISDKszsIQcB2y?DoBH`-61uFVyB%dpjPjX4r=?vr6gbhdDaZ&Dze}=_`bRsl$Yt6 zM|J~kAfe>Xx&?>Wl9=Y<5IEIxR5f*{A|<7sfv=Jmo^Dg(fSo6F*)S4$NOdWqRV0-j?18Nh1&bZD7M_w z7%tVa=|v#HxgEpQ_V4Iz^;=d=I&BKbCK38A?kJ{+z$zYV`+1B0!DMCG@y+$9g>cVR zUP3}az{2F5-*HQy|FJiD5d{gl@9QQkSAuWBJ^ljEF!Oe(T0W;(z1{vT&@I z^KO)#X8vlf9GSxrLvEy0(^o~lCEOtxoo18Ey3qOqu^@{28@VucaJl;YcLx)yr!{N5 zK5aX40&FO=IfBLT_fo93T=(wv<2?Ri$L8OkQz}KdUnVu_G&xv&AP*~W1wg~jm#06S zwifEiJ#WtIdo&R3;j9CYk#}Kpc`k%ADV+wFs=fbnjRZJrl`7qp9_h(GA{ zZ!HCYHuQ9#Y1LUh>o;nOPz z+dS|5?p=OmOkH8Hmptcs(9?uS*(Mw4Whv_hs+vvua{VTE)b_&%gftQ{%`Anc?doTQ zXa-?utupd&3WBB#D6_#_h9lHPtb>5}{v^$ZDu1*zwk@%I!tfOK)2`V9xRvDO-Pj1{ z_KjZgl^3Fs0Jim!$G@3>fU0%CqSI25`UfJ_ZvXH^=;dLPR1WKW^$0ncpn1SYxsnWG z6(OKpdGS7PvQ}D*ZijPBiF#!q2 z@^{|qpeXxkAj0R});3KZ_II`0QaMwP|G7}-x@lZ6tzw&3&HN>8UN3=g2k%DHsBl2r zZ{dunWVSc9bWyZHppqyWl^#2Zf2L(te1Iur=jX|_)$DM3 zuqeVDwa0u2Q&BF;x2xW+p6hmTfxkIB-V^l3+$KtNL?kn`o}vTPEOkUpB=dYh3?%KN zw@vc8VR-%CefI?^;U5OXuX@AERcXAw7g<`<&BZo(QS%MlO~z9O`630eqlq8%-nHc` zR&AD-{3XnXTtf zE$_^aQ`K=uIX-}Nk9|Js)OW2*wDkm2AFHC<>b^Vpr@>XXH|8^Q#Klo*!SL}Jwu%Q; zMIBkk#$4^TU6Z$iZ%Ss%p0FKc_L2mZJm}b}=JR|6S)=D&IZt5B>ZkPuzp)&wbG*R6 za?!jL6<>W4-v!XPvtdaJY({ zHTB8fa;Ftcw%Mtg&rK$b4#FU?{XPAWm#h=K!SbST$qO2lE+GcipALO6iBJ-<* z&s9P`ygO08kK&}5v{fN=1WM!WU`u?Y%v>5enNv!~DF}UoM2_$$up$==fbBhrWB83R%h?@j);k|L{l5d#-qRHoMQ|ITCQ;nP_<2)Zx@ zg-0#D@7qY}$B^0Z3>_3T?jHuxu!SkETfKJ{JjqUGGBq0yKpLlDNX`*Ion*3VZ=cZ; z??ZPK7lR^%wl-XNq7@SR#u;Xr`SpSsnAh-N@5Dr9keDEj=eB67X z<=VZ+SWj-+=%-^-$`QrdSi(@-y#@FCxkKmuKmvZ(`&is zsLU*W2oa*Jhe%W-FmDm@l{H#^bSKp@!1Vd8t}k7q+YrH*x20e6+u6ky|C(~>FvgbN zhS*O!(jC1q>&P$}m8Z{tyyWx!2s@xiz5o8I->pf5@A*b;=9V#-fj`$GarRPA2&I(w zYc+?swKW;|2}Qz04Wg$@T-uY4h{)sk*U>C856H4wlRb@?5;-?Mle2@==lzDuXEZN& z?wxM|^zyM>=|_}pAD2>7)jGy5vjnbeUr|ibkroe--oqlt&9n5XX_ekr2U18~3wIY* z!2}$B=nU(KRt2IM5ZB+7_X&t=L|qUme7wnRGavZ7$?<;xEkV-0KG5DAM`FEQ9<*Qo z`ZlT<>=On@=0cT!1iovp;J{J))dFZ#6XEOJ$rKw!K@Xu)FfHXa zY4D36Jo%-ZtVs!w3pgPR;fH;g&Z=dCi$W&lJ_$gATmFHsd_+&Y5$H6#h0vh`9KEuk9B- zg+rs)+SLZIg=I{s$Sz*F|OeM~mPoeQ-I z<)JCCsQ(sfLa{SJYQMeY0(4o+816Wl{y`er@hdg<>qZS!wN)3WfP!hAoH~euC3M37 zaJBvuR7ync*A1t~KjeG!D_aI-M8&*Lwz4up=shC*na_GAtA3u#@$$RrtlV*zk)BzT zaD&ERjWR}e@~g{)mHOs`_C>5*sm)~;gFKfB2pf6)l1ncBkFWka8gV3HquJ)0ZH7PX zuWf_6Tgeln>`U0~MK5~nWDe@*9f+QhV&6;hfw0YI=IrIM-j-U4K0lQ~4Sn z+kX2$g8~dc@ySmfdE}8SWAPOet`~H1EM;Tt{9tFIv#x4@0eLCk-FM%~{09Vb8smZs z3wia=8ncA29$w*$IC6?5F9D@W;PIV-v5zAPz4GN;B=r`^Jzla09|j@gqOc1{O^2@mlL`#le z44*4kJod3$A#m(*$6bH@^;i<6%ubyl71Vu8`K%o80i1=k`4$a^BHP%9t6E-?!#gGTTnqxyZ|G ze+un4%9$@^Q?d09F>1;r^AqJAQ#zBwa1<2#WC0<-)1xmK2JJSOEkYKPhin88V#_y78+<=yFc#1VI zF<0qSW=-lr*#=*YR#dCF&YEo zMM44g3%x+>w%dzOJ@qu;N%O@oesR}bc74SwU*Qe&l66znkO469%K;+Nk2x9_3r<9%LQ&vFh>cG&w5Wy>`!Y5B4h3m&#M zdlI97t=CUU!$-MHTFJi_${+IWW@b~x*-?=dR##=uX0>Qi*q^wiLM z`qQ7T`~bORa=0MhAe>d*E=($T*&=KAO6UPPdwoS z0GNa^h$=f%r!d^Im4G9{3@QvrgaeL+U4$wELP~MjBCZw$4m;&p!d>|qq)R=VqzxIg zzl#Gy%KkP*DG=i~jWf%+#Y#VtvSG&{EIL9`LBW4GauWPbd=2pna$}zo)c%5IC$1y- zt2K72Lv%#@4H03Lb|6Fed?=@(QDch5vwv8F_8ZylH9*(j|>a@a6;B;B}~&<@zY$^O0YU>^k@w8qg(S@RnB-Y zX9}P0*>|6P>6Exz4#sO=``QgQ*x;jln~1vw)28y`Q8xEI^O?^KYlkgL;S^An5vtv8 zED_9Pv*?d?pIL(l?iatj{=WN`GM|7DOkTa$UIfPcl8i8EyWz&`xSe1)wPHEhY|vg2 z6OkNc{aS=+pGlL{v`RkIf7aL$^>bADR{7#_`L0>}rF$2JlCu5%gSFqb{+Iy#HFv4z z4EtyMYABAdPvpe*ul!fhe);CLI!RJ#e77sb!3pcs^XARfy?s5e#O)t13wq}`v5`!w z#8tlt5XeYpuOz4v1kn|PpY<~Wtx(3-Bu+_Pd*pGAb(nJz_yV>TMZ2CUfPcofAm;G} zk<}hnP+mZZ-@w*~c22o~i@+xP;`~zR=i+x)D zPM}Eshkb&+8Kr%c_ddQ&Sn}y#<^fUvy<;@(N~Q9R1f8sPE5%^NK56})jpl3&^6p1s)TrSwIhNHw@&GRgfo^9@~dI!uS-zz?Wy&175#IhrAK96@xFr( z=6M2=8e9l&HrsSl{6rzZIv^8!Qgek8Hi;z?t&*gb0*^_Kb|aFEK1r7z?wZ&p z_VJG&J%8RjmWQ~~A*U?Fk$`LOc!@ldC6=7n-a*=U-iv{TSJ!d9gP7L!bj{DorgjKo zQ0G21Mu}i%5Lr9J5mECPu`UNKS)o1s?9Z4sjWt0GP*#>jsmRN^wf=At|7;J5#rPwk zrhH3P5{B5sZ}qv8cGv`uh@JWW&ZN*>=%GkDs3w#U8NkjwMoq5CZ?$>cS&M%vGY!8jU%sH3dyOb~KeeJ7X z*b{lXmOI{Op*M{K|5HbLzb3xc2boUOXy+$VgA@ihHmDE$k6j}b7k)AIZ<|kGK7Bd zMIH>Uw8~ZIBA2y(27W2YPApQ1)i6XS)kY%e%+**C8Bngc{M1x-d9buhRwd~}A^+qL5fTuN=T-0%{unj%6sr{egGfb?1ja z`VrQ=*fg& z$3r@V9Pz;;DAT37sWriC7nFi4LL#P&v0MICuYBdnU;bAO$;%$tWtW}LI{Pec?=xP~ zqFBUar{1%k^{nfzyAA==ZN1IbjIX?!SMAM;VE`#GZU3;T3JLtVbNvPoWn-MrMKO3V zD$|9?0%K#Z*mKW+KK7sK(3dZJfX^V@e#h->|AwbC#J!)X|NZ0t{`s}nLK8TiywZTc=TeF(H&%)DB<1 z$!sD38C8gxFP#J2gb=L|i9tkMh%ARZRfv*=7{?-pZs*DPq8aA45>614b|qCrV=+PD ztT;0&P|2nu#fqsE^J`uvQTQ9c^2I(?J5gyagC(X&Hyj*gbQ6%?5%|_30c8XUYL8vy zOOUo+RvAePN568?Ors-Xl-`Po9cO9J@9O&7uFx;xL&_M8JO%uv!jotv->0Y+oXPMu$kUC~55@dP*K?Lju^$orx2uScy%H zt+)RARx8yWc(~6`M8uD+;1Y28iV1el6QV#>Evw@MFoj!QMF}r+i*17oBMgxQZQH}>oa0N%AiW2w4FX#OB@Fj_I37U`ni~-bPZ08B6(X=#aj>ZlCf$sq$ z^d*1UN=n2Q7NiLLyy0d31laiS0Jkms7MICAJR}!?# z-pSv2h+3f>3P=N9@E3&BDWz5mKs1PmnVbae*z7}}gO}$ad`k~G;lxb>9?np@#tB$t z6e?InxqR&lA|Z}iv(`b(5#V1CQHTJ{$|-fGu&B2RGEoQXz<)_(u;N>MMcNATm=+e1 zSIX^+@PaQSt3`>S!;+GB2yqtZAA*}yjB*j1`9h%2U$HWrfs?^6|b=pL1Ct)g!Ut0B!w!8owQ4_+i%fMOq$Dr!BF^t zDLi9HpZ(o=@agasLn;gQ>=Sk)sPHe!Nn0@^vD3){$v+c0fix9u4ZaY<#2HIPAOp}S zY9xS^?BC@_KBI}0J$Je@V~2HEqKdoF)q?7sW&KjDNExYj_Dj+ci&+3n@)M@Z18jXjJh zJzw@%rq}i@UbOhwV~^$WUKSHi`NpY0!IeL6D^LYvrGP#`+9!DCiQVO5DGp&5#NGc0 z-`xW5UPH^>ci#1eH@xBAB};ixGP4M9n5AgEz)czF1;>N{#Y0VOTGj)be97(pWmDL; zthbYCp8*pa^s-rjJ!$HoWR|p#lq;<|d^$@-@E1f#EF*DyV=5aYF#x;mSkls-K0?PQ zbPa_F6mzXd-Mn(GJZ1e+yj3ZqMCi=f75~g+bkbf%QA)@A$HK~+AVf>##>Q^F_13o? z_|{o$ja;$Zi+T=rX)6o-=N(Y`YOwyuJN&ULtG&Fi9U$;I@mih-3KxNtfaTr&R+746 zXNK6Yuh6>!<%1Imgd{HypkP~GFC6wE#yQo)URcC)fT^ zZXXJptV+!FlZv2`P#YxgzWZ)gb){WQ{VSr1?VM=gutM6ObI_{Uoi#~VPD)x|#y;5X zm*Ri{#zC*@=Wjdyink{w%26}_Aq&n4N}T)PuS|Q6WLswaN&AyrOZia|oX9oW_^xQ^ zk!iFXsMm-ILQNASB0O8wX2 zt8!$~N(tHK)J$@I=%$tJEBL!k73xvy&P>#`un*1GP~thvKkyNd#(w*yJ#lAZ>3w`n zLGj89Kjg*4tVh;XE)>EK|BF~KEBuwtGKLaPIB?C5VCOz;Qe#a^37yvE5%Q@;sr`pG zWwK}|oL+#llyiFdZ2-lndm~;fYj7jryeMY%j*JJt%Bi!$YVgHI$%BLm4ePT&2~NJG zYqFZzDl-06)|}f+&If#tKI$l~VNt4lhj-ciZ#(cnK0<%Y$B()EiYpc^UVP#SC-Iik zLk~ZUeKCv$T=8O`k00}KeG0(t5s%n(zkT;(J0acHvIkb2c*2Q$y=pHyV)W1#pMKhD zkKAMvMzZ_vy_aDLf^T{2fxOfqGYA6L4l1Q!7eC6lvLA;mF9(GE{k?gDU;N_T5Wzn5 zst-W@eqM4pW1DRr&!<#01gO!FEiG#v^uB|I#@(&y(=WQ{q8*>UBlnTmEzixaTW`5_ z=bd)G;l^Jw66-0jm1EC+&hrpfPQ9nk%(_FVKl->c9OllQCz@QutXy&3_1Asi10Udt zp#VcR zYxdrU-iBV_xZ{pJ{fuwwHZMcx$`!{RcdXv&vvTFXedQ}O9$Fd48*jK_kC(p8dvzQB zB1(JJz|BqFpZ)A-zx?G5x^0UqHL5LA+HaVEmlbe#A=)qf5AP@9k(L=VXVG4+yYAZO zzwm{8c#g#a0qY^4`Q}4{kdrsmTmHdel}9mmon0mWN$?dsKtE?Ajn2;gT;uWO zdboFK(r5fg!}W#+a=Lo{tI8n4h(jtu<+@)&74CMxkCHjENcg-9312Iv5GBVEtQd}Q z-smAf|5k}_)D~L&<94C&l$XReMT%cxBluSgld+i6clasL$Ct45$aTNJ0e(&B)a4+x zw$GaL51_h5sIC7lUrL(N)MI6otLslx0S0a2FS+>QOE12psXTbQ`lA#0sKV+;uEmeI z5Qz3={18+=!dH$X+m|EP7(b;8qJRIu?KkWrTRQub@NY~riG6AwD*qC)O8ci!r@N%; z`sXO$65jn5`wPF%zmw^euY)5ejfB$TYWa@jzdHQRJ_WhsA0ZXL2%uMOZNUD+hOm5x zU-)i4M>$Q;_23kbENDOA+ursz zo;^C|6QAJaMS!nZzLLwIB}=*dVMal38LzpJk9kPtU&}Y_!!-+cE?@rAJ@&ZcjyvwY>#kdG z{nf&STX0dM^3{4}XP+!S1_^Iey!^7uuekEcYuEt4coA;#qQzVB%6Q3-KW?kXak^;H zHPke`Lu%oxFSlwA_H_p#5azO_Oc1NC+9yI@_@WoxbI%g4 zqqs~WB>PJt+4-Eq$zMMC=Rf~B+cn4wj~KH^XME&D26ZlZsl~r9BMC{u8r z46-4djhVc5hDT5Q^l^X)j2(Pb1J=Eig5a>hSS(E<(B(yuo=s?J7Dw8p_uakZo_p_I za_`c6DJi+z@3<`vck1WPJMO&m&O7*cohFd1{0LqLQuE&^LJAl7p;ATM*|cj?M7DBd3ibSr8s8(wt~}u_HYnC`wbEoncHxAq z6ig*3e&L6cfd2VBCL&QeLpXx%H}oX<1SCKC`LRgZ0~nu7At=&Evo7;zSe0|Dw^ ze8Y-`HqbcghJ$X=kXRjAU{GA)VH2MubyceYgXD}uQ~;L1D%;Evh2v(OY8harP341D z;J@`PZ{eM|Y;>o!((UtV+kfyoR&~iOwR)dAs#$zH^aF=J``OQCp^9Cj;87Dk@ySn| za@whU@!hr^|Ms`P%?&haNj%caiXorYg+JWR|Iv>gw$@rCfjazkyNso6OcA$e7z69f zn+kv!Uol?6Ses8SQ!Tj$11B6>c$WDq|MnGaKlD3*vD`~uvOBPm->?;<=9UY$s<(Kg_t0)3?5n@9ELEsC6VgD>9HYj=U zSbzQX*M8XAeCL!?4D!JbeK1}e%O~v@E?kJ^SQ>cz;~&rMM=Z&c2(NtAE7=vu^n&|r z`cfFLH=Z_i)|@%defG0=-gze%=mgsv{cdA_N|guA!Y0}^)pL}k%Ts~mSEsHa7RtKO zOVB^55~Xe^+L#0w$VZu)H&H6g+En|p1}cFCIoJI`%exM!NEIum$neD=Ty7YIE&-=N zkxJo2)0t(uc4tIMN4S8h--$Ea{s+DNQXCU2S5n+82^9vWUie3?Q|;BAv6!`V_K8@L zcLv1KqG|+qX%*#8p`$X_QmT~ttA-&-qLE=W764R6SOYhV@8EJ!v|J!4t-9R6kEA;4 zyx7+Kgw8xEwgY>d5C{$&5-?HaWu!PNFlkrvJ`}&Mf=VO-GqiZB*PuF#dZdZ(tUwP$KJot~kxNouVzWco31ux)x;!NY7^rRD5P;-H!WIj+_}QXsxPa$5jxT)i3%m*%1s7g; z;kJ+87JTOV6gT%dnfZl%=#GK`kN<4E$;R9UV3RTh{hjap=N4ORK@{DuxP`RuNn#L4 zZnEJ<`@DAFPY)j3v1|DD?}MJ!F$*Ltd-T!&hz%j)TB zAVYNtt9%GYb(VbLUvbRc2o_~7GG#2@@ub}iu@LNAQ6bn#N<0wt>yvUIAn;X23XFJ- zMq$aGcBR#8zp2I=GO#RPg;O>>fd|&W%RUO?d^-p9x=8v zAs?U(Q{NJg`}vA5_6hqFAcKf`kdI@Wr9Uhaxl?K<5w`QI`?wL+$lWQ5kE zl+*J_iL3 z+b3mggkkxl7$sf|tFj-kf0Zlc>-62hmQXshbokhF^7f0E3}~Cf zt{Zsm8M43Xn8vPQof<9@M6<;qJTdUVXpVjwmJ}Ox3u`(4@%kQ)ZIG zMF>b)56oYaE_+z+;$m&;#8lQgQ4Jr)s~9zA&6;gi>clpA#3nMCYZ6=m+;F3fSfN8B z^;26IyeNPQn^sj;AQNME+c+4dE%!!rvK$Y?zwjJP(U7d>7H^zwEt)C9pKCl zDGPGPKgyz$Qk|8fF)@~Hw9$sF;Yxus$n_V2Ag!0NE-(^V)+eL{#kn!ii18QY*g%i| zWL*ms&O$pHQtqR>TK?t3*MTfH4WEk)hD1rGGZ1 zf~FW)@INy~>`4BWla18)j2HZ^EAb6#f6Z?MnU70T=M_JB+6aBl31Vr)RV@5TkUtTW zlTedWhxKADXKj5xkvOu^QfI;oDEA%Wa#eN-hZeGKeLd}tC0IHlk1A7_s-Y^|%w5D7 zCx8=;XyGf3@>gIpcL+uPBTXCyA{PkLWQa*VkETlyo1@Ka;opI=tp@46 zUiGR?Hr|-sE_dB^m+n*9>8)5*vA2;=n=#|06Hk2M3ty-!F0G5nTJnwYCfc7q_c_mD zxee0<0#yV3j9eOQyterDuYdjV+diIrt1niM%FEaFA`C_^`b0cgiY^N5BbkXY@}hnp z9r~|zPyH14>b>_m`O7D9{T~&Pth3*zL zN?dTk1$?Rud<_rd<1D{|poK^;%Q9Wq>6y7ReO?knmtvzK3SsE+91 zAzrpljar?j<=>M9Q>e;g&l$heAJzYXSQ#;b3MHL2elU&Q;E&eS10FaLUV|C2qSC1! zNYOiO`D0}jW-s0+#!n?fx30TjIv+B3t$nhdD1i%vf^2I}o+N+q1gJQg9u&9-PUVc&+7@7x zFRyw*oJpHrz0V}TIgGYhU?%+qpN@sOF%3a#RY01Ahm|5nk=TBTFe{{g0iW9gYScXY zzib(A9X;ZR50ZT5tl41Dd%Wq5Z#>~&{^hdEE^|d0JNV#(`Dh(;-$4G{=RWtM7x79U z8t5|S*P2Yra%7$MI(5c$LiEvC!nsdCpUf1k=Hc9P&)sgjCouCyoPLiP8wp`eM+Dr0 zwHGiez4FQ{>7Q4OX>;+CyVdqFBS5lp$yFM@;;zmU>12Wdr^+>|{aUQ8|GjippJ;`W z1RDREwBOa{LU;{kaA@mikJZpg%YRkvS7m3HKZ8J`FsYFcR>}16hwKxXC|bt3;M--F zU8n#*{NWF6f8*yt+Np%X?OLeL;{;+)%JBoj?PhjiFfRiWGxhPLV`w*>D z@rrWq62gk@JI^(OG_zWGW=4IU83*>aG-^K`t@@(|6%(K;0rdO zTQ!5q%lR7cM>KoCzNVIcf%7kzf4uhFV>0Sw&^}2StGN!1O45cE`(ulK`|1e^EeoM7 zi$+Gl{rcHN;vK?Y^Y)eYH+~*E?83hZfr`ldzS?iU*4QkzNf^e;lT6#b6x+AYWbLcV ziG3}g0zyqjDfc~rI9KzN1uQjq7~o%{131wBknMNfb=GA>W^LuK|N0#_-+UAI?75G_ zfFAs#d^P`{GQMc>VwR8?^f%vpGj8eAicQ{CvlvFX;}NU-@7I(--;&ac_L)d99B4g= zBGFYCJ^%QXTFL?g*`G_0~yKgz*Er%U`n4&_lDifyEqM|aE z^xZ-Uq{b!^F)^DRXv8obqdDk8>{^lGue`_?lt!tEj3){*otJy{>oD*=mXm9Q7w) zc%;#%uh=TiIKkEOFurIq1b90eT`gC|NOEgPn>l0M0v>i!rQXIrI*I{~t76Lcc zX{W2NzMA)9Y`Nu@m;xyHlP&EO<3oG1XaDWH-+lE}S3U1}&*T0Kp!9r7uig*u_=2_8 z`pHEqQMext@)bbQl3-Tg$GMB4%|9fAzw`m21AwAh9I#B}XrmM| zzivTn5VUihiL5ZEAfe|IY4Vh`N|D6H4@!yw@W@F=32VzYVlAPG8jV(d-NciNa5GKn zR*6FKFZR*B3Mw$d^f|O&Y$c4~W5eNPLoUg+AzQf6>0+Pl^BFhdNZ={Bza)A?|}i9SGWKM)k4y(h|XYag2-3*POtSEixM;q+JH z-nu|D5gV-Jv6IOv85GoGlx{oi9`*_OC<~Q=ibeJh7l{=R_-WCqq-a%nDRJxC{!PjA zzd8^8d|uQznE?_)dZYMC)NuXRMY3S>{_6lW%fg&^lz>GRgSfe*gx2QHQz9P z0r8nnq5`xCGILsbc>E0jmqQYB(jI=4UE!^&=zL}SivFRk|H4Gbg>BzM?RV1li?w&& zd1qdLe#jy3qe(JMtHbd<8w}{iiV4+ET)|C5Z_C(bn{B!LQN?m&Y3=XowvssmJtxCD z?;zr#UA3q2v6-`Gs>w5@;lWj$p9-S6X?B6rvnI1BSA4ACk%WhrnP9N!gN$Yxbl2L% zrmxeo8*$G)_nbR_9+y|_T42!*s;Cw$DOKCFd4k__&pCICEwapXPXxYwQp`ySh8I6fB|nnLJ9S zYgH_1B+Y-|9P?YFrx2po{OlT{xI;=QgpmnM82zgKqB1Bk)k76$bwogdG60%M)`4$- z`>Z*$&i($m8$Tj914ed0CSH?9sa06pz6ep#hND|XK-NxL^#yqs+RCZpgB#777zmwb;n_6m^%zCiFu3@d zMK|2=OFDNJ#i$L)ufM?tya$)nOD;gBP0{Tv`dSt{(aUSRkiq2Wg1^u(M6o}k*QJ+U z3I|NURvx{LbtUh(T?Qo+*MST98*jXkBzz%6hH?ZoR+r~E+n>s(e%xyx3%A(f%=m%llTqKv=T z2UBWr9Up(q-miJxYhMQ}OE4a814&2HlG&^%8Ek_!^(TUzbp$0|T&;ivn12!CT*!0@ zYVgDO1z_m6;1AdTo+!W~&S8WT|A=Yh8`>k%B>y%+BQ@NB`t9@YV%l6(^<8xJ5f`E%B5e)2XkJcApjxY*%JFhuA=T;Ye8Q*A?!DJu`|Yz2x5uN0q3sJ? zG^&O!4J`{6iX0m|=%9l*(tc@5n$5Yfs}*~iP9tJzp7*oRFqyEyAAEsN)n|Wb-B11V zKfnB^KfQa&5}G+_*Iuwdy(lk2eal<+-+zB~UqI8X=n7xcyGnv7;KLsNu;Y(A{+OeV z`t|L%@ns>Rco2pii@?H-o|D-yH|p6JFl+W~?li=P$_8U}{V7Ff`=-9O0gbMoquOsJ zJHpeJFZSy))|a*QSJ!??+>}!A@58ib=0Ax(vFf5^pYE*1kixK!>QCh#{%oTJJ28if zIgC#vz{6Tb;+sOx{uK0_*>kvc6zux=;7WPmR~0&+%oWZ~BwE$30;;h?$tyT9fj^qR z;(GQU!0+?#?BC+|mG|u5MHLbizl$2>Yw;s#U#b+~5dSJR%0A*>?O!Fs)3;9>*|SfL zSQS2TO+J%9)3)&`JS#v?b^iJ1BT`(i{tzf+k`qkIMy|+@{_XQG_8F2jueMFK5~Is^ zl>K|DRKq6WIa&Q_(}#a@{-f>FrRr3#dHJ^X8N_e7_4#)(ozBv}y13R?EfYqR^)o3y zt0>=L{L%IitofG&1NeRZqszDDOF@#v6|f&X#J?Xscvk2rJd*Y$iv5T9SFus{S@ZI( zFxHQjmXISE!H+!hNXB=j1k4|}7(t3x_(jSPXZCRIwb!x^!~6la>86j2d-(_p7dn-{ zJQ>10Kfc7sQck?iX!e}hT2wMPk4e4vz3*i{zyja*zV|)m9%yH3#Dt2S3cxWf`S*YO zw=EVf)LwxpQ}onA*(9;1SZ2&{hn zoY1xERaaca+a1q7>+H~FV0nemxq#`;6F&k-V`&cjLo|-~xa-ZLF*SruGnSQL3dBzb z8S%*;F&wxd89oq-P)F7a>bNH2#+Im9<9OC+s*pzNCCaN0YU`U%sotkEEmUyO_2* zMA3C|t&g}6E#+J5|ET(-lwKAj)o>McXU0Jneu_4fu1E;Ob!sDR1Z4}ARdAZrML!lY z;#RZbAg4lu1`H)B_)3l6{dlYdAsN@kBEsDowJKqyUMG)!pAapX(eL{%Q^@fAo>Vuq zE54ekG9{hiOlRs0ZG@R%J&S)lVW%jGMuub(Zi*3>tlYUfIZ(PQWxrl{vFT=;IjI!T zi|O@7sZ!Qp@F2X(150xD$BArm!aiN`u>3%HD5YA(rZhZMeM-EGsj?58un+z)C8909QszUi@=^OcZHki%c~LQb1V#jbz4kT4zZ35J60;2q zGa1(^t9^**;)rkgf$LhgUs`_{lj$?U!}=2;qouwBImY z_95;}Kva}WsYQPYPnojT{Q11sQ-lIfzf3@91CDUMce{y6D5@c zeBJfdBiJgCLgfw`sqTG%#C#SWF?Me&TaI390UwX`v@`b!<$X z14sr7{#0AbIH&^|@U?t0s!I?MS^qoy0@32*hmZ|PH2h^uXzE0lWt2}LY)QvusHBvk zk^p~Gc@jlHt7*Sno6%{bxDIMI!RYo23tO>+7gsyeHuVznptoO@ukBDYqSrq&;U zXQpdq2w2Iw{s=t}+>!cXZRY{g!VLdG_9K1!O0+HYFoe@*+P zhhJ~K^`mxkgAjVO0VW1n`tBcCWclVd&(NkeVy8@b#VcP)4^R7LX90D<&4_pVOkpvR zJ$fwG2*)*e&g|Kb-D)c^vzEH*@~N@dMa~$()zH>kZyoaxFn@B#XBorOC(Hp^RzgXFep$)iy`}rbKF}lYJn*63Shh1W!3#ff;bM*ARlv;Y zGZ34>Cn$OMi(V#}4`pM(OhD$GmKp&JF6`4zAqdnKERBy|>~sHS%40m1!j&vmT`{rz zWiNjjkGPR1gt^)UNBns>ovkx^=^>@5QiJ~uH{5vrFMdHWkw?={#j>`>Y*7m~F0?wn zn6e`p8EH{%w#!tddSqz*`JLM@NYXcqZ$c)Xc5V5G{tQWL3n{V!BK(z!C;;*jKq2Ws zSVEuT7iOPFv3*HX%Fn* zqJhK|Q=vG_S?m)wB+NEb^5~8-#HALRTnb89R67_Yu0JlNNR;C7W8mZ(AyK}B^Jh5K z5KzMvub^^Sutasg4PIQY2@FThEEGBKtd-b}D{&i^VWc}~D}P~@Ys;PAvHLc}a3Ke<_^Yt=c{m{ER8ONkz}dutDE7yoa$d z27=CX<=okSn8E7pBW}h1Z62y&4O;9{ep^ipLM&21y9T!QZ}E$Ou=T$bTCq=wU)8TF zMvI@aopJ&u!&lj%vvHk$iaERTm7x05RochNCg)dl0KU zDM^1b`y`PxTxXvW-r1)pPyCW0`jp7?aNdXQBaGFRZ^*dzWR{9e%db?s{JMZ7H=6(N z(tfiVr>5aNTCrnYn9iTbb<~P|l4A3-di$)VeoktiMv1+pe(I#Pe~Vwz!2Gvvzk~RR z*%`z7N8{VJnWIS8*{7JZD_@uY8nxf#Fyg1fFZP+Fe4VyOEzpMAZ(K(dRl;9$SMVpZ z|7iOJe#_tEZ>l8?Mcn5h{H&it{QGQ&?W6d%{)9Ypp&3(WeCylaZgva0iDs5Oi!C~x zB>47uDWWUiK-ReuCQt+p983EeZ69gsR9j{sJ*O_GhuUu#X1IJy>nrh-x8LFVkM#lM z>W>T8wtQ8hYc~Fd)=|gd`Z-DYI`N1)G+h6CLc{fQ6pIp8l0GQyH^HL*_jx?{?YCdP z%8zwKBaJg>(#^!ai0boFg!m56{~v0<)s)z%ZC`)n@i)pc=Wki+gt78-`e~=Lsh>xh zws`cTw|Mjxd>hsaPiF7x!txd?V9WIWrQ8!x>c3Xm^Z+j?t{^;Ld;008|AX?E#oB? zXZV8WJzpXr*`GhlYLV5r6k?@pir&2R4+kCe7yIqU#yZM{lq8Mjrz)gBxiBTj@Wejs zWPJ6jUp;ds4}j}6S-0PLJN>iPQ%P*qN)vsce7I5V*Y^a`P*a4c*A|^7_=`}kqlQ2b zR3`b$)WQ#k5hxw}EUq4M$RT)SYo4&yXY@?O_pGr51Z`>Ii#8i;RJD-RvvFYa-f9Y0 zsq9SExhnQgLlpsjBcagb3a9cLq3uz5C~h+RE}ra~SR{)1iHc}){E}9`Eq;dg5(;}5 zUkV-CKq=q0e$8T6zRJ&P?Xw!j9e%M-SH2C6^mZ1kaZ&f8{!GHZ%C6Q^@P9M=1Z>NH zc>MCBv`lFiR^~{Ef}(u+#T;sPYQG_@0E@X4xIEKOQyRfn_V2r{d?Q@v zkQr>dy+py^ub1laR8D$=&XmIxu6D_{H|m<@_CCA0$=2{FxnO!iI_LnDESh{ab@*uKB&~|S*uPSh$AMx0Kq+}| zq$-9GjZdVq9QaZm1c-Hckl^>;ex)c3|EJS_F`5k}`-FY!)iK5{)@BGyZY?j{vs|Az-Jdq)|VWWvdA5rEoDti!U>~ ztyK+yQ%*D5MI%00{|U}~Oxk`e4S2WL%_NnOzt9q}YBgP6i+#l3Of%{?Xun*yJmVS9 zuyLG^CQHd**n(H3t9f$(+8p>$`?cxHA?@bEQKA0tZ@;QPxWW3#)oFYRAS;C%g>t7- zcZ#Nd`rO$~UsL}Hc2&_K(v50gC0+At{7mBJ2Lh^tPuN*Va;l#SiIY+I1`uL(nXf!2 zLe0C@@mh|`ulWz-JHuwF)cOmVM0QO01D4n{os=f7DT#lH`VHD|%}aVJhAd)y>k0cL z6N`$889X%p4%jg5md3+orBWehld0UI(iKZ8MnNilM>Ww6x{$*eM?x)7skAl4WaFnnA|dh8s8%*@%d z^mZdzNw*xn^{sF5kTT0mM;vkZNhg1q7c1R!&)r^NVk6|N*Sz}GoBjXnod=*MMV0;E zo6ImnPBNejVO1muh-+NeMMa`Y5CH?AxFWEMA{lg7Q3M3V2(GTmE{J3h1O!#I&{&D2+g<#+XPcTmki zlEJgR(cK<=@Ii{W4UZcM4?FxYoa0aZ(iS4}+0TBK*Q292W5$eq_kFi;oO90EXMgUqpXHOsmwf9IKKqN$Gk``h$xsR`j{p`ifjR!}yYFINmC07>0thdS z!Cji>kOth;ZnCI5A9<8p_wu4|pSVGeHAcYx; z%bHf;^7OafK}j1h=^65xP4-jj%0`5FM{VSR5U?itdec31;VwvCRzoA&#l}aI1(W=X zCAPMla=a0+in5q?n{-IZD0yDFTzpxyalVQwrK;9)X%Bu<350N5#t}vhRP(p|p#Ga- zL@`%cq?5a#n)8+GCKH}BB~$p@KP?YbJCcn0rh4a{4m6$hTho6V#T7&Zq)SS=OG=uF z(p{rrARU6FfWV}s8w8}grJKhz=Bv6LrQ8qd~95d`~oQ{54Fva#c&+<{bwi( zD#0~gj{Bs=+*un~;Bo56Ld|6pE^gXLyEAJ-4DK6B8jz$!iGhBBDXy%9K~=0PXJgD+ z_JYcC`BXKcTXL@gI0$(WWqOXT*wq9eqN1(1xnEUcLy{&-%6>gsxwe`orkVlBr2Ep{9CPQzqFw_xoEODhcYPq=S*M5LYU(ZE zgRMVi)2e813fy56TkGp5o~OLxf%sbIVTF(Z2wd^EdD=_J4aADGG9F!TEGiKwCfXy% zdC84vpi3?P*O=;LBT79~&M{+5{^{mWFBY_at4=tO?kS3NLN%Uq@kO72Qk{psOH}B$ znO|tR%TegWMWLQQxO&T;HIoC(P*PrxEi%ZCT@}e$+eSX$$Fx3>1UZTcdeEc;{MmSZ z1aDM(D}ftW3RZ58{IG5EoaX`^3M|VzpY~qThuz6tK`*%L1ZP;hVm34|zShFDxaf}{ zv4wuRjzhzzxg@z=zgDJU$>$G|JDvInrP=`h@kayZKYR8+&V6fQL2jVO6$7UCf<>Bm z86Z4}k{*uL@-JOJWw=}5zZ@v{r+eouR=R#Ugx$lWxZ~BORT?Gr8yRyw5rSNT9#9f8 zi<;G-mNp(2RiUH#8C9L|zyf5SZ$E}N+SGq*W(eWG?L}@$Zqzaf3ZOlJ_^+Z8?pmDEb}6sR4n)t#@@*Y{})9AV~aLXyC)iq>2s;{Lk&0kFd|F)LzJzm)crE z;8tX=1%6)`y6qGl){N8Dd|W}R?Il7Iq@LTuwIUvDiQ|uBcRBUp5MwPJa~+`V!CMD94Rxr)n6P5SOu>?q>n%{46gVpi(be23}KLggEe zV5Lxp$O6_RmiaXEXQ6!h6cctBE;DoGs+Um;)+wy{35A#8VsoA$`5wvOImAsz0aHNVR)oO>54NPEZKe~hed`y z`*X4Q8>|~f2hiqi(sP+P_h__V1h|y##LhhMk0{JLj=~ypn|R^DZ(<7G6K54&yrl#G z5i3T%u>*wr!NsI*J; zEC*{E*)19x1CjjiH7D*2prDpoc{PFwU?ZJCY=01KUiWif<)Ja$a#T6JaRjg?@XtX$ z`EI@4o43LaGb)jM-9ebWFOwLX8l7?2(#PDDBl6W$=8wifZRToESO>Lcxa@0yAy9k0 zzu!Mb-QNw1c*$GOk|oj(*bMyYuv!H@Oc6OVj45C=XQXbG1;dYPIhLBMZjQ z`|VU{Ko3@r1~sWI(U__EgRhe-toMC*QikZ%kR&M=NZY|1v6R_TpQ2xE^u3_}i0H{Rnv-X%yEhM&| zeIx}fy46fA7Dw8@t7%@WWB+OVqAYK8Ot$dnG4Kw+%R5%=%b|wP3{Bt8ihHX~1Rqj< zF-6SmD=WF;Gwf1lyX5@uc_7TR(XT$6LuH5m5@B*DZki>y1rMPwz%LKDh!kx~!{Qhu zzweJm*l$N1Oi7-_@I*gW02`x8NbrWa*ORoL2d8O%W26$VON)uQ{(}|k&&V?6r3*YH zx9okzvpTQAIHSR)9pPF~q>jPImQ9J(#|O*65&}0XDYN9#+Zg*q0yN!IRj7rh1+UUT$AT@kjR6-BK59{cSRT3Ozt~+ApzUtg{T@*>W zrSp{8zw50=E`18`^z(`_wZB2ACSAx4s5HA7T{n{rM&9wS`FpH`heO&g_xcmg{uYc$ z`^jcfwN%z3pK30mZLeK#IyMlOZ_kMVl* zDU6rNG@*QIGj70&c+uFjgH@m|C-jO zzpnqLR`|xUeQb;yFw80 zBl~&me)kuYqU%?El$}IpxgHD49jDEeub;F$JS%u|5nc(#(2Ik=iYM7SV-a$J1G*9P zYU_eCkrJ5UIM4cn<A!FPKA*#nwQUE6}^OIg@0WqS*!alr+Dfm5=fADhy26qH( zn0eITSmSXT^$g$A$V09}=I!ZEF|gY$_jd9Vg)vsfTF=jMY6jf9FXkn%YUT)HV%WNp z;*0)NBk@Ws)CE-~FsjQ7Vi=zdT=M5V7qYkrhxg!eSI8&Sl>I>#Y!$O^|Foca zJ)Xpv?Nv~P4JfQ5fA1b%&DN!2W=(3@t0XF)n8cGICz&Bf7ZI8C77OZvKW}ZgRKBc4 z@x1xF?LOitxU5Nf+dMok-Ff&eabLKA!`_BwORDTYCmg5>ODApwQI&AS}QkHYId^b)6IaU zNh?fq3(EJpG5vh?gr;vj>KUYJHgb9cdJ1YLZ#`{N>VVWSpo6Jpl$TrXNhMt>0)ccWet z9qfX-j!w8;^B|na4ZQz{)sPP8Pvdou`%S?Y%W(b^Fy_WW z$(2ihn+)3-SH+jyj`mR{$qSV6uPxy=!w_=%dpy;$d+H?BLBs@)$)$rQV27h;Ev+ zyoI8k1dZqsoHbm+0K*Y2?X@wIGYUVdXVd9_#B`vcs_!wLksLTWI@=%QqpK zxTUD^UNhNZW;(3IL8XLxUc+42RramBTWgdWfyn`fXaJ$dCFPsM3OiR;f^Qb;OMg5j znqXaoImgWeP}0rTuCo2}vCizc#b28t8B$-Rgd3M_v+B&>*!7lb+|QxQ@wwDwo6{{` z`)=kQ-im1Ipmy%jqr$Zx88|IZe#CUcx1wj_B4191u271}dQrXJTlS{TrP4IIxqFu5 zK(R32xVw{^NyRvH@c|5ZZghSQ(au7h`2V!wGh0J%N}4w8h9w7JMt((IXE2K5c5J)#r++52HVn zOG5Z{Hk}U1*s}`cG43i{WQj~A-(3{U4&IKdCA#OW0+Wa>uLlO+riT!z?kF02C(hCR zt{*~Im9rGSFCCO?^(HgPwkgn(kim1T%=Cd~g?_i*Y3~cl{nne%{H%}3V0vj3iK9^8 zQ=TGnH|AI$c9f}7Qj>o3;SJsw7%8@)4b^^VPKEZx2hpMdP*DScvSe653%_Lb*Um{z z3u7*Z_ZtkR}BaCpQ-LEg5o0ec*0!Jy&E6BA+lN*VR4A);1!`eQ!F|gy&qiMQ>cd za;JUIU$e)0VIJ)K#*MlQJ?mVl-EQbr?1rnl!=RMC5nxonD%e% z4QnA;_=~$8dpQKl4d@qMf@E;3f6(J3Fi0V$mCXMd6TqVp4NqFqO3`A-b> zSnY#oO%U`aNqr+UE&5~j!se%2yr{A!q>BNV0~V6^EX@r3oWtbMik0{y3scJKC>?IJg8U z8b7y=Kj7r`8K6duHPuo0u4K3@LqpS!eRZ&gud<{l;yRf=(pwpsqi;z2hg!d9A8g{c z53>BZZYj|b?!|6zN2#-EMXA4y<9yv#8lqKo3}?tn^C4H6mu_8iW)UMu~6i6P= zVP1D^?`47O<`-(aZgakFE4+C#EpaZXI|O`W@)LJr9ig^;ABPUS=A_WjfDzkHUzRKtyZw&8(sQEI?E_2C4q$9@ZDDFqld;%{H}j>EyU2XYkMXaxW(6`lC;PZ?U3zdx=vBRH z7!Xku~t19*!R)A*%**^buUQtL&b|2E}6oM7?jP~1o z^lLz+#Afd2vVKmfv`5tb!(-M%SQ&9AuH7VRTR_!f~>owZ!%@Ev0g$qkmO5?Igk$x~y^_^2q3=k};Cud($*wf7H~PEEEoVQAM57*_-X7Sai|(jV!3S$Dha-Xdx4BLeiUaBtp4gU$N{ z?#7^%bd(%+!t6al*roFAWnv{pCD&YUf}jDUGyt!eBmjZFd6>Mu(13wEMkW_Z=T~%W z&d}?7@Fxl96JuY8Qc$ZlncEHe&hfSGM`}K8T^pVwKwQM|CUTmiY{5wUHwmgtDGv2) zuRd7QvOlj+b@XW%4g`e4ZNvppy~f`6OT)Fo%*%mu>ZIKCpGDHcsu2DBOne)cS#~Rp z|F{mf#`Dz1G=|cPtVf;VP>F4DuXB1y)Z3hTnt9)=T9*^1+2rc=o4Xbzu0)L1)O2sZ zZ}r~;e<=%)v-`K4JrMfP6F$lD9~))lJ~l^iJ?ns1nCkMHaGIL#;g;sPIDd~7ZqkGQ zU94CKS03v;7Lmh1=bL}&-!MEUdmc}M$|FbJ$~J(1LN_%P3X-@}Tk6pd*I0lbO2mm_ zXbLhDcx~)beyiI)j0#{eHj+l3Q35`p7V$F^H?-}XZeiCT#+@-YA6wg_*pB1A&X>A@ zv=jfCx%PetQX2ZM=+^N;{(B z+xsVj?g^~=)fe@X^T}`mR77CHGw1YQ|6nx5WQMe2gDBDhb`7}0A$>YQq1|{`QJM6w z!Vdd~5TGr?w&2~@_7G=;P)^-_&ev1le$?7Vxq!6K_{Cq$zqhHifnLLOU)FYGJ0=XR z6!Z#*QU9f+3F*F8Nq5dD(5V0Zlq|&~rTM3@ZSQRXzx%Jy!C8nFV$^FPSTy$u}AocB=W48|{l!0$B z$B6|~S)1)_Mt;m_dCzFKzt=+oR#EwZ#&52^g(9$8(_a*3AkQBFv~D|SAzECvo9430 zt~yL=_Zt^kL6=R zWv*N*z{xr8ub(W~4Pe;tBABMCCqBR}^^Pyl0W=tgWYBt( znIfXvO-dG*4USelcW$kxr96?ciFnyT%ybH|z8#1!ItI7saf}YCBd!KYVabpFf${6@ z;M5!Sq(Wyw@Kc(fk-h}Dkz~FqSgc&dxtB&-UJ-R;EPF4aynZliaKs;vxEGLoUSSMk=SIRaWG$u(L_#9eDE6$`nRdpdZ3C*c{rDNXLo{^x|-F=x9qzV#|Ob*R^n(!d1V<20j|HT|6d1MMd?@4}Ul$`%3eXgiA`<8apM zdn%|NQzd*@upw@+b57iV3gPO#IIMm4Y$yn5{R&hy@KU?>; zIkGf>oOq4B_LgagH_uQH|6aY9NyWL1gd4X&g1gLl5aF87cIo<~hRquoJ5cQX;Jb3ZPyG31Y2&bC<0}W~=R>F-YMz(M zM`~*b-->_i9}1RQd%k*;qN`DdqJv#H8dC!lE0=mxMsQ zgO-wlus`U-@8+$`(2OId7x2?%(lEP7U~^oItAy1Nl3b!36#DS9m^SeSB!et^s&vp8 zKTWLod?O@IUl?hKn*19@k)*or>Sn&GKDZ38jU48m# zJ~tVEvR0xD`0-^EuRocc&V8h}G#BYbecl0mFUZICo$fe;p@v}~k zvyL;Fl4LdqJ_ngw4?3TXAlI*}s|8@o4M63B7+jT47YQNZlKR(KTRa=HVZyaXdJbJ_RHbnd@D zzp8nhX#$|BSOntmgxFjNOya2OF)6*%`te%SXQ3gd4PEf^&vBvEl1t+~s5x5<*7!$b zank8tT0Z+ct}j)DHery-UvGKSEz7xm^bB=AtW0{xb-IHlUWT=?=eQ|Y48_bl5V7r@ z|6OalY!y8($#QugW>oF9V1`cV!lPg>pvKLsVYD#XnK*Luxw2($X+VelHI-A9fR&eF zP_1HWz%VWUqK9VEN1l8WT~|hS z*~Rze-u`jEB%)LoA1y?hDYqgT?uZp%Q}J``{QY2ygF1pBkMLcWo~DlKUb)Cga$KJm zJ&#nX)5nF#S}c{C2Q=CKQ5R)x{QbC1kMdb_8_xE>;1s6cCVR~eM*A=($t)#g%09N& zD9@k|iol+y*xv?uHs7u~Uj|RUz1(HT`Nl&Lt?)vGc#+qrEP=e38iTRnv;9+%U0q(? zw9NOsdXQ4YZS6{%ml81i$M*}XV~8B370ufF^kuJ>^@Fy?7r)Tjo@mSD0$d_K`>VG7ob5lzkwnGU*P6Bdtf_4% zB%lix+wps$DBa9^xnWvH^0#1#8jP}uALhU>^*9B|yp6MUyi@#yV?-zI=0WlQJ2D*hiig~Na#o>Av`pR{b>#H!*l2z{Wr}*Jx zQpw^V`h-JP&S6pe60Ob3H<IjmBdtYVZ{ zlj#{PoSI#qYl=7>?&#H9=HTQxLMe(6f09(!h!2M$#oWky1IZws-bmTTU@Sf9dRhSf z2Rw4r9FL>W5H>3pE9w=M+Xo#T;fEnR9XVC5ST7s+&I6b0| zCMkfzs66t(&Spw1I>i!?+*Nb=S(Z&A?_W_%2R`3gvZ44c{aT+#!n9ri2hOKVi0}m3*t%9j~1x zZ;&5y_dW0RT+VtOa@GC2H2YcVhxq8qo5`)sb8~SgLCKzX?R0=oozE&b{z;?8;JUM5 zy4<*Yz2a^dXZZ`32=l*xyx*Y&QM8dbD_Mivi?f(8X83BselMs^Z?TExHtO_5kGz(+f-9P8n?0UVw{1xW#_;j0@(M%3t@77)~PWQN#yz~ z$k86lerjU8kZn7Lz>s+rue+A0WSa}>Ly8y3M9rBIN$B0wf7F{eDr(~|U0!#gO5%Y@ zLIEpRHt8m5k{O5<0vWp<>zzuX1zi{jG zUeJ-pv|AqkS{a{p3T|#9cLl zQ;0URL1ohzZ7LPQWKbJ({{3}L)BwPbV_6Q4zX;L<6kxgWx1^o6Vm7oSX~GqL30(yo zKHfs(Bi+GOw)Pn|X`H`&fT+U|!#H}($Pla@8E?4$1kk`t-u-Z{eB$pccH}&oa)orN zX$$9j^W`t1$*x8nJIGuhKk6G{6z84Khu0bAG?2N5vpZJKy>_gaN79|==PjaK)GBTWny)jJ>+)6#YP8>@|) z!cyKc_X-?U;Y4cF)HE5^ZwGe99}5K?>={*A&57cIe)5j_V3&_( zyEtSLT2R8;yYVThDj0n8(sl(t)adxpf(QE-QDdlj#vXVImZL8Mres4k9<1Vux9QhS z=fo+B007t1t?v%M*)N|gYUBvBgu^i0_JrB43bo?p zqowM@ddI%MWMaH2e`nh5Ob4xZZIU-QOht8R#?2pJf6x6b*m@oFbBo_tdxC*DxJWnI z$heH{hYIPa0t=SPP>bO?zYms{0z7ww+;p! zqcY%~=bI725l9-=yIqp7R?*XC(S7UMWi5}i0vK@IDYk)49$PM+iloz=BpEIn)Ft8V z+VIw~_D`L}PJ1HjzpRh-`3c_h4?IJ79zbq1cIo9a4_KiN>GEGU|L%;Q+mT5)jo!Q3 zV+V5FUV+G$Q8T79B66DQ`GewV-yh$$ghCxa%K1ASIQ}OI<%f0$Pp)zfvo5vkKR2f< zE4^V-nbMNzq?mZvplRqwxfY-lpaZI6b`K{l+x>K6Im?v|Q&LXo#npu3c)3W}wJ&{U ziTsW|*_s|}6o@Of;9j|SWm9t2f(oQf&XDq3lzHfc5AuK9ige%}Fu$))Uxzp3p0xIB zsPc-4tQe@PW*y-*jFf%4L1TX%{*>HR7iP|Ev`+nG`1C!}rbH9GjgDt}G!-luTNDP^ z9)$a{uMf0z?($j-0?Nc?!(zMJVBNP}sP-cyR54!WJ`L^nC@YNfLtj#{gm0g`PjY3Y zQTS4D(l-2hBCNV~D@qZ3Wk217pY3;8uJ+j?n5u`YdJbCn%-Nm!^?tfVu!IO!l-Y}r zcn^JZ-(6Bov7kfUvALHSk|&MZxZE}0D2nM38`z6xuF_#96;TJcrq?ra6{lHO_lV0W zklWoZhJt2e%>G=}&&2Lq>m! zd$g&?MAA$zz7!I;TH$TE`Y`71%JbXWJZbc!%S-?1uh2_$DK$9D8Q~8N{qZ$#$CPrA z@Sh&{%StL?6rSPGC54)tLl`>3jNt*t7V&Ob4eU_m@ABMD)u-rA*dbOdZcg};lk@V8 z`G%b}kbqyseAoI2;!g5Hk@MnOBpa3te0R~57Rf#r0aA$GVBU=gEJx9PwR>Bk#C_F)vyqtV1eII(m5|bDUTqpknK0q<2pBA z25bT<_YO(V18>-(R?WyQct5bx!k z>cA^ENS#ih^BV%$AL-hONzW>oNR?KAiO$MQ#AGK$9C&=?Ub+gI1q6Y_8OE<+ zn(2_x1_pmY;In=sX;^X{eKe|}=0jdDeGjOp-XN-w8d;pdlkBi67Z27E4sF36&@5T# z;0*_A4QdLPsola$L+D}9PuZE^dYw&0#LGU{I>Fr-D13Ms4FxUjbHxxGpP%`0ag5r? zEe=JAu7ewxGjq0n9)nLBKMJU<`bxvZ8IV{z&6pokwlxhKC+E83;vg%?Xw5Q)>r~e$ z@bTVYwFJ!d``}L~Gjg8(t6y*&dqFQfr;0~>9N#Q0w0)SqMJu|Ro=q^%{M#U08NjZ_ zFQ0~!P`D_iI{;6Lk_m)eV;;00Rgj@Ik!?#KZZ7P>PM5=E~=O=MbNCX zCG?B7U$^~sT{L=1GP0C%f6ByZ&lp`kMbkPO*PQ?KZs$~d|7Qnmh(@Tz2kjSdMW-XN zpo&{$>Mt}h%NF}#A0sqr!3YXe;~-%mynofDBX&O|!(^v~Jx6jcgdNSRyhQJ3)ot;# z?+iC>rIm)s@F03uRI0E8Ai)m$IZwFE}37QH&UrKgvt0dzY3oms(DEv}%O6ExXoSM9UC?PtT< zfOp{%-!Lg7D%ZBUi6^C^O?EZNUZAw&V|o~`#3+LSUin~0rR0h z_T6D+&GN@+gVV-sjfqM@?qSoc7>wlxJs4zOe|62{R$A)b|E%lQ%{Tg=(q?VrtZkO- zTQ>|*^L(j3Jlv-n17m9(qykdhlUJ`igvSaDNkMxAV0x=oD}w2~zN+6oX3|!Ny5wsc zzDM*P6T@wG$#y@blaeH&OJc*XW#gmy+g}W~<0za!;uOe|yJa(hrw}Qt(a3Aw zl-PC$>T&&3^ZK6I*@|5>FLKnQ*Uyr5+8!IswJ9MNwn0K}S-EGQz)TL8L570M0&eQu z6RB^^O3ijXkx2s4My7v{j1#{v-GMg~&Qq67?hfH4${8~7NBpL0gH0$`4>jG>Otmv~ zme*z2AMq**=ehZ*@1%$Usvm&Bw0s56z&?&}g*-OP1z6i(q>N4=Y!Q55@s<<%%CVA^ z9}gz27RVh=)qg^QFCmC9{J)q)@J7y-mzRE2U4yLrW-V+8p1k~zL;Q>hRJM%CNuo_D z;r8%!+j_Ne+emuyQTm}*B7rhEaOhYz$y2fk>{m%%mZ_x?^XGheq4Z$D zyEjzFTgkeP)y8ERDcTS8H*QZjuL(2GOD2}G}Z=cnGoRcU0A5q9hfcWNI4axaOUmaL#8`uBRz50sb_$PCM_5Am9q* zxOQOQ0PH_KC(WeWwqerz^Yy%e#{f~vs_5o$x9@XzGu9so83-$qY8_nk2ICMv2GH1h zRkqS{v^j!v7o0M%G>08W&~2iiuE)fbWhpSGXJ<6M4mmIS<-#uv6NX(p9>J zY9SF=m+6`HWL7SGgAcytb126<*5Tm|s{=?(zYhl9q~uwvM|IP})*#>QWG1=mt#)n2 ziuC!Bl06;09`qwb_bBku;G00+We|g3i-mK#B{$%`&pHW3#`Yg40^l0~x|F-74;V6aSqp5v4XQuIwlwg6e-?oR zQh_$<8ea{MhoTs#E9j! z@(`h_zj$Jd8T^hxiR9p%g>M9Dlmx@^)BbYzG>;b03-Hi&*-#|M5VtNWVx_Y;hNYYI z3XZZef{sdrJ7Yfz*9taXn01tJlrkYF^H_Dt+05>2+~D@)$S~k*0DH2CyRASEVE#^& zrPxFsBg3zSty8bVa*u~it!S8YgIVT?s|lVZSEHX~&0&ZmxzxzAs zTqo^hl7+@=1FyN+_H3)fKVa(vzbW&RNS|zDFxgq}e1V{(8_2(`xnX~qW;IAVwAoM6 zzDmZTI)^Kaq`5uoQlSS-8;!ONqXuxI)sAAlNKoQ zyq{1gZ1^li3@l%cx7r?6tY7S8-G6I>rNyPqz;={+T=P)V%<##2dX|NhckvLOXAAB)^UoajyA|X1mRFr(V9VgRZa?#EH<)@}ek8 z^mldsD>4CAvLtJM)19w+ zlzi5WAu7~nJf=P3;mW=%qEgcvc4ufftrO@Gw~%SJH0Qo)-Fua8>Ay11Ns)yYh z5W0RK=Y209Hz0Qz?_94r2cuFp;KlRC+WdMji!;TA#Z1bKJab3qKlM&kSS9Fg%%ua8(YMuw_WCy6 zyH@J4=%R3^8&%D)vrf_|1iK=M)Z5VcQE5R^;V`O*Y*Qvqpz1u?ZerqpSf(z_HJ;9; z>dEItdDOo1uGWZW>+Dq=DdH_Dto8x781b3aY+GBLRA%d+jeT`AT&X@^G8AHl$pL?4 zQh2E(*QhDIyP$}7Oe)Q9pyphz&znNmj@}5`_O+1f8sum14J2)hmpze<|DB3`zs0UH zWUwBVo7 zxn`N%1Fl^3uO%{+Vl~G$c#d`Fbr#ss|FXGmt-F|y$pYoXOGho?PlEIcQ^)bUa>S;=xpkNI}$O|v{htCrjIRx-<^ubSwe z+U`(C_s7U~*U{IXkD3-9e{P%S$ac z#{+#Jx>PT|W@`oYGG~O|NUX2W=Hf*Xwdioj@+kMHu6};E&My#tx#{@)N#!qX1L8b` z75l9uKl|p)&SP*=-7A)}mwgc+!-l*+f?C6xlM8#g{%qXjs3Aj1a#*|pZeR-DUHLHt zsLn`)3`%Ai#RH&b@fC^HI9Ksg9w;>@CP8mpb=vEc5Dh6UE|hD-N!pbcbF+!MR8vfY zfF_iOCF;}pa2kyuQ#pjCt#GrFDbhVz2a^}=2)aqd47H+s$x;yf8Z{T^sr|~kNkVK8 zppMW(eCUh@-PTS_!^!ONZYezOWPh1+`R|sl1tEAHx&Z~ z5(ng_PO_?QVVavDM2O3zzUNcRR7!1OuA6it+xtcbRS&J5CkHY16J4$sFx5Gs>?$?+C%AH ztf}3!kE;;y1SlH~wk!^fGTDcUESOpmV-ijUOH_xJzAxMa5#8TIQf}1ekPh3TTWAhx zjYfW;i1iK|N|;FxO<-zRb04R`zUno=7cbW6Q>-#D^CF+hX$wdONBMoqXQxB*XVm9X zQZ56ACYL9h&(A(V735fs*P%QK4J$aD8gO(VH`1QZoS$k1oanC{%>KKg`Z z==Bxy9N6InzXn}l$p=4{O8NzR)5k0#77-97{B`6s?hIE0c~Rb>KN<_0!!>H1Q= zWDtjoodQuCNd%-Rj8ue$cTIeTrK&U146(HswTpPf9o`>sqQ#QUH<`8Zn9w}?J2*< z^waKguwtGYi*@a`wVt05ADk{A>F2=LtXtl(YrozW1QSaOAnW1-c*5IeY%Oty>l5Pk zcrp^pSVt4{R+8%M#kii|d{_&goLPEi`t^?|*UT@zHb;+QUM}w&tEE}vSmpAr0sra=6FEhrbF_#m2pJKoL1p`8{Ci%p5CIPeYd+{kjkLQVK=rR_r zKS7DMq(S*0=tpGh(CJO7TRr%AeW#?%7|zJ`nk?8b_3g}vm*CClxi&+^Djf8Hw0Y_N z4Ug;JtETLl$3okqb4?E5Lpbm}f)!dp-7!R*_L8CO=yl4* z(2icZ2=@i2x4jCBI!m#8Ryj-^u#BJa%hayC-V#sDG!@4DIT;)G>7TJfha+5ZlCm;W zWf{3|s(8bcHcv$fq*Y&wx_MVuhEb=K|1p2Su?iIqUE5CT z?~AM}^gRB$UbUtqLE1LYg>pi<)}w}CMu|rkHtNHJTEY1-4LtF%|trIRn3LXZOzF=x27YVB;1apRK9Z# zM{H!&7X$`3h`RI0m-qiEF+~sGU&HC|&Lfo3(fiFo10xbXhQlX)`9!)~dK9H0z|kXR z_yMEW+zxOZPq+aku1)Bbxx=yPlx z%?u1clczoyeqnGYm|gPx&hLH^N~ZZmU@2f!71WNd*mEcRH_8qUy7h-%RDxU)2z(M_$;ElUfcK+Kf zEr0#?&AtfIw9ZCxS9eWGpjC(?B=QWE6X_s6@OPHbPQ6S}L>!f7;&Qhz2yNd4yKIDG zxTG0*r2<D4*G8K08Z zCt1d8wlDvbqMlPWG3Qq$f+YO%vOZ#1#Z51z`tP|?`wnkKc|0hp(Cv8$jn1JJwic__j<&lZAogA4|)V8h`_!^A^)Sf~NEM_NX_79Gecq z7Qu8~s1jt7QHC>qcwm_8#>g+NCEbhBV+ie||FxW=@%%)@bA3lXn668k3o~B{`SA34 zw~x~drw*XEUQP2VjY-Og@}cr8)$!qqO6 zO40r=>TmC_LD@fMXPejiIVPJy$*#{oC>I%6TUUX?*w(So3VCz5sc_#>){^L((TqGR+ z_sj?T9|(Hk)V*=cdvw3wqT9yJgj-pd>v82D!ra5t*?YU}gv*M4+Wj712( z9;S*&NWN3h`67=QPldU}>!dRy@u%{;RAK>_S(I;7EQv5!bL^L0IzIU(d^p4XCH&NX zXc2x$B?3 zwNyCAG2<=a3VT|+?%J?v{w+VhQ07XOEyve@HQN*rd6vX1A<61e zhD4#5^Idx3yE!wWo5*~Q*+e;Hx=?be>B$?)5vrb#6097=UMYrrZ;hYh;S?5C^<1nJ z>17)VzUzw*HjZu#?Jru?rpC8mZlkDqP8`22HKBgD946*1fDmBTA4P4jkX9`y;NjkVXiruDzEC2etolpN9yx2mkZ(SJoVbe~s&Su@Lz{J&hSO`EJR{j@&>9Ah{9y2hxyR-!=WSRC)BXZ&@dzeto`C+*G@zBQt*3}07_{^i8j}o zZ<{+Cp*0vU^oQ%*_~e*m*A8JUGfM?UNDg{E5cKh@#DcjFbXHinkcFFeVSNCbU-YVM zDPQiqBg{ocHskswbYIeriFrMg)xt-&<`*(o=x4I!CO8HMeuf78X;gfQ)4hDPS=)!Z zxJms`WpoG!#;u5w@sM9II^K0wJSJaOC4PA(c-U9WA6N79-*bmA{;92VnjR8V zD!3q}w7gHE^nWy+Wmr_**TxM(8tF#58>AT}rMr>t?q-liq&o%a?oR0j0i}Bg>Ba$u zng4mN_j;>c>y7eAxdQcI^ z;pa;8fz{bEJ%tG@1R->Y!;`*xvtNk=#jDw!_tZWJtMk~pTd~{A!C0QlH8iI0K&nN4 zJh7{nQt`ZlZyY1aK5WN~6HDc@KWi4y(|mZ^M1V&?MoJ>E`oRYU96U7*t_6=FsjcU8 z9~`0W7oFsNp<6u{y-c~T=9N(mf%^^3U+}gA`tTVB2LV7(2-o*(nNF$$pgidC0gRvgP1edge@bq=PS6(Os?Dxl5ce*1zQb(r8xX<+=%dQz3F^PK zqkb5R3Wg3+M>vfL2l_-b;M2d1dI07RgQ9{U@aZV7QR{SO_w&)Uqp)?K!ms#<1EPEXKo_`1hU60Wb z;Lip9)$X{dEJ@=cEu&k}qhKI}mfN{0+gqiz-caL!F|yPqK?-~x%WDi39LjR22_r3f zTrETm_vX&yIO>fgaDuK0BAv?NdV*HrW+!`>#EtjaNSg88*_GN)pvfG8{jg_9X?hpM ztGM7tS4B>*uWW{+37Jw1GVw14kI?!sh=nQPUgTNnCm9evmWB`))YTTJmTX(Kfb|Fz z1G|NE9^1c>FSrcSk)OW}%4#uat0oexRLMp}wZEZ(l}Q(4o~7{(g$H+jl$;+>4LvPP z`Ul`8386drKY3zV)98eEMlo$M`Z81im=8PyDm7~fRqqCd(RWEj3E1jsjbDl;&}t*7 zC5vjwVcs(P5&c(oIBSlVQ;d>|$j?L`4jmpmu+LTkaYWjU&M~F+(=^2=ckjBK`1yHz zyIIw&SR4yGBRcdoC6+;6F#3XQwY1u+LEO%9xcL=Zu1@1%-}H|iYA?*s9S=Yexe|S4q}8QC^1_M7<=-aS zcKQAo3OFdrehyf&7f}6qNARTrqU!nG(^8H5NB%wI$eYQL?j9b^R zd6u&7Lz7J@4kTNInkx9;HeJ1+)U;0>?Bb*jF+RUg)}VL*l-xPW?^mEna@OBAx%(2C z?Ean-6GYZwB723WH@1!_dop=iUD^!1nHFO{i*hK%&A{;WOEgYy zPuwy9)V8WVK;VH(?iE%kL5Nd&CnC9O(@@}R_FPtTQRB1Rm^u#!i=6E-_F(s2zG8S3 zrGL4*c|&jI=a~d_x{DKjy$!`(=nqa<)vSrSl@d#;qh=zW{i6$pNdzq&-yb~3*9fj{ zm;);_Pg}bhjPV3Kzn2JQAb-b9svsC{kKhVi&CiF#4pFI*w^`upVAsrEf;cgrdE>} z_oRQZ3}?bpD7hBjwwVzF3_qvsC2;gTa>pTz>WcKRB$lh53>32FnE3`aQcF(;PajWp zTpkl1b?vXwVtJuuCm5@sZDkduw9%`s_Ox|0Dghpmrckt~d^7RhQFKR0(Bc|rk6DmI zT>O{u08>Axq$2V{Qw1)ie<`_V5(JZnzEbs4UEWMHfG`q87X95rgNWPRbol+2_a`26;pOJACr`*t)yESa)o?h#G-_& zE1$IeD_qgQ90kU0EQ<2)A(zgOke-pkw33IdY&`xNPuyUiyaW>6%kMhNXI**SWQht*< z$F@pWB&MBnM|vto7+2HUP=c4u{tHC4(8&;52}V38h+H$r6pTI#Qa^4-wFiCzL$zlQ zBvHY_1KW)ceUGnZg;hnwf_SEUtPf=h@U zoM6-IBvk3|2-qQn_9vyHZM3wV?zoCQ4DEu}rlCZ7If@_>~VjHd=x2A#8J0*BQ|(_x5S*OzcNVq48|VPP;jj z4J_yaCH;04?Wowl0tPM}(C-s7(}xK$;5!+)Fz2v;o4SRL+jyzcb zmIxxSZTLS1$kMyZkPH8|h`QDmT9 zz8xe4r)^zT;rxBjeQ?ZdZLNm^TveH6dIVGwLR3>7mLorfa28)mT+g{S3nU==F_UNai2C~ zqKmkbUx|8E=XuX2!3_6Hy)t0+ z`WgF9XlQb_Uxr=)@EK6=F5x;}drN-dA1rJ-Ix*d+l3)ZLNRkBghTL6jtH{!|x(&)< zse+qJ>M-SdmTm|nn#{I8x30}d9Oj}@`My}h{J;e}h={cXHqj>n(r*)^VkVF_*rn&? zGmcn+2}|d=3^kyS{-|2}*<%{#e#6J4kyNmd*P|vY@Y3zEadO@D`EKiJ^WUx^ZC}To zAObp}YYapSU5(_|71~48#oc{Rdsaj9 z3$*A4donF;`-u~XDu+dltP)E?FO%{?(wRbEze{}nHowix`$*G?3pQ^8kvyn;7V9)W z0SX)=jp3)y@GB52dwF>P%9)yn?3m?FZEz>d5c>?rV88Xr1u*_7Lpk7>&vHx~IDHdR zUH6>|tT4H?V0>3MEvIsmC+}yId}^sK=PeP5wdGZZ2N0oRS9#XA_M#-8WH61?I;@R@jedkgu@OIzrC?q8kKd0iPI(#9UqY(Am>L8aM+ zA_VuGy#|`fE%Ll462nq7q{_j)82k-q4PF81^TPC1cP%8AgJUOUbNN7dy!bw4P zFcr~@q2_l2*FDYt9%BG7l5K&6YKLqmpG4HQawY?BtNEmsY|VF5Wj!+we=u7{RQYX6 zbgZsIplo;VR7Wf2b(`S9Dat&VHaSSOMrw~(7Eljq}rttVru47qO= zQsJaX*B#`wk;zT)$62c}ey`*CrO!{0s=5yU{=$gB0H^y?x&N$hj9722-;HKoHvHq# z#?0Jgc&y7koF9!hc@2tq)>W0}<P;xy{!#)q|NeKe$ShvA7car_XH-Z zHl``Djf!Hqw`v^z4wtTTYLmsN@u%$cnkl&b0RMrzS+ADz(to&boM}yQpxScG4oc2P zZ+KE{y%PIcR*~_Lyoi|jMRK=Ks3aQ*cXeIzo$$KMp66Ms`zz2d{{_Xpd?JG$y5??^ zG7sa~#gUZIVY=6`#{2XuIeDxU_33t3I2>$*23Qdqt82Y4E&CJgJXb41e=Q+eob#~f zLsG9D3g)9%(pZ&5p6Y~*5Y^{alU z)Sv!!r1P{Gsw{GnmOBCpjXu3!{bgJdIDUt(9GWL03ognnZ-dI&Edeu`e)`P}bD8B4 zJOda1uP{xsmXy-u#$!*z%F`8JbBC+3u{19Cj+Y|N4iBRf9B@*VFNAsu>GPQRpUFR@ zqi5AR?-7Le!t1~k*(3+vuc5Z~+W$GOc zGiN7X(rAmxNK!Q^eoDy@^s1ZIhLzgN$p6SHUH{-v@R{OG6{kPIn|QzKOUJlfs*)E& zl2GVM`mh~_V;%d6@mTE3pxiC)0jFdZIqA!gry1DB*%gP!&wpGQg^YFcy%ezYUt zhym?KX|J6D49@l!@OcUlb|xT^Lh>{HL3ua)7f50;9C70|Tc64v4H>qC`Np~v@6@!x zWo_>>%|^mJuE&|8T=|hM|h^jd2&`>lX;monW#D$5pPs;hEIwquR(?OI9cDsc?zcMVh>D8XL#H-S9UHM3yAm7P5lm z2%#a1Ig4iqH+ObcJiDhdf;&d`Y~B-3!+0JvND83X7l@Rt_VZEDGnw0`uF`7tPvu~3nPiKA9i=>;}6b1o#UcUPn zItMd3li3c1Vu}4PMhHf3URhb$mv=;aT1rxVbA?E{j2Ho4Jk;w}qElY6Ff5pu3uh$}gmJnZHulI39>8Oq2{3qVmB~BVR&yH=fibPe) zl=<4;`IZeyISm^oTDh1}jr(#i+97`hURAJtX;aT%bG0e3KPUe@2ahs2A76Ev)e0-= zk(gJmf-4n zerU2mI?CkFq+kM!=Z)_`?;@0Eazk&pGS2N!BMH&@$1T5xyY z8|F(ETB5Xrt-`nGPY||DEuwEfc^7=1RclSm9thhj5_R1yu96jd8L+`5q@VjJmC^2ZVGqVv-PSo6OJYW`L?zvLe>Itcl8Hs4$bEh07Y&_;`KcM9>}bT}|G=Z0 z*;uzfDM@2@P%x^jvXJ&RG`?Ka4`O!@xqj2_vCna8Gm}(*B}Pqv^ZSxv*Y8Hhz>Z-g z%yKYM;QFcCf!Z0o?wsS~XF|hR%J(cXNUAr(he}9(&qYiR7bq~Jo!t< zEk0OEXEX|~5CpL~g&D?E20il?xD7z;LvjsZtsrp8(!g=63!OfiNMbggMu!aQh^jao zO<9_4jL3i(M2rfqSw=0oSJl9k8MPM521X1D#EqPN3j9|lQGG?V_5m%IEpYQYZ<9idtwBnc;1uvOTm2R z0fxiU8%L{>4Z2_N9qdP5U2eRr zFZb4F_B(#js~G-}-wjUYETZ$qiYkI+nvxV5?CuOXX3dlbXW;HSwF?1R*L5Ld(B1cc z-5E)$odRAa#fk5XzFuWmp#pEjO^XP& zO_V(B7)SjvwieflhwG&-t0_WF0p%bjy}0S5(R$Nb>HanQWm;>NvEdKht62iAM7EmF z5#G+=MpW)(Z9KHIC*nNz3id1Fsz{C2dG4~`1@~0)7h0^!XCj?5wwDe_WgeoD2);Ya zlvXvup1sFZ4QmFIUs}~gVaBewkDnqeoqB5B2Luh?n}^BP*A9?1$67dbVNRLOo&3Dy zvHHQxs*Ipjb>-9mf5x}+foxHY6*qCLsk~$58)mo5oe~y8*O#tjfC}D^K61@8hz{s> z40{Ett=7)MRg=f_oEHc%H^VcHR4T6n`|i`KInqlLRU}pDn?Z}gZjrfZ*^*S~s0(5$ zlXmKQEvDy@G!Tdsf61z(e?ytH!FQ_XrQ33DNWM~a4;Fj}FHP+2bA}tql0^r~cmc=L zXz%n7ijc4B$d+iQtne@kf-J*AYT;O&yE9fc-fYY5H!iY?bIIz&r#wGKkSMn>%~ll; zywr0K{r>dwynPRJ@&7YjY(3>6Vlk!UjWwj+3a#5f8#8)Nfz(@*Wh;05wb=KAVJJ&; zcj2mm=8v~WZ@ysSFZ$Qeq&SG4xlXW{?BR*E_Fl#DZQZwu7YHYiV-wp)63CY-Yz6#$ zN$%3Fewm6 UAz^(lCwq6K7{w)%(jwtU1BWv?+J*KyvKAi{MDcxTF`p?V8tzs%@ z%}woRDY2z#jXQ-E8~8k4OCkb^<;$F)pY~V`tjCFr4eR&>??VI>lEoDkj(h%PzHLRx z71|8OzjD+29Y2@T=o@mS(o>g@>yR{%%0+u2G@iIxFyNJ{_sJ=C5vno4sdv)1CT0_k z*FV~Z^XX5`h}jrkS+@;63tkp6MUoyCla-8kk0JL~?I$LlS$>}1|COY@@V;H3rtWWO zVIx3~m}ERw(S^~84k1ZZHVSkG50L)Qyg@q#_(p+jqot*ZQ3L(_2@B39ZTR5yf@|Pg zg8oeL+1p?RZHwj8e#0DKZ%)@G{GA?;@)hd%j?VyU;*{d?s_#~Mr#)DoUO=zeWjp19 zk>ct38)(nhkIpGYmySu8yNlaEb7nGcSXiDZcQMIaYh(eZ z>2PKznaQVSI@@dORQNHGU5jM1`}xd8mk$d^HLsJP2pbR6)YB%PmX&g6&|gh1^i}>= z+LzZw#CXLd&<-6IG8Wg4Vv;EEv1Yy)ESfHJ`$p6?st-D>>5KMG7Pm1|*1g!d_sIIZ z#^Uxh`c%gog$kTqg7{p3Be#AMg(pRq6drpBe--+zt(*cNX2<4zQj^IHnjt90VeZyk5kBvazQei*S#A7rs zMp@S5xFzeWl}k`-%DKV_mgE9!(JqXP)`~3vy&eaiidsZdsKWXM9=I*Y0rB(YFcpH0 z$zM4#IgyMtLkrhZTT%5XMoK$7nPK&~@RZt61v=q?`}pq93+2uw1p3AI2uy9+ZxE)H zeqS~TwiB9Dubsa3`5q%R zL($Wje8K#T^9|1hWuGlC;+yEb_HlmAiXZO@Ny*w=;gXw7P#|Zu?OK72`spg+Kf4M5 z<9TD=trR6^jV*dw4|w#P8it9u{EigiM`?)W&8-^2ZZ*}{$k=GctwBKE8bG26Q#-gH zPmy@2m<>MnFp4>JbKKmVevAM*>8bgZBcF}5d;kv-V*hOacPLOSMp93RQ09BsuE^|E z=^$z*r0yz?`|KM`=L2B6?1TmkSGG?so*AXJ!#rowBNuVhY>U!?>pw#%5zSY!D;}wu zoBIT)YMTFmEyl?P_C;LEKQ6qRj4ID zv&87*#X=FHl+S{nC$$qRJHaJkA2xZ@Du&j-5o*+JxnrcNYx!dE1Qsu}`^kJ8!vrcu zB+#taY&%S-tM>EC%vp8ZE^&91j<%3FRNi=AX#Q$EoVGjHTHSL!Ytl~E1cLc_OI3?3wwYZ730ekTLJH@f9;Pp(nUq7-5m4s6ABYw zWlc9bqoan&K(sXYC@lS!&4uWh#j$Kmdg3Bw z`yg;QJ&Sr*dnu(c2FFECGvy9oR%F&S*kGXloPOz6?vVD0h;S!8&$%#lJoz#-u8o_h zgR_c^H$bZLWqvy$mu1l-Aw9p(Wu?Im2@7Oi0b}ht*uC*sJ%g4iguz#HX=ASE%7SGp zy!wPtUC>Pp9Fxylil|erE<9i;> z|M%{fb#1~Q^0$Nm-#DNFs1XJ~LSK-p%a0Zwbn8(QhM7J=kLX&lKfp>LxM&C0^~0u@ z0*1W@U;a^kDa3BOd$5vja_N4_y(z%FcmMFO9TIP4kQj0bd+2o*_uU zJWM#nZ2a=H+s`AZpYtEyb^6bySrPJkzC}FcJ3zZZ7u@7xpruRVjqnDnLQa z<|9?d&)iLbaja#T`*xMLxc@Dx$Ib@0TO{)H^Ur@95vdxQ#MXmjk@3xB66zG{2GR|9 z!eKLg=7YsdD8urtlK+l^;c;(2G;3&XC2Ke>|B$7mCd~R|X)}@6@dhWubLOOr@=JpI z_aUb$2Cbd7rRHiR6f%b$L5<=^2$Mq()Cup;<_nm@E8^13#9%V?ma&4<^eN_+)1;I0 z$dyiIjVEszQDF{vh36IgC<;_?-7-u+4+7i6wkc zL8!NN27M9v=s?i~`2j2-H28z}>)qeU@^{SI*#Ix0>gx|Mrlv9L2{o`F^jk5iEDra| zOfGR=-b*F=9G1sG2xJpA_^%lw5_Q(|o2=YtK<`?`zq%dRzbu6xOcewbpPWD4CWi0B zl2uCp9Ttj~$Gq>yDzKuZo-UNtQpL=k&6eLY?yv83JS4?tD~=}j1B zM*DDrY8Ll|?@UvE&mgwThsdi06lpLYuLYH?5hl>$*GcDROvT@YI>Kp4UsFL9t3^Pq zyL=OPh-SXk(*68e)5bpp1Q6Awhbpvgx5&s`e*I~pZ>U4`Zg%e9H69((N4}<(ZxE^y zEnurK(K}=@-w`UbL<^{0i~EMe8Pu3+N?6g-eJsO&xxrYXwC0s+r|R1Y;f9I|O*0UV zR}Z{%6y^%mpe=3=uT^CE(vypyMPu~Xx@-_an8abl zrN0F2UwJfB=VwSijzeysErJfcsz5)Gx9cyHMKclUAd(>?p}FTw#rPA*f^pSMjal=9 z?CAXCS0w3G-!fjjh#IeL;_s)S0ASDUYFG98^>?$f4X3$7TLAI@OND zJqitT{noc%#Q$KTjZxkJ&)MY@o5yc+e&rKJfVL`Rtr6H6bUFcik;y5dTY_2J?yi~` z_S-L6Pk^5WE>i?k=&fH&eMAaADlZIHwMQW5_fyA>OKUfmN*Z~j*p~~fLXilFvvV}5 zG)jqS)d^YBAvP3f{l8!8V>G#A?5;Cylp0H65+C{YKFB8r(uUJDYjol{vt|1l4&gJb zIrZf^pAVL1py4yEgP)wH@`6GFP~=GDzfm!S^r_)=7nI?Us^+|>WDH~+GX9JtAAa6` z1N4NMrXXHXbW+`f0oQvh%)AB+yl<_`MVub_j|YJ=o^Q&^b*NM|XC|x%^A~A{G5l<@ zj9oy+2SbB$qqVjR`Q?vPGy&Oup*!JOKWyP&rr>KCF8{e;6F-Ku9>7*S146(qOzxgd z;JX#})t-Pb-wsDK3|Dl7BYl#|OvOt%xdE1p0e zILK=0+Z(VMc%TH_nAbo%gQbXFeU1*oQv~D8=uF=@-+ACP^u5?YL9DBG`2A2BIzg%W zUQwY{Zq-%J#Jn4$KFgtxKo|tnOn311eY&eANWD>?IRRp586hM*Z?EN4B@Sn=C#wEO zcKHbzyd!TNPr;)dWwZ-FImQsUx^*+<4Dz=0-K7t56c+<_+C-5!;(9Gbm3{?9MXjej zaqvfuKorf!Z#s~N?VpboI~UN5r=0t@C|%CKK2>}-b#LDLbuk!=1%;Xjq{0UC6W5V4 zXH<8h6Jkl0CN{t3R`y8ph~dWW;Y-m@y5;a;jXK3|Qv71NdVF)kM7`|^PNH!SaxN^* zweVa>A!3x~zu>99;6PHn8t)69Tt^k_=;nWE!^-Vd38}9NQTSh zqhan_^-~c$1~Pfe=qx&CW~sm(7_w1wVQR)D5qyWxk4qz7FI<={315?&b9f!{{EYFg zo@WxDNIBI}u#sdrSV6ipl?O472ZpAelRewcKV{yn}J%~h^DK#O-~GW%hZvLEjzjQ4~m z*LPcwHXmGINMu3FE7zq&7n4|6dDIs|R&xCDt(`ilcO?|)YyLBEQew1Y-fbujPg)OV zN#>{t`r@}bWg<{FQ*oT4pUg1vd$gsF;jvirFhir#E+Mh%qA;3TfQ>fnYB1rPb*B|+ zIV`_$&S-G_`D&x^<=d#ZpDqpjKhhh%?@ob^DpfcT#h4BeQ^}XAwj72->es4yl+cis zfs7@Y_!1~nr@qdGKp~%ubPVh#8 z#~hjD1Vi%8h5WthYa%cZB2Bhw(n9AD~9r z;|*jLL2>xYC)vOdE)Qek$5tcTc_lW}*wrU8dL1 zk1IXC;~#5z`SkqRD8;F(Z3!Z8re`?w9|_(@wZ`b%3T~QxdT#q#P7ojb_d%B~NCe~_ zm-x3b@hks?jjDGad0v9xGhTWBcQH8{?WJ(bAUY9|Aq*yzC07>Mgl;eIXNlUZb<;tt zi{3-$TCyx5QAn&Rz(_+t+egocTnEyGK%mq0*}6vngY8oL)bqn^!*>Gv>t51k8rv-r zk1iMoe2Hx&kRpeh@InfOjC2Ec*X^?)&Sfl4T>l+s9V{&Qn<up;(RCJoB57Ax5AhQgSEa$ID2 z>B_|D_W5{s`%4_1gt)lfaHw`)FnoF@>2Ts9FV|=z%O7eY9aFRk30sx@;2bC9Dvt_I zZ+;K6{qxhaH`KEtLjQ(JpkH^FN&Sf37&w;{9#w$%r& zH7bMV^1Kf-D~pwMIO_wn9w!UX0v+n@h2w17^0g(pR_7uC_JG@ODKgEeZC{7d(!5#7 z!_iV07I-8)D4r)Zk+Fy&9@#KSM!rxbg~vAhF?`%E@4)v{c^xxw@*5Uj%gnY#-nD_; zyfbf?5+gU9PZ2m6e8dF2m70THT&{0H`EQ1*e`w#D$lA?dhzv9TA{~^|sWs|X{m4#Y zgY^W(T&$`4U-Aw!<$T zPwYN$g3Be1n!i{cefh*6&W2f{EREUs6@1aP6#(dxdl#g99+%}*`woqR#hj+;0o#YR zDU$F|^W8NeOmaLb@@{AQKlTw|l1H()V|{k~`f0OXNl7XaV12>VWEy0NHZ4g$Mw4kH z8d+o9kP9q&G@6>C%s+~==Jv?neB^d@N8LRit!Mzm!Hem9n6AHs}TWzpS28O>5X z%r($*%0CVHmHFEbo6+D|j(2WrNvtltNF>M%b$O6Sl;ivzE6RnDk1x}(OY`rq{3Zh} zHD>Vs&Gd9|$E#KB8PKtvO1|}}p|@l)qt8||6;CN&W+`~fT+^aH18=BG)ikHpU`gY_ zjp1Ou(Sfp9Q7Tn5{+#`@S;gs!nKdV=@{PBN_<2L^fQwdmV!ndI?z>x2=kgy41x`^z*-c+49}d#vv^jt|L)0!AKPsMTE2#GH}x)j(l}Q(e++wcc{Lx8gH)BDq=0Y-?F#9Z zH!7T`{riF7eLc#*0ED>3u{_lw9s8f-{OzMEfN9?2nqbV$E@C@kP94gf?q-;CQpmrz z;wCElyX#|ndD?`4umPH;`4UAnt4)*}@NE$S{8y?OT~$zWkPe`pZS4*#CI_G5wW&|^ z{6*&aZ7~UJoIZqlVj)LXZUU+}GXmY}J-~1xZyRpP>6C?>!rA#EgBPY(gSF+54u6eG z>8a*P!^oiOCs_Bgutp2lZ2-TwnpEFqhQb&&3K`6CY(z7fQoO#;oo-#uwQ)L_&F4Dl z`5tirD zV9pmcG=0^Fr`NlXVRqaPU0dsZWd!9j$yR|VxNSeP`K1-96`hjBlSZji#;^q<@{XeF zP*E?xx>oLSV?;AbEV%ufsbwknsTYmZAAUL4!JA>gRg!_nfq6S@9*c&M zbbfYXU^q4AIdA>dEKH^tCv4IS`NN>vj}q3)6_oUQw|eYn!5zK37ka%$7u@@!X?n+O zQ}iqFB!BDcry-&F$(GFA1<%({-8(537XKVUKjso%W{n_yYY60i^@H!$CZhN-1~=6Rk2{EpbJ2^2wdvg z^csv496Hs!Pr%Z|-x`HaUUFX8nl2)tm%T#?Vqg3fb>_f$0h|83WbAQ<19t5XP!&(p z?cK$FbE)Oc;?hKE2HvX$u*1idCV|fmCMl~7xI#HlVwM@M-1@|bEOzp(fAUY4yerS& zyQv4eRJ@Hl>V~hKJ)LrG1t+s%Gyi&T{*H=08g7YP%xtY63DdkKKqQVb0c!GDV5)uS z)Of@gI1=LZmq0`U%MvjJz3}m_rUD#x=t%I4zwB*`CJz+(duBX^dU-yG>(HQw;&;m> z6=6T6RamQf31gEhER{bH8Ezk*(o$-J7a ztNQyEhO1cfnD?&hkt=%~TuA_89ybvZq8^Kz$T9JZ&*#=cjU-5_UfLAJf1krFg^AFl zKo(I-CNOXQW`Y8ln%|h!-9W@rzjMGtUY1<#Z4vwXE@O>%o0dg!HZv55N$G|z zJ$HZVz%9c^Q8p5Dlc*xTN1LTNjw8e&`bzzBV&5OCZLT5SbGef$BWSe(9gjT_L7&{m z&3{zSrGm;z?1$&yzS%j)Y`P7Ig72FU`w|oXLQ+%%Ru}xVP~Fq7(WLt*iYi-0l1sDI zhGhN&7jD7Q+g!Dk|Fq3My?0p!UxQUuvT-iCmxjtDyigJ@+@4S&t!vJc-cJu}SB4@P zZAR!GI6)g;OU`}I_nj{@98h$s4aQI2J_wlInqN55qGcwaW3>cPo#_EON)m>qK z6bnBvdVO>AIA1cU$2V}tANqdXRm!zg**VL0>-*uBEe28*&5Aq^{1Qj7RvhJ7Vc7t} zhx~;nOx0ad_RZvBMtNL~?c?EAYw&tZOG>Iq6@pI+0m~6jT2IGbVvwlXuKCD|D_-T9 zeEdlN$q*{R{EYF+tJU~wi>Rp;PHt1Yd;k}l?Kq`-MTG8KDzkIR%<7F=L3?0B4ZB%pSvkyrOV$y@?m1^Q$Sus zvA|!RcBXX_b@3mvL>(!Ifk(No??GRZ;^eXV#y?vBMnkY0H(Dkj2XMbJaUW-Zxi$iK zk*RzTb?5F~_NJe($owrFhisANx)h1Xlke+<(*E_=uZGUO4}yIiH(dbC`A%^2abaM^)oE@HmF3d=!3dEhuk0NLDjZ)Y$@sS)=$f!ojKoO&yo*>ph2kCmA&dz~f~Q+Aa{Bnj zcT;b5&$i|Qx(M8zLnKj_c;r9G8E7r4rwqx-@>souGZdxRr(_^{>%#Nri@9LoyV+cU z6oDVolFFaAkwnHo&j|j>7g!nk|Bl#x@w|pjJ*M4MEyyFYt zyfcOt5<9n!mx^ik(}Q$P05$P>?~@O61YMNeXeN^-w_dmHToOQzpO1|4qv4$l@k^xb zIgOCu*4p^Pem18z#bsV}qf@#>WO9%GicWZ-8HcoN7WdE6Au2fBU zNn+wc=Z{4!YWmT>2PdU@)=P+K-enau_G#r{zj(um!ItAI^+|} zgC#M(Gh4A4_>gv}V>1(=MI2Ay{C?aOpChtipd@pCOQQSqnA=VQudXUdrk0s(}>4s>p+GrmegwJ)h!1t1V|2^oog!}~~hH;+ZitM<&@n7FeM5jj=M%U`VZlCIm z@N3bTT1KW;UBs`Ceb0M>XW%DSVc#UUKB%3D{-Hx72)@0ki)7?VvRKJp-@f_stS-uj zY+e&9=i+d5RWno;12FUcP&s@YksS@CdTL(|TIy=A56p-Z$BJj@pGbPaZhOg>|G?h% zfdXW=71Uu-oGZBG*zzaL_d>w)T|WFO!A*DY;#c?VVVHpV{Wrv7auu6^M@m2|Q9flq^!L*x8Yj*nRtSdLx}v;Na|B{z4jc zamqGxghNL&p#}>JKx59-1@5y++|MPLkneNd)3z3`d-UtC6rQ9aqm#!M{=Qi<}7%#hQ z!u9UEGT9U|%+I0D)oY+&QB3DVj`A1>{)LG)qn8v^q?KjJgLYLqb+Ai0iK%slKJKgD zRrS7;xJXkwM%uAYnmzH}u>xs6h?Vq^>Th{=#*Yw{Z%UixIJSK^r*Us+S$0o*80nafmlVdlx3|Nwb@5qvit1c7ff_VhkwuA5(iyGma z6Z_bi?D`Xli#>>I)BY3VeJQVlEoV{VkDo3V3YwRx zFod0Z)@emN#AHNKpn{n0vY! z-xro*@ehJea<+_u2mSCT8OuMndX7W9gDI%xAuV5U{QgQ=$Ik94kG^|nd(o5c3czt^ z^FhILJ>NLHV2wadu>aidu#te9{&1$-B!{4h+c2R&run8A3RXH=q=i6iwZX6lJy}TZ z-wmy|;M5x;l~M0-Sx04hgEkLLxP5|Fj+*-k5@U;1xQB##*~oqc!E$(6anAwljKsTQ zDw*I*mY|IhPDU5%6)6R)^7scCt{o<)il1d>k_Dt7dZvxow|?qGfgVSn-{1bp#81sU zr5%%usXo(SzU4*Mw@c!W`@HzccLdY4HE*03ub1J^CxKp+i~5?^g##JcubK&I*moc$ z6|rNkAqk(_mp%u3C^~Y%3=p`GNG0eG)4z_xWtWYw#|9-G!wJw;BB&de(HDOSfG0tW z+D=^081cM4z_EdE6@B$F0T?Gs@+zYvJn@lSr`JZ>;C6K9t-)m7RM4~;@!c}B&t*n9 zFt6Pc<=-2bW>#rH^xR%<3fIjfx_;}pCg-}Ye zcOagC9qcrkx?`jEA7dPFeym-)wll`$UfipG1Fwc*BUBZ;?kA3(h$mR}4=trvWg0q8 z;8xfEeB$7NbdqP5{U8cELkJ@2(ZvY+GESJ2utQ&=e8`PKJR+b7wetZMFMm$?@r9fZ ztO;Gp=baeUFlAAlMtkG_R*I8 z8fHdTkwsDW^W$$#)n|Zh+|Rx_lr13Fd;RHr@nGt^_$hTsNDLc;Go`*3G|weCgL)QH zxzsED+1Wg*UCM%!3rR7!@2BDCT)}?6B;sNnbg$4;N|)eKVH2?^83vdN_P`qA&P1P~ zKh0^~+C#Nut$TbNcnGfq-=V>dLaPWg7fi9g1vU=@&WTTJbP0ikP@}Dz|Iu`o4N*1h z7JgJhKpLdGyFpsIMY`Qu{DPSed*AzxwXP*d z7DK@Qp@*XEJI^#-xJ3H8FKa4ybz3P{QMBurNw%$jv?| zg;5eogPGZ8vABQ?M7j^<@uyQX$jD@Chu(}^8WZv|^48l**?d&&{fCqxGF9X@0?`a= zP2JX!U)=lkttoOEC6!$NoEc8@_r29@UsAI^sV8o;GB4Z2oxuXmCUeI*=v#9^*iB9n zc4R5_>J%qNZp9DYsSq9~0rs-AdUsDPvbN+nO&xmsoy;eFX zDhDRV{KEQ`PR!&)^n>T48XGs%E`Nhr&5 zSC>lb)NY;>kL1lvC`&X#d?>u;*EAPz+pwle@`|i=qxqhuoDBC+o4!^^eiRqS%>~^0 z>yq?9E0@bhOMk~W5AL5KZ6s1S_@hK{{9#8V2O#;ASft~dXgmU(&7(-olz$c;1Yvpg z|9tWTKsN_GwfUZWDHY(~9C_ay#N}6bV848wlavJAxCpJI`y&r98+^ zFTM@68WiORCeN^P`$Al$95enBWDMP`ZbIfgw-WiH*(hh^bmwRBS!US3=D+O}?Gc*y zVwLs({u~2Fh|s03ZGJIiW&M1htGq-tg`XGj; zY;93-e>H>)3S~XOFZOn|^7wp~f6X;d-h+bSzoZ~8V(bEVrJ(zY&{U|KYYY;V+>_4m zpQ`a^$~>*I<3q^o$&AKv?rfj*gOM-cuPS{vt$c45#2Q`c`o+CG325J(zXS>iifKe;5$8gWnA~qf>S@z zl#`Z5qzoSkZk8#FMDC>pDaoAGR;*)?QA>MhfX-0!*@R_9znr~QoWwZ5005}jv%<XrTMr)BRO_*X z;ih#?m;g78Z1Y>jxjU78#&+gCOQc@j@|@YpwtcGtN*oaX|5D{wCE>6INuK_%xBQG_nokmdhD16J?klLdaqwVou;DR-R+=pW^TLUyNCcgdGc-*Hos)} zM-<`9td{Sf(=Y)aFG2Fb+*5Q5fP^BNey0u5VgWseM@k);TIGvWZD%SWYHsia}%^>feM}w3zu_FIS+cs)6HvJfe@LF8u!)p4aK#E_7ARkPry$$2a zXJdZ9WS!OhbLpg-l6;ibVRXO@UP;i*_+v3kDd3~jb@x^gm*~qo;7UI=4o^Xh|IB~E zWG zauRF8-t-YQ7+&t6Sxm_p3Rrr2o-|r}EBiHT-_PJ;PvuF5lj$*Yka;0laQ&0Wkh*vz z^%LAH&d(#@1G_(*I4pkn1N{BlqE9jKWanL|>3cWO4z}yI`Y(Pg5;)DYTN5?1{s>=N zjq|pKOvg~ak>WtV*lw_!gdBf30z~LtmHO-cg1e189*gR1_g}QGUmBrPN4RFt(|sk) z`o$j$JeQqfX!zNr8c)lc=27%j6pB}Wn4lZWpbD)(l98N`XaWb+G9tcqhv}s>ujL@U zef3tKv|@*Di)I+~lpJJ8+2|0Fn*O_oCN9(mC||vb!;I~mXr=Vb%f$tVt3@XcaF1UA zs5wgsVekr0ffcduZe3mYf%;vC%7nRgydv7XgV0gqOW;v0NXlcnrfIzE&AL-l?N7U3 z%&mGqO#|KbXYY|(i6c8%)5_AjKmsOeRw?Lpzwe*d{1($y$3j57xM@<$Z`)Df>?Xxn z)OA+d7T*f2BYr5s- zQIRKFVQ1j}O-T_a&FJ`eW#FY!b}V|~f5r9wUd{}o6joed?f$Qqqg6}+s%D(>UI8
%U&#@5{mgjbIkNlmNfn zJ;Z+@E^Q&6>oQR*6nuWHBhf&rc1wddN`VWSEwx|)$A8^I`S;WErLy{-t@T*vSLD)$ z$e1@?1%&D6-SQoWhNVI2-gGL{>n1o;U}I0qRfy2M#tuT;tgSigZxb+7C|1Lzf8w8q zv>DWM&u7ni{_J5A&h}bB3k(Dy#zfr5J>fiX4;!;xk0{8=4UnSawn9A6?~wjd{Jt( zcd#l?Wfd6=cQe?`-adu04svaMTHIlsrMV5*u$FW>%Iyd_;I@wDYW;p@;#)~W;?4PW z9ur5R37Hr2zsc$ONCYmhef`bLcPEsDd&N>_qw^+%LiW3wq(^8t5^ajqfliCxWG2Xo z8T>_xPPfpd?+Wji^M~u_O#8y9A&5fUfVW>vGo-A_b89erZa!_~bp$a!vXCJ5v$vqk zXDIc5AJ&-E(!Z%pUeJ)-{kUu)p&?PU<#tJ&8Wiz~tPTs?^uL=OMy4bo`Ck&6?~CK9 zznU~7Q&okB(j;j>!=vS0KRTV;?p@(YgL)r|@}Jbp2yrDYn&bU%lJl+bF&__Va!uQxU|Hg7_-8`gOC9W zAa+)Orl#Yxz6xPeECwwWzQ_k2P_`gLMmaPqSp2T&6hxV#)?oHjFRs~a1?_y)Hvjn! z&FAB=x2?T%jNv7x2oHk<1dH+nY2rvcicX7O6w_D^VC)RA%aPzw>!3f)5V{T!b^-|6b2n7WN<~IiHVpx5`g$2`<;g=7gA5pW+XDbYcfM}!T68CHX z*o}eBlgA&6{b_i|HoEi|Fg`u$k5zso&(%hhgqDk0knQL(DHP#DMVRuggEi(SN!K>b zyB_9GEJ5Y`A9?eaTSsyG`+2_aV1aS5ijG{{sK$q$0<4$LSAU{2MJ4W_Q+zWt3|+pn z#9_gzjhmkqFzok75$wWC%g<$%5y52p%AH!j9_?ZmvAn&@r!ibGouz`TA9La9jlLff zB9KCSi=~=XkT_f*R7C=(&uj~7<*{YV>kMXzo^4NYV1tXtYwOYzkBzi<8y?e>q;DpMd*vZd zb3M>i<-?aiXfX}g_`SLMD1GMVz`|_(&^?Eii6>vIMEScbn` zO?%u)G);H%h^+eyq^#PP=O!q#&It(Tw7t@a`sxG;@9u(muQNop2(n|Mqz} zp7=VU3Y`0S0Fbh1w&06Rq^E5}gjyPGD5u1%qV23n9$TwmxQr6`W=$<>hQH!E#Ds}o z@vwOT^X+#`sq-byJtB9@l@b?!6{gDg@PXEpirQAAn#cnOuj3>KXd19=QJRx$$d;ib z0ZajtOUP!3R8=%aj3|Zb>2RcgimXN#AX*vtyzE_x_T~4QT(j>6Z+kaQ5Hk~A4qufo zepAEi!A)V>JU^sPo2u4y^oTzZ=99^)V=Kw%yXh_{>>y^OS?!qNi-H{ze+uG?mShRO zK7btLzL%X`AWS*B7rOOy4O%b&?W%os4s1IWzn3;$p{{8$JZpC8zwlaGh_$p+qss^4 zt&xpx_#;y0p{b!asHt1>kjgU;_};#G2vPZ5qz0yiZmZwH;3oHXpe6V?PlW`pn&1W! zu6BDgeYATb;sH~T|tMt32^%e;|NtmgQ3k%DjL>b37x`tVh zOXDlzr_vb&_yonEgtm#RIEOlZMNZO0RJGPneALPvW%h~nFChw# zo8!A!F8$y8LNPkk7q?mF0ucva|0ZhX{oM0`x*(=ACDCbaUUQ1%ID7uchb^%v!)8E&>Z+hwALIjJ=f+X!9L3%q9*>O{`>cBT{!O zdCO!g8AAJ_)C{}53OEP3m#!Jq)nyNak0L6&wt!TTzVlI<{{6V`V%GEiHtN85*+c_r zSCJ+?K}iwb9t(%SQC`^#CrmL|Jq;bSRX;Fc8~x3ckwokW(a%fuSNC5gFi?q@vkRr9|Golx0b?riUJC)xwlzJ0RBDjmt-wQyU8qbOP~EjrS6v$ zod^u&7D{_}_np4bF3Pl)OU_Blu`AdW5P`2m`IT>I9Go0H1oMIUQgE6zqNIM3hFL0g z$q$@u0t0a4_GP-cg{0zcvPch>a|hMZd9`HC?Sz+1E;+-vp*x{nK36n==lGrc8WsAQ z2sXDY%9%zbkTO2w@&Ayh)c0A-+xk-=e`sc3Nd7J5ru2v{Yb&w{Bn;Ba43a>(2gN*@KgyuTMv^w$OxUi@x^!Ly9;6!3JND-NF#QI9QeN?xMol;MBlARx z17{JiEs{yn+*f=WIi6T=DI!E67r#3tAI3Q9toj}i$P3`AEdrf@h>^27{m9ypN_)2Z z!J8Y{^9TsWyd@5gOLh1g#@2XH` zpf>rt#VQ%H!GG(VOK-wLXzTRu@c3w#XgyRqlhmslR4P5pwFc*8#}=REJ7x&Gv6nuc zgH0Ein1)nUE8T}63ZYgW(E~Sp#l5asWI-wlGU}DU-suNB0CDTS5PIKIKBA*NyL?{S zZR1OXnQ<6=mX}?N-}@LJ*6xTY{!7jXWf@W$Ab$PkZ;SKJaw|oBfvxYFK`3?#Rue+> zb8`Ax%L|AOK;r+(MA2d#a;Wj?{0^wEBYOmzsg~P`c>|GkCZNR4M1$EqYa(9P&uRmqUF$x97NN)%%SSDRp zxuk;~v*@cB7|ws?r&RqUaNZVLW+4LwRo&;qq1h!uHT&dH);jPR*r?S~r!6>A!+0Ou z(faQ@vB6Ni{p&sx9VtBI!^X3KDFmlen!k1Iwv>B*oCbM8M|3m*$9I|onlH~XI78z? z*#YLRYpE)V8nl@O3lPyjWT$)tsaaiztWsYT!yMP9x*7<(Q)s4Ab1 zYq*vK>x6v@1!@_AAclBH*f90167}J#bK2Ep)~Mhv-n6;qDWB~tu}E=K`>j>!V|6g& z*0<|jCD5ezx9xr;Z3PSQG4N>|Fm%TM2?1dVM82f)TaG-FJvaUpEU$bCJPz@KGcXp^ zd=v3Lg+}*hEV|PTCaY8Nx64J(uhhv*E_NX_wgNklu>ILrMH<4J5DLUSwqchtHpyE} zy{8i8+2CPG9Bg&zldGNJEGd((yPt!cR5bh*XWxJj{8cFsT~C^pWgaK6> zKe6$Xt7^~b3M}f$jIC33D0R|U=3}lBxJ>=(Z8YilkW@gPFSl2%Kg#aQ(?!ZB@2bFt z3;j8=8%mXuqj>GF<5Ecl`Kne6>`Q-6+TcTwuPtRTV?}ZyP~xyPs&s|phK z37ck*C?k4s|GiQ^M`fwv;%vtJt6+bj=h{J83hcvNDVQWse;c?iHctvrR#dBsuAi4? zA6L0)HL6y8lO=2^JxrHY?>g5Z1 z592_fD?tFKcD$ceFV2dsejK|R1<|BvWt+LAo?`@|BnaYAQTWI8@4V_0egXtDv_R{N zHqzhlba3m6NY%jL9MY>oAJ7bJcy_uXYW6o5x<7X&?r-Y*N9omhc~IyuvSEu3*6nPi`CPXR3fQ+eK)7)SRL8$dDLy@T#QqOm>vya%uKy;#_sQ>S847{Yz1KqHq z);Yf7n7md;#r&Ic&Jc^y!HCPI`$0;f+;~hUp7u?1fDXM$iK6aXTvP8=>!-h5ONFtx zM2|8XKUWruG$DmG`vvMf7y_PZa040DS+5Ev!c+7(dGahSranyG;rLk9p+8f;&h*LC zB7Fsnk6ZEu6kiq--Z^t5)e(J8-(wWSbd5T2y?{m^sU|a}pC~AXY$WVqs(wqzsmejH z(02%HBV|?}@hhy^>{j{^F0Wo?%VXQ|m74iGmG$6`_3&AJLu)(UxDoZ##c=LZ@`Y7v zQ)04|ENtp_U4Nz^JR202!UVaI;m#o%J7$1G{pmGxA;<9g|l* z(~19%^RJUR`lQLoPy`k-=meoW^G%wshr^ivvG%5_Om4g#MAS7^w?xLQw;Ad^*Q*hH zS}-=WBmSfPk@>{F!=VkNj>EJnf^=h|@QE5@(XecdJqP zttm*m7X?v%^BxOZ9;dTe{#%(UD>HhAdQ8Uj_h!lPqI4k@HS9wa&hdi*T4yv!BbUh= z^;!^&feqTGT!@2n4qy2f=(ebrZsu3noWS=6Y+r^Oa4d%m+%m{$uw05Hscoc9?D}Oi zGqa6U8+dK!{kZI^44)mJ;|h4@CKxQa)WEefN7PSw)YU|G;g^%sXHK0+~R% zq*GQUmFD(QNldT*UIl2Ly1nBjYxz-8M@#!MJ1;dRF7~|??KTow#?c*Fm_&-kQY_+s z*%|H#P@9|;(Jxk~5%HBKp?^jwP;s!JGMs&2sV6tmi$?&Va1)f(m!*&0>HJ=p%VX8E zGISVQXj%sd?Jw^tl6)OPB&d(P!EQw%JSwrqa^&j&jk0OHpRQ=^Ju*a85rrcoWK7Sl zUhgD*@qn_{+ePjrtlfZ)H_R>0h71a!mNWABzn%Pom;e2Gl)}Yza52*#MYm9v(yIKJ zA|WMmkhT%DRSW7NH~0l4Z~NsURK^rKumqv}aBy;_3ffM9J*`dJE|qv)NWSnTn*@Y^ z^w!elC@U?9nWCDP5L?R6v2>OnEcBVGS}OUCU+C@(MFvtIbWZv09{02vf;p;Z(gU+{pM;@Oy` zE3}v0f)mXSGsENUBk163i8ciTn|WN>qN*xf_gJ`hmMdccxNY<`@s8XF;`h@>-LeLZ z{~qDhYL-t4&ZWRH;b6i4AgpHvkSCLK(2eZ(q7O<;?Ut>8R=te&F|1=Mp3$Z@;v7745)_L%Z{0nH;xygMg5h<5|s^U_tA{s*U! zcTalDBr>Tbz_tM5ZqFUSkZWJz>l5UNKe%}I++=Yag0^xob~FtPfVTEdi&c4 zq)KUxVHq0S=yzXNy=A%T@WDqS)%Wylque=0A|~g=&DmZuk)c0f6s}gSanqBxKLrTC zwQag%0aA>ckIhkTd(M$ew@G&4v=i{(@0&6YYs=k-DL$ZeSbC+f)H~~tvW~z9k~Gli zfIRhE2hY$?PRS(BZ)XX{PFR@Fqs8%X5^6B_KTSNka z@RnU?jI6`l;w@WK>YWmY>F{!V4>OUpf&Q<9_Wt+O{h8bYNM&@9+Q&ZWyjS~#BY_PD z%nko}7;aLKi@_m#LqkKE<4v+}0MBzB*JAZ-YD=U^(@cZX5IQf~w2y(?Z)iTg>a(|U zBKlh%g{XrBRc-;@f}z_L(f)stPj0BHJ5a8sFU2ojTIU6Tp9?+(8-25ej&A4vsIJop zz2bjO*WwzxHEJF=Xi4fu99nh)a)zGw8w2CLjCuqiq4Eh1ShIf6c+1LUFrmU8k5D3J zb@S9SgHn5J$E%{<4Qj2qP4C@udzs2aM3F$Y21Idx<@H!Lu4+Nj(*4jVM1<^5CmMWU^ z(V|U?3X1+Q$?9Gl%A1Yj*>4*jNX+~KV&V|BAUGJT z*sPvC`1SYlKh?$L!`^5nQIj6f_Es7r=#klMy%U2PXB>8>?Byh_!Knf}$wbmbQA|DL zBMp3v%%7W-d}-SYl%U(OyTrpnsA>3Pk+oaE{>q!Yjl^BR1qa~%rCzoI9MiS?>5#WE zn!ZFgFIfMM34S2Am`ER{Rv$^Q{9T8BQHctEwLb>Y=SH#OQ}~N!+yRP(iLnl}cKPGds#Tl=8ox0LTs1)jv=R2!k|6-=+HM-L1tFKJj!VHMO zWaFMd>6-*rDZ9((zoa{j?6R>q=0wd!toweH@O1c@9D0oKu7OFrXh@^fTmMuR3$4dE z#Fw}R-jY5hhVa%TFTjgy&+_^);C+$BuzExB^eZt--%aUnDc^ z&-^FxwK|UTw4!tXv zhV_wjB_vW)W!TE2GxwFmWb=0=tu0Ufk|O;e$RBK1^FA;~{55y`v8-P>h<-H8m*x}M z{ws&c^jg!Fq=TOew|^74+BW9K>3lu5FiQYH*gYmgzz4@{#B)8u@1ZFlJ4!aOQg=$_ zCdjxK(~-oPE|v>hQR;nf3i8?kq3d4xSzY-j!ZjVb{hGB$36sYCYo>W~oH;_If+ZMv z@uPn4KPlCC?~_lZ74qGXXY@Avrl(wQrBN2owujX12DzG=o|<`7oQWFFFHO=puJ@$} z#xY|W<-g5bkaMl)ZiBCe4OE#ywz$Dxoo*nsuVMBK&Ik9%{;5TEZbgOqck7}#@uH59TO{@pf6!G3#j?~H#%03T z+EWxB{XqM6z?(k|5+0@aQIHg6Tb!5X+pU4UFtW_|ADb~A^#@MVxASIZ80IM9|efB;KSF4!p9bI%sEqzlqCkN9`z^+tn2 zIu@A*gxMexNRLFP@ItprRYL$TASMo+XP8eFq-NcqR!hMz9q~h0UQqR>V>mwzQ9X1y zVdLLo0Ya>y=E4zJ$DmnvQ-65|{Q$u>o-_qrJuiH$$G-@7+#Vldqu}1y_dWrr()T-n*V6HrjCa_@d%}%H~sJ87-SruE3}_Z7hf(Ottv7Y z-lV=*P#4T|oZU>&wq0hak(2waj>Ex!uOx}zD<2}s_B3)VRl@6@^S9lwTs7{e{F%4k zq}c!X9aNP|UR3_%JCHx0%;0;-oDNj<-4m#Yna=2C(V@AoFhh#{Iqrowtq5xYCV#~? ztzW^*vR%fGdEt>UX(c79QQiE*e=&5IPZZbCp|XaG zRT1*q&BOuC`_Uz_&LCD*ns}Bq4!@?#-+~i)8S!=#oMmn1Gytk(*1JyrL+x1TnQsnO zZGX{5N6OkvxpbW{*K9vobPXsS&6fA>@RtI{c9h)!Mn?(*5E836*$ zPnqn4_;2Jt>Ix;+-iMcHwCKaIO#2KF-*l>q)DW^%WcsGr8L?>Vims1t;&p7*zKc)0 zBvC|Iul7T>v8k#ouqCxe{}l|S+m@gLTczv-4J&j*sy!RLd-RU03{ETQLqqk6RFuEj zL4MX-)DTY!Zrj?c1TI9>PO_`!7AC>90CI|QD8>&>w84rozwAkCLKqENQfbFD@*N!i zS(&#Q2!g$MPh15*->h?gIQ#s*8wcfh=i6_Th+ejx>N$czX2h8Y&+JRexhgweNwUmm z8&KpTrrni#a4SOX1#{~;99t-`h(sYOpEEtHEWis-IFrtb$v=Bx9I-T2a`R~@eX&(8 z;nTU3bxP~R=YWg+FBPpFjlG(162mzf@~2@i!=IcDfG1yR@Vk;4_{ooN7rR8n^l|!c z%g}fnEY`0;u_6wLBVhvpU2aV8)!t-jcCI3wjcA|LXu4HV}gl^Odq;vLhq zhdF8{%i*Kt?sZv2lrj~~@irK-k3bhO$^7Tp8!{1gO)sV6t4#Nf}^61zg&2AstO`6tfyNlD4* z3ctoums8t>*{vP>$j78TiP~g1&JzwBTMvndd2Y$oL#I8a4{D3Y4Qet(J)jg@(8H)m zj07FK8pGZSEbPx&a;E(`%VP)Q>0oY&KJBw*X7mig{duJ3o5yt@7y%wwr49Zt`<b>s`wpIde%+2VRmweQ)_K3yUYJ;;E zM!tRb=cDX#Wu*b+i6+c5X2WTI@;#^FbxVC9b|86B=I6BjEx&c=*$dlAN)XahRVY8~ zKW>M;_s9z{{yZK-E#|0l_^u@sUMYlb@TulY<_@ueX;4+guYHi6 zA*_BX9;N8Ht!U09)LOKh-KJ2>A^dwvWy8#K&wl0|ToU2XQZR%9^Yv5_(-R+b^0Gto zktKOP{h3p-akO3{7S-U>p*~4=wfY(FvKK?kEP5OwlZ&RskrH6MXRCg3f2$l2LrZnk zh|~1vo(IB}kca;7xjI?g8rBKM(guT4vAS!Rq*^JAkx=ZH*5D-JtDSqax7GAMY+=-wQG)7lJNaf{-`2ou?~W(zB=usTkxw? z{!#Wu>*mjeyZOI2Ad=#`O(ia@&3Zkl`b7VmaxiPObcbiZ7oi@~^!HHSJScKd_~!J;U1Ciyr)azE-x} ze*5}8!MT}d&~6{_w4!{JM$naU z>9R{vDJ6~NRuja_1$3N>gyhD}(cr8QZ**B2`Rmiif}fJ-g==rcFEm3f8t+Pm;@KY( z`C+%NAIgxLJV8BJb!0jb#|eBeL@hF3ooOm3K47>I%MlugaE2~kg5LRWRF4CVF^5v} z^X?d>d}%19(?3&sc<06M70xE=;myZdep!fOJJnaL z)nE;Fj=~~fotz_(+|ZH3jq1EBB-J{e738@(XCz>o%n;W7|15w)t8Y%7_rl_m-^)E9 zjZuUJ6SUms&5-MU+{f}C=*aYyg8xZnZd%KAWluLcJZS!&)upUX6S7W}tS8os5tL?t z*8Q6%ArEi!_FG6D3WHu1WDuBjS&inbHqoUGXY4DefhO^;zT$K6W#*Ux|K9U-Lfws2 zgmMHi#tDXpGJ2usw$jl!S$C9_eQEy-iSrvQB@rns3i&KbEKOL_oZohJ$`b~^s_;!oNOBB+C*ZZx;S#{U`+R-q_Iz(-S(TudEo@n%Hw4fwbhAmrx_&2 zm^v5v=Xnc#x;>iA_8F!E&4y^`{a_PBMryei2V9vT?sHKA?m4~0(rtvO`WYjK)bT#j zwKmwps!QU>amvp`u0%IUiT5|Q%vHnT5t;@}h>W>A=Mnn9+n-WThFlqh{9|p5fNG-` z9D1kSnUUl_XFAF-)J$G4mvuB(WHk!~!pUChsr_$RUbB0oGBFO>#@&kXTNLOrf!K)r zasaeVK_ALft$gdMIp!eABg>1@L0>dBFz1+*HuH-SHqm{m@$@>7@XVhD_ z?ggS~0sCt)C*m&H*%4?5p50&XQU7gWAVL3^ue%0rauNusYegExUx+!<1V~yC@R!@K zfNrr}gFS3YQT6M)R9F-Yj+0v@9I+OxFE!!cDKVIkB8b^v@q4;B&a#)6?2B+ z7t-X<>Yw=>!*JA-6}E@A@zx{0@g7ZOdR%rKO;OSyki+~{=d={3q~%S5kdL26UYLSL zCf}REtnT#gJ8K$9&x_+?H`07ejkGf()6aQA^XRQgZZZnEnEL+*QsVA9rmtPL6#n)- zBmVVO>$+t+w+Rj$>3x5Ydbwf2PQV?tdaVhnd z_A4abT=|ByUf!x^JA|uCo0pqqE^Q7u=+*H}1sUAE34thmJpMTEz%OWW@pqj&^~?E% z&&JepX`wx~=B{StjT1ee?DqH!`uURqVZL0~gqv(FzD824Mg9~(1#!yBIOiFdQrbA9 z&-^avUt0=Pe+T`OAd~OeN+b(uFXRLw;O{An8J9S#!tK*fp@B#}gq`UCzzCIg~pjzLFaez_%*5u|4iJy-SOVT93?rRnB3M) zK=wQPyQ^!Ucp&+;Dk8>px@B*8NCbYhSV}R{a@0@|>xx`-m&#iEzE6@U$f^M<2*m=w zB#=Akc&y34&@d0jTd*QDg7RA`9{#7<>N@X@gCv@;CH-hZtkWSo`^g03!SbR8hSh#= z>qtZ(0;cl_z_r-{W)1HpD8+gI@by2llfo$unAcNZekv`xqTV8{gBYcBfKoY_*=21P zL8Q@Lnvpf* zotMH!AeA3CDvNacG&Etr%PqJ`QJXEZ<1U(9bw1$U11Kn$UnY6p(4Vnj(%%KD^pvvG z#I}+T|1ZGqK8r~$4Su(q&ugHN8r<_Papx@!FY#bAkpSBS51*?tU3{;@-EJeN^ugL? zOtM6`Soh!|vOS6YJ(#c4b@u23#>8SxUac%ID>u)@uVT9QUaeTYDke_OJ2fX>1mKcq1X6|&#}4D>tfivG(s_Z zCp-^+;?4W1q+ed$(GZ`;BvdoVEV+!s=nB}HehLOfCpHTwOc+v~ZhT7`r)=ue>M8;mmT>Jgo=K9iKvcHMV=A^Gp zvciEVa>0VDOmQ(hn$5`yqlgX_9WN{+>mt4;VJ~MF09Gp%YcN4s6SV_3Rdy}`PLe@? z!N+QF;peIa7-{xR4g|Az$9e5xaR((cZMEDIoI0`nD1W*~$$|WvOGTRqBx5quWk} zPAOO8XSKEzqO>f)pB8``6;SQScw&d?j7G0V% zf9yhNxe_h;1ZZ^0NWb@O{-`(Z*xii={#iXV+4$ankB1~U=%b$l|0B@)W)X~g9|xV* z1(475}D@hFcfL zJSt z_v`y#h$JbOCd@)C+fz6Po!^<|aeZ8zHrdu6%!&IoM(L*yHPb_u`I&m`c$v!rtI~zQ z?KV?lxJuc$Eyh+~O!h+MZ!Y$D>C~0?jimB>eiWbWEA*3jv0CA*MP)65dZCi~@%+Ri zMpfl$W@82rUA9UpLThB)Sd4DA5vsyih81CYw1@9@xN%a>7+XXLtxM~i0G7N7;H-#S zJE^M`j$JI6m^q`0IDfsPMfecjimKeoys{=8WQ5UN)vIO(VOdb#P}F_;2q*ZBOP-2q zWc7;W_Mbi-$Jr*kcItZDoJg`sWJ#x8aIXr!! zuhsRk5xe7A4BEPhzrW)8>~y2qP|p;|4tpcrQeeU(w?ZtX+!BTpzHlL)D%AoPlC?vo zq8dYV6gve<#xccT=^BC}>zSL9$ zFl=jPe&h(NzKP+1y}*{^RKR=wAjEDEyD9w{1nFw82DbPqg>Hk(FRk^=Lbxlqf4zsl zsYM8{jR6MjnTq==C*UD3^;ai%ubG=GhHa@TZ9V!+RbySnB3Tqv%c?O{WqwzY(WvHf zIjk;7BgR{O^88P_YdP<>L8b4u}LK`sc1{`r0|hLM*OaH<1`k8vP; zs%u-#T`zMS>pC6^ao3-QC|pu>72i9u88AfswohpJ?*GH5Lx1HYfW%LRdj9K**JM3# zP@W=uVnJryl$tOCOqFz24;VT9o+cI-AA8vV!&x|_BRYt9+Zhzg94wos|Caqs`2Fvj zbN5~1sA_#MEtiLX12WW;Nf*h%q7W5y6c+1i-u{o~bP<{*H5v1pwODk1rx0JNQk`x- z>)D8VFk$9CmL#vs{O@TPg^N|}ey$HpQ4YMn|Mzj@Hp-xjk^;=)0@}UdXmHKoQCNLj z_TKGva%-!cCbqyUO!8{HOaqN5hLt}}ZcyC+2Ig0J0>RPww>crc?H?!{0_eQI-2Q6( zgnw9aVW$dxiu1`qW^b`HK@yJ!5|hBArxl{QKMoj%anUab<3#5UgO-7kCpyhomhkqY zEhPJECqKg7w~j%fsj>mNYijmpei{CH);_){igN3=$|1F=p=`Pnwl2P>Q%jv7lhNAY zC1_{&Q+q>6Br=OT6BSUbY~YM#a=IUkCY^3a6Q;N|5B-bA+5D47_kLQc%Um zZy5Vb0I;!<9+42g`(8v0;4FWpL0nwHg^M+nGdsb-N?{JPq`~-~N~|~LU=qAB>osX| zWTD)*1>a@D$X%SdB;1cbb>TLRv?(ABICtJ)x|-?>*;r z-tXsgY%o`n+e^Y+GEu%D*i4+tWQXAO6U zv2_?NTyS6M6(Awy4F54zQkDIysi9o6EX6hls;9i42R@0~X1~+vY?uU*40kI=XU{X{ z461(DSlY8B+dQZHN5tN+jOsK`Q10Iwqld0?dlKeUgRwb5EEvBID05=4X{zi%;T&q- za&KaPy-w3=MBbU{9><~FD}PYugvVLZ=#B?TB)|2kdR%~SqPG2Duc&^xYsl;7C!Z7A zYQAXI z>rQx_>tMq35)s{{V#4g$LCsG9Gy)glz%8nG z4jIo<)sTDMpAJ>b9=B*qmF{eOR&w4)kHO(Y@~=AV##51}XGrLAeRr@~^(>dFV^U#U zW8|?Pv5@_MJIV1>mxA*=f7ctvp&rHW7yYWe39_f9GsWL=Nyh1RddDf{9NN(e9a!LI zUGlUQ>!ZUv)s`c{Ho>>;_*0Co?XVNHq!-;~uemF;Ngyt<^by*2h7_P-OSlx8^sg^^ zE%eT^%23C_v#X=LKnKFb2WON7II?3C!lpB(o*}teP^H)6-lJN}K zyX1&FC(J9q2#T2#i)wk(UnXKkrfg{Ep0qc$6E>{Wx%$D)0rb+jtK`#5T+BPU&Q`@F zV)Yz#_XZLkC5Fartf@n>_QoOfBZgE zE^M%Gj|1fC(%&4!4&>n&C+z$fRzVp=`WXB~oCCQ3rGe@xHF5BtG0y<)sWTc8dB?-U zpUITVfZeq9S$U>c>e{2ek^tPODBgz_V`QZTf!e0p}7{s zaQRzu?jm)t$z91j0FnA8wZ!S?aZ?kx*acXx8x6#&AePns#dC)aQe%fxJAjwLR8Z&OLXFetM(|siNE9lKk zQ*!APg5x|>aKpX`%uu?9vvO*-w=RmoG12ak>i9=5*i7+uDI+AQ;fk-wivHm4s|>1m z^Xraj#j!5Dt}B63QO&^8Q5JG6DvK43hE_hge?u!=t2h*^RXsOBS9f>XYMsaHENG zeKE3##+7>)o7eiw{=x$C{?j1KsFkGF@a{yG_f>X6@q?(XgoFy={>RU&kOBAs{CyRD zJzLzzx8(Rwrw`!UnKPmacmK7xi)c~LxF@*yibdi-ru4_QWIhh5E4a9FV6v%5;cx_SSR8nw(Y9}>~=%?zdkE0T$xzE z$$NLJH|aYtGiW~%h6Cv7+{C@~ontd9Knnp!W1^|yHXr(5=Q0nhK%qA1l^c{q)`iu7 z3-ebYv8Sar@X$M6n=IgWxZ$s`+vHKzDwnhx@n-~WF7qZ`S(CZm^C4*JrNG;;%rOVQ z`)_ua{e=9lH&H_+pQ0y4I(a*bHA)RivR;Z+ItT0qn^MW-QnSVI6%F-cuI*N482&LF zj$H1Q7{1Wj%9d72Fl)mjdVszdsEiKRrqKK7_0l@>ReWL&v7Qs-ur$f9+KkVBWPi%<;Ui zeJ8K7#jEy-=|2e?MD3fGM)&qlAH!dCuJSmYNsY0577(7WAfMF|mu=0uDXQsjtUkKr zLbyE^MtiDlM3xj(qKmVQ8Hf)E*mkxfNHl~s*A=}lu{D(mrY^{YPqg?V{e&a97v4Gs z`(|8yzc)V60yitT3*^x$U>3>vYM)J!rP5vG9n8FfPZVQ}zSG+dpFIy(cXym?XvJEW zn0b*!X{Pxm*kW4{qal6sA}>a{_Epu050{C_hI3F}sC9znR>agN-pAx`;nAI0(0jrOUgL8RL?5KpYRcLsNXJ3H*69Z^Y2_3+eEw%T`Gb0mWO7Ffv z+w!v%CzUWQ90Q-FX-v1MYW-I~YU;t~gJw*A=#js@J%g~kpi_)ejyM9Cqjn4(&{c|i zEQz)(JUWn#ypUXjv^mB2M$3z9EK z^7gQleyYbka8Raa63s|_+&zQYnn)3bX2417LL%5RlB?E>$40F4fR-dL(Q6doH!i-g zI%cSV`)6}g{$>$NtImOo@R6xnV{e9OV(-WvGe3$9M&0+-eTYVsx<89N|GVS-bjDjN z(c0Y-)TL3oPK-$se~pbFTI#GM;rYWmJeT_i6J4HgLIm5_aIFl?qNm^wT=0yZJ*hM40ve&bRBt~>HP;V#j*dUcZ1|M|FUhI^pVojrObinfxtB ztWZab4xXP{q3*qV=49n^7sAZ_aSWTUv*44vY`T5Muf^YfE&)YUNyeG*#3`^!Om-1E zXxs4@e;rn(Or}2F*qB(jSjNF*-c;KvBx-xcs%wl%R*^^;EgGSx_VfUR6mh4pVe$W) z=Z_qzz_HD+vZj>&92e2rq-LXR^R>!2n71OZnD?PrPd%BwJ-L_AJf~Y}pk}mqEc7g& zA;h8Abzb*T^!tb`r)|7(wP!pECzRn&Y%s2IS(u%v=oef?#jDyDm|nPB6%iw5N~=xY z8kNNzSFg8&RS` zYi0!;D;s-I{A(xB{`N3DXGcD<`|NgowR%ZqGYt*ezS4r`{iwJxK}Z&L4D4AxH2(!3 z0H^y~P5^5>RRIyKbUT^O<(TJ)Rj{U6P7G@i+Fq@l(Q} zw1q6ACtCUvI|_n31;g;*uBII^E1n}7i4h%}M+Ekd4D+s@ho%90u8gmqt`9P#-(roiKWvKQ%HQO&8b;XjypYBQUy@5wunEGZD1?j6qRMvxFAM zA#CoAV1BYP3O)a?(I`D>SRW zq^*sa>FH%*VU*@%gOfV=e{PnY?I$xI!?@)(&vFslnNaBHcR|_RMH@P5Gm`U+S{x_L z!tY`HQxA@uFRdJG)jr1gDmaFwQd%!t8?o7;tnK{K&HL;;$Acs+LH z;fq*FM4?gfNP7UvF9TPhUh8X}VTxSh?Zue)n6z@GQS$r+47-7GcFF~PuKLHI`?DHo zJZ{Yq&5)Y}-_(ywuKy^B@SD05L+Np^4ZC} ze)##%3CkuU(+~RZUPfh$neBrwTI4bMDl_YSb$@&g#8Hb1{rO0J3mi`!Sa1~MQIeq3KnLwZq>Z6#ozHE{gJ)t7E>FdUou0x;oCM@4nc>`LyZ>F>lG z1=8$YdwHyuwCgwhn+LDch%92r~K&s7$P zNFBmmG{wR#g<=I>&c2u%$&l6fGa@@2UijfIoC&PSGT{JP$mJIukF1)1PW&1lP@O`dw(D$?Yr76q2uVqrZ8Y>SJWpGEz;_I$qN- z{TY_S_#BVmK*sUakTk&gY44 zcOVA1AgM#1UEn)|eB=xR-7coib2T@c2$pIp+kplBW1GZ0^FM;RBaW-#OWHHIDlDH> z?5*x>_2&YTBWlEwyM9*M+ip)z&9uXn)D8AOV>yQ+58r2I1;Ps#sX$Y6NX!>k9~sjj zWct6#G~t{I+%cXg>fo$l+7a{K=ZVa7uLV$4h_~RnWws2iNQB<8+i&jVwBK=LiX|qYIM66;2=dWW z$8{b(0~$Zn98r#nxOJZa0kUg5?hgX}5Ua`p$o7%Fcr)cX#uA!g6ZCilyhO_@<1+9N zecTGd&c&t!!eox~(b0)Z_38ZPJ^Eex^mTn1in~3rf*&fbok9OX!uxNCO!P%q`!Ljl z7OZ^!Q)B5+1AF8Tex(j!Tp+-{L7>0e=ywLkOt@SS;gWNLf1^UU@XO)j&5l^H;UV=f z@r!*g+I-#2w;@ZCR|wQnz=HM4??buc9|5P7`^ApoW>B^#2z~kx0%DEd{KnK8WZ3kE z(HxR3AJn1{WONnU`qCy!(IeO&qEk*NXo~QoU-NQwf36rQH4c9Xz{WRuZ;lf#z_MjK zt&Be%b3o{I+p+f};6i>N!*i^0er<2{bVUa+|L3++d@sWWWHaWLAic2cT+<<$1=1s) z6uT6p-X+PVtbjkV4j;*BPJf!26a!+9#P$Q=a1lC#KtQ|jK;lk zXS~Y=NRsFDfKj|ZQr9R|hzBDChlzEe1HJV5PfLYN1z+g>2O{CX#YWvt2C_K84M=iu zq&~6RYw&N;aOdL49Vu|USgqdKD;>rX!iBS~IU@Oq!q_lDK{rNuJ&~P&ivZScCj^!d zJQ)4`SbxR$>Ig>&jzkmAD-^>KeCTdyp~t^M9oGVGYHM5RQLMKugWdmZSRWSu!<_2{ zU8HV?7gh+d_A#rhz5yLstUq=Cu{2HI9*P5uBV{amUwO)nF@@zT(u!_}uML{}q24F7{h6Mh!LI-z9N6SxJ!<@|t_3-VZx<89d@Glc44;e;mx# zaqu5GEpil1&|Ab2;$Ii|^HSmw;^dAsHDShlnDl!!455Zhyu*X6ehtJ`UWCfvUSO(N|Jhhvc z?8t_AEkd1<*8P*lkxU2X>>J=<}MQECMhwL65gc~oDKZaXwm0$lc0aD(;3ZVm|uz(P6( z&JIZ2yZ8@Xx_L_HA8z3bxOfIIyvze~77Lwrg<2!KKSXo`dJ3aizI>SgN-dZkNI#*w@=sfpn-R3;sl@jln(7y1ex6(}3hsTAC{6s7pi$BL|H*uQx+*3*L z*0OhQNWr_Towv3(-QT3O~Z^aWJQ%!KB>K4)b9C!&>HSosC9Azm6XGW7>Xz#vinspos1 zo&}ek)Xuv)4KxyK(kCjXy)~V%d1?WOoutPv412GWHp!@_3_gmVQdY8KLp@m)i>`Y zKP^N?g-0lT%y_b!QT_{NdD!fZ%f+CSPx6pY8k}ci?KT_#P2|a}J`Oieea-6{It)|l zw5L~WuFRTgOVM#ak1x`U_y>z`5%E7_RB|cv)|i9|RlnL0nApiT#gLL`7YTJJN#E

M94lb`%$*x>Z-|NRN-n#;)6W19fw7 z+vlDkhWQjRP8*~3u>2wnmyImw`_rklJKmcyQjbBH_n8a;CvRj~nOg?ehuk6-0P1bJ z3rX4fE#SP%?Y+2e`ii$%F$r&IUps@pbfB=w8<`b3(JGY{=f?Q#g?jki!E@;Q9EgU5 z?NVb7#1}u9Pl_8OPrVD=e$}1bUZ7yt+XeZXeaN%gb{Cuee7(ijK&VCP_-ROe;66hiv90SbFTfqOr3U-# z2G?h$3$ARcN~MD`mndXjpl?9SX^t{L5_Un&Z{nIy*_ql)$K%aC;xYm^C?J*huIw0s?G8Qe>5Z?TA1lb5T@`U@1 zZF~DebX#f5zw?F?z)8s(+z4GiPHA<(!UNx+3@(Ub{~Pv7ziZs+3mr$bRh*)A$=bzy z^G~)ytJSH#h(w}w4;YDt`@-3QUVuz)_x8AQ;7fYt)n*PtMD3+{)caVLO(noUmuzC# zz|@ARz&r?){h_+d>ri9giv4__xHqzmyM=2S&oTJ?dvoU;1&_idx;#1mezJv9y_mFY z0RnP2aZA{Ot{m?NN`s#fX4nV|vOJxt1Wogp&oY$qNDuZfTKnSSw|->dfjYOKXUzPs z1Q`dyEPbod!U*q5Nu*(Kx;^rLm5fU^}@#1Cg?Xi*Qvh~R)ceOE)P~m3efCzwq zau9cAdsEA%n0%I(Zk!lW*3|RlHTL_$=ZUx~h*zZuNo}6LzFB~djx?HcX9V>+=Cp*& z$kqo$IQVe8sY|b4uaP&1xbEoJp7Lwn;&=VPH~_Ur`=2`#BKv+@WDQB)^u&dz+@}Gd z{FXMtZ9h;|>)0eVr*F=nUI)(K3cp;)PYJzm`ad?)5oKROE7XFNZ8GyHXkstjd(6hc z6`3Qg-6H>8%?OGNQdowt=y{CZYQBFG6c5>wWrH@aP0Hf*^*}npgzD=EAQiaDR8t4a zkblbIf8WHoY<_V;nAV>jBc5~}f-oT&>03kK2s&fVn1j`*$z5k*@Lg7+3_Jguof@&V z4zQnI4JT73){B4E$vsiNIS@+Mki6)IX`EddKjV>PAzX1lo9|d2_VG!PF(VdaqNXFq z$Tx6^eKy?5$(Ms-Km~|VWr0&gc>8Co;9AT@FiX5^Ltth9L)yId4d0j1YgS5^3Ss zjmw*QvkPE{E?Q^Hf#whmKKCy?#IUGn^f~YunNtDZP$U5b@T$(QKxhq|B+5=*wgy?p z+G}>O8D6cQ;I2Y^yMcrOkar86r`{R+NzhZecYRyBQ~D`qHs9MNTG}ieU%y>|B)%w%l>G)5Q3VB&kssbTNb$UEx5YTlB5~^Ye0Qql27)qz&u)WA zy^=p79WKBGBrr;z#K2YP`EfoIESKSaAcxgwd!e+hCMrIK&+HvC#tbpd4Ilu4TIhTv z*z*r3sO4rN6S3<5cwTKoE?7~kD{B$5zu+2Dr&pUwqv`EeoG}El4Wf#1RIg}s_7f~m zkUK!+4jO>q*l0w^s6ZF1q>c$`i^&7;s)XMkyayS4JWrJHw?BqI0N1XH(fRsZ9wP<- zUac8)lc^Glj@MBOkh6!MjO1hr67)8>p4#RU^4WT6s1R*2=J@;!j+P`bgp$=a94b1G z`s9f2Cj(Od4iSIDQ@nJkAo(=tMl!Fwqihpqve9V)8d|75m!Ql0CIW~>99+DwIyHCh zeXN%7^$S2Otb`*WDy#Z$&RqSg4jjaXcQOB)RsU@Aw`M|)UvSa67C_~nT9NQd_dT2A z5Zp><+l8qY-uc*O<`KN^R0QVYTeeTK{(0D6$Lyq3N}=_gfXqE1lVu}! z5=b;)SMZoAj<31%u!xp8x2;-q$l#YEYJ6|Mm}`CrIX|eeer0~*C?_uwh%-V|cN>=3 z55U>?o&?U0SE-cQRy3{UFSkolX=$glMd8;b>-HHxh%i{_WU+`{vzdQ})J9^AA0cr3 zK58Rg<|`g6c4!zHN`A;Uk&G)G`X%0^O{Z9A_c2Y})g?_KdzQsit}kQcS0VPc9C+&M z_~+Y#V0RFu5(~YSS8UNPAx3Ef55bWCN`}Kow_W|j`g8TM)Yte7D@z$J_2LU)l9b7~ z_tYbW9?*4oLw+b7?OTy`9x|_Px6Wzfc|3=YG&IDP?3>OJ=lL~8CFcasK+j|m8;ykg zy@g&j$2nR(asQ1s@yCItT(D9zJ)!BFXmWbSDc4r%L5=(b=0QpFj{#eED`UrRFU>+# z=|JDe@$mK1r;-g*WrIQ{52^U9EKOX*T<&^bbsKai{}!0zjjIO|KOZ@KyFWpqlZV78 zc#Mna(n}PjBH##7#3#C3b<_bdy`OEDRQ+%897^_8f5BD7D+DC!@t9txaHjo}#ozFi zN0|mmQEd49eD|EUpP_Epi>aUczk%C9d=$MbfRU;(I7HgC&|u(I{BP7sE2jGtl1%JP zD5xy$CVqvbDk`;@P|N(%w_qt$hCDT%b+-o|PfkpLFMClZsB33P@!knsv>T59@|0q0 zSY*|mL%^SfNitjaD7hWA_wQ^Vk!ldFkx)_&Tw*%yR+=Ww+O#;&y=f>>qw@-F)0BOH zubv9B_Vs>JQ`}4oX4H{rvau!$(5)&DqQ7d}x{-=oW(8oYIzGk1NH6cs(ZfL}o5RFjFt1fpZP-(Z2 zHLE8yUdXqb@IMpu)^uxL(2O3m`5Q?hT(oZrTe?NG9KK|ZPnnzRZZ^&TOpyqCB2KYu z0VgD2m94i!*S9aQEB$eb^w9@^=tb9lUh1ID2G$! ztPr5Dr1mcLSqTRIA9Rz}BTJ_0VexQb4mx~yEx~>+#VIe?7W1Uvgr{{#c6bykaJlr?2MHz8 z08j)|fZQYjkN3N>N%9EB)7)Fc9ttsul7mOGpBBC$w0ZhS+i|W2zhl@>a$sr6r!Chi zK&U;ki-M<(u2U}m8ckW5m2iHp->oVYyygjEsZ)JNq@uDx_Lg9;lJD2wbT)$b5gsn^ z4?lEPiTHM{N)+Kzrei#m{?Pd=Y930C!3C~*xFm;Tj!6X>N7~ya$0O$Q@c@;^zUQs6 z&*Kfq^CFw#2TW7Byry0?!5Wuzn1f)U8cF$g-Jg*=AyJycCBIvfF^&th{Ni4aYX1%{ z{2NhVI+H8q^qjhrRJNb>Q`Krmx@iq38ULmQX`PcX8PmhhF+qHD{*KU6k|wbi$VGHJ zXIx?$aZS{~?z`-P$@Ar6;i9vwUkI z%%#Epy~5j~qrRFutetTvBb*$(Rmh!vx0Emm`!}T5efH5-)6$+ZAx*%}_b?ve*6+r( z5lNZ85(U&a9?^Pld-MUReJ8DnAf5_GXN=gm$!&EL>%;fvt+N|dmp(&zjzI}Wy}drj zfs`)CqU#htv1(HB`U0k-$zMAxW59N?kOg@J(@<^ywF)p#^tE%<9_NLivBzzr2Mz(LqNU-Aqzbc>EiF%hcK?^NxHhQfOb zJ=-6Ba3qg!gf2al5pM)v!!2L9USe)exNVL;_38B6lk`v_4IF4DS@T}_2ZlaCSOMB& zmdpRT-If)gHORRV`Nsp~SJ8KQmpW0upkpkZ+5iQNfSa*{TjVIXjCkd0|FPhSk$k@#xcWWF-`Mpyk$f(v%L_a=W-G>@31!h41Ky z5sjRl`hahJP?M+))(u#*f8a_4;iLib1oBw!_Ro*owQKVWqia5g%j&OQ9rYGCEd<86 zFOtjC_b8$wjl^rSz6;J@4yMLdxkOtK&;Co~dvN~S zvH|NO@56)9?pZ7*DR1nD$6hQVnh5A1`htNszghpF)!D&u8nyP#ccVQZd@4rQ0T~t| z%1(l9_dLKrNF;H7QM`20PLQP>xMOPvOqp;FwXuh7?(+p~P~yTvk|o)dJ%xvYN&h>NJ^ZCyCpdmq(QLrS`Sv^SQ=3y7s(OQW?YxdWWDW|}YHY2P%d zKL4G_cnQ#NagQwf++p6!=`tAqd*@OJ5^KzdL&Wjs?WHMxMaB;o{iSa_DkG*rJ0bRN z4u8qzlqT|mQ@;DSkZPNoGWB8X`4Sc#kOpyDrPOxki}#wA-g|;7=HS`vPwzcSqkvNQ zrs`%>HOZ_och4l-0nZp119I}ZBAsq5XIw~~vesx^|l8%z9(*UA~k zemUc&H{LUStO>H1LzKazur@jN2>{|K$$nPYJ{kYGaZa8mqUYdA8ROm3SI7jAqJ9Fn z9WVMZ2eN|@&lamwkUQTKN@1wcAw%V%A;)@m6Wr_Wy*92WbV5*yqrBeU*$O2@ zER_WU*!x1F&3JuXFl$aN`;~`lLbsqcWamXJxp>p*zo8Al1<{7;z4c8d{9W!n+DlF@ zMic|<$^7M+Khi3z>WA;OzsnfwMB5*eF#kur?#=VGl9$hvjC=& zZC+jowEko3L%c#udFOvj^ZUxPdPUC zyy|A5t$+hjQnz;4tJU+1Yk?~EzxGM*;Ws|*P|v(a8^k{Lsa^-XUudBBtIwL>0=h9# zdKIrPzT938N?$wZ7Pzq|(it?!?5lS9i(8(+r^WSg+@gkw>78%iGc>uiL=K&ShGQqS z$nWKqxgBwzqYy&KEqm|{>t+xK23i6Q+|?13>pkV9mWYAmu!1*fgErBETB7`FCdAeX zn7sO^f_o5k9qR)+tQE$xoGgfzt5w5!!v)6fWF{S zi?tBO^q+)!mye4Tz?0*zOyQ@5xoJ|FvT>K_hdGp1=v`l80ou-k>c3t9;*rVyj>OL| zlq=9$r9^CV1EK@M@uAUrz8uS{vXhYWsCUrUc(+VT&YR%YU^rSK5ka!43FO01fxGS+tRv1lc-`v`cmunNN<$w(U$4$@%8 znP<1ts^(xIOORUj(n@nO?N+)i<0X-!|KO6=T)Wl6clpeA7p~v6d(C=Ve$cToGu?!i z=beqh7?%PD&q%xYUfg#TOVPdqirSTLu;rw}>Iv45NY6qD>R{08*SQ-SKEsMR^4zGG z-KKZK5rah+ZlKl9W`di~4#@0E+WR4{pbmhY{H14}CBUYm8$d)f$kp!HU{zJe_qY00 z`OEFcFD%RQ?=)XS9*t!+uPmjGlhR-kA8-{%#54q+ACN;djMY?{MY3+G-zT``Xd=dc z^see+#58@lCpeE%^)O_FU0OvguKxL2Q@i>{9*I#7-zvG`>(EJ5K@@qUw%=v-xk*D+ zc`q?#F5%4IZg&Tpx(%^jTM%X~!oSR(tJr@R7|Z<**eaP27_?sL4)8SntreKHqt~Bq zGM*Qsr?#8-KCIC0s82I6Vi*3bN6Kr3q5jf70qBh@5oaoI8boEHMH`>6I!+B(G7f?HA#m1F?M+vmrgXi}l(t8JeCz zzXqDl5ECV=^Kaqu7GJ0UZuLULti2s`?hUJZGUmOvMUr!|8=*3BvWZhs%}uuAZsTlGs6PWew1^C$x4v&CKcLkq!`S_rZ0Xa$|VwLx^#`3{v+xSoDC{`{%uj*_hXhORZY$4wS})6u1$W)$nbW4(V}HF zoJ{Xxuqv$rlS|?niKzhYhCo$?-XgH{I8g$FwI8Me!2?a-Wx>mcBK`61UVr2@ld$>L zKpb8GC)luvb%jpJ`0kA1?Yh2NrwRM@+2bSMA2WRMt!PHG#wQN{l>X5Nhc56}z07#_ zr^cO#qCHqJf4Trp(WSv@HBInU8mFVXHC@b?gW=cj@~lR^7e>*T43It`v!S+GpWfIf zxH!_UnHkH51zPga(G!8M#9n7X-`YqI{!aI^+&Ft9JC6Ynpd!vYV-3w%DSR-5+8amf z!*E`SH5hURWHk|qPVYBe0DFYTde(-PXm4f_vxrPlV6c;E+-Ka>WIkys6ceCoR?TE_ z@>hK#Lz~&3AvxBI8L6v`J0QJ&B5u%-LlP}}?PX@(OjF$F84pT@>+wT6l-&;&2!~5d z(WMS9G49CE?6Es2d=KiZGi0%D65-v=O}_Xip_>noDaw?}f2$D(gyz{B9D4BuEH(J( zk*7s`a~1t;@e;{jbxEs@&F;B7o^pGM(Dcg#j$x%#`0*kgzrmv=cGl}_5*f%!DK@Q- zS!OSrJB+&)>cD$Oxu2oLlAvoLcME+!!)ky4yPClP$S?x>19fsq+xwMP9sY)}{GC=-Qp;q@jSmE3dbkUqiysV zl{inM5vo&p&U#YXx%SM#&`dQ|i$1C?5dy zBX0Ui+jP4-F(9~!2HYJmc%VDj+xL)r9VmIImjxFb$OOHP@Y}cBoxucosD;bUM@)lpx+Z zk@;I15O0m7HjaLZuX5|2ZCFwi&Kqnajl2D_;3Yven zWaXa(7U20-S>NOL5cQ-#2#&!!B~HrKUwSIyY`W81+Y^~+f<)|#@38JEQonlrta{=o zJ{)0_u>aas=46Id{w3A1^tH*pQ<;MN+t5+o6H&>hNCz?|v3$8YyUrMZ!wZfwiuG_Q zuYTbmNoPH3u`guv0S`owf%Un9uveHm&^%rVr464%YT&;*>o{->ggvPLuVwP>X-PC* zqzZz$F+Gc(@TJu*cSDTqAmEK#bs7&|&|9R7g!gD#UNK--FL9lPF^EhO3h;;%GIdyj zGQ*7I#fuS>=t2?qf7;>e@X6}}aW?^%zI8ejSBn6GKU4cIuW$V>YcFx*1!{ox5&~zs zBiLC>(2yydft@%RyhfJ5CaJR7&I1znKfPJ!r-SBgNe0DbJIH??ExCJkuz~&{VKG1d zec{H?=u~GLg!CU8Tetkf@eP!*3pQs>p=Xuc-HT)01qcZta$tUSjM3Rj=Y72*cL~M2 ziOKmMWa?A1(Ra4jj)?Ey1Gn^`LN8K3pa*~mz2c283eFn2iE69E1cwNO~iGAi8d*e_xRUbFy_GPW_&Bh-Khln^WO6_Q@e_7qxp_wC>97T{VVPg zO%QHJDHW2_F;na!v1w!divWNR<;nnU)_&j&V)`VOZglDO$oo>Tr^YfJu8zo5>w|=| z5^HQvKItT51Pn~VskJp`Kz@KsiR+--2ub$x-=q&`aRtqS^5h!o z=B>$l#9=D5i^I(_`#c=V&=uNE4i|?`LfnCUFxPe@gwhhc(U|{fMh@XSRpEYVP4i-C7yKTF{FffFuuX z8Pj8_;*HMnN_0xcNv48SQGZ1wy(LrChBB-prru@5gO?g|o)T$VZ1AyMuBzuB;8&~R zBhG0}nRL8{4eR<}U&)Wj@ZmrRcSc~8gD@lf%p@E_=^Xp#UdqglN=h)T6vP|RdlPoL zYB9(Dh@qDc%Pj-4WL&d zLXwgO`y)XN9hx*B?-aKx1MX)_ol8@r@Z?uDXHS_?^w$k+9Lg$EP?u7)RiQp2S7QN6 zdHh^LEjK-LKTARjU-mw)@Nh0TvHFpPiu$G6&ti*^?iBlK`7B2sw*j`a z<To z00&4NHhlsWpHcZ2^#5QbkNEPFeZ*bB<*k0-2D)j#2JYB4LBsQR-|66^dBcr?wQPg; zC5B~F~FCE)?5)Hxb}Y`}i5VlB{rZlvY1#fadfdrQ0AH<-TAbqES?sH<%Rt z0Uv)~6}>%|rBkBMU80qy(H<1~@ey2Bg^z*7?Ue}rHD^$qFI}n_K2v^Qz8M-q0I(wFa^}iB@=MPv}U_^W1XG+X`9LF7 zu_of#Q!3q?(M^K^n&f8}svilM)ean35n%)*?|YtYaYqkFJpV#rnP2eb9_=<5O)bHC zG4u|E1)Z2?XAv)w8UDh*8+nrgjRZz1nXV z@u%Z^LQnL6Fc7|iwnPD;gW8U;XKzHfoE$K9ev>(eK+U+}^0WFI%!+SSaF&hd!k-Vs zBiNc38SY5MO(FIxnN45b_3o-uiYcc+59tj{}Ywau!(=3(B7mi z`XDD^+>G*y7co@J}Z_? z-{IYoR5(faqN@VLq3;O^OvH3b=1xm6e3P<>W1&qMy)L z(zNCD0|x8*6r|p~4+%bkWr4lu$OY7~(nRpP0p5iV+2@<_x{~20JKe7a2-`SCb!~>3 zQP^M=L|A8uh1dBaf(iC5FFbGl4;{dQuOskdmK9H$e`>AgsWtPuk|^Jozv}OX$+k-c zs2`GB>l(%{?S-8(zjtI@G0S5m_bmkC7F#c*{8W+8nu;W|H^Y zP+;MXF%GvzSa$-`kU1^nt=+IN!pyG!%hUU7lh<4LAh%e!c3L!M#ees#)#v|74l^4H zmfdp`5hF-3@vyYZ#pN3^SXs5Be57v|p}8I|T7EFCFqKY&Vp_ivRL5#pPMOyW5f75sWbFkL zi!RodCc4f0c_E}v0|WK5LpBitXt0=6P@&a$kRRx>UjSHn>Dk%x>1)l*jsSKiBAr;F z?-Z?4&kW#PsfZv*CG?NvxI?tF1uolSjLx&!Y`J^Y1GylM z3szi1iG~l+k{28=4JkmSYF)DoTO|a;Vdx?o5RcLaEM0gqr8A%4pv30-YjSetTB(gu z#im&JteZ-Y^++*)oQ(DBAethV$h{247X;}!MgJ_VD4;f=e`U5+=1Ay@;`^@F5MsY& z=u>;dxGyM&XWkq#Ly<$89=wngF zPp}Ks|Iu{TaZP^j{}+(%?(UM1uF)ag2*RWr1SAyL=x&j26r?*8WOPY40@9)6$PMTA z-TU)<{Qle?_v1e2+-GO^6|d`g&B~^gzFqh7(i}kS8~rZL#c_C{b>vJt-ENVIt@3ql zkJsTzb6G(e#dFoaj+9|T^ryDd{?@m|48+?D`iMe|LoaEq2`5_PV0=4|wn|UfnNaAO zerUt6Z&)ZPa#^^rV;&_{WTnQ&zMvZ8>S`OKG3XCOOi=z% zUDYGK2wSfn$O(A)W`1B|Adkl7twwB>xK^-PkV1|et$-3)Bh!veQt)Np+Xv2P3aZLs z?S+uQ3>&T2W!cwx3oD-OPg{xRpP`t5e_CJWv}RiEA4a`y$Axf+I3Z zm5H8|bD8}oORET)1RrW4bf1_vPp_*~=Bc(m%3+wE7+EQm`<#ZRD5>#4nhJbueZP;5KkEIGV>!Q<>xa<h?i_88@UKhRyf}HD<`l{Q&;=ThhK%%!aAlq<3UA`#O-Hp)15>%K({Dw-&NZ0{$c19jr;=RVcL;L zW0^K}DwPS1oIDaMA~wl<@bUyBv1W$FTM_WvCM z{raC#Z5sKRHO4e*OQ2#t+?i59$~H!=&GkH z{%v8V@rv|c{(INUuNqS;1&6L4I_B~Z331J&g4rX173Fa`lX2X0sLmoVl<>t_LWTL< z#rq%v$X?7c4Zx+nEBNRd$sJ7j{%l8E;M*s=ww{YtxqQ8T@jG_Q4^~pL{*%Wt<*}=x zgR6^$9^90i{h5RJi&9&E7b~>KHW&WW<#^|6E|>EPgGe8; zCmQcOd=)s@NN$))X`UleMcZ(U)$w*nQ}?7DC_yqceNGXqqG}>a*SqPxSFs`#r($50 zJ2~)2--?N|#v5sqNmKb_7SlZ9BJiZfnT)U`tH!osDdb9hD(aFS2pxmZ(CGerto$OQ zaYefE#qb>dIC`o0&HwGi<~r&UfQVnLo^6NQ7#Lm9N1H;-8xFm;{Wq1sLm5lg={793 z`*y_@HbNRVD|3RU>D(1KBQ7d2CfuTzT6P11+#z^jR`4z0x5e%`d3y!`afX9$l$6rH zb*|Of4Arp|~TH=~qKKch1o}uJ6Jq+lE;!#*R@keS_(? z6%O4e%o{|Iy5YPPm>?=2u(413iXJ%U*!Sdg8rD$Y;9~AH-HF&{0FNK?dp+p?^Oq z78jWnh@`Q*9=3rcu8bQ>xC!gW`h|b+d09HN0x#jAs(rNz-j%x3kIh1z!dG3Dj03le z!BEN_M2Zyf?Db}N=Xn~m{9&kvgyib)sh=U4zS-@m)Y(RZN8E4uvJlDsR|}(-MiPD_ ztW80Lu|@f7WieT=%Tsz(v!PU{J)&Ozb(hI7g;Kc04%WAKl;6Hlt^Br}PcV|}YTx^N zM|AMMvT~oZqIUKP1~&Oobb$f$t_Eu;rp^A}LXn6eGPdIz&M;#9e;1bIZRWNf(0xhs zA!%vR%r1VVtyt5o+`&`^nW_2gPBmZ}Yp{LbgK7EVv#WRN64V5dYATAkc_q^@GDws= zLISblI^1L_@3$?ukIOR_?q~(j3+cOAZ60nw_3UlHu`Ct!latM1e&`0R30{j$7 z^{!Vyi9NyZ;DHD_J_K(&zBjl$zhpf-FOJ)^d-o<==%9@1r3XQpt%`bB`Ey$f zW4(yACHYITr&#I8+Tva^wEA=;6Wh!p)Wd4?{!@p0>uV3J`0o378V?TM&%?#~vRXT( ziA}$rtDr-#pcVKJ+1OA0>JFotIpVrSPI?Vi08YY>D&s;fw3Aacq4MC&iQ5LDTzQ^n z&Pf6{3R&-3mBxiuq%~9BYkkEXLad)Lp{nkPw|d_GZhh-M!02$+c$B>tnN9q7>_1GY z@C9l&1)M8Vr0yLLn#E`S^c-{L)51Z@)5ox$gd_%HkM}2Dx?#78N^Z1*5pR^20@M4D z*PnmniU41P1*X|@vPcgy0&(q|KUw@veEwxaL~+6{;cZEAKdp`9V8_FU*QfgXvGVO` z$wp<7j8%;c&rdmKdqJQ_cE|Lb1$8{kIb&a!@!_){Ht16n&bNr7Y!&?Pi;spFe}^r6 zey>v3V19_CGriuVHs;q9{QVp86xAQkvpXd)MDIYtB+>RcmijTOD$w0{^_xQmHO%9{ z#_Iw=k|OYz!&M1l2E5IRAWyG7(%)~S`{Zm#op-h-`;u2xbMV{Em<;K!o2JJJI~Q6` zKpSw1w;js%OyHRBIja1RROvwC7yW1-rv1enDoT+ne#|}+uS79tLB|NSZ6sIkbj%>oQ2pp*~BqIM1^1{3wxR5C_VXXlJeo!{OA`WV{j9 zRs&iE)tUZNP7ch1eakw&w$K*3ufVhc=To{;<_Udf!bZF&@G6+Mvr$d0>6V4gT=`JX zTBF#fWACxa^fy#4JSuF3y3)8!gGD-QCx~1NYEC3W>S4~(cq}ENdl_D{F@P_+H-zx) z+C~oZDqyTG8Y$TV1PW<3(zIQ@=KTV(lnaD1C%vl5ZVYs`_lCEy9o?}&(;4%xt#!km zKB*^q(Uwt*)%Vo$3V+YE+c3$dR(cz;nGU+G)a3F$8Il8!Auwdsa6H1lR8}U=AG0kg ze`U+_`{$R`9#^D4Dvc`OVaaF@r<>hoQ^mb8y;*Dsd|TW1V+c-|qS|w{4Vig`1xwIU z!o0yICHg5)O~NyJB>d4oe7`}8A*5bb6L6o>Eh4`y8|H`@wmuelsSUDHf@{{z1RjIanp3u~d5L-8;4H-3Kim=N z++WB}=G49?Azlr#elK_7)@&LcQ#rjUDy0~F(mG{mx*h?h0HtKY9FbTn7{su4Bk9(T z*v!~>A{5&U69D^FxUNAC-2O+{zZT@sDdLCHgYZqYjyRxj=K$`$=+ga48xe?1Dm_3& zN`v9`lu`i1AtOiq-z3Bd-geaQRi$c}J6qF_u=HLC%n9}L2j|4QXrkZ44{QmhX8gw{ z)rV6BXAL8}#ZuA6yb@dVatw*sW>rRBf@k&2$u2~RCU=A>v zO+9%#UZ^;ip@BI4$6qjAD=8f^l7C4<;kx-E?_+67WXv_I^f*;E#*VUnxqJD;A6rca z8r`J7@SVFIEDhA)$jKN=eZy1r8f{$lPR2Z1h5VhwRgAA!$ah+}nuKsJ3 zoVxlqs{4dZ8H>uH^pu!NwSAmApcH$+%g(WgM3L(VYG@+On?d*f`mY?aw?;oWJT1k* zQ0!TCN95)1JliIV){ASNucHpdkfAr4prG1mLss2%9<7-T3Bbk|_b|ypGcz8Nk~nC` z$A<-}Zc6a@tlL$24BV&yQV{s3{F;VaKi(4neh@i775vvPy7KY$hk)L6k)b!yTCO1E zo$`xnVm8mlr+l6-srqIi-M{&R-45NCZmeAeG&0J*bdUEZ6d7>>axjxe#{r1NWn(ttgde~10V`u-K&z=A)r+AuDPG;mHOn-nPtwVow5V@VpWm7CL0>JJ<$VxQ%1)1;Yeo& zV)9m0;j(PFjHU4yI8&L&sX!R9>?sZ&@F4P{-xd-#ujsV{Jf}ow{hfZufs09$P!6`& z0!ifmf z>Keciql zhK(6#>CdvN1S7~>Y_dv|Sl?p-zSQ4HC)6N#s}Fb!jmO2+i&1}Fkbizi9=@L{rHHCH z@l!7%dxfq+L%5SfmnQJ{^5dM5alwDz)Qw4wiY)h^$YB#(npX!ImddtCzv^IQC0Y7@ z%OFGKxA9nYf3c_jnPo!)>0x(lIbEu0ZTV~5(9*uf1|1_5UmT+)a-S}^HiaANrZ`Z@ z$pQ?lYnJ*B~^6U|a3;CNViDR68WIGEbI zj0{3NV?JKhE{FRew)VI_vkWBwep(<;VQI$WE@(m8qqkGb&EjD(_=h-Z46!~8-h09w zk3MCwD6UwA#NSrnN5mh5;mvj+#YSM3?~w51=LKlbNCJ@4(QCNE>$!_o?dL|oo@McH zmsc_5Akynj%qkDa8Tq}_5A@O*{mrh&HDp%g7<^EDa>jXUR0OH69MY><%Sagi(M_L6 z#a3++>m-Zi_fMn>_ELqCWTTV8>}l@vtr`jn%<{E{kPVu1GRo|aZnuOqk~D!DgCX>h zhad~VaATq(8gR#FeQON7+}e_u>laTd??X1bRu;TshsNui_(A)+ib1XAH#|hlf3vd7 zotLp|_aQM`7UpM6;9IleKXMWrzWB>$=WsNg37H z%MOck6)6^hc9c@ZC;z|#t%-0RYJz657>*i}<%_Kp);_94WWRbPA2Wr?l_wIf;G(&_ zzLt&Xv)~<5W=xI%@e{dGwg9P~Om2Ep!DCu)22@;JiT9ff%33zi{ul2AG;4sHKw7oC zXIC*r9<}nr?d1=d-RzEMk-`a^Bm1La(JCJOmR?KdpHkUA?i9<0$isXq-1POWoffVi)UUFxLgHL<*}!V6e{PulNON(gl4mE^|?N z%42%sEN{$IL1kQ7;h?3qM;qudbu3~LolGk-h{@QaRo(booeUEvv0?9nbgk+*MhyLQ zp%NoLjqvn}!{ZBy8f=rqZJ0@Vu@~Jd%DA*}oum~_qbM;VY#T4T1>_~zc^%3jYE zL%y=Gn`%d7;r^Ct^gTr!Ix)E9yILF(9Xo0dvrQIMw4;p-S5o}F8OGek`Xh&`JN-c= z=|vy(PhZ%;DBRZL6w>Nj3<)Q><{~M{0LZ;-&3WFQ!>V+XyG`KrtXs-m)8BknSl8-d zf)X_r;~c3lT1C;3?AO^*3AOo%JwTM@99AQ={{Tx}+~W<}A%z-l-g%rhRGGMpTOfRh zfCK4H{!_N=6p#d?GZ-Nv=8pmrP5+*l4at9jCowygD}{(&j}Q^E{s?vcnB*MxfmPC7 zIuWw_6lBRYV-z3>9fL_yf#6c9nm{gSpFJ%OF6tALsU!XVb0Qu@TvX5|QKNM+8A3=2z) zJUT>t+R84)`Cg1Vh=={+fl4!JN}h+he`=}y0rfqImqO^C(4(HFd1F9E(~q1%3!osX z9rXgJjlx*z7up2-Ftx@!9v-Kx;)~qY8-zO?=!J`n1s0haE+4`M0O8lvHx0OEY9^>qd4>)YU! z#HgRm`m3{5vIB;`ia6OUZ*IEOfl27%TVaDr8G7IgL6ke2^>Ud>lA=)xcr)i%koPcU9{Tn1FhU7YdMMc5%a=W;~FGP8vp#01drXxZi?JIAATkH0k)hCwQ0J> z^4_rw&DIScpXDgEn!_{bZ1P?GgXr1$3sOMg01l!Q?T-K7Ew5dRAva70I-LC21`^XE z$t&<4MQ~O(?jD>LARE>3qRB)wMNY)8zvIoLfZ-~+UGx4S_Qq!_{T4f$(b4;gG3#~k zcLjDS>64I5_2oyqhk!fZYCPe6adL(iIHFOtc7GU2Xa{kI%GsgrD-M<5Sy(GJkO6xL)Xh)XG7Uw2t`3s!skBxaO3%4l6*pB*;4#sq*S z4uO-XL%ql$Z-mN-3hJ$F1|%ETD0M%TkW#*F&U46z*FpCnc0U zE#pGfkA?dUdic&t!0uN2v0>S{8Yi0p zwc@216?Ap<8Z!YQ8Yc$QxEP3IfquvKMdxUR-OZ_L#~9i}J=*$7pD$@ssp1waEpc94 z<)#g~nhnJ`GRS3Ejo7N>N5tTLeJT_haL^gW_2wnV;wY}!5UJpcHG=O;_Q_1tRcl0I z%WHCs3P0a)Dh+&@((f>m?h{ay`-4ZJ^5v|m>UFmc8oonq@_8fYvQW z`qJqL3Gmu|&ZXbCOCl38jGxBuy(lB%{hJvMZIRJ=!lFTyCf;g*m4IF;6GUs2 z1%Gr}{NoZ~c7(9BE@E7j#XZX=?$;nC=9Q``YrXIP)(U$1VwSb@2qxffS+-h9{A)NG zqZKHemjgi32PyocEFq(tY{c#hwH+0kAATAPBMfD^PA4w8PLX-LY5FCEugsgdn)X+u zI%VIL(XYMNOlK|R8--EY8)>Ymx7wH3rNY-Xc@EEq$!+d`&;=TXMH%N8dX!(#nf;zO zj_+`s0Xiad2pp^3y-3^Ox;(+bP|Bc#GP2!Blhb}ff9*~O?W}H@D9b}iYK6GuRbDrr zHgt-`zWHnWlan>oroApbH!SV0e1PPCJ0;~&_La8da=i)1UFJ`6IgjgWcC#iM0og(I zP9<|sB(F6Vo3Dv8B8Xb1O5pX%O%q(lgyAztAXk*K(k0XjlBM4Yp zI%c+uL#)a8qjh*p9%n3XV3I1c+7dmlFET7L=HlSae;1zVNQ(L_iz4whda*>Iixifg z4f)t#o$-h`I_KuFGc|0k7B!X6P*B+K_%R(sW#vo6vOgc;C_*xB42FAW5;?YTx6sc) zzFdw0u88?Vci+p*hViNM-tMP3NVJ;@wK*>a)e;(l#ntbKXb{oE4V^~-%-dcaCf5Kh zPy0$yFJVAD0uWc4G$$t&XJT%~a^>1!F!UK_m;{Lb^eOdn?veju|Bg0PX-?i}d7%&k) zaw@0Wk*_8S5j<3dAR1u@FXW9mUe*0k%}*rG8kV)d?17Kn>$qcj(BE0t!lbK{S3+r ziuT54i&^9;pkgt(>6&W#IbC;zi=(q43L!xHA-ab3%NUIKNnsi+=s9@Y=s@fAMY<5b zIkM5iZ)X4wT*qhYiXhXcG>EzNVZ%enz{f!@7a3jZ55hRB^4@W+U4pS* zRa-lU7fQztBh(M`5StXJanjwiM1bNmA(PnBxdN7P@OaLcv`vG0XHI@eyl}EUZ0yQ( z=q*xN9KAnh1m|hK#(w*-5-RY~oE`njlsz>am=G`oLu=qhZ~AB3KXN`pN~bjmOI-nX zR^T?{<9l-8mS60_FiYORL-agxml2BR@=zF5`4_v8&1R!ksccX8kRYlo(!;uMtBP?+ zW_}n!7jm$#W;HNmtdp|w~ajkM3 zcJpwxzejI|NljlFM5dCn=0)@y)=PUx+S#^i|Fd{4OE728I@1zt*N~@^QmGJwcHCb3A)Bv{ z$I+e>l(lq)``mn?^RI&flT#4iXPG2$)fjo74sq%lJRsVu!sE|EZOg(nxEQiGR!`F? zv4jW=aoO?;`)oIA`N0rtsNDN8!tU7DQB7|}?-D*CcEx&M*5@Sf@~}#5=MhYO^KdfB zuGFRLh;lcw<#h59R{i}s`Nk`5>)XusBxxzT6%gg|aqg?(W(ajG*}Cab?vEMP((t!Ox9&0DQrBc4>feD7 z;~{WOp#?gA|?~t32|W1=5ZQ7+cN3@!@-x(VGBX5Vt5_j`9)aS0%iz-Xq1A z9nnHO2QeVW4Fe?kCBjAlTLb{NNoa#LS|~lTK1$I z10?Yz3p@y8R?t>X6!(zS-oiy?cWHP#yus#Vt`u0 zY{NGorpo-KMt@|^9WM_aX-iAtodHqk*41azXu~=QDSbwXH~84A-MlBk^9;o=6Zi@w z3DIz`VU(IDQqGU(v=9ee;Gp&otr4^V$x)bH9>e8DoBtskK#lW=kK3(EdI@xc4ru3l z@f6pXs$H@D$d?)656Z9BmY>ffn=neg z$WZXRDd>nB(;2m7i`q4KVHD^00xn;b{U&@5^BLoAwG#K&^ZVm7RrT$sQoU2edq#`R z%*=Ind}s3l2^gi{xx|t9B@8g!^lYQ2+rl9HD8l?2rljAcKVh?AO{1n7F48pJaSmD+ zAEN=552&+1)l*Pscyx z)v@m`?@el*`jLXpx1Vs`!c81D{QQncaKllPtKPSzZ2q=}*#4K}Nk_Ep`HZVhNP_tk z4BLS0zXxrzK<%H8vx(8{?(Sp4#vUb*@l1ceAt*Q|}3% z&1I9zXv8+$7^on3J>KXhY&{F%pp~D~YgFVoumfiqjR&PQVLzbYx$I2mMT%e&!?cUb z%P#d`q>R3U-ZK4IdPrtzU@{3+CYey5#ZB;AofjJHL7X)Y%LrlQSGg~fwken4QeaM- zgC<)xs@e%mE#9`IJBydcBKaO~Bh;N!{Y~Bt(f2c~-$Yv4&otm75W;FRDQ1YN4`}0J>I$I$H7b`tP zxTUSY86R-->q!yfEj$fcq<5^?2NjyEgUWtRuJeWN0Z)<)DRz_fe2)Ct659zM|B1D00jPUl?SXuLLR_77>E?dqPgac`!xUgF%BA@ zH?uuDUH>K(9kCVN|K9ZX9Wn>%+z1nQ?L_9qzS?daL1=ul*O3XmKV!K(97d$x$oCST zM)$gcG%GnL|K;2@aD^sr$K9s%k-$qcT0ks4NZsxK?5Zy?{WXHZ)nl(+UzfcZg8U!gbegCwatYhI8q5TF!xE_ zSQfCR>}u_^u3aqzmU1%C7-u1dmBH(N#^MGD=SL(*d*iji@F&cyn@UG)54EOJZge@w zak9$msN4oE_&8K0Fa^I4GwKd0bg$SK0RX1n5g=hr`&?ja@+Qc!)~bdhvyu4zq5D<=}#H(tLf)h#)PWnoq)$D0w+&Bd>BZif_uX z1A1E&R0ZOMR37gtTY4#tp_R%_n{KY!Oi=;}*c$kO;{GQi-UXr-UfCvuh1A#zi5$gO zd16M~NEfK7!KI?fV_64U2j7{{tm`yeJGBO7U=kI_t09vRYSos~zzyBh@qcR8rS$NO zTdAJIM{1okdTx~Gq{q@e*L$d?W1gzdO{;`@nyR$eNtr$&lZk^kU(ZW7ahdPekkdQcYQ+g0OA{OP4x zIajp=`^c)}Sglf_L_R@ENFQXVU(UtnXtX(u2PDxg`~B+AAgLHOg$HlBjkwQPEu9Ii z)@W?=$}9i7q~31Izp(0olP5l8WSELsf%w>FcB zSkH@CGmBLAw6Vl3%<+>O#`Di*X=(S!0?0Rcom@2e>tksWdli{0*}fx-v5xI6l4Tkg z@-<{kBP`a)RkEsV#n3oingX53mT{3Q2tKUF9XrJe%=3Ohdq_qt`}!ZUcAr>YbIB4_9DXf@v%DLlTCy-qm_`b}4%x40&x z<0xkqd`^4#reYyn68yaL>fd<*r0T;zUgm=|wc5#W32-7>C(p&!HE#HsIG+1P>%9Gk zih8zKSe(PA+2nXREx|?7H%(7>Nez3hjG{a&m3+bx<&Q8*v;&-9XUdh9a~m18=Ni7o zG5G8Qex0}}a`xuAxDu>I4C<~Y#Y%8v$w_A^=@RiOKzsd%(p!p}?LC!Kl9wkhI$sQg z2Oy`{PO$q{;e~HKNyJlS31GQxQP{k)62;9DFOM(LawVM25ia0yljU;}@ax!9!d2P& z7>~s7Xg;oJ>Dr)_i8hu+*zKbZx%7;#3)6Ip^H)Z9RX7@Gc9&v^#`U$txU!`rGn+tK zevoLShiFBFv&mZrFZ#%!?RS$@1_mCOego|h?A6W)VMB}Xo@b2Qi);>Lb{9-oSxdpc zzI8E?wC#-penHn>vXtYqvtwv5;>aznlwDLRW*~wN1+`huPZ3EC2s3Ay@Pfq{`6ma5 zY>xws{iCU|ijNHi^k9b2gKjziT$TomDUssq%Xrlzqbm%;lQA+I8{W0IK`4SL++^&w zU<)tzw`G37cp0|~0C>YXW9_dt1ppSrk`jF6oP@aF<0QsPkHMxco!#Qc@z;S!9zb#H z)GRSVJM&W~Vjca;=iVUw47Y(zAi5HsyY~stHdRT_&WelM(e^5hJM3)J{eZ3dAn(^x zxa-DNbanSPzO52IxFPp&2Mo^7&B@;VfEOYRR}76>OThB(h1bU_O5k?^8MbaV#o^{G z1cawHN2HS3rNni#Imecs02id3ZD69x2j3xtcV=4M;=ku$jnf6Il!5x2+^>;NGgNln zO7k)#@Ef*Lh@x0!cCl+9zv~eKt*Kawl)3r{H+2mX{V}8{heG?4ubV9mg2F#O{y0U+ zfmh*mMDM=LY%hlBveI55z1bR#!p>#t&)%yHyeqRYUUT-O+4~|n4Ou>sr5yIZ{{Jk1 zSl`C4{vq8x!302#fTJn;Mjw-pv_i28xe^lxZ=*!-MQoKqFNy+x7Ea}|DTJ=TaR1^~ zKipp_YQw7iOnF@M%A{}(rRwnz!f9Q! zC!FXY9;cJmwx;NzA;sago@8W+($JVFDYreDm2FxY2?J%>;B@w#&QGTnac>P;q zJtNYKC`_9S09hd@7J^*odni6V3iO`#P25|_q+YUecg za7`UBU&9b3^IHtB)ZTmTKqm3cpt*h~O_6DdABC9X^M6@oKiD!AOJ{(Q@9_lEdh8R; z#~Jr0gcWKD)n^fzxkj-hWzf^1sZeiX-7YV8BC%M8T#VAq*5U?d!f;75&qffqTTFRl=9SXIV zRdIx42ApY_(Hl#^x-$h`Fv2iV#;AT`tmK2_5TD&1P2Xw_c1wayTw{Te99GzdKd)Tn ztjM1G6z3G0Sl?Hgd4leFa>Vf$(=*RIH8>uBM^_S3m}^fbjUjGVY@P%5$Bh`8-)`Aj zx!@9Wh?ILsOl;BFNu1f!vADvfSQv=o>bv^MNE}{Q@JU!OQ$C0iUScax>oDWnQ)ubQ z%&`b$Nk~(uIh{OfjKkl3QjaCHQ~2^>_|wzuY$k9!!6#0IyjjQqdkx{>dq|cbt9+@f zUFWjVTSpR|s{FVpOnVYopJ`Y#WV=Xcw{0FeXW8h>Z_kN5Q7$CkloOWzY<8hl^FA!! z`TGF(p@vR*O50ts2U@00oSu~8K3mgIfSg191lG76uho>1Vq0y0e{D$^>tM0PLX=qg zy^5309obs6Bd{Q_$NyyE<+1M^jb2tDmHvl&FWd)&rPVniXW{zpc|*t6?9J+TI)9Ni z>|G~T`j=Hd(Qh^!SEm`;>C#BG+U%F=X!};YMIlqh(Eqfz;domdu9M+X0UM}LjhqzY zm-l6#^?G&O^dY&%C5vM@N~XjA{@FAz;{;nFM^-uJE;tsm}^E%(7TsbqJKt6^?q zlp+KJ&u?-f(lS!32c+_A>84f%C(A+N&p&ksv>`vU0j@6@?7F%sOuEWqZ2HVBeA)E7 zXJRWBXHR#QX{mBCw0fRPUqmG7!)asGxvdjlS}C`eQ8TY4AtV2Z)>T_qFvH{cv!_Ab zV75e--2T+T=#2n5k;~GfSk6AR#t5qbCcd`Wr@- z%!&nD=*=gs$Ad`f6DEqlF_rMKn0%johVT!MzL&(K`C>C-3QCb2hLGDS(}NG=mGVqt z@~^<=Ff+S;L+BhS)|k>>yz2e%nCf6z`eY7IT>&`I{jI_eR+5iis_8XZFpvJmqggSS z&#!jijw_L`#9IWrAma5e`&bP8Za;vvdP7=MsI>WF@6n~y&I;Ro-i%e*98}OMJEW96 z>-(ZrDH>8s@l>^!7sDg0Yo`8WI$~y)3l$Gz9!Np4HxH6p(DwC2+qC0dSe;f&incKyEIy4VKNPWy>)>9~E;-obQk8*L-g;{bk+ zNt&y_dy%6&mDmrjG4M=ERRdrxHMjws|ngH)h zgPhPNcxqT@WRxa-fdJ0#{2oHgg5*l`z$p4x9HKnr3QOxdNHy&J%YH+@u>hHHR+y2t zA$r7bt-9zDoBcoyH(hUZN;bOlQ2&Xe3Q)1l-Xoe^*)KpAP&sgP|KP0n=Z6p^COr!J z?FaT*=0V|#^fAP;@KAYxPL{jfqGZD8)Eq(P^ z0PqVe_F4N|PTW*7qcOudKFqtHvs*^`eN^KE1Jvy!bt~UZzmbjx^+tTo-Lh5CiC2^w z*lQ#V#U%!~p%->XUgjgbH5e^129cuV;%X7IaOl%Kr9fhR?;IzUEdSU2#e-7mnH zE9usl?Jtgg%V+$93EMeqLs=Ktc?lGvGZow*u9i*1)17;LOEb464eA(GwoyQ2ydGTVireA?;VEQR>du36u`KPq+*{PgiiOrN! zB^jJ}$MKFd+A?|jjME62aFuk6SD24+gaFG}DOJvm=9iV5>!_%TrOPk5(0P^H3Z4%%{t5*YrX~TRtusNBi zC_Dq)6xtYa9H};d$~w?R+PWw6o%0<76c&EifEkEtv+sn9Cl0rQw^J)w$@>0Y=yu;E zxZXB5zv_F7AACY4oWoOZ9HC*io2jyMRrRUFF-L@`Qq`C(O8qkHI8*|BHz#qVG_->t zP>K>-dMd*IO$b6@KW1pu3H45TPmv7`Fv^!(8&VC3ka&*7{ehT9*dsnYY58zAE|E!r zAL4x^VhjtiJVyl zPqbXxg?~G39Kt8B_HyF@Ys*oNZ}TM>@~TUWN~oX70u!2o|6i~q-`c$sg$Iza;OPwv(=Rp0bDye0=9z|oG3HEvv5(gp`EqoOc#8(wh34CLVf5y;Yc2P6tP zM)Wov`&OEnefT{JxcP94gW_)IGqLyTtAz`u=i}!CCIAe7VXb>yc(W;<#gI?j_%Dx} zR^z8w4;m~}wSmGEDmgj<$R$K@nk*{z!+{c4V`IQqy5$`C!gvppg;EFKq;7#-D}{Cwlf1+3l83nIO^ zxpdcKzpkJR@8L?+$#LV>3tffbsC~AAM~D-TTB84ZpSx-h#JRE8XSKzF$dA ze>c_8_3J&lFVYTW&l|*~jomxsAl$Sk^lz@nzsH_!;_48|Yjm^8KOeg`F|CxUq`ipP z_gT}=T1{~GI{n0=K-2@EFBR!)Z=!eVfd6iQ2v^iuE}A>#8-7z55`h=2Fkgcns;24v za-H43Kdj(EO4f9nn7@BN8PSK&#i}P!suF6 zq(6R2dm%fCeYywP>B6CY^uZ`q@iOtf9wz;S7OZLF@lKcayTyNo-4JcURUqZ#!h?a% z#yeF;v_N`5F2iKkOw%A(uH0~R;xRCx1h%6g#N4!p^uyr1^;_OY3WB^ ziY{UHvR=1c*#PtdBJLALKK)29&^hN9C$eRxr^Y0`FTxkkY$J}Ox`Tc!e$5D2-o`|I zemT-li&FTQpS@Q2DHWE;9a}$4-u=@9LEKx)I{&d!5FK|nl@*?Zxgg1aip9*iMLHyo33=<>knGjZvj)lR=_J(xm}4Dj5p66dueZ^**lMp|8v)R z6akslYuWLPE2Vl8P4Wf46wC0B$GIyRntq#crQMX2`jzN>&7JVO>-l5@MQ|hpEiR>~ zN~F5up^f~>E?oFl2GVrAojDvoj#w^hbCtdNv%0pNLvA*6$Gh59_g#q#B*&b->VUlF zS5S}|BwPw%&BqQR&E1uUqS4fA6;IQp^l1pS1KO0Zrr0oIe80Xwb7W2CJ*tllj+{$K zU&5FRequio>fY1SZ5jl>y3SVwU9!X&zF+vD^mlz$Jgc%Y8O`DUvHkuKSLv#Wgp_2q zL^0a;kDQ#sR8{Fw+MAO zxiLfam>uT2RDFr0_2Z}LOS!T8Wd_A?J7h>3hY|hhbZM}_od6C#y14WgHR+qDtbld> zG5No_q{x2|6h~j#`PJ2q7`j925V76=8;p^D%%Q3fn@w#Wd3A=b3s z&HaEIsmKRJJ+5Xh@BzR5&4TW4;V{nLtC=wX#{dEb@Yyvw{qsswE;;@LeHhD_hmEAx zWqqs?!D|ivX7?~&=+M;=EAomEbwjIk^Kurk#tI^`YtE+}bPHqTYoP@EUDNVr7dY0@ zSK=QaL=t~D{lx${O4|%rI>?GF`Wb!^Q2JL3e5*B`$y4)QFeoB%Jo8J--T+Q&$Ubzb zW*ZIR1pm4z8S^FGl)pQL+@ChVS%Fw1(a}F*wzmG2T*6C8!6!pt6fqcuZmHdK8P|J` z?kO-)iqNUJIO~H?`y%sE=dN?Ji`51YI2hb^h!|WfE*9CouRfkB?LmVpn+1NoUDRg* z%EfNHYx?o{nt8Y?O4CqrB<1g~%YVNN5gUd(aJPsnB^M>&_m6zifo7&h)n10gp9ERmPcyOYli(lSd;fc*Xuaf@jj%1Ed64XU9R^J^nzO8VyUX^B7rfmLV{KiDb`jVL!pTk7!}4G|cs?(J!GwWBK+&|l@&V7C)R}Hi4JH~v z&vLHCgLdNaFX6lNR>pllM961EF#5BeH1eMMm9tp=2Z%OlB73U_#}%GJP-dQ}t?z#+bfWVbN1l7#wcz)KPR{S4d;4qzRn{fJQy+W-aP@EHe@dTHI8>8w0{YAMCFJa7{=p)JbJ&2Ici_J z=_t9vWrsy%JOU^uy~38kuoR>x+& z5d)R`F01+Y-hL-a)O@gmK-L68Tz#F?*!_wbY^+n-2Fjek@7qf^AaJ1U+n@3=?eLg< zyB4dov?(c7`_z>kts#wf9GvZcr6q_M$$~*w!45q7A7Gl@lTbT zvhE10(8Cf!zea%G+H|4Ti<_j<*kJ&2j7CK(pRU_#N|VL!m(l*7!DBl)Sz7n_s4nFz zajaM{f=NsYK%GGQq+4&eGx$BJNTT0O+UjjQZa2l&HiSs6cJR;1iRCZ2AS-g`vP3D> z_pnm<+q*v!=dfWK`6*%;3J~gx`9;UL_#fn9rjMh}tCbjQyt&}%=6VVlHTEmz)V$`z zLE5G6;cyjl339B8qo$}R471zh*Xj};q72ecsWBq$5-;slgW z$3N}XueV)5P)PH1(P}o+m&SB%Rv7|L(;QhX|0b#j##FBU4=+K`zPZ+!me@n2CYF4S zkKRJh&MSmAzFpXqk+W#8fBN9BgZTRU#5Yy8mO z;LC)@{$yJfpDq7X_RFs}DT(+{#I96GW={So#OTS0^!UFEMu%10)b`h$z5Y+7J@_)A zF~eK~@JIJgtv%=E&xCK+*8Ybs@3M7K7V$puaZ!fX+s1|c$^YH>38b>W1sez8^=d5f z-x8?sO|R|e;r+nj+ans47?-?_@h#0YBuhrP$5wAGV~Qr%hT<_>ZnYnJ@S)eA@Oq?K z?8~6UNQ5?(diKz_^wJES(OfeEzRw4b2Z}Yv;bD*YL8Mqsyu*$=Y`yi?v$orA*Ijo5 zv-W?ha;Zky|3bB2?4O9A+W&kGg-TJ#T{@E#R2IWX_<&a1@bdrMVTT>Jo3-7nS+kBh>L{E`N$vlT#o`h_3BLwagvkMNgd9{8=T&WW%9gVFl<^QmLbR_Zt0Xgg^KV>pMIO&;rf?dz`6KJH)2#S!3qL@<~ zKhYlq%l+5d(N7U!trH ze+0pV&qNyrb6p`6UsbWeX<)HuP7{1Qpx|-Su|*Zm(WD`>jSL8JJ+{hk5O7_@BZs6X zGcbukk^v2!O!0%RN5TCSHM!oGG8WNHE{bp_$?oi|TC<8ese&5+hOtM#ukZuy>O_}O zs8drX7jYPtQqrVCB7<={wBs^IYkbJW{`(3ah%)}v%)R)j)CDb04vKA2FaTdo(3RlK z<6BpW55+HRSDAfkLgPb|G}|6jNl0v90g{I5#z#h}vOfr|Z8oMwL|Gp@DtvSI+TR7z zec0=t@PD#j?5b!sDdATrSdi=zpOcw;{WB}&kGeb<6t*`KiFs!`ilB4!%h z{(!66cMyKi8~df*_({OK%Kt6@RQC7rPv7_vU&2JsTKtoF`|m6K+Wwlk7eAG{pvB2S zu`LP);E(Q~8Z+nR&)Pq|@&6d@uiCf9PkJl+vnACYfu44zc&z`{?b{nagW^AV)-D~b zeaHH5iNPv9A7lI%sM5bG3SF*r=o-In-^32*6MX(HmSl}4{0a8on2^eYhc50)iaP3= z(ih!ayf%eu3zw@UiiDa%Ji*T;?8I^7xORu43+OD)#j!6J^;5t4* z4zYqJP!aspr$4=nNyAQO;^c{3Zq8h9JsNkt_0}6VVSKs%=5AYRrD&M%+{JweB23Ha zO2I~cSD(i!Z+Th{F#b;;Okb z#P8}8CUJ2-Vbe`F-D<0?=FOY8bm`K2@4eR-?%etFk>y8r+;PXd@4g!wxP@7z50dca zL39Zi0iQfl2&rS+R=ILPa!ZpcXAf zqg%rgsZa>uBu?6yf0`9d?=@pO7<2W$wqbpvrd=6{%Em@bs*TtZa8-y}-L0qyAqoaTR2qJuvXwE$RXMOkzxKd%pyA}&@z4^HC8qDM^V8Si=_&_4A8&E+Nfi; z-;AzX(#*~XwE*%Yv4;I^G~-jA%EF{H4ChmiCOu`2=0xQ zN)jln+5V<7kHsP5DQu5t>L`uVE*=M6xm=B_`;ETmlL2=%e2`d$~?KflSPFOyuxJ9;AVg*L|0_8Gk5Hay7&q3Iq@M%b$ zUI9?3OWc3qN;xQri_P-JsPcxCWXb?!b3#w?VX9mp8k1<^&SN{orSe>-s3r9e#z}50V@Ub7gfsbdn z=2^8;lm2)Ibxzs=%ty_bdqtxwGNQK@=u=B@fwYFvfI&<%p-vcyzaRn*LB7KpHp4^A zoWdW=9&rwjXi!QNz~VHb-)Q~rW=Js5XyRv>9T%@m8Q~WYEWt<(K{Q{yN+rJ!n)pE^ z;>S%CuxT4s*dSZfoPSl_;15{@#GK9IK>yk)@jr(CtEg$v!aoH~N^GdL$u3hWfoPlJ z)@-@?DbqhvgW-d)Kh?;}oEK~KLa8z46WV`Obv2O9!~d=vnv-&l)qp{0`CmjyhQYNr zDu|FXQ|(W=B4HXm`yWP;*`T&SqQ(CTm@F1s=Be;UvA@f#Ui@NVgKs6;*eWY*U=qJ= zE3(CY38|hI%^8jhCqp|F zLE`D>|4NCmQvFg8$d-M%68&}Ff*;lm>>pTvX>qAoBUDxxkf~eq+_1_%->UODr zC@|$wcbMAek&m$@<=yY(&JZf1IRi@J9P7XGYL$v-6vc56rS_^_2$g-bMPjM_->~QL zCmN&~=+u-(sPU`zjkc}}%elh_ga%>gUos?FYh4PJ=!LmB^)E5YWRU|Z9dPXAf>a8N z&_avOR{G~;a`f1QlkkQ4hv>gd3(A79uHF_>$<(EB)<}*BuTu=GJfM{KouB;Vr`%KX z4t^2sJ}UZ}Bai&;Z+{CrYYGlO{P5Sm{`Fj~>e@Gc`R{aQPM;ni4?Ot5EjQncpst(O zbZ)ruhD|ry3>Xf%l;>j9N_AC=!|^U#ihgm$6=8s+g2QK5SeST<^OSDL)kv}_)iFB0 zi;MlLOurN&;G=Tx+_{Gxb{G`RGWdOn#~pXv$3OmYu(|jK3MxGN@4x@Oci*Gi?V%16 zqr8voh8u1aHfE#^`=mLUq~cbRwVF{&&W3HEsci@=0ZIUkHevU{$X5j_4uN1)KKrKt zw&N$6Qz=jQ;dbGG*|s02;VbTWu*K66eJbU)3Td`kc_HMCEF9#Yu8LMBX6dr8@e?2R z1Fru}ESn}>G`hEvZmo1w#dGhhczy3upI+zb7O`kSxhuO37KC6b{#F9yqCbC*e zrg(C}<5i9#sJKewQ%cUtT|U-Plm!n~iQQ$?6qB&_po0$j;0HgL0fd)iM6v>H;<5%& zGHJ3wBV!UXo60Ovu&h1GFk2J)eiW1k9yw%0W_q&X9VLEhE?HOFHxyxc~n92x$OrzWL_OHrre?&MQ|w``ORF^2#ge zbg290w=R9|bDqPQ7ekIa?$~qBJ(qyzXTtZs`#taYhkxLj0RMdOJpCCwVod*?HhnrT8^Lge zai^m%hIvIl9eml+rOdMN`(I~VXU6pDdLV#bBB3u^rrAupAoy9H$!`Gk3zRw~9PDSo zuK3*u9P;a+Y-2HF;BcYCsc>A@Yc~eIyOq9RHevkuFIRD?U~7 zKiq%&SXXrvIkt-b%JP)0#H(!PTc`e;nB^%T_^XDKtXw=4M%-oeyqA&fGMl|l{kJl@ zg&~A|eHD5_i}ItMf3s0(U*Ttpfkww~RkRI}JpzDIyD(_l*7#P~f^7K6wIM2e3i6Qz zI7~{5c(iXEhL&Jd-f`?qDsT6M1XU?d_;vi($VmXwsQ$;|0r3-HX)D8W`zl}8t##nC zmS}y@VncFwigcjvTjh|#w-c|UC6zw}YeygHwi0lynl-h+s z)3(MB%acIEl5=f{3ZH^}Bwh}?G3xjUEy1Y#SpUUeYrFo;ZP4uTcs2gi?VJ4HJ3chR zk*gZY4H48omi~)%q&ku!a=IzhGe{v&jSm@sf`RcV%qPD8YPBHuf?-B|3u+U!(S>5w zp7~nVRdxXsSCdeou~S`@_pi`go8pD=Ltw%`b?Q_g`NzHM#v5i6Y11TB@nZQ9gT$E1*l8HgHKZ_S8^#4Ew`uJfI0UPpxvRz{R|JEU*$tdjO>} z5=vR-;EKe$2^InbK3i$S?S4!wDy!OoV|g%jr{MtsPRz!xWFdMHjCXG-gj6+VNhVB4 zZscW=juX-GHWpc_+L0Cy_mU4o(P6Ku)>@~)Lu6LVhDPE3BGe|f-F@S zQl>|J!grm+@~qvV6;Tku&|tPISo;SpYF=zm61F>?bCq0=5Z!2_jrcwiOX^@|y`bK< zrX@nFA6dhf8MN?_6rUlQIEmFgDut5tx|9ucwyd<$>Y-J7CYk|?ZHJNennO3@*N(}xll>M_HR+-J!vGC0sZXif>OkI5?{iie&3+;?G{o?tg+kPb zw2EqZag#?mN^KZJRuJ_Tw$i??6nbM&5HNpkJTj?gp-74YCzY8^tQc;gY*k37CU>=v zqfl@*+(H>H=?kr?#F%!F38A+s3(PDb<-+gRe{p8m9QJ#BD>xQZEwGCEC*#atc$Dc; zpZxC%MQPUF-G8C0NXZ`0;8N{>p~#*o?VFV=+JE2vi(>0yBNq{nuob;YUw5la#e{4gaf%J|Y4_@O#)qm3<5s^BuUuj_cg0SEl# zCqD^(dtX2tCaYwRTO#9A2wbovEx~HbC77sb7xj&U%-C?aPDw&_w|_E1@+4B3;yT(S znV6FW#e!AgV)B(x=IbGaZLS`*k`~(|X)wcgg4LfkW@(IxqT(Mh6oi^fK*ATg+E9#P zbMTAcWT9G8YQYanERrmgHR0(1%0^N{_FYoS|A!m1 z4ZZ%cUu=$rGMp??YT$ermOIlGLyT~Z$fZh&Q9xt;*PfAA8qe6V*=<$)%W2^@ zaX0)yueTr+|I71v{CCSCsC^wj(aM3~_KmhlRhaWBs?~ z|L7fmoc)*9;zE`SN(ZSnP-6Ed-G6!BM7MT&$8~Rlrdqa&G)~e`Drl0uB8ty85|d0i z=W?mp21$6aW1Ef}IH?`sDWv7gmfv^Zz0g-{2^o`m%kR7QK8lz*nly2Puq%W;0t*10 zOdu{?uuxKdW+&c%fA4+wu_}kPhuq>aNkpZ%m3`=e2eEh}_u>=v7&FM+>H47_JsdRm zrnx`++0U-H;+H_}yz`v>_S<*!&9~q!MKL3WA2dfcH13*fuKn#b*Jw^~#Y(Ep_XG|) z@W3wrfM72$0db2zSb+_!Vpz?02|o0p5Aiqv(2>WGiYe$AoT3g$jCkS+`}v(vv~Mq{SDR!Go}?}X0-3f2v0p@BsWp`hex)l z0*O@~+$Z6IMGrrG)vtc_qaXd(op;=|)z(jW<};u5)Tcgm{q;BCfg`m+3~icu9JhG! zqJ#L=OL9=hXBJI!CPfcFjQrjRepJp3@9a^?dF zOn0(gL-uHX0iBw&;Na$4Z@K*P%YS*rFIi-<+pfFqyWhTBZn-7X5lp*aKWh`vg$+!( zE?c^c`3-_%qm4IWvIEMmuKX2`d(-G|c>Nn@Z9mIlqn6Q?a0%&RziOPuab=a{pN6%B zT@+SZg=LEWpf*C`A88Y<4?ba7At_t>mt{l|eo8&|=YpE4p+)eCuAlw#PuC!p_+_a5uSTV{WK^*B_TQz; zmp;7kVXR`7nZCnR9KR*7G=`N;$jqF%o@btU3fPN*qk2ZeGrU89sD__;G!_g_oib(N z!wVUm_5K9>@wE9Ka#}WtCP8IX+z*DOa)-!o)|&YUx6 z=jZOT?8j6x+}LjeU7=C?Z;3#&4%|coR`C@MXzWL>KY%;)7p8e}soO=tbwCo2^AmNOCb1Rs_b7wH|c{P4FwMP zATT}z{y5eduMxk0=<;uJwpVAyp$-=e_?d@)>3Uqx=oNwEI42HWaKQyso`sY6T)%+2 z+8}@M10VSChdv}@TxAX9%!3XMv$*oruX+_k?1xSDRUnT%``OQZc;Q0qmp}c(r0wdg zzyA6wmM>?hf)_MwK$bi*oO5*<<|n2pQ8SDTee;{&)K0z@Y#Ed|P=XFIZ0b4xx_N_d zw5=+X@PiG4lqnfsN1}QB54yHk)e?&mheU|mQ!OKU>@R4Ew;U@I_zJEn75w4~iS{wX zGPXbar$VW+3E%#OT=0wGYsFtD|7854xu5;=&*S#o@ zZ-w9X{~-I_aP2^V6x9HZ92+zB#Ao05iMn;$pQt_Yv+nr6hyA0sZ#p8fOa z>~FVDaZGonCqBdfX0v}rx4#RnfBYw=9{Y=Mv~W3eW~~1<{!p8A^{=u1s{miu@nNk0 zI)0jn`V&0=>a(E0xu)cLU4LA` z`A#%Qy0~;AqhwZ?OKBlPihPuD;Vv9;icks@|Hud*?&L0-sUGfl-}08Xm|H@@%$9ks zr#=~25=Ux|1($uNbciCkZ$o7w*DE;-Xf6YZU_ETewSM5^j;54S{ znA$@G|FGrNWXvAw)lIhWgcDA9=%I)9*mF;;h2p6+RRQzf`ObGRalnhsPyz@Qk=yK_ zE3UZ0rnNS(;UBa!Cy0M;yX`iaw`L6wMKbI9y4Sr9d~W#yobA-9=$w4gNhdK;LWP4l zwuc;Y2vEFR4G}0j+yp=QNT`Tva)_OinGU3-%>3jhZ%UhlS-Npsv*uU7`W41ZnK~5& zDorCxWb$NQ;K+xJ@t$urr5!V#^PK0vMkzd){pBxz8AVi_L{xES67gAATdhyaw(SQk z@i;So{sN|eNp5B0y9ThN=9Z1q6aO`SEoSai)1afY=vBstDN{?f^S$^Xm$(v$z-qaG zZFsW%HBX)I@(;%U;0He-oc7pb5AtBM4SoW;#DB8Hy2C%2r^_69w2+cit$skhm(y-* zafg2@y&<%Q<-NoSA;p%KW!50`B>P#D%R_bwQGKOUx4Y!&UlGGlF1+vpVvqROV5^CP z;eYzaf3!C=lp(7h9)^eM2&bKP8sMzpBN~ZJ_z7bIl|zk4>s#MSLou2`3G-B>sw)A7etv#H5sSfN7=w;a&OiVBX;byoBnb=Y zi4bx0#ozb_6P@rt2S3+WUZU53?sK1${jNjA&!G10icd>4)eVePr-qz!J^1EKrM>+t z3c~&jp*>RAPc0&^W2YvvYwn!y#ZOYfFaQHGun9v4Y5a-76ToM?jHuFtTj{Kv|ZfdZ4N?tb1d!pU#IX-P=dc#s>NSjKh*SypPG_f zB1iPOels=sr(~g z_7@_VvHn|oLk?7{I@z23&X4x@SpOX*+S=n!ac3i87uwm`+r{Pm{2vIt+vr~bri)1~ zdzg7(>~NmTM>1JEEm`uCh_R!Nf8(^21b#-aRI9xIfpL;g$NJ$<_uCvJ$SeRa=O)7; z_q6B#>z8)gaVNeL!ViJLpt%6gvBCMm7rx-+4}1_UiNDji@PZ4tl_fgh*?aH3P(&u| zfk{6_RSA6VuK5X_Tbjj-AHM3UD=94gf5$uDxz}EMu}l*CAyZ$8VyjYzi4D}3?F!vBJOX{&ZMUgw zj3c-k)Q#UGx(&PJl1uj6Z$Cu1XAY(MKtUvZ@j&mQqGkDJ3-;RyY9Q5qq;vAgCo`F% zR`U`J`3DPmJ>1Gi=G{GSyKQGNV{2yJoFKk<$r8J02$8eTKKqrgd?j?;jxod!oNAof zR(V>frh%W@(na2gD?*%HSNn=#aQye;8xS3J{3lx1uJ~X015z*kAo~XZnB3UkpR2!r z+Wj{qYyY(DuWI0*o~z@v`+tB{xzkm}{oA)}bY6i%qn69(?P@#eNS=i7YH)(r};8HqO1Ob6fsFO5{d-s%9!2IV58Nr@G<2F zGrg0|yWaJ#r|-SDhdXKGPMa-jU_oHz&Q3e+^vz2yQ9(N?KJAbapSGpVR2@+rmfW)H z3N}pO#Rd#hyg5;akvhEeZ8J=Q6Pq{Ne6tUH-~+T(s*xQCBMzm?5HXL%N1~`hAi^xF zzJ>Cc&wPfpNXu7fJ_t5Sng7Golm70Fha7rnS{trgyR^_wlQ#Hj<;oLIJmD>GeXC4v z;rxZ0P?>#|nA*EueB37wwo(^x zt4s%#e>lQ^j0>27_$~Vzd~$u_r@@!D+CMsm(g4VKnwfTo&X#UOyPHc<;q~2m7M=-g z4oMhG2O-*V7XiXsi5+n?gy!OrgiZ$m3N}qW!{!xy1g#kKQ<*+EMhbrHC(nFj*nlF= zN7yV~dcTD7oXD(M-$6!P&lx+0Wsv(j^l0CRShkVT^`C1SHY9ouY?~u965SaNgA&^Y zzfB*APrbOUa_wj||5s$uW7sj~0K;-!3>P)Ex~c;M-Ih#D(1$lfA4yk4O_d_)Y?`B8YaUnm8`8>^U^jRjWWhT6YPSGYoUYma!$r``0ziHoY`y0ue!}xWyzroM4k9FTu`7R*rPb`i7gZ$Ga z>cjr{js5-b11BW=_`jv5KYqi~#;?yWz4%Frv99O*+WK%TK;LgInY0erHTJWrk(58z70oz{HFY|<(l^GFW-*;;2G<`WBs>( zv}9`IpG?uWfMW;S8q0PyhMGV7>KbvN(;qi)quRanZ~T z>UX~TonwzZ4$I^HbljiwUNFpHQ4B;5LFTdFGtM{z!?_6l&DB>gSh#@eX{w1mM<0C* zbX~Q}ozw@{=vuraZg;uDo;+o;Zicxf9viU;p|w zzrBWuY8ZG&J7{F^&)l2l;&+)Ln^gd0dflWq$ZE@bT5Q8C=%bIuv1D)&s|`URQdnw)LzwJC zBfyg;Ptn(1Cr_R^gJqSXz|VxmDW{x5Ml&fxL#+B_l7jh7)Tlj~C>-j%;_xHB|NZZy z*SCu}NBHpw+nfLX?~yg7!*VS0R8`x!KePi;gu6(J*c_r%TmK_Pe99Ag>RW-UGTjmB zUnIy1W+fBcn~Y$MJ^jnbDgN#MP-U3X9W*&Xg6K@5!wFGa%k;;G!FGt#_SeWE2L}^J7SXYhnV#Jus&Bp z^%zx@c-r0XJjEGE(pF4xyGt3v$f#qO!Xap3JR$6PttuMA=g=XtPL{>67yuet&OiSG zzFb5|lP76R*m6sTBNT=lvWFac$YqyZM!!H^sfA|M^3%s!?885%OPF8NY^1jAVFp=q z0rn@o@9@KqcM{2e6oL3{TKX_1d@X+;wKfEj#82-kG=n5w}T6qKV<)P z5G&5HrfghGdjKQl&hGOst$sPcmvWVxEyI!$J8t;xnXB1SX9O5ejSJD>5FvOdO)uXMEH|sq7gyK2^cLX~6_;DRoBEop!IniSjTbGH?feR8a=Iu9OCB zjgKHU2R@nLgs5tJrao!`1@DP7blC{Y`jo|?YXKz^YlBOC(lMis)SETBfH1CQMvZBCkG=oeyaXDRfZ|Mwz%VK7kF5hHf;pKZzoaag;(!K0Uc(RW7j|J(Krz@= z5ZF(+rg~ zg#9jVT~}C9x=t%LU_qdzcDH|$cKbj46J84iA*=44uTP+t<|!6-#b?l~ZHmvJgnv@0 z4DB<1RQF=2pa3Z1j*R>mA5U1002M$Nklivi4&mZI^%}*^jzR` z&S#|?w;x<1^B62I7Q967mD7zV>4h(RA#W;N@X&l#=aq}&V~;t8dtCsT7I@jqUdB7K zc-Pgfx88ck9e2F=;1_$sQEyUv&wJj@ia&1l`F;qO*{F`1>OPiZ=I?yL(Dn~p;aLh( zl|zb3a52qlIIgyJ!#~Wl;Dl?hz3%qgZ@c1(U)*!geY{GKc{ilFSm*wV4-0D6f(bs| z55$P7Q$9IfT*mtfAGrSko-VxQmRo-Q^Izz;Uv1N&j^DWW8@z0Y3U0gIcFYWjF3UT7 z6&i8oRH1;vtN`!!Vrq}O0wxZg|NQ5(d;mAT?d@;B=9+5|!59?rUC0L>co0Q26UzX( zhf#E{8M*83c^~=6sbMgHBx}*78=M?4jmR9|%{Sk4=bd*gn7@EGnw@s)X;hq5g*?Uh ze@;B%t~>9dQEtBZW@M=+TOQBUoP?$+C!g?tPJC$oLo~{rcizErk6E*3bK8a|$+HlN zP{DrQRE0}@m!_K&tqK_$T7Sb0{^_6o@xce~Cm5LKkY!FyB=%C&P`;nhUW^>Da|3Fl z@>VeGxTMm)rpOIRvTztb9aZsP%AKh?i%e+jeGK?Da<)HcB`TYq9Qd_Jzq}%_aJoW3 z$w`@cp#8y6_%eSZY5%tU4^ttu{6EnC3P1V3C<^@MP~gI^`>%T1s@3m!$2%7;dYHlI zSD)bU-hKDocgZCeW2YZGT>Ziq zyx_qH9>h_sH)NvIqp`YBoRy8wh&V4S8VvyG)x}b&ZqqmHkh2|E>F1_WwZp8-Hd0 zcNw3Y&|`nwKNf2HKX5Ai7BItr{ag7s9G=IY%pL99*q`ZXzpnVUKV$~_zYE%j8NX#` zKgL0{Dn89JpeFtBMa%B!v7f?9ra^6TR+sH>%xfq$e%Mwi%mUr^XL`CS%V=Q1Da1@k z&6)VC_$dg^RQQr?r2ohIZ`+;$N0t2pf{XVt%gG20*b#p-5!e$4$>b4kAALPh-Mh85VseZB|fG%o7^BFW@z zwir`YoXt|Qm*h$BT-CD2R-q!0VN@;YGZu1~;)i0)7fhPvVYMLluELQh)_R>`G5}J@ z>ju)Q^>S{cG<`OtMLoPeKOP5`Ej;SeR;sP)yVU znY74{T0{eI|G#AMBJRdna7Q)oz2_d*+{q>SbnnnJp81Sl|LRw$AmNic@4WMy7hi(S zn92=vg5?%wxO3dGuRHFz;g?7hUvKy~Fjo`m${aCR}4jSerFUn_Xvo8zAM1AARG9 zsL{N#m!E$6$K@hV$}>D(dF54n0V&+h+XMdNyWf?qatIr( zw$@Gy7?($dY_v?`PbjXeW6r+wqm&StG-NVPKa&h;_Mab4{^oC9d%yt)FvrPwLU%jt zutWFVcVC{w1OS{ByktNNi^dz*j}0>Bj@L3MD#$-DnSa#kg8bXlfzN;byu0QdbIh?U zXWnwlEtf4@`s?5P+D7T=>GLnRK=Y|?6V^sCo}76mOS83c_U;Uh2s7_`>(Yqj8W?Do1)oPC`eFi)04Sk z)Nv41=crv2XZwAmpi1^!eXtkgg#lM=oP$eR;vBxZL3^q|O zwN3JQ!lyvaNJ-8#W+ud5HEAaui>iN4E&MhwM`600l<{qdY$?2B{a3A1`d9HR4hsL< z#o;bT#aRE%@niouZpZrXSpQ9`~n>bBdet6)VSA(H;L(O*xVgijDPbJN}`DM+kxsSc* zqOY06Ox*Ct|LLEY*y2#{DAaQ|OmPmMeDViXAOHK-x4y-5g7I|%@Dbq^JX9#zqpD?} zsah#dP9&L_#Gh%3*wf*)7{nbI_5yg~mUHUDm zp_xIR57sS~)voFA7&7BZr3wpJ6YOd7J_QKqY-Z-g7VWjqtqowXfZM*WGCmxdONAWq%_tc;O3~FIcu@ zIkk9b!TfK0L*90QYgdXoxdt}vWo*d`hUwwUK=;WEF!NZMV44q zFgwV}Ft2^ydFRm==mn%xKJpP6XFx^u_;D=eJ>$$XAu7s;hCcj}kDx?#=j7NKqN8CG z@h+M%?H8427ir9}uf$IU8AT{1@!Q53pFEQcqYZuSD_{8vqYzI->s_BioiBg+t5ly^ z^{`(R>vUMg=#Rx~UJvhx>hSQgr5anjKoYsG)QTUM4T%3Jvt2brmD9RY_!9 z@zVgc_!@G?Uwiwu0P;Z+ckdRh9y-mF@ykJX`?{dswBjek*0p^Tw|0a5qtb!xJJ9}a z{}lYvzQR>%`9CQA?Qa^vTwwYh!mA02eJZ;yXkRY>B$NBs1~ zAK1Pv%S2Xd3^9u)xU~=O8*M}B&A^#mV8vk zhfM0fukZ^)!5{0tWBs>vebNM236>^P;WL)H%^|LN zrC4ubi-ACb=j9M4qXjHXNUfIfyjh=8-mj&*&5@C7e*0T0$pd!FmM`CUlPB|?#ITk} zP-V=}&^Fs_J9E96+ygT`%#uC6qxq!upM)$o&l4x{-olAC&2>bFnoR#(U#X_rREN1R z*=bimh#3bS#N+i}lp^)w7axpX_*ulr^Uo|V3s=114R7EjT-1#l^8fzHPvPNZVtP$a zn&Ra1rcV<>XNcF68DDM3yAkbgP2Qn5zLUTswp(w#HI_pGoN2`CufKlw4zo4SxO(-a z-{#vLQp%+0)1UryGE57cb(ZD_DaAkg?Y}>dKvJ<~%TjAH9l%Px*|TSV_q*TqQ@=wu z@d`qS= zCrjK$DPMN98J2Jur4h|xF%!iQHQSlmx5XnF_*CmmZ%%g*8%#4Mp2#;+Ievm7sEDVG zX)W%fmgF$WlF4O22P|8pA65S?dZpSpjcpk;ECQBBiIhuhjBLtQwJcFr%(mH0c8X&m zjVgg-o7E{u-8Q6}#V++PBlK`)n`--w%vgX=3Xleum}95opfqa76R!Xu%Mv&!%mJc+ zE-#bZP1NbEm~MicSI7x7Y!bdkGyU*jP4ZJ%BqfrSFj1=3afyeoGPl{J2GAtMB@2!Ob9#WFtUz1wcP29I1QCK)RLeF}CtW5uM& zT9BA|RP?uh`?nuG?W4EcdD($NuUD(nS>EKAQ<@?1OZM+d8jKknEnW(c5uR)wR0b|I78Mx@y+it!1;)}oV zg)iK2!wnc4<0NY;!3Pdm{0VcU;?hjezQkB>Y4(F!I7(xyV>^d!7?bFaq5ldeRY%sy z9*&Lg0B}<)Mp9|fNG6x5#d1RSQTE?x2Q)O_$>E^(hA|;DTCz4*A2J-au;#(y;#*m5 z8S|3sgO4K+=ael*vlCRj?IX*vmme}hYLPyAD@$y-WYY3$TWSvrRo2S;`e(}%J_jna zVp$UZf`$fe$kfnYF*=((nUv(xh{););jk|8C+%<) z^QAXRW1BoC#mSx`TR1~QXbb$Hh@l)KW>;6vC|pS+yyCGpq_@oWc)>2E9Q`I%AjG zH$j!CX#2U5wOt`o><|Am2-*MEoAARwNi&AWzm^tNF8omN#bw%9|HU(@9mo2wqC_#0 zqD4JHc_L-3|Kf+bw}y*qihJwW`8V~Z*4U`QP2MArf}R4b8l~&=SCq0X=NE}ZST=)M z;Tr3|+2?Y}AGrT&sQBM_{vFS}5)(!m3KhiKX=v1GR8boDgefF75}1!o9!yEjPrwUNu=rnX#kQbB%!LO!~x2<-*H~>F62lQ<&}YzEVYVSkoN$l zQkklqE;@ubh8t8qzQJcP zFqQd&P>-**9%n}>=N{}eK{fRl;5zdPxq(?9-k(btR|bM!I) z^`rkH&wL-qa?D6)uf6x?J%E67>wfbsw@8qOkyotXU1rPzkSYFQsuJ~~7B=)UEvSmv z1K^Wo)?k}$wvD=TNNV{fT0@>!I|`{N+_A7=?x>g-x;fMWWIKl+dhj70%cYc#<_RG& z9CFAZyc+?jFeWRI1z|rEb6Nr%$)IpT;k)M>ZM<=e|IvTX0AWF%zW)5#jDz48>#R4R z>=19H!W^=jLj7q@pZ?2yCBGc#SY*{6Mes75GJ4%K_a}Var2LA zfA}Z7(QALG>VqEz>#(iy2eG&C&Gb0-U+^-1)xP#m%O3mO8~aNEGr<2jyu|8~(>|ILw7_PKu* zLb(7+l;Y8W@!5#>$z}Yie~tCuK7KN3tpAp18|%M1m8thsF6k3Dxqmf9^t3vaBQiPz zrCrT3T0Y~C_21&i!R~ro`)^wJ;mb&cLP@$FfY82viMCBP>LJjWLDEGe#1a#xGf$u~ z*nhwv3YPb-8mmQwV|=Ee1mx7zI}i(~pcGnc!bk}G$L1uW9QTjuQ0ssoh?_NuTCHPM z@QNX<(jpeWypIVcmZR{<98+36P8YL|Xko5Pm|p+F1e_+kJUJ)?e;8!P9d~4YZQ^8o z&=7`m&OS#bsfAQiS1DU3<41Tf`K5=wG(O$S$C01B>84l$RPw)Slk89a4}0Ml>cDR- z%_lN>$2XTb#HOeh6Lb6zWwO9YXvy!kN7dD`oe<##f86Br>@qAr{_h_n%Y?$)-~M*y z`}A(U;h|4_;-BR|M*+|G#V1O&>POF3YiFm2 z8^Y;M)|&8Bnb=9v>EMT@!IuTTn^a+iF9}h)?H8uhW;gtP{nysIJ}iJ$`zj7K^-#PP zW=mUlZNoy@Bj&KS@lVj3t?aDFhipIXt0ax2@Q;;i%9sRhscZ1aLix<8_(yh9u8Afm z*1`cM&WO*DZTwHPnFKQ#|3MLi#Pj&m_+KTVBKF}CTM`Wt<#JN4i|tV70@fA_*T`^a zjM6CO(TfZN7bC)IUN4|zn32g7Covo#OUDU%_D>uo8WuA#hj-&RY8}c|HK=M*{;LJv z_{KN#Jih&s_`^SzlK-RsA}5OMkZ+>@G6LHn9eQHYrOJvE2g6ERDMdhO3#dH+Aqf2~SZWe2Ku?e-1-XPdN7?DA0}hj<(` z@ss>ua&|=ECo%h{#t;7lz8oEYvPYhlY}=!ZVf2Hf7Zp*e^Jc?X;|FKL&qlB}dm6_Y z`!!B#{Orb0UDHxJk3{@vFC5vR_)&5EslzrY@Xbf?%9coy3;Q#E&@;Y{{xte8hN@m9 zXQE<%gy&!VL|UI8!{6AP>~Gqml%!hec&z`1f9n2a_qgF{`zrp&`fo(gSpSWlvySIq z`k2O0MUwY+VjM)IjrHG%tFiNMjM|QkvGZ>_e~q1g+v7vI)Qan4>ur5HBX-Qd`q$GS~PPPD(7n z#(Nb^oHS|bv}t%l(@d*Yee#o^tj)npT*Py}EUx5r{!BtK3AXKa+b&tOh?imUO0~1NIpAx^9Qgiv zWc}&WX*F0_h7%mjR~iReJLMW8sRFfwCrpPrn>_i+8?3)R9}Oi?D7opTo3`J6`}5B~ zpFDGgOcCz7%N)7Lb>DQ;P5BHl0Y@qGiL~8N-JrTSvfv4x>f7I5!J+uNKLY{Y%VAK{Q}M6#N=-ZTXge3bPD#{8x2p{FB`& z_GCqpZ2P~!W&2C}M*oWV4=z}2zm9tG6*jr?;7kmH^^t|i0;nQ5l{ZwzG7ATyvQsW< zD6UpQlz6sZI`IYGu#=7sUxYg26+qD|-MRF#1)h|TFM$yAlk_Qe_1PrrsZ2=&4vu}< zED5)%Ff$vs_g;I27EKb6>x}SmVMfIhPB?*qMg$w5GR8?ZxkQet0#J`0`vK?iwXFTK zbji}^|K(q@Qd^bf1lZ|pv+cG##mooLH`{D8J{9$gU;N^QFMI*>gM2kP)Y*QrRZz(3 zjX`drLH##lH0(7cD%*}9)xQ_Nk!t6{7v1Q>5Us`^XummI)w6VLgfDx#HB>Ba-|&yU zV&!b&Khl;h>SdLQHN>OLxBOFN9p0{WgP;A=w%;Z;a*m&Jm@~no)-^CY)Iw~)GpF}8 zxsBgqt0SirNWt3h3p1Iu;8PAqFeUpN>9+s-x9@291u5f02>YX?7e72x`=?+yGhqC$ z1OmUG|NFP^X!t5?#ZPJ9wp8~~WlkzmO_hB-`fpoXc5RJccbb;{vcU1v#)zZ-?c3%Y z;Qwqy7k;~a+eqtr)NOyrsTNQC`FE`UrdC>~_Em#K5|O=pk}py)~S7S8$P;y%l}1GtvuF$d;IVCEKXwhsjq+RFg0ImSc;}=WcudT zseK=P|0?#6_21U{eeC?(L}5EV+t)vZE5~OUpZi_EiJRLrJ;qhVo2VXRJfdi$q${m- zG7>}TGI?oCkjhreZOoH!;v&h9;+C!kntlwo3^%A4;8i%4Auf|*9yDXEpA9jpZU+3@ zx5r*f$T*wFMsuqjmX^o{W3bc)e0hL7>PeF(@U$;D%oeFMD@2^zPHue>g6-ggU(8ZI z-kHVZ9n()N#1oodcZkv4X^S#4GHv?wx4-=#4n6b`R^4&q%CeWsF8lUh{^j#!Om-rw zc`%u|DWLdfD0l3rjajX~|NFnk3B0C`C5L+SPfx-QFI>2gNiOF5;Ai%bHFx8A(0HWt zga7aKK8g{<t1)9?a_<|>!iR(NH&6uF+8ZKg^mKEx~iAh)-cOSK9o{s{Ft#+ScyAAMJODp>9iLr zDGDQs0$jCFLw=ZwRShkasDe zCUK~uAd&koi(d74ZVrcrPI$u`FpK`ii&hyB2xbC0#2M}wp7h~wFB_pH=zZ#vBs|Ak zR^>i;HJK<{>|$RkXj8$bkZ$sx_r8bEkuwfJ31oT`-)nl)o8I)=*B;3yiRqGX(g9)M z!D0rA7zbFKNJX&6I5LoO0I33_GWkhUysFo36VJ%VC6`<>f58HI$|LZVSN>9;IM0cS zc+s)O8eTrAsehIU$2ZeGq=)CJl$6EZ@^j~O_;_otFa~4kf?TGWr1|+(2S1EoUfQtHhAJ7tgdS00BmH` zv<1458!dXB<)iDr3D7E=q1Zkv_^k}_A;(Any{=_bUhGeGE-eKkLZ;S2jpeBffd*0% zp~-zAEwXxCkkrvsKPy*k!w_DTvfggf`(WlJN z)u>Z0oZ?b8!VH7Fb)hRQQ!BwPdqT%!KL6Ip&OfpJH(T4HNzn7vA9EWNd%3&lHaH^%Xq+*WG`Ej%0>W7BMa# zi~nU*GR`Y-y74_*BH)(&vPORv<391psfF)D>7fj-X=r+>b5XZgAen@0j zCEFm{AK?6(tjra)Wz;wVAlYdblm^bkQT8cd5Rr@RK)8j8&vY0mE0J;~xj5%JG@~~a zQEdKa0r6yIA_yZ?O0M7s2wU!5moHn1-vsV`E%-()`cQzUEUqAj4bAozg zRgLN>9O^7uykyU({n_q&>`prLu*0|k#^mQc?|>OIX7E`KCd2qj$uY+qw_=3=Q7=jN zOun`e=Ip-PZV^hv6KSu#_ZmM=b9w8nw;mGQ0|WSh4}5@|er)i)JumYbW=#+<=bU{u zJiMx%TKvVI|Aij@Wi~;^yUtSS)b>`Ff5d?!vTF`H=&!g300*?IuDa?&A37PUePciF z)YDECua*kxfyGz8>Q$1pe^2|^N3kvLWo)xrj`_q7pK{6`d+f32(|8W75iMb_RR+0t^`nG3Ks$cWV(M?WDz#F=fzMvUu^X zyUy8lmtFSS>(Af%Rz9Pw)r)!nQO_=F5|^0;_DmXDUD*tX&&oe`R=Rp|eA)p4P}%Ql zX=qSMRmDn^k;-wKYoJ)guciq67BL-2XJe0u*$SxH{=~cXUG7o?{L=z3KFWqXm6p`n zl(s#i+useDBB_Wi9H;$2bIsEFhxj0KR-@j1a!uSxRRL47m!M#axydr%Fr-tkXNo? zOy(trYhLxLS28D<#gSKi?z-zvwm5+;uV27R7@X>*))kLjCQP4SDts$jYy5|Qa{DIt zgV?0NZ;nG_K9&WkLYc%qU%X^e#?Qy~elR}RYqh^dQa-nLiqBG^I{vea@Kau+c+Nv| ztumXA!_^gKKo-H=zTy9xujyYkwaz;v3!zW@x-1)7`N#G*_=D}Q{ZkY*nL+k<;Wq-2 z%l^rhC2AF)Nn#MbiyQkBH@j?aL+Q>*XS=>_E6hKzecSkbBpnv*{#9FG&43`;Pq=1n z<=f+D8$Sbqz1H{-`}_Mp6bfyyf0F$z+jtEr&?4Rt;L>=qDt$3>s2E-?U*B<{JiEX#mFll`2@Bd&N>%U|DH_k8r z`}!|;y7T}pXt`Zwt5KSNk@LSbWL;y5a>BQbLQ5Qx&0;QZgj{G|SL(6huao^Pr>)gLVRW)i_=(Wv zf7dFCiv6u&Z_t69R8Hy~v_{Us@t@&z_0Wi59R)Fd5@i+_XXX&TI3xruogZ29CPK}q zWqW)CBUdOO)Synt3BW0lDte|MY-eEyq>xokii1ZmsX(`RH645@Po*f+ooCh`)Ko02 z#HXx{;v#n`#TAsHJbx@~FnfkfUe|`!NxQ*~d&I$A9KeWf5yR+(W@uJ15 zMRpuF`G*|#lBG+RBB+&yYt|fn)KOEXOtoblmJPCC5c{$1ymQaH@`@{AvuyZj==9T1 z|IKfH0~jnk!u-;gzf5h^=^$m(lC!KZyUOkm4u8q*B59GEhas4!O`FPt!}yL{dX^@g zb=D^Xfr{|?0$%dP=Y6>$X9n)+d++_^O*dh-8;09&z3r=C{VFq(Odv8nfC69_%wNC< zOs>BAYW#D<4L5AL<(AT_l-Gf(iA)J&ICLq*Y15`9BS$*7-F6!yXi2}#b~;BKad>!< zx*zk3IHZ^VX(tSjymS(>#s-W4cA@wyQe%wda+qXYmH!9Qi(FYeJN{_Qb$yT zemKk#1e+oyxl{Jae|8E7PN}~5-*KjmB*4wVu5gYzCXpm3%`U6S%D3>l3-ht?CEi95 zA#_QplUPdsmHqmQI?75zXjcRz<>Nw$I;mnJwf$oq=9d(tdy#!XZ%GZ)*#VGRgkXVN_9KqAtOtdWHSTLWO{& zQRs>1AghidSIPL|&6Uzh9G!e4Ss+-QSfx787vmy## zRaUK=BtqyaWU)LB{Wk_C*B^C(&NivZ9+2&_s;1*ZQp7j~PTJ>w*I3`qB}^d0BlfEN z-%5{r|80$L)@}8T4WYjFf3BWe!~T}-!58tF@~%nF%PWB&6uTy6!#{~5XyFd>;SL3@ zQhojnPeIIXj{ci5#jboR4tbEkdf8^DwZni3*fZB@B;s?2+NBv3POofoj$NFz_mHm;N zGS+`{kU3n!H94@x`fv0+b*Jo;di=D1D9}*pjog>~`TyGa7hmujJk{s=AD0w=Wxr`} zKT@J`e(d~PdnV#Xl+qo?&c6zm6i4z(WV!CviL7>BP+Hd~!NVP@`X$T_kS|37ExVd4 z+6Kh{0+WT7P2zD-zq9#{A_iNZ%7D@9F)L!HAAIos)G2^-Iwvxxl32% zI^Jrlt+k}Z^4#lw?dxA>o{n2<*M8{ua?|#))SF+R$|L|zs zNpF1PsUP_WH>H#U^6q!Po4aD&2oH4*JM^#v5Bw{1F^$IL+D&uk?zrPluXx2PzWMLp zyzjpIuDkBKlTSW*r=52C-hX_bn`F#Diu>@7oO+svLiy))#~n9m(qzq7b~-Cpt=xCN zeZTO9F96NWGWc(Nbbn{sw=5{WKbZYB0N~%PUc! zZQCcNw>Dk)Dic<>SiyH-1%heo=ARHq*v4mrsL-$=b(_LfWsI+5Ys&?Ve=IL@G=I&7 ze{5|FAByWnVCVzid=320yY9OG{`*%fUk<_IDvG8V_ZvLh>zj*~yjnrbz2tD-L zYp+u^UBl9U8AF)hW6|;X=YI)PZ@ukycvy4I&=D?+@GtQx>7mXx+icC+Mmb`5===*V zIP0vlAoJuh6}jv?m+iIJUU%GaXO7+>hEqm7+{Q?o8>;btSNsG%W&+rg>c+{1VtoAi zFB&kYP8q+Z$voFYqk~iNkB&q`?6LIU%0EJoH3P>V^gX z75s=FBNqI8@nNbr-To;h1qU^XkZgZp=+4NHrhO~_Wbas@_`kyMMvCLg{*Y|3)%Z~m z{weqscYpgU{FZ-+|J)-I2F0{)m;WWBKe?v%kFi|foFSac{z;%cyDs}BW0)@d2Cxeu zuS*Bw*Gy~8XZ$u}cYL7swKXrVDP*?m{Nd4`v@NG-m{z_bK9yR`3_{P#^5PpqZ=ez6|gkjj#*7v^jGcd@E!{Bo7O}#7 z0Pf+3cDkXSYlEbe_s-d!BCUx}kwvjsB%yglx3t7Wv~0=5q2?X~-#^1AeC8OGEBJ{V z5JJcnJxcs!0|3qq#*cDYkW4RTmrS?)1|4Ak+WnI(YHFBFwSR1l{F8`Oe=YtIXUqSC z@$H0xn?IiZJD&P5=*d#{4pWXqqOL(uVK9iKgT$GJ0CS4Yv4wnwP(Fqo1`9!ngRS>? zowJAnoOQE)^&U5+tT!=BaB9H*`NKH!o`WHv5PMHDmUu9eUGIv+@&vGyy$!1``&jh z`wsQB*;uP7KkQ%$8~NLBzkTM+8Bpx85!M9#;0Hf=*0Y|8+01P*S$WJc$GrK?Z{|*$ zJdg58mdS&0<7Tcm^NU|R_YuFI40D*8WF{_ZVO#2r{LejkG#85a$4xa8Y*2!4@}$YT z%$Wl$dbxd<_xQ5Ps@3m#&wDV^Oj>hzx$7jC-Q zrqqzUeXkj6~Syl$r63`Rs+4*FM_lOAz8-}fFea@jBgE5Z7Gd7z5vG0uFLAgJYFuO||`^<`ifNU&K@yWV1C5poJR$ zgr_N%Oy~t@%^F4z;*+6m9*7j?ZE%!X-!Ffs^nio>@)H z1HT;l-Jomua5j@|plJHrRb6k!%-OSM-8lC~fcXmgSHAq^uYBbz2>$%%Kj$(0>C>lu z{&S!I+oO(R?i(d()@SV4{saLI))N(Mb2d@WKnX+UhBoLr+9CbiIl18oM zQ?&t~7J1jZ_{~EGgTVKW-}d&m&zv!XagV7|Y{cP_fq`Y(+itt^%lp7F$Iz`Bf z;>wkfsClAPEHyRwO-cQ$E|t9w8VSi`uuEL^Qi)Q9;A1u#@|z?9{|TVsZ{pLCk7NJ! zXT-ph(lAfGR0k=QyzL2Mt`|p9qg*dB%W(F#Ukqkq6Di06YR|Ylm&U&MjYD(Fm*kPn z+$J|tjeo+^6c_(z%nT-+XbqJFP&uygvknbb@_6F4l*do&N?dh&_24H$_L2~CirA3- zK}r5e_@W_<4$EeX%$hWsB{!KEY;7w(v;DGHIB-drtN~Me%77x~Vi5$djWcPh!=yd8 zi|jXm_*I-#DWCqD9XTCDfsD%sVULe9zRE;slI{0l*k9o{Oym-NMN9P+rBt*jmL2{4 zgC(f2mG)jE(I=OUY57N02!ik>75*`Dk_h*e&}{iXn#81!hVys&; zkV8CK5(f=-tpjKZ?6ty{`d34&z0rf82-!;+MfA-%jBQ{5-%E$We|C;`*0aMNHPyZ%*T;ZS}I=ZLGre1~pFMbeJHq}^_f-em29 zhTqOcBFP|-^3i{_zyf!fS-K~g{e*IYXF2a9AXz0OLKWLC$RD+PD5RfQ3-rWBV@oJ7S@ zC84VRHmv%s#bnt~0VBpt7B`LDj!T5v3=c6;r75HeQd4&}mK$JhJ1OF^I6d7*izmJm z3*1jK$M^7}hmi(N({GUCI~baB(gT57w&uib|6lyYUvU2)m1pgn=Zsj=GLbcasN83t z=U#CB1x!IQO{T|{#z2nXncs=ircPtY8|&k;Wb)5$yY0%`_nGZN+-)|(6La(D&tJY`1(M7ga(m1w zAEpd=yjM%-=!oO=Of>h;{MtaRBdRhiLTV79(L93Wq1RztEr#qwvP^ou-~}(BE-@#F zmi1=Nbc2dXc317oXOwr}ZFk%#2M%?xjkQG&FIccb@Zf{P{6B~v$e)Nx0 zTKOM@Hfv#OXHx$v#3JPQ>4zVIRq#uuK%`97zPPtB4GmI~;yPC{i0y|6MoA-9p>m6 zWE5Q^n$6dImW`mFO`O1&g4EMu_K+2>3WwTi)^(k3F*RxzBy>bDsMgF=vZ;OrKeMMm>s|r`v!3{VBqG z%ym*@m?uq{3J!UXMXE32*gk=dr(uVsgwn2L|A#Si=FH*u1}=%zLHW;Tl$S4O&XI-8 z%s*1)jW^yTiP;Yi+B z@l)_irn^?H_$=*f4@pTYernlLP)w;x6+hKs$rgNhq_+`jnZO^=f<5us?Vm=Y9H0H4 z@T>SNeCQxASFX!glJE&N{l*VR7>xAK3c)c#4d%0DG+ zN~W=Jv|OpQ?SIt%7c?3N)Q-Y}KbjH0M_C&`+n)gY`DZP! zR(zKBwTGmn6+g9XDJZ5?rHaq$uw)CqJkr|;wM?$4!$N}~lw{yCqgTPIxXOdh-c}jhUD(2X7Z_#}af7~3BloyVYl;p(C z9`}BLavundMP8D(0UYXXn~(EyckM|cX6RNfUqOBf^J08!!uVkt_s^3jPeFwH(izjV z@{ai+^l`5aG6t~a_7!8eE7eCQI(%4T!ey6T_TKlt*9PcQysBG=Z+AWt0}+@nec8)+ zLEaHZ9Kk)XhAd70;UImdnA`6wuDIg;?|(n@Q{3u@{b4h1n>Kyg$shjEZ?F07kw+eh zCDZ^k?sHkxLuTfT=^L!S{<-I#_rQY>Zt;{YSNeOz$U5mOAL!XG%G#ZXPdE-rf{NMlmuNPdf>86{~ z%q$#&gAbDexWyL#KYQ;UY*|*;3Fggv^8r-V@F0*%`G52BzrG1_*65(ps^ z;{>!5fHd(8F8}~Q07*naREix;2p9^qjgDd4N)MuK_<)X9K-9KTBYJE>!`5~%Mr0a< z;w6uwnupR<3q`#$^WMz)J-%60A+UM-t=bVS& znLd1k5Z@|@`+yJxPzZ&a8VCggl<=u!)ZosbIWkL}ZWqqK^{xL8er^>%j1B2{C*$~W z{Zau1z&%hr)%b!Jd_R5+;R#Q80tXj&8F|xz_xMqG-gBS(o~z&cozM7A1i~q9>tKf) zG)O}j<7Oyt^ssBZfr!nc>i)ULd+{0Wm`H$B6cUCetg-$$+BfzTtl^gy)bcm7xlQ!f z$grDa1=smT5?-yMHTgHw#raE2T5#lJL!tS!!o|*C(NTXwL8TIIeBl&Cv@ydpe>uLY zaX3^e6-bTcvAe1bhGtB!s4zg=LlAE!k@}!cH@T>D-xLiBb1p~zyHV!+uq}kgjXB=Q z+Bv4gBYvtz*_gm$f&)M>l}?{Jsb}|a!;47>K+eJQ`XP>k^Y^{)eZc`WsK-sQUHwus zRx>;Uh&}1lA~g-{>=+`3&F3#4Sa11hM>JHS?Z~$(@smE7I}Z9sLN2{$cC>4_~9Snb& z2z`M&LO_r&c;EwZuODCLiu;C$NNcR6cy@bVcSV(00BalCv9(ha6mCc|F&Jv9Jsr$$ z{4XEQ%Zr@9co;r1S0R0UajZLe{xSs+GT2BH_LAq;hh4@$ZPtfLb9b1(jWeedJCWT$ zcQbp@5p@TN9DVfblQ`ia8(c+0I&}k1f^y1+f=}%-uJR{fq^exQmmvC!gXqdjYvzQ% z8E7mz3zPuFHJ0NRd2W0XZVVO1_--Y?SoVfqXu1^U>{5_tYjHGjjv7I>imiERh3(vKN?fzQVe*Y zGf>9zxU*ph9+n8$S99TqKI9Xx|C=cwoS$gqG*y_=%3e(J-}vT$_~D;S*7RLuI9`7^CzPbjQ*4Q^w9OP0j(1$jIUR7uYb^e4fB!2w ziU(tC>-?Q8F%nw3r^=|wbt$Xv_Save%{qTMnmNDL`K!c_6-Ftqa#UpE$MKH6{_<@Y z;$ki1AO7X?hJvre_(@AFnYgDGWVDdUw3Fq^CO`Oag~S4>dp*3E8>l&}fH$8o!&^&y zjUAp3ffwGs;^hL~4GMv1_=9)aI6ZUr44-j3g)fpjhud3RM33PeHD8keUw6~^%MW-E z1&`_g0W%GI#KFBYe3%zMkLx$Frhtivx5B`{^CrMY6AUi9&t?2zFTe1fzif-Q&gTEQ z8*acuE1&-Kb)b3~Z|dFu^2@J4+4vv;dJUHS);S7zbAH{YufubWpTdJJH+&AawD3vf zFZ_Zpzz-JV7l)bs{>dTnBo0vYh)q-L=9_Q%=tr-?cjuj9uTNZg<(2ol-~If9o=t6s z#uW&P;2ppDTYn8Dzq3!kxQTGU8w0#MhAzjK`YSFkrNkj9ACiJTr%s;+rV$_}Mgop? ze9!Hl#p6Ga_mh9~H+XXDmRrsM6SoZbDeP0HaTiX5!tn(*h8Cx|!GX^jBM1aH|B8>7 zir^1p5CFsdgLC*?FtmgVZjXWk#swUaHn@p_TYDH3Ji>YN%{PHGWO?8NA9w{HZ)7K+ zB<}10&G-O8Uzj=gEGP4&j~9DA%TK&E1&qg+1|dv15`Sk0Pe&pmn&KV`BL%GN0UP;` zOe2Ig>db-1N#KiZ=x<`?Z&u-EU$`N{jKA^i#hxd=(4wzs1xB0{w2^bir+M<9>tj~J zWITW{=7kIRX!(<${A7L?=iA>-1<^-6IPLtgLXK$sEKj)^zwYP!3kRibqFbNJzv|<@ zERHY4LE7WFS1d0`RNR%*?KF;Q)vd8YHsj_svvGG9`HWeHxn+E;9=LIZO585TM8F?N z0T4_;e7u(*RmM!DF*U-5-eIlaH|y=-*R*fMqpv6Ne;TKG6Fe*n2RtPBna_L{zg&b# zg0#Q?{qKMIWtUrj;4$riOu+MaO3@1U`J3$L`eaKSFQ!@f+`4AI_e|dId@0Zc{i~e@ti4z>6X;EF5AF^RZ*seB>kePa`}f z4Gwtj7^{lcB3OLrgCDx?y6XUlZ&7%}!yZn$7de0FBF#a_`naa!%uEMR}PkrjYVYDPR2|)z*ihW|w+I6kz zg8_?jDrSAGo(HXUia4`vla1@SH*)G@d|!X7KAYyR13Ev!L@-J?b!!v1x&Fql^uaAV zU8^Nkgn<)EQInb=x*S8)uo)n9V*IMV`Bx^^-}B1|bA9H>Y1#Q(pH1UyeDYCu;BI}) zq*bEc^H(*L09`hJ+o(s%rzL&R(4e;Cn`i3<3eAz+TF%&F+;7r{Jn`pZ=5GUb#5w<( zpU|S=TOVsl-^2J<{K|i>zbOfG&|jjg^S6x{3s7mDztEx1opt^?Mds`OI)A;s)%B;Y zZ(fU>KXrX>dSaPFrp3ZPuK%{F`?>zcA8hXF^A`)0ukU!9fR1y$TCcwlB1LG)!0Vkd zYNAig&vpLZzVjC!_`)+{ymQT2jW@)|Prc+o4F*FW+vNQk3|4)GqoIlcWZB~ud!O|| zG@?cv(0uu%H_IG%+y>IkK*sjO=xBPo&Iew6w-H_=K4%i&q`@)xD}VT@WWHpjPS~lS zWYeO?{_>$2RI5ukIuQYeclG!dFw*iCL_-nOxl5IQn)2A0hha3j-eReo%>`dvuuSkV zN!%&my*PA&>~U)ePV6S)!$AFIG@!r%c6{O4AT>ztI2-_lcZO{>r@K2I)Br6gX`pf& zHCiBU)5jj>Z{iA#wG3vk5)06XGu7aP?|<2(2JG1#oN+Jyi67&FyMBBt%XWCchzC5y zsP6M^3GVpe7BE&)EUC&?SqWdTv z@++@=AZ&PBuvEZj7vsJF2_MfNBuQ8gMB0e$g6E(~*+(HvBriL`#5z>|RXp z(H`~osJ#OW7K;D;XaD(s`sQzb%wrz&_P75R>B)YouRVWCl*E_R$%6Dr%9=W&x=E`5 z<58#Dv#(Qp#%)^=4MaMtdBPunQBQ#azv7s=YqS#bCyB6X!ju`PH_@23z?Z1}zv6&Y z{=*RnNMK~|B>&9}w{K!aA_6-ba z_KE*Y<52v@*t{7*9dGLc=C46S1^)*SFN0vlI}0EwhA3FTz)_dPgA*SsgMM^q)=6q+ ztLS_kUv)QjL{EJpQu9<-d#i;3YHsqy2|00x{*WsUEi_xxoJX_8ZB|!0dgvfW!Gy@I^c=v3+F%fvA8XW|K#H3qx4Rg!&1`G*w>yvy&ja!pPiZbqQ#nu ziOk$(frY32rR5aAYC?442O^!-T=|E5IuwS}S&*(8wSLm^Yh2@#_Q+qfBOqf{WEC^LJc-Fu@{!ybP@Kcb&hSC7wB6v~bR)WLVQ;BCXfoy0~zGX~M9OD(WiQH1&+r zWrF6j>n~mw>dUpx;RB1hz2!R?es`SL7X_N*uk-irGJo-G2cDYn6K}YCfajy|j29Eg zuODK3aS|J*V9qwJ*h~4NbaHFh_I$Yg<}bg*%85&^E@cgGIN>krk+o`MmF!3?USOeu z@9o%NqlQAu2|jAlN;o*-ts{cOCNPi-OBnAzr4UUFYtCD_-n~lmH3;P;?40J@rcE9 zZ-mUUjx&9RcHnn?U=Lqmqo{$6gYebXT4~H4&hh0#V{##W@CSIt5{f``;Ls@EiBIkm zeBh{dicicIt-e?2d+r(axK#LIMIUa2xrWLOl;Z#o*2=nm_D9GFIbX@}KBm@no_ge>UPXEH{b)u$Z@5 z#o->tH}$bf<|tD~m!O#29_xS5~C&-FnK#xuEe#4+`0xD$P9Kjg-{!^OYw zUu1X~K50$(JLJG$S_r@7KUhd7vl*g~`40oq81!#_P?lEw+PNqPwyY-lB>(OBEgB%A z8+0@=2O0QB+&EAl;j(i*tM-v_Kh2GmNj9jQdIcPdk4YKEpT3Xsn?}h`Bz6|6%$h zNTV9a!k0d%@HXMkAhRtZQzwfn{2@Qhd=q|)X0bD|375f2@3Jb7)F+U$D)g~GJC_ZS z>~plJnerdTmq4&n3as-tCFRk|%=DpH(w^#;?XdhTrYKJGzv!R%2<$OdMfD~nGLQuu zGJVhl(VN!|>@c;qQ>^{7r?W4|E!}ozE}Xe?HG&eDe>!73b{PeLRQc zkFxPA%kd^b8P()#h18U}4>|D%H=B^peCLG&5|a%y=i^*FLm4|-s?0;*^7b!{G=Y7H z&kf+CK`^HchKLBp$A{otn^6|<4J>X1X#BxSWzwkpE51Hp1_L{hTIH`-=&ZwLi;Km# zJ`&g}fr-_#V#c20Z`UWp_IsF7e>_xsV(%0_`-pEpd&1W}0h~|io*+A>f85Qs)`}M3 z@sG#B4GaNy4G;&8CT(arUL3jD=qp>t>n*CTmgylp`S>HH7%8LEgESadtRSGqS5O#d z5uzsz@lOX2G|QyQo&*!NY8=>>Tdg4k@|(95P~;hi07E6VY3!L2eqPLWmdj=AX)gX2 zBJgDtWx`?OV}zVd5?VnRDh-h&VP|XXMT0xo;NaA>K4GIOA-pWv^c>QeBMdFab}e@w~2`9CD+9;v#Ygd~Cl(+TnejSL9aq^vGg zeCnixv1#mKhK*gPesEVfk}y!f5j-}T=1Do~Sz5q}2@z=Y*`ZjXpX!^`u245=K=$CzN2pS0 zTBxb)m@KqI6Y~^k*s$)W6fu)$?#EE5nP>IK{^}wL?VHAKUEY#8okD^;b*P_}$ zf_{N-bB00n;q%wyY+s;E^_lChU}Q(WU6*8zsO)>vVM#o~()gG-cu9dWEL(SghEp2I z+eeh~bq5hAF#?~s@sBteaxNIz_BbtIC>G~55F<*jzDbUZNv^{d!r6;0jX(W73WEbp z$ecWFPyYC$sN_Fv!mqU>zRYj!{AEx);p)gNM=wc{@+U98U*J8W1Y@k&X1XpIcnc8q|pxk17U#`e|TXU-xKgVF}5e z(bW8_B^z9ywPch@3m@`D9BD+8;LoB^>5feur;ajU&751Up&>EtE&Jj1GXcaFPGKW# z*0GJdx`~!8(~ZsDmBiE+?1VJKuWHMAyTp5f~I`DC)In zlqwa2qN~->E*05WADgPB0+^Kg>*~gze(rp=SGlM}?W{iw`ZJgdpY^$o_-gR@T9)8X zYr4uEW$UkC0W{}UYe>fGhBf+-4s7gT8dd4bp1~nfft`$UcXj7Y6txUB z!S7(EAT&bJ$@uQ33`1f8CBtOik=FUEG3~?kd%yR4&=T(;@DQ25X$N0Mgf9Z>78Xx# zT}O>x`_d0I>Ww&br=4%c!B~SHo){mEokkBW1&d}&_^cF&8l9aiR@=P!NXv@v8xEN# z9p5#=MRt&dh@lktdXG>OM-kD8P18#^7B}*5_#jjDY50x5XN37j*$V^SpCL^dFSbqk ztiM!Q#=!jA^u1I_#b`!*FW*#9DmjlpT6Zt=nulwaEi+Qnw5>B9CkX; znexe-d!fN_24;NK!v@Xf#$hLfHdV2v#YOebG2a#powH0IT-X}eQ!2*q zQ3bSRhe*0Df*hkv(iojCvXR({{DxGMdNRw0-@_qo8;D1U~rBQ8nyGn5>zsOn*P3H5B$`K-AesAXbnI41E0Ta(D5a5 z3*S3X`Ae=u=q9zaxs_69N13+pTGpSW#?9Rxs=Gd;>2P*Y`0{KqI)3P1^-2AQV3M4D zOsyxa^HVC~1c?GnX3o-I?FnFgYW`$PF(dz8oux5)b}aOxt&Xqs@A$MR z{0ICE`eZ=rKg_R+TJuNv*1rq{^r5$$s6IQ$t)URVo`Y!G$gu4Sf8%ek3y1BkVHEt? zMN=B|zdh!!dwfy&;x_0r=zp>Ew;@UUMQx+=O=_ib86!26@eRYf?fi}G+d95Af2>cP zUtLs>)?)q?J?F)W@79WcmzlrJf}FpR7VdHODCTdT;>ey&^kM7WbN;$tmva7^!Pfm; za4kFIAguG3^BqzxghNtF2MK5W)_=LQxx@ACHeG)g zx;?F0=Wi(DWlP%E{INcDezoDTr)&J0mVnaz#9yby$=C7xz`9q@`O#8bKPV zA+5Be)ClR87RE$G1f)ABAt@jY5~I6YK}Oe57z5nK*q(i#-}O9y?%H*2_kBK}`~IAG zpVz7Q<3f9<#o)Y<+$m-tv@;{o?yM=^j zo}JE0{GpY!GbH}2;?O;_X@nxIug0Q@L0^fs*b+0vB4Wxq=w&oXxqVyh>Ex|~`QHcU z6Fxr>A1!*$f!#Zt_!yXQ47Z~5m&Jt`9PU$S$5$%SFPTlL>(RM&E>9dLi)p5qTH3B~ zOLm`?i^uL5x155$zA39f{R}YArR&zwlP#m9hrFxDwzIc#_?Vu@22LO?0_kYo9|@ul z!}{;^%fC)}c}?w~^1aYNUhkIUsUx!OyM^O^vOzQH{ecWSHll)bjnhXSzvDg~7L7kv zAbrE88ftFAYhCl{zS#-2&*L3RZf)0plKdaeltX2YJUdq5i`qo>gI}~8opbY{&&So* zn>kI>a#0`DwqdW;k-ZSA&n5bp!RLQo>!>EK^FAZ(DJ^Zf_0fU4{q9HQ$fR4gJFGjF z;CDt76my{C0j_{`lA9$eSqGxKN!3wrzhB+2Se}$f&g=dFS@*lY2Az1$|I2>t zS!%uB?@ls+eDDe~{g6WqEd$&?(y^8}iPapTT$)xasLyWtxi_-KEZjl6}UO z^_+;yCCdMpe(`U5aq!uYmy_$K0@QFKSJGFgifK}A z4r?DuB>ns-N7JXqw?mrg$m=slgz#~C`npc;~9G%1Oyr# z7)ROWL;JdN=ij$h-ufa*YG(^>Rai#+PDdnusBHg)*7damesbD1O{Pn%Cn@?G7q65m z?9<29b$9Eb-)f1iydE{nQy<#*;t8(>!3uL`-XXi#AwvA^!v9<=$ey`<1-n!=M%>mp zhLy3_VYN-*QEOWba=oAT5`P$cOjn^aI(qPRl%&qrVJrAA9Gj>Y(jr;AHYRD1uHh%9 zwO#t44-#OX3VvjS+3-hZ)2Zg%cR5Xmx<*l2YN{W>8uRq$VYS}{>!f|N0%h+oCWi4` zGwse#0fVD+_YmF%njQZXBxed=8{Skrtw~Vo0$#5aG!bl^V`A%`bg^MzmMVj z*xx@p_V%rdu9_?M-WUF;g8d<&Vq8{6Jbu_$`8VUPGC`DHgj9G-+fD#s3QpSSrgfe8 z@WF!_S!zDp7Ky@#kQ}pPi`K{7j19J6cPT%kW$%7D$3#b%6_^1#JL|Jy@ns6Lnav9P z_hV&e#0k+u16g_+#jV+EQAQCZPWRnCax0hC%bdll^Ha%&9hsHFPsa+n=eKV#cjsvZ zMBKyJ_r-hu#RtHgn9HV8C9ugn&3tI3k<Gc}Wl(nUu* z-@Z!6e!~)ZmnT1$#m!CC#y-c<<|?==QbJijD$8j3SE_IT3fCGC0x1`L*_B)5!EB$b zOkE4Aq0e6PJrpeNptMW4AH#Pupsb$=O~F4%DGw`~>4iqU?!!zTSoi{kA8#)_K^qqA zqbRU{ctOIryB@)L0}pSQN91O|%}|1kA+Itj-J)G_8s;{#?7)wi_=l0tZ)4;@n^f0p zD&T0%^PC+EoP*55LCL=OXGGf(5TVI`DTFaR%TIBjc_PVP&EDhv@oE<)#zTQpk7R@B z1b{xnN@sPhwt?2MZ?;7)?todsBj&QA({By0yz+kEZCDfC>0;e1i=zkKfBV*Eu)_Ep zB0q^>My*wRKE3;TK&Vq@q)j3$(#{UNbi;uvp*i-%d9Apn;j;p-BhvuGG|UICtT6J( zDZ`E&yb(wq_ZgOgSC8~rsObOBym}98A$md&E`(BSi(2?cte`2pUw-0WXvJ!<2c@qm z0c@pmfm9*7TW9E&Y2fj+Cf{pckCKNq=cBC-H_x4D5~ts+#>mY9{_nD~?O~++b7ty^ zaaTe+ino9I{YV6aZK$jOp}_aO#Wqv;2f^WoG=gNRtSz~C%}p9ic0|r^NEOyedJ1}o zN6Ugu0^J54j@%yLxojAry6X|Wwo%3H?06$nAD=q1g30#aKdvo;hTnS+ z9NYaji;$CUQP@-|j&4#szQ6tRe#K!NsmkYZ&fAR#6}@4l#)fB_A9KW1bZA&qGPJ`Y z7Bcy6f-4|Q&#z_wkqP>?d@%I=+S%7|ih6G-M;O(Xw%>!y1_dsi7D_7FnkB_AJIk`@ zJw8_0-F*ILM`uFxnCQKkgSxKAto1wEym`UV@!PV+$&b4#07hgP@^>jzuS71GIie0j za#Y?MmG{dV+&3*I`_P^>Irlt!Ed%h!@wMMc+U=(oTV3|FYgOOy#>^@_OAqLOXuw9b zcO{ihUU;27V8z$$)cpA34tl3{`Js~zqW-*LN%!IoW!MBHDb@F9!1i760scwp!ac2Z z>9Wk)FLNtzPfBm0VUxDqp;Ef!kX{i<7mU1BQD1D8m#M~KkU!P$vada z^-zX)o41zW)>c~%|N1MqQ#T!MhT=+kl~{w+3i znGLbp`}kp32B`VBy$7Lc-W~D{k$Asl-{;9KvKj6A?;KP+F(iqcPZ8xKbKXj&-Zk#T zyr>w;aboDXA@XQ$V_*YQ`C_@4rg-ErqT=#*^&!28({S6=oyaYS{GcIJ$%TEU23On7 z=oOZu5|^#r$(*8&@*SwhFlc-bc8!{OvFCa}&S?O$c;NnUuVGmiQo5atA)2OmyS+b$ zBB@0=g*QHUAugU-KBXd#`t3e|{X_C<96K4nH6ovYlI5nl#umO`?6d8avxLd5n)_L^ z!%Dz=ko^TGZJ)1cfJPrmYaTs%uoW!j3lp7e^6CFB3pUuf73=>?Cia_lbSM*c7vg_B zCr_1lH}Iv0eVtUF&jR$gY4h}Zl;Nzd0yu%p46<7|^5QKL75x1VvaQr*Y$Odp?19Ft zQ8k*PU2V6oBR-yrD1vo-q)(7-yM~U-JBPAGb9GD2@4MUf3@>%tlzxS;W@A&$bfM;C z$VFEx*j&6yD#U4>7)uQL1c?lrWKpufX=r--63cOv8*cB-{UzR|#jTyys1VZ?kH6Y% zI!F5As9(5|y##Z?QL>=dUu-iidO9ZlvqNIO3?HCLIJtC70ZcOn9xsowJUCSVr&1KD zBp@d+qpF=jxc$5xr!j~zq{^tHM<1?eGlxfp;I`i!dzFcGOFyLmxi#8>O)Am=853)^ z0l49zid55L8V~A-d}F7FqIR%=NIGG{+wikR5+B$~TE_683i0o(==O|ctMy6*A1j{R zQM`;-_BYIxeF8J`k8Sr4cAEM5*7&Kd6GdY29cypfV&~#4l(~?EJIGnk`(A<%-L$H) zEQGbE-9_L;G|%PXhJ7h7G@ORg*uc>*__HwyHQ#YdZtT;$JaReYQ}fa%iZ~7KG(qei z=9`TAKivZNAm&ctgN)!1?ai0xJ@O@8I7 zFXlI|bJ#p%(2RQLl*`J1jiY$&a#uG>+=k?JA4WhXZzfj3MbxWpzkfu=KvU=FSJS2tuyURA1=uV zP=i-eiRLc_Q4cFCA03k{b^uuSwhzR)svZP5O8WdB3@U_5yhI{eREl#?=rcBjEzhB`CX=?c)-T0jw*YMME(cyCh5#*N9SRA zy&kB2dN+%t8Vj(lZ>vZiEJPRnHY5}eZ?I&6r2!=Dju#KMu7QdCus9LC`jafgG22m1 z$Owvoi+HEvea21~JgZ3yP2<|70u`AAMzlR(J*u|@QA$}ACNkQ)&5AJYOA~J{nnwwM z62_@eAS$=s?-@p%j)HSZ4rlcphYXAuYt%xWFBdV33|1d|6bojA4rHX6OBGq{ya(#J z2WSvHyHUU~1+Ev`nQBmK8eLMfvD)d$8+mBrC%Y9pMgmfZwB6~8-qLY>g}>jnO$rXJ zGNi}JfcLQnDO{%a`v3^oM=%+93amy)^X@jE*S2R3%2)>QX|e#%gRv;k1mzKs(G>Xo zShYQxoM-qrmB7++?}%3tzhA$5VSJpJfORlwRX?vdtAO2wZeBdJ&A7Xiqs&3HR=gSV zfe-W+e*ZJW=a2e36E4^qw2;vv6^2yrZr34}F48rh&vXRygcbPFNr5n80YOtX4jw?6 z-yC4mjg{|bXq=rIVN=0h7;vw6IVVsgJ$atE*cF}<(^~UWqqemyvH0Y=Qs%7lI zs!;8$Q6{|yqT~P!!r&M|G-1_*3p7X#765F_Xu|5o?iy}E-@zPSxKBdda+2%W??pyg z&Q`P~VGmIG!)@ol1K*YOuR*wA<&W5>Zz8WQ0}7$KaO;(SMJnK*CKtvOo~J<~oNqxK zD>LZJ8lKF41#l-z$TZfO>0faHTwEn&3@+YdZ8R7LEFy*k-k7G!ehbb8J@XhR4v`Cz zajqWrq@MCo*#8S9i*-Hf>mKamD0 z^~?*Kl3dF?Gaq9Y@j21yNAx%LnL#`nE2JsXj6(UclawWutL_x7k}q@EMD$R&4{7&% z-MntSg(rMzG0|<(6Yu=lqgB>oUt$plOEuy7*Roub@8@c9Qxd;;>A^JeouPslc%=UI zQ2qW^#lv6?4Ui7E*0673R_>kI1Y5l)g-yC%Esj1?B2wnGuij|i9~Q8N7ORu5p`8wr zudVOgU1_exEOeYd-2NW-0l`SYGx`|y;Necq3m<7iT=gegl49{zd6Y)Q?3eROlra;1 zysqpOohib4wjy<_so>0+WrBh%+4n5&tPp&6Qsw1ZnINvj5o8Lf^_b*($fOsV)idW# zuPZeosh06mSK0)~^^V6Wb1H6C`o-%!rW^KhEJrKzfUedchhq#v0bGLr!L2*gqj&fYw+rgcY;l?Cgp9z%uV!U}?bQyQeP<&!yU(`N%a0xw6KV&C9d24E_F=_&ghq=xn8zxc|TNtywdQC z!~FA?7wKj5K4(W^VQa5j&$_T5@FjJ6%@Uw@9?M|W0s}&jVZjZR@@41ARxtTmi9Gwxo!{ai^NCt+3nrR%Z|G`yQ~cQvu|o#z_gZXC_yN$9j3o;owaff)0gD$03&OKk=!lnCvRSI@FOP#^GjO<%^T(&R7 zbBN}19$p5ZXhXg%qFZJq+S*0}C7_c&*_@SjpVy1AS55OAx*&-`B$Jx3&H=3RzoZmM zOH;z$Lsg-2_X+Oipc`h&1&1~NzS^yU`t7D~N=AKO=6c>WK0PF)!-tY@5m`+^ii?YS z{rg{hr|$~FnNPh4E3s|1?5@VYx1E9{wNJwqw`Z}21Em}^bI^)w4Ln_E(&1{OU$7up zUs};%KU*Z3f$Bke4|!{djwO9Wn>n_(a-}{G!dZ70o=MT|YSz##gJL%_=1ip^3`4># zfq0jm-u>;>^JV5;$D!gF12qtbkeI40xt}}N_@+jY4U|j&^Q;Uj{;D&9k{IGnl z?Ex$M{y23SjP%z z6)j+lsL1sCT>mhdq}~QXn!~F}G+O5S{txGyO2oP=BjAY#?zBPqW}Iq#v$8XXdAi_f zQ#cm&cY{kWAoX4HmrIBc`K_#XM3FTqbAH(wNtJFPFQ`^IIi8*twbmrs{CM;Fv0+MY zw|PEs$8ah`X{zxN=SaubVm^K255|Xk*PY~(3M)BxOLJ%vtq&ND{9}q`W1e#qeZbAn zCsxAmj4z_@Ug__iC=rd$Iq()4@pszSSQX#SHV_^8?H=xC^J!**4n_ikknIn>T{+7! zm%@6bxDO>G5{nd-)?ygn%~4!iHHa=0$MOa35!Woj8q92UTg!GE&pd;!vs9B#`_L4q zLwsK!ZsV_*s)H*os{Vs4F;Y+Oi?soXhc!iMU8_gqP@IyZ*Joc4!|UGCuXmV9$?cVy_kCxT<$K$QHD7V_|n26kW;8se(XtgX=;+-T>rqM&weS)#zcgJ z(}=>lBSWX;NBX&Ol;ooRe>&b*d2Fxr5Q`6n+!)nd{Y@gR{tS9W##NPC$4D=(h2Ym! zJc5RHh2s%>D+%_pXQE-Fr%4c~%WjVrt6C2Em^@me{PY|9^a@&+svQ@;Uim;fqw$rA zjNixtRen%N>FlPVPSy(Ccl5bTV~&E=K?#6OaD5q`uw)Q3AWye*xU4GTy*2SbLY9Nq z;obVz9B6#u&;E1-riRJ=q0L$E&hr|;icpYAWsx(ngHHdsIYvpeF$6RL_e5!}i@9&!XHD>6O zMXqHznd2ZjM40S$P4|i9fxBpcA-^Su3V4fxjS(m`_l2YAPyfB%Lro7gu~+5lG`h!d ztT*O+uij33xel%9g?46avMfh{fnnGxg~67~UG~v5lHkA8SC1;`)52RES4^6X9TbSv zY~Jf|aNNw>fc-1Pxkk-Gbe_E3`WuIX;$nceP6@3gCDb8Wvrob9CHkurfv+M_=9)Q{ zLKX^{wtN0xEPe%$9abrHx<|HGU%zKcVb$K=CVJ&~$71c#9j?d>&+UmPXWr^$rgw)b zd4)w7op*f`Ue6=aX*?Sl1@F3JUe~90lxjS&GC*l(s!o!^=mCAMOnmr_f4jb*#awEV zu4YP-M^%8e%je=jFRos;MN8ubQkZd!0I#y-*s^U`@NF(sL8wvP=L}@5j52pQCdFmH z0>nDhO=bUWR2i(=>K%aHQS8(Q=MxaTm!zbm{6Vz@R&_Y5A}aT9== z+}?>5Pk-*B zzdrM*dC}&|fphLmf7B%%ibTr*VV_|=8QJm#ofS6-=hI)0O-_USOgOX(q@$0bi0fNa zzx)N4vY~#$I$fyfzdgv83$-iNph2MH94V=ovyZTxC~dij(aYGv*Y)3TmvLCWrqFg< zd}h9i*z?VD7QOe?MNq63AZyuABS!SzuvzKyZD3e}_ZDEG!bu}evX#rZIMH{(^{)US zaFP5~ZP*8!2LIr?I-|#K^uFwy*@F9fXfGrNbtnIpT|e`W<*yHq39&n&cz7$D2@&o zOjTIF)4sWa=1Z^b44=QWNHfX`y?JLSv?r>@GjWN@JP(TFuZN+4GJl+{+s5br$bkK= zX#-d&5n7)SXR;$v9V6wx6Mz>Fo6knH2;;rr;ptSqj9L_;!Dvrm?0}dKL*x|Qr29^% zyW8FrJMQlbjL@4R#OfKMQ+7C(o`oZY50-fQ4ps9b{l9}8T8C8AVcc`5&SKlM0U7jLy4TQ z{NJ?sX&IR?(zpanC6s~Keilr}jVYTp0W}t3q$4Wg&^zcS|G*_PHsIO)f7b4xTi?Qs z5!P%*OkFXVD#O0mLV&3j;Y5QTo0zG+RvIRW*m(z4nKz{70y2<2JudB}O|8~SHy8;H z2{9~G;M-rMQbJhH{@tjXxA}NXK9ZFlFOd7@o3YHL29HA+C427XH<_=#HL6kVfA69? zpM|lY7SPr<*>lZ;JZ0p4<+0hmwMO~BWZH)I??@xCKdydxg3n07e%yd})k*JDbUHXWjraqL0WOEGo*{hu?Clm#NS5~vT|t4RyGgS)74PNIu;e!n>((C$MJb_$ zM7kHLlI=X26cLd7`Irm~iEsVpD+ejL@?FFBAZje_`>O{%nvmO&EV_+bH=na_&+(NF z83|n<(uWzVc^5(l)oq7P-CCItYBY5XWjq&eZB&`lneK5$kVk zIX`BLF>;oZr-mLr_O{u0|i=s?ouSCU8U|m3;>lH@=Q>c4> zXFa6V@zS^GHs?fi_~~-fO(PN=?%~WJ-63 z4}AQi%i?@y3|9EL+m8ioUq`$1cHFlN^Mt zWN@h??t9KH_VVlPp(6}aEmFNgwSL(Sk$T*8nf{UG4yDaZ@M7pIx)({$qSq}CQN8mY z^74aSkgZ?y#=&khU+R*r4^aDtip`p?KNxj8R6?-R&>aiV59jD_X0gZ64}e=7Z|2eB z?03M?V_k3EBM!_zFN`n?I7lvyEXD54NJgIO9*ARg-`@QB_O=S?>Rtq>q)e|;;dFYb zpcW~1WS9|zTTUQje3b{Rv|evL0CU3X@b_^KQ6I;tO!IZ`Z`sJbGd2$CuI^)3uz5wqSRH%V98bz< z`|(TukQy&yg>8+i6RfQ6D$fhe@@Nqfj+~Ak)9w?_|i$!=3Ti^S<9og?@V$p^g$lkrB_I{F$DH~Z~HO(Wl-Meib zv(vSiD2ja4b6WCi9=lLfd?Y3*sKaMs8dPrbi{8opD@mJ`!)2_`JWwwKa>3E|6hHfc z_7N~z)&kjf-uEH+^T#P}rpR4hE<^y}pT~{`t<=RADtSu5Xgj!4U|XRYX0}F+yep}yw3?XX*>U^K8gXC6YN#e|A@oa7{HfAvtRS7x#f z<_)Ef9T`DN9%OvRov#@INLM#A1G9ZH|27DeBJKIp>b}4Forf46ycBxtfIXE*8O~cc z%y>)fj^!`muBUEg@-!#I`#&$`Qo}B=ZxZP~$6+2Lnen;Lh?4mzIQqik?*j=6CBfrF zHz|cME7FZ8jO9k|*4=J#x18U!Ue}yd{fG)Sn)m)mX7-HavMhP$m#OU&1HUHePpb&V z-}mxAp~EBMR5j<7!w)jojfL@>HV9>Cr6sAmvIJN=#Ac-WCe8nIG>#^Lu@F2L`~JC) zR(10(9V$WTBuoE(WDYc4`bjW6*)Z`GTNN*XdY%8qbIL}H%JU|3f4oxi?)N%OY=DI2 zs3B{j($iKmSEQlGwBjL0w}yyBS2i{&|LSMx(Z28J`anPach)<;tsCNp1Jjp9jgJZl zok28@oDNMg32%KfqUVHzl5G2OVLTLcCarOk1t(N!x<6RQUSi;r2mz8V-+;&I!om@3 zKf*ZMGNv3}rVyPpY^i&VAl{LvgpB?vk^*1ex?90%M~h}0=lX9Qq^nNpki+{FH}_<_l0*86gXc2Wd$gO zKdu1gM^->&D*kMBLchm2=x29 z3cd@vfAFfSS@i2-heCtRLxX&>QUCX2Of`L;J_l?`wOjFFwW_%l4Nsr?&5JeuOe1;O z82UQA*6}I2)Fywvi@SEF0Qz_o+-ipPxC2Pj?hsmzDCjEaZ<&MDACd%?NXkAYwc>D;!HNhjz3xsHJvb`6&qt z-yaPA9b86MWV&Etj~E1b{F(_T?pFq>KDomktb28cj4OH}{|@kt_MeySeGUW9lU&AP zsIu6rb#e4?*(n$mj}a)j5PHlW;tjH7a**xu6Lk9T*Ot-N7q4i83VyRnkV4ag=NclE(x%%-Dfqu>G`_xClXsneQ2U}p@DABb;e)HQLsQJm%i886OyeZ5yNXb| z^4D!%xC`Zs__W&gIMkG}N=-HFWq^;4L;<|@TgYE=D>5gGfB{SwXC~RZe#0NV*P^d<0$@7^M05Wg6?w zA|1mgo;Q~`Vvzdvb!IA~Mdj@eZ@l6U8zJJ9z#*!p-TF{ffw_xh+A|MSnpPlaB%89c zoAbB$$T%BIjEY-~y6gPJcRtCjM+i#N_$n$zjmO;+nCVAWKYPK4OrCEQA2>P*epsqB zrRZD|)Oh%wEI!WqXof}iP}ViMyO{QYG2cz%K%O!FV&1r8v(V`k_xzSG1Coaonfx}# zG4P(qPAC0CI<94brbhX|aW=0KQqLAL$hiYOITxdn@RFLtZc8S>x_y3gv7A)_&~uQ0m5SnjPT0utV~PG}XmpX9W2T#dfCF|?;FqmfUTMo1X3uEAru5?vT5g*0b@6JOrF`9P{2+yRQP$`3A0&Em&NF)4oSWLLOvB$e z(F};97ntoH=Hcl2bNZ;XHfdfz9{cXA6V2?Q8%U@+)6%jKuAk@uTG#OnB__e#Ql+cG_GmDXnd_tS_@xENwSt9Av3vEZ_F32MgM|RhMr28<+c(+bVpKTuilhk-gHGVh zNMveD+c=EuXNi-lfZ}W^Q7a+rJBiEK#s5JX>O_sKlN+TjN=ZH_1W~e*^NvB)C*6uyN${07O`;x))+8$nc{_Ho4 zhu_2LsN?}Jkp`ld=5sB5yrf(?`-UaaG1erwdzSBKd9nR#WJ<=_TlQGiB`=9cY5KPd z!#&xxM<(bdWzzQ5R>z7Wvu!9t%T~qS7n*lj1qxv8d!YELgk{$cEN4<~T)g1(z&Dfj z>BndxnG;M#>eD+tO{IZ+n-nj01(5DxQpVD4eJALJ>XTpR6%TU(>!l-)Up`{5VQ#I( zOd?%-A$x>Wmx+Ur+=760#RwXufIq;r85gQ$w8-QWUV?mo2hsWC*X{cFu}=dI4mbty zF?Pv|v&5zvEpmU(pmzTp%CBE6Slh)c$d)h$=LB_5c>k_u_tNb zy6%I-QrX9tmn5SUI!%!w%hvFj215VU3R#kYEjWJ+!RVuYDwfNeP>u8$$!!qN(o$j>E6s?cF zp0ZHr2kyNkdPnQ`9SauuD4kJ@yMM!xmGhr+FFUk`^;?@hUb@;1Gm=i>2?fjrl8LaLX{D@Rhu?3++ zB{u_$j=mv`{Gunoj=<g z==NBpi((7AFh1~x_u+@AoIfkzzi+?K?Y8?3G2)4YsIuBduzdV)KV4^hZU;pYGyChL z?s?nI3mk_Fe0Ux`fEbxZf7{fSD(>D2x&Nwng3_s!$lo?d+2@T1oOw;yUX+WcYTB~Z zf8(>XYJM5d%f;nrmQOTJ+SRk~a#Wpa^YiKA794SsLh@hw@WmohdG`{RPK0bpi>g!2 zTD8ufBwhb3`%-b{4(Sg586N!X2)>|?xz*}m9+EW$As=p>H5KU;i#AxRJ3c+{;=e3Y z*%FMXxqTg{qV<3@IZErbu7=!YJ8Rh%GtehWIvbeg_RIyRIPid@@a{_golcnu0&6nC{ zUT!_NbLvGbxyjqz)Nid4+S-ry`y+6e&Ufe-;4DS;@uid}R?9+BL#(#m&#{dywA`}h3-W|#J-iL{%qpL?XT|*N@s+dtfMzs<90ar{B;T< zKV?lF`J;?;HQPuYLx{2QX8DZtipg3{+o5#kV5>itIv~@Duy$j}Tz!EVP7~tTP%FPw zQy_jOjxxJtkWTf?61e8k(fjC-GPxL z=wl{XMJL6FYB3yzI@|xyE$_rZ!abM3PmRjpGW`3MtLi-bkR<7>Srhi#loW1@5Xv0% zT#&nac2rIs2jQ5W@edI?jIhW16iD4fRX4bGo65HgRO~d2!-A6_KMMTHRi$6UaWCjb z2)<_Dgnk`wJG}xIf4x?M*95}klKYDvmo+r^>|u{@WxL^CR<^4AhgcZN!jk6`oas+t zIGYRX5ACkV;Tu(~*{{uR7C`i2Vo(^@={&zl_nwW=E{67<^#*L}QvAs)(Uok>9j1vg z^7>P7Hq()*?~%^$WAkZo#AQg=o<>qoB;Ua>bIce9wtrfITdOF-5a@YG)|rUY@Qnn+ znj(d<@b^!y{s12EEV6H$eXK|4)%Tr_NTLz9)~2U(n_O9?aL!$Cyii~NmLyO?&mpC= zUhZxIkwM8)2h;XhT?fZC+}HoOueS-5J$eXg7ty_QeK1|vy+IBn zCpd2%Wj0Yk-OC#CGK^-rDB1py3Tz(jnPg#p4B=BxOX7lM|H{tL&Ls?e8S; zK`LB+;Mc45ZG6O8bnqy@`}C{fk&V$VA0N%cuxfH@Y3)qEJ3!MHrz~l1I#c9$$#)us zL#DK+R-tt7zTy(@6qM$!_KU0Vr_DJF-=Nn?OBzZk*=U#OoP$JzHlA*Lubue<7)q;L zj3UrEt`re+M++CskruWo^jZp;)DrK8#m)QfIee9~(!wV7zB~%q@tsLh=ad(#%Z7IT zx;PbNO*fy$qd@?eBQtjHHJRu)~3r{cpB!BgiLSvzjR z_7sR3^jaB_R4-|_17F`$a>+vq0P){efK!7u7MCT{D_}yOZW!ps28cIe)>DFJ#b3;| zbVErz0*vbaN?30d-(WBuBCPYqB6b|(j8S{0-$9`{KN+~947J$(h z4)6t6+KoDm(*`<3zUm{4efYZc@}^!&V>@h+&t)b?Cpz-%Vxa^12r01sG=%Ut?H9_7 zUvk&Oe#q*NmB_xl8X!y*OoyWL0SH7gHwVXNl>F%1zw5Ye1Bi`3QA}LM8 z(k7MM`Wd+EECzyEtgu7GTWr?-tTL%^r1aC7T)VmPWd|lHn$vE$%C~kl!Dqeek9oWC z%0cHBYa3UszSwf#!P*(*H7Tl)ar1lzQTQJ&t~}pIp(h-B!WzgXrzOs@2Eg|-_nduX z_r>B)uW{jp3Nf)nbaZN4T-$#)#C(eXo^SyI_9;C^ci(eMl`*yLTrwMRC}5oeZ*(J) z#s6xQMBQ7@{0kgfc)v)@p4(YwIud}udSH1?AgQTxTXA zZp{j*r+Y&mS>&bc=+%Gv1pDFDwjo)F)9Wmi3GJ(9-!bTNRm+jnnlWS=%IhqIkUNZ4 zbrQXDtOkz&^dP&Cx#ZvzRQ{g>Y_|6lxWoHf5Yz$2>X&p^GF`vEp^;uGv!8zHLi6rIi?(7;KrgK&~Jfq;lv=itU^*t7e38*2eL zt;nk~eQXwF8N7gbXP(IWlwTZKSU&^5%pNaWWWRWR^SI=s>ZB43#EWa;Ny6NUcYI|F zk-*jBO%QR%s)yvaWd%ofoO^gKKx9Q5kCB}L$FPV@xsxsU^AOR(1KtPeT$QAfN zd(V>6$)2Js&d~-ztt_V^`<1=CvUe8#Xl?F8IMsd#1Fl;@^met^$=Y|^lf2eHm@h3` zpD`?e6*)D6WJq}3cE8@YGaYd*%MEkxtN`2`K`?93-!RySvZoh*Cz|VG28f8K0TCe8 z_`wn1cEm>jIvo)9DUEnc%-p7b(a3nhsulT!C&m(1hod ziy27>iD8l4H*+C9NX}78tSmC_x9z&Jv?#xPhCkmiqSrp(vuy=Sh5N$e-Y`RpM09&j z!Ibedw^lT7Ja`1MenhdER#B#QsQw6u^qRS9E&J59^HUCX`>}@+wWS;5M^>k>bav7P z3(bh9m2C1R5z2rZ7NtBx`#kV1+STrNbUL5FDo&OqS6oLtYs=is>o1Vl3Ly=7(?=@m zKLpIn8ztq|I@hhp-N~M6ck{ThVb*lZA81yfwOY4fl*04@^)h{sr~lKXw#n$yGrKuXXP z%JATs@oz|rpZ!szV7=#wHYRcU8CG{ZZ;?#Bsuc|=W;mi#YhtmJ_*k8SJBlUYGc8)- zWm8q3C=R{rXMax_xuBsu9%%J|z0mlUoYVyYmp0BL-fYZdgc@7+a;ui;%%1|{4(jUz zUvSvET$)#06oTyPp>XVF2?Nrx8|Q(hk?B%tUV10xLilhq*#@%Hc$0gw!mPVw-9ou# z3ZHpJtAcMx!tv#^0#>)FD@1+8YXK5t4CTJ39fYMjR`eSO@ZO zpN8${V1V)O&v9uETX9c(Ki`&13;iCkUkJwc}zriC?ft3+|10er)1;EH?ZmhkMdg4DmTKcU* z$m&jQWlVD3#vp?>Ur*&P@kgvC`R7VSmjl@Igx3sL|7fnR{MPiLRH8-&g7}3ds%yl~ z0nIcwj93Zrj1s$b8+d}i?u?~lO+|`MuFHKf<#3goqu)E7u09=TI|0M3fI)FEdV(s} z@E+vH_Fx-ePss3Q#ITP(^}nYuPW*h}^BSNHKhGU@#EtFaKKb+K4|f3+G0cvr+`L^R zz25DI?f22^RrJEC0dz_SU!O8i%L&pYSJV!RkW`)Css_$oC7_NE$|Nr93&H^Z>=_-TwQ5;iQzWL{_ygdj7oOGi}*VatF z?$kc^R4V}IvsPvp>NANSoFEJ1!nB#vV5Jx6x;_4UTb%^wtC;B6!LRxERPO~5po2Et zRWA=0h}W#P53OvIe1FUC87@KBLV!D$h*}f{T!5of{6!duIdBHTHTKmTbz(TcfK$VH z4MNT#TEX#h*sy@ABWx$=IMpSL3n@|~Av{qJ?zeZs3E17)3ma)uiGsK&ym!v@D1-_T zrd+|LMlB6Uo~CBX(CqK%YQOsIL+op{d-yA7$GS})O^Pn(yWUASD;usYm1EOD_H!s5yxcI zFaPX)X4`1|yGHcx%Nba}^+7L+cf|&o2Shus(+?&JJc7$b4A4jJ5i${jaoy2G7tj#a z->63M@WoR+4!ld{A32EP;CdW!6;Ko{c}jjx8k_`C4rglHz7?LtJ+WtTo1*}LI&4{q zJR>|^t0O8O4fpR*&#?0!`hx_BD4hnxA0W*)2nx0fF`plObYSCKtl(5`^#nd&uQ>Lz zKc798Q&)d_)^}eK+-}r?N(e94&CY~|a!o@2Uojui^JwfT=rp`O@?R$%g&dVXoSvID za*U>XaL1D&SzO_=F&lakM@W_seDB$x(Kk2v(?7oW-fLL$;#RW?!N#xs17CxzWLI_$ zXTPhk!_chgbixwcr1%BhY)t{Y+_J&CL&x@Jw&@=k++B#$yiQR>iEsKh0#BJ3m@>g_ zZfIbgI12Sc=K;r^JJC31aPU%%QJCHBx{~jB+dVM_2_a zK(oT2Kz>)`L0E9*HQi8D1(w20=oc3_9yjpQFJrVw1p2x+HZ^e%o zoZm|g$l;B_C3GV#^7lS!pQWiJIkIaRb!$EV%L~g#v81|32@5|c{*`iH!BBRKNT{E0 z-v$ytv)a$s+IsThjDbda(C?#hy?yL2m+@zc9#Q~@N@`YEV)L%jls?E8laqf*cTPA( z1R1`&E0%vVb-a>he=Zgy#XX;`4Tsn-=kV6bQZbY2xJb@_BCv-ti;dve?(}_eAnM8W z%~@48=P?kOUSY*!=4bcsQhPzoBwEU?8~3KiQ)}do5sg=Aj*UzxW8=7>`;CK$Cpsh& zR7Ft0Hsf?BnJe(2YAEdf7pSm*kun$NV~d@-wnHM zx5xf^3f*oTunL_8K3T0I4w}zRe96AV26Q#2R!mHUc`mQaET-g_+15i-A%wH;(az*~ zYft2K<~ZE2GEFzB+(@~vIX?I6(Oy@4%(s=$TKEcCo1gE^P61DYzp{z7OV#qTHA0Wh z{NlgX5wU{Rp%L4V$m=%nHNk-%wz#X-Hz5W+Spr;pC{e|d+J+^BTUgNBBnv*ANCura zLGWD%2oYk#qW3=`*b4k>hbRKA4thffT8B*tp8P55>NNB;9ESW!NML9JP$i_Gt>~Uz z-Jacv=HLH%{@n=Rum5%JaGu_h_c{ANG<|hcQ~&?JB1%h%fFPqJL) zfRv~slx8EOQxTCINGl;BAT_#SG$=7(bZ(5nHWt6VKi~5^XZvS6_w3$#?rYEI>+yWV zS@_A;KxEdReFXT#5qKO+#|m3HgXk8U)L&7$iWzm}o5uMFWs$@ zB{VQ6=#blY$2SQ^2X+7*3|m9;F8jWSQ)(Y44b#sneS#ehDNWM)usfgbJ$eSjoulLb z@M98eKT?QJw?k$88BTjej&pn`^C^#qOZ!^w3wvMd3WT2qkI*E8vQJOA(VofK^Fcpz z(Vs)qF1!l-+og;g++lj!{cPelq9?>4L!DZ7=B$@%Cc}@Wt(U@&q!g-$BG)I9`I?@9 zWKII^O}gLRw%%Tb=a~!a@6%yaFK;K84sgcNQqQ-!fZ)^U3;1a&_ zBTvyaK^|6Jw3DLhd1H}I7Oa}u`Uae0cr-|q$jBsEF@ANKbQ&dU!!oRvD;w#$v6>N7 z(bPRY`hMhjMc@c|ZwFzwnLC&RejA+6yhHB(QELaQw*?fF*u9I_k-zkQ=2b40(?4sP z4*#t2yb|$1tkTHR?VmpqZi6Z!{++J>>Ww8y;%e+o+4AWZC3jmMK~2{e-U@1=zWchB z<4q|Kp8p0MGA~#l*gkDt8V7jv3 z54=$8atxSMVz|601YWiuoC(~cjP36)g2-K<+_j-UH4T=LU{T@;jcVFMJNf{J4rZdr zy)bew#D5TA7mS~xCuV~aGvC8bd|^>fLzcPq%K{O{KPyj3l{6CIv_7|-V+o>19eKVxhz>dA<3p!!uy|@N#?dZd4AZ!Ot`3rAZ z8-|6P#{9S3?2f+kql?6#GK@%;#h{5{byUy>88tcfnq{w7mg?+#CIwE{wC-59;{SUg_z;spq@$LpzN=rs z`?Vap%SK?>o23>k_t^-aV0&KsXeqlo)mp4>#*uSo*w~AE*`+=~6hWEgi4hOI7Q|#v z{Yq#1OxlXzJsbqT+66NzSUPxay6`%_U|d(%j&X~-K1=Aa6~f9%w)1v={9VSbY1x#o zjX%=0W8Ne*wxF+TV1Eh96MspMEjf!d!hR1Q+kZFkb3!~S&iH1$6k}O+4U~GVX*U7= z?)=4{B?x(chgabbc7EMpz8;^g}>yLOu=-vQc_5+-Q4m}5}NJ7xyJ-~7g@k5Tdx;;Bik zbO+Y<7PAr|6icONZBr8PVY*lKt6C*u74gQMv3X1mVohl-J3(lX3>=$H2>{4Z$iKAcVlSg`mZCFyPe_$o?=lQWfzHG*hT&t(1Eyj=|$FpuI_`myE^(4sp zeuCMHOS3<+vfd8({oftsNuVQWGM#yXBOPuL(W&^eIYikr;{6E=^SQKUh5P!tL04ei zQUZm;kPDJNYJDQ>#^uL>a`QdRADnEwgkAe@so%lWqMJfbH8bX}>%CU7 z(0>$ue>Czf3hNlehgg}rgdC}Ad&lIR&!V9@_c)#}tB&kV?Pyx0RM3mSLQ|mh2wK_~ z2`20xP{E!PrboMuD;>*;w3|)4w{&`a_h6LDhKZ;o5!z-Fb#o%;SY>wklh)#3Q%Em6 z{dm~ve#=731YvBE60Gysk4oOBQvc`%IatAolo%SW7t)Nq0Chn{u(@_`e8Q$&MI6N; z2w?!i*(Ef(aTXogHc<>h8=tXn1Ke5EOq}~P6Lgd@)4KkDIgs?Chx&c4lu^Dx2Zo7BsrZIeMy|g zRW7Ndz>UE`(>nVRxB{A>v!(1~|Fi(in2M<0NO9|)}KYXc_ux(9ToEtQFWf?rO*($_w;#e;vespTgG(F zNT1`um{0IH)Jqcjn1f_8q?VN^F7-$?XBl7$FSu(v*Y>6ZJdW`4_|yKSr~@F{1Br0( z{qh|%X1k{Uh>vQqG#oNPhI|#WS5qt4c^Z7*JO$Qpb1S7pDAAJ20UIq|$)qh;a}uAg zTfMw`y;IzNedDvL?k*PBi^LGUxl|IVNi zS3jVv0Ky@jzZpU%aBMYxf&Wz#RAEn>hP&QG2NKBD$S*){K1mexl1U3!44&vgx3(N4 zvsOe6j5gY^4I!@uvqCYIcHyrtHiy#KVIl2CnJQt=$6@Zh(SYP|y$Qv^jSn|iQyp|V z=`3|~mpqMOnsc?MfI9tBETGQTNWj#MwIoyZRhQ2GjxR9hwBjwERw5t~xayJ)<|aN8 z{JiY(icW8|#LN)gyUYF~C6d##O>hXo|2tnTBnYL~mUDpgZ-1gRf|23bu3fGP*4w`7 zkQvL;{66atVirrU)uqmcQ!d;58#b`&X#KD1&pkn=CByj;mh2*8U_D!M=iMQgJ!xtIQ0lbyHu~R9C;ev5J&i0-+Uf<(c$_^N}pd2H>c)ze!@Dk#swp` za(WGRHElhOqwS@p=I!XCzT~+*J(y?ktCtA!)0n#hcyoKB+m}qnPhJ1|yrl9UC&1d7 zrr9f=YAD`*UFdjNY$tm0$*|k~&*HHX2ijUe(|vkoibDd5=f(gS#u|R}Uo2f=sIyd;JPx0Q%gfZZz;9(Uw7sjeB7VPhDNx8ih+ndH$&glKI(0o-)_!f$N@$i6opHPCry zsxE0KKQmnmV=o?|Yh`gW&612Kxl_uMoNA~~7oY1zS6rAfiK>1QN%b8j%2D?fDxE0e z1F{Ral>bguxqR0A_s`zzl`~*BrEKts_`bo9gPw|YFxsRyJEwz!*^K-mGQ5{PTrl+j0mE; z>OsS))dq3Q*?q1gugDhmyy$G*%Zv93r2D=D95>YEtlT(n6S{H-+lk666L%AqV-C4d zw8QeS%XprYZqX~hFR3;ux?Rdv%;%0F)+XF_zshCv_7$696P6aQcepuU| zdvw;x1Uy3+XH9(Od9hpiU<5^K8zl2Jzb|B*d(0Txen;iP<=qEMY?4E&The6yFh*!f zAzuOVRggMbh{#O3L_V)(z^DD0G!rQmePv<%rTGKOPmrv-X2QYcBuJIRCqf>w;2KC8 z{c}5p>h>?mm>lT7!I?|9y?QToCS3!X@f7(@5gA;EQZ?D?b&70RsX%2Fh;;KUm0~K|iU( zNfB)ji5b=_75#*FJ?iV+N3w+Dzs;2-tDj$i*MLGdSe1i+)HJTKqFvGx^Ybpie$ZTR z34&G4jsREv>bejwuEEa8Zvl$Xgp1U_N#8;45dBi;FtCt>Q)ksWXc>qE%<@x@?S_zN zL@)|BVbz%u7C^@)NxD>uztD+jLAA-E^1Uy$!B_5GgZ<;G8$zpORY0cxV130Xuc2z* z7wPn=Ylc(BTd0p$*h*nSbi%Pu=+{GUbGRq;2}N7r;U{={uLeZu2wsMK9cwR$-K!k| zZz_%z_y1*;r+x5rohpBH2s5Jn%G^wHXJ<-*lz1LRo`&Kdr#6f&KWsO_3@&XL_e4L% z6=Nasz52@sk=^7hf+9BYkXrlb%}yut)X413m@!WrP#@Y+jH%a4K!gD<*L7RjH&TCQ ze)y`iBzi0f^Y}+d8s$UukF!4P>L(01a??GbMvIpee%U{nh>Dm*{Hr6&5&^$vz%cXfZ~2UP#M-h4G!#<-mFQ*wcFm>JH+f zimGkfK}s&>9w+5HoWxJ-V8<4SLAMzJ2lsSy(18<~iy!BF8X)-OD4o?}7|8`Ai87j3 zGMPW1EFN@oCM_ZI+xKh_0%mBZ{15}S{T+LQ1ezs2j=tRJc#2v8Ln492gZz;h8p!C& z>C|t%O-EWz%aE!wL6=g!nlT}#D8Nl)(yLv!^)vZfSJfIEQDxTc#Dj@Q)QevIewxR( zb(fEKg(F@Q1t)GS>$Ki__vMAXna;xD1H(Hen3{zIJ-J`j8wzQ3VcqQQ4jRJ#udFT& zYaSlho7sHJiRsxxN*8Z(zCKH0S|i_NC0A<971iITiU0 zothNv7Tm2pAiw{DPwiO47_0LvPQM-89a(xMpI_!2cuLe>S`Dk#KwyOJoC~kktdNU; zNmjIX{Wt+6UW3-2jvn3lsZNH6>-;$&Ph{9m-5=AhJ4$9Zo9#jMR9S`8uCr`U#j)k< zZANKqN+U}RvbZl`g_wv}#KfcZHz8!#QL&@Kkx%enq11o+G*g1wExqSH9Hxd+Y!}Yn?gqWyJvVR`jB)!# zSC!G<1^>RmH4L)7-ee;(qVx5X)UPEE;^+ru1k0ehHxzCTi@YD7zB2qIUtT;Fp?&$O zKi$P>Fk3>QdEfZu!dP>=ThPdmx||IM8%l2ZO2y2EX1^k* zL=~X|5=@H|&gW|8asu$XDy+UyT$yyZjXQ~zyV*Oe z`t3iw`w*?9CHehR%gOKXPeDxRj*G1a2lQS_5ju5F{oo}GZK_Vh$8!Dz;gRe=;`cN0t?0(Ak@}OGnVM-x`Nl~ZewVU;5R6J1zA+q)z@@kvQlxk@)}Bz4 zE!?A>W@f@*#~Z>AP8je`LsQfe)PrRkK+X~=n_zgh`+cJSYkX=451#abK3dbbc_{52 z)yv?BD?y3NYw`fSgHuN?(+Vb8^)p58Kor z5r^!(L+`EemXpPwy&k?8zpd>2To!X2*oU)z)=NEX?(UFL}89#PgPEp zbrCr%F>!`J89gleg6gO$_kR50jAaN{+}GOtjeKZ7)8F$?96oM{XA3WHjM>156j`X% zxhoFVL%iyBN{GvTU)`bZ6r*4fZb|f5(UyUAzu4nmfNPf34@>a%0o9S;;dmJ*IJ7GTG%3md`|1pMWZWDvB=F9R z{kLCFq}u8g<(q12B3fd(&Ev0ErQ;{Tr9zKZwd^PLwogh>&w$xNpIl9N$pE5b=!ff^ zQ@KOLr(UzS-cHQN?BaQ~Y}Bg&v$e{4PJGhP9wnNdpdtQ!*$-HK|YTZR6JKRXY2|N zWM0*#!d@d6A8_ne4d}p zKdX4|@9HN8Gj%hKnSqeFi^&^>l2+Hk?H^Jj6*Yq}G^A=BcU2u2AfWJ7MZgxiB zRrK>lIR#ba>NyQQ2)X^QTBXvq#Ru1e2f_M{3HErxd(5>U(JQRd+x!yUV0FEw01k+2 z5Mfye*?79YVyLq9htII%S;*bqB-{?_8;9R(g6fTno?WOz!#s_$T+zydzu_azu-g-J zmuQZyu+{-T3}b!pJqAF)XZ?E_ds+F>p(#alJ^aw7;mgEKOEb+&u4*O;^VcTPJ)TD# zT(2x|N|AH5-iNr@%sboY^YuiIEi=3@leXvmd*s#WCev7~6WcZN@xON8 ze3f^G=;+*~s2hZSgbRev_omQ5G4JTI#w4IIZdZ0ca0 z_@p17H`eOY7!`lk3rhIc$C<@_HymlNsdiy=xaDpti@@YuP_xm*xSQA8Q?tP_%2K&U>+y|L!8BNK{T>e~o4$!=d)VJH|w@QxAakS7Y zlng&xKOXUg5ma?$=?$jxW(8*s(@5IJZp;rbG&oj&=7jpP+B2&7uIFv;#!j1P#lC!~ zkn{_bgLq24*4h_?8GQ6&KGb_nA@)aDn2r};S&Y-h+9GK}N<29o?m%v!#+t9Wcno%$ zBXA7ClL`vfH`AL%x4z84E=w4jO#Q0_7nY9LMHo^Xx>@xTPhGgjCe9J7=d+9(&pKQW z2v4x8aHFVK=@QFQKNfoike8aN4wr(XE3`Q{zIeDMW0rob#){wvIXCTENZs^p2qXwC zBoc%X({T@+00vG3(gsiFY4aQUSBw41?yolESAGB4Hj;5b8qTkW>Gsh|)sk;eukUSM z5EjGjn307oe@i=@fpYkahBDs_iBvz*%s2raiI$1B*EBPUgyd@ju=aq&;*ZHMPxoh< z*R2t2*7H5B(_9Tb_1nvzPzjmpzpNglVZL$VN;%oU?M*a|+X2P0ooir4}S5andv}9ayHwT z)?N(#k--yKXg*fEb8h#g%QBZTiayir~GOcr2U=)O!Wi6JA zzJI-#b62W_A9?pVfX6%#61Htw-l7vC5&X@i+)5(271j};D6`lb-Eh;u&oYELkK z&yVIq`RDhtGHBmWd^Ag*A?D)7r{t(De}c(JwpAMa0eg(u6*NbT0H*b7IDy5VJ=RMI zdCS*cCU}+Z%Z0 zFC(1?SoN7pC&OgxYHpaNZhKN=PuCIA^i@Q7%3;#|$Q4JnTP=1kDMnMC?2plvgi5-t z(h8^J@9R`F$=&&~r6oCch8X^baqktinT%RREL%ZGz4?-`vj0%*76jJK2AD`t_L8A( zxd@J06MssM9K{!=3TlH#HwlFW3pw8Q~K?4plXqz9X z^+ekc4{;o6ys>U$=9|!t3tP9S$EKI{vI|q1DUBVyK>iYY);c+@by=u2l=J9_Ymbvg zNNQcPMoc~P+cb32QEVIn5x&Qq%(iT_98d=;$q!qQDd(V692~T^5(QrDRmdR1==I~j z$!Ii!s>lq?4B8kE=hP$6lY=1Gp^Kbc*vEU0dh3oMhv^EzvH-7TWQa`@sjj}Ioc~k) zZ}%1sF3<&hFpS!yvKiUJT*6&y4|OO@ZkVxRKB8AMG46m?&o#D*frv&`-DsGW2Zo!sl0>-!xVrDrj!W z6R4$R$TzzU$i?4BeO@ixUv!jZ1vFJ|F|c|hiVUURaBSvbx)eJ5hR{`7epvyX7F1K! zW_3FhF*+l}e57XXcmE3Z`E_+BPy37X8u`OZ%?HSBMmXJ5oDy?&W)iLrc zCZ5uixhslW%Oyjy$I+1&xUjvMU6R-RFK^xY-m2CMf9qf<+2L!g(q-W8{I0(r{gD|;ce5%?wfMXO!_ zg_DA&KEE3x&a-p%pr$SLKx}r{eRBnWwV)t1lL*|NNjJKfL~vzTrLkK5Tj@qL@$ccM)&6Vuydi;BL?-C zNpY9mbTsw-P8XlspnY<6K$Ay;F++fc)uBO)NWMyRHfeS_)(1udRh8GrMM5^lMetYH{dKw zWQZY@hP8suKDl4`|Gfa#4#*=1he@&X%|=KfdAkQM1jL*JVyqE(>rqB#tya>{*4VdD zn+qt5=wxi2;R?7ZfDiKZi2V8DKyG5YrvX_&o3bG!;BSjYTfU2J(nr>eoUx-F%3jx# z8CLDV-+Lus*~KJkkp}A1*U5qRnU>asawrA(Oxl%0Snt~F741|~_1JW;-&I*n+4(sU znZQ+jM^~08d0g|+b?S~5155RDZUaW)Cz%hQZU1@%&?tS?$nW)h7+b8DCFiPavCM8tzfW004aqdA~`nJSt5@M80wzM@ODyn7LFibhT+Xd){k=tiX9 zW?;t&+3bhj7IyIa`zt^(iS0h5g)mY1?$o~y>5J+lgDV?blU!5sG^bGl2r3EQWmQ~z z3pFGkjZ|0HNqt`~b^{JDFaX}poc{e_PA(_#(tA%tcV+8yPYi;WrXP&SYFEHUrKH0r zdzH`-^9u^$fb#aY&`iyY*w_59=>;wC(E+#)I`VCqC}BOD%E0=|@`pertQ zxKo0@rjuo=G5g-dGq&S2o}c;mWZ-@}|PRjB^mqGHmAW$Gv= zWmJJS5Vv@`e`3=k#CTv6fd2`mSOW1W|8e$%`QYQ-QO~d!TIT9ulo=QWr+0wEEX5-` zHnE3;V5%3{?US`SmLr@*0-A|QW2fIj7mu4-cbYKHZIy)B$~LB#c*5>D{YN-yOPo~E zoa@)WX@l`kz`>{*I7j}*JS_`>|3Nnpqb=hVp#&2Czqx<#pf&0ct3dWE^*rfOZ^gR6 zj$N?NxIm@*e>QT`5G!jQNaWPvA8Ekzafp<;MZ3^BPJSzOm7_EAdQVSBPXTm79k~=q zafjomQHx2ln1aL9O20J}1w^A$I@g=dKHZt)hc5HCuE|5rQgu%j!#SU$AvHl8zR-;Y zWWZ&|4n@eJ`PHE+I&$`bF{U`>NXAz$H|CDm!xAwngoJE0DjJN@Iw8r~weX%<@aPL+ojkVkkLF}fsOW4~vxvxdD*iP23k)JAW*Z%3fr+ZPQYSQ_xSmC^9 z1}*yTHjAM}JgDqYpr+7FqmM8Qbi~_CIKK&1pdGs_h}+rau{q`q#FvknkLFKkeG=;I zb~VaFRtlK(+>O1!O;CA1ltur&U$+GMC0Y38f^e5!$!)U0r9Z)Y)D!&`X(Db(%0W}1 zk|yM23&U$cb=(3`-_7r`flqQt`qEye_7Qv^@^oLiy4cs=jrb`+%%jurw&#xSdD?T$iX&{m9w6cKzTv9jQOWMiwi^_QrT>AsQ&3VlW7Wz9oA(np7X zr93={wPGv>Zm0r97^TN5P->>@#Wk-OQtm&LE&uY&hA+ExM3X%IVn^A^-Ujj|v^(c8 zO}7?Y7Wy~=pPoP@@!VF4lgE_C0H`(mAoJ^{GyF1(i@MJ4( zt_0XTuy;JHi=(3GRK3z-0?(aw>x7tOgD!C@Tu-@W)TgJme0=JCnwQn`I2+^dH4g*{ zXMk=cmSa07a+8^U{{<+EznCHS?!G*H&?0)&aVF!#lWG+gmomtEBvEra&G+snT7loc ze-Bm5fjQqFWfPgIlm;a;RRvPeopeigV@%c5!GF*XEd_Q6Y}q!pi|B*s1f2s7Ly2k7 zH3Q%gG3eU|e(Ge;Tb)64XXVa}*qtCh*b0L79TYGDgr#`%$eKU)*u8<$_6s4Xf^n)S zLIR551U_~;S?~qqWGwS`{Wny6>2*53x3Nf@r*~pB2GD@w>zC9v7zIq}P@pogj#jDj7 zqb|0>BFQLvEgJAHYQ}YLI0`?2;un>mH zroEI5b7#C$(+-H|MqWm29R#O<0Ao10cpJ?1`xM+Zk3qczcs=nH#cYD^f;GN5laEd7h8U3 zyYKACUGrqVBRd_aSxoKOU%as1Buq!xh^Bthtqvo;45h4q1unuqDjsN3+YaeIHu<2fdF!$Mky6<3-W#Cf1P3K2iF6-9??+e-m^as(SM? zuo$a(vnPq$RhttyS#-`2Bs}*HmqN%Aw@T(^a{dHVjXG!jDC6-5=QqJnrgs@{mt6R> z_f}z+=^4US8!p>Nzlyo0aZT|E0==6nc}Zoge0@wz4Uz+%-WA3NPC^EmR+Hi`-jccu zc~Dm$R|Yqv9PgZVq0JE{m9l@PcYsF!Y*2R#FSnNCO=X)$kTEn3vDQPIs!6;PFZ_Bh zUVW3N*Z4jNSK~_?C31G5sejDph%q~-U*N4#&UuWq;W~AITkr?#I`@I8XEQSr-5U)g z%CbMLKmGGGRUCTSl^?Cf|0H@s=x2*Ola>yZ=TVRI&#KnIl?k4gBbgZWk_ zR8ucP_&ZY=K}GI{9-dyv$|1o&;cxBz$EGaQV_tA#QbBMG3p@nzlsrn8&E@zaHa^fc z0a)9pw1cIZ+GhzPH{lC`CksH5Bap;R5~!5$9|=C85Rzzw{|XUUTRR-$N~OD-_Rw{A zkZrNcPcT)se1H`dA;#i8h)So>#Q(@bkj|9?uX%{(r2)8*9GJiTxBv&|- zbapCSC}-DT#qpmHc}#>k_8(ckaccbaD*uLhZNZFPI=F_-w1qy zd#k;b45puZuRSia<146eZ|2ugzQ#OaI6yWBN~@<* zO(_J(ts2{VU@}bH!CRsyDqeHfXt1`j_ z^s1p|L>zzX?-xj)<~6tJ?x$4#35$WC$RqmphCj)dl3```?^+N-n#N#zlOux_S=1P8 z$NPBaoT)doQ0B}_!3XIU&ty#noS2_9n-Q1dKo!782|BrBo)kVNhu>g?in-EKRmjM7W-UIhLsdSpPyh22iz`>K`M z7Pz^!AF~#k+x1?b5^RPnW)CgDr_;Xij&P*4tEl}^lF+IuzjJDGkD8BefSIxSamQL| zrqiSnPtnxBR`YRFl{rnKC=t4x9YoRFo3INz*$Li4Q8}^GYQa;YdY1zC+M`5Pn=m6I zzYwQ6fjP810BKzvAqY6+;M3V$(Z=~T>K_-y3n`WNDj zZ1JUa0V%~W9E5Bt+IIOX^v%g3-3sMDS{RtXPZQ?}fsF`&OM36vc4!}Bpe;=~ANUs` zgcj7PuHUnmdPebEL0FWnRi%E4itV3x5JAfmBf|yww|2^ht!o{0@z~?kL;n$5pmXyc zG-qR;&<#@Pn!N+qVH}t}h-&+3t{!WsPr$x{W&?o4xLLd7>NH9w4TlQC@&|Kc^3I*n z;qFp?cqD$~sn+){v$HrMM(iuh$lGAF7*KPE!;^va6XDW;qXZ-O$FrXEc}8NDN!tcY zG~>(`QDh@SYO6Vq(Rb++65I9mSrtjcspHM7f?apqCKsf>WUeSaTj5-IuO#*%s7@$B zBk$wZyk!Q~fCHGW!_xd=z6+p~*HjnnKAhB&y7(@*7DV9hQ`b0=rW z_}n9NxTyrXYzm^T2Uy61vuXBJ*B4iT&62>K2gn2jR_pnMM-m|Eo~H`=6EHSbS1_%! zOZH<^CdaHT?k9QoQVp(|yi2}80H3})TaNS2)9XgMrobcYK-0lw#n%P(MS^BQ%4pqT zXXyCp5}@2${d+m4^Ys;(1vhJ4cw+bJnc_Gfd2$y>MzmC7OT~2s&N@fU=6b^ZvHpooU$kjO(bRSxc3K0@Tsi(OhRsX z+Q5EJ-acQ~Yl8@yeoy3~R%7UbFwzX&^n`YbAX(A5T|57izR`#F(mAl;LuvYRM%UcX zkzG~5p%@@cv&}$BCjp1!t+@E9;>GoSn{l{mD<#&d`Ouc$03nEU#EVKd8Yr)C*;}Ry zxw{phO4R9I7wEbRy>~h#uCgECI}LQnF+2!P>_|3(oJQrL@+j&2$BHJJe=kRKi+!OIY&BTj87tuj8wE9|E>(iqiCncBr+$F>z4;^UA}~9>hGs` zf+G9U_UIfOckDCnNC2)1kolL_Fi1{ZkD0s-IJPJJyW3ok-GEVH{u98{blQ$D^n|W- zU(V0Y}A5hM}3Q`p0f@7?x0|+XRil0)hn_~~* z9N_@}+lo>DM?haE7aW$)L3^)B2LDW-6iyULbZ>0+{+pPPa?#MjNHBlYVmMv+M}FYg zHsOu?z3V}Yz&hqn1cL`0S+8qN8(m4ZZ$hPBck%6Ax$cde9gE{752l2c+E|w)+SP}! z?;9vTmkkoKZYPXK$%1FUY4p||w;mOk_r3b{;or2mlIknt%If-5jNq0y)V17}HxxKu znu&Gi+HZ?d516YfR(%k81$OD&l?#%ge3?T`bUJE7S?3iLds-OfI%Bu!!D6BB{o*r? zV_$%Dtq?+sWm{7VZJm#-7WDY|x_9@P_ewEGxZ}s38&xsKv5OOv?^!LYm0I`Hszz{4 z49vik)I-!4CAIVX0Utz#yn|JtL%zMW*^+m@1&T50Ieg?*%6wmeJ#Q~v^Sb*IM^Dh? z!OMsmr9;QQzv5P!h@7j)aF!uf3uu6w2lv6s@TN6khkY{W9<_2Kjo8G8_+Ho)3)>3x zTs*{;fS(?u*dhjW3uc|>OIk1y3V!?EfVLCF(qm%VV3f2+m0s2e$R*9Y^KVmMSa+#I z$eYPtx48%6TfV0C=SNb9)*Qjohu$rE^oFXNF<~Ot?m2g9`-t9PMg-Tcf7Q#agaUHVUH78i%+B zD(UHM!CCKv5)_&$sF@rQ;iXl9kd&}WY6!aq^t{pYnzF@8U=t9%1aG|BA-zh91C1Tp ze#kIs?!Sl}gl?rliT{mVxFq%ZPuJ)QI*4DV&|G0)0A=NoZiRsM-Byp9Bn8{Xa(oNC z?1(gGthSm-7S$*3{7jbg;n@l9ePh?r*TTMI6hMiSE`gm6w(d&9R-~avz0h4Ci4H^r zNO`}d~xYis)W!TR8p2>r-=7i;#;SW9bvpV^Oc zuKRYSmBpmLhg+|tJ|XG3)6!PodPqS`x#afza~BsxG`u^gqi%)iUzv*p*84hDGgh*0 zMIor|n%@0s4fxqyRp08zFPnNn%L?{B$TUJ`Y=*`0B`Deh2)!l&%Fja8T7 zuY;v#Cqi+AB$Dl;-%G{AP|LC%D=@4Y2BT>D1{*UA^UtK!wW?mL@R#pMkfd|3+L6a( zy8@{8f#qol*tXF-cn%4xu3M!Y(I+A^cL}8_*QiDNbP#y(J*}=0asBs`{BqS)+T#}( zfX@!;MH!^_O|pGmRjWKEzj20ezl8j^%?u!Y08H9}jD3l(JD-{+yFgPQfKhOTN5`Q0 z4c|Ok!;FHS+g#4%=$tt46Oexp`GlUnagE~+64@nyr-0v>7`=M-;BLD|K`jrhohq@r2vkJZ~ej$6cSUf5!+O&uL?wI%) zx9@a_-|Sq>#Cwk)VaRVP@Xrs9?iM>WkpXzQ>d){7{&TPb!Am0epFd2XIS%-r?l&Bp z%0GNI*2;X*IvFLHH4QTTA*E&|RH1w$tr2>429*x*u(YW%_4!bi{lFIdY$ZTx4OC(B zOSb|cg=wI(##cztd7xqNnku>3A+rLkMelpRN|o(Tkm$|^535>bIlarZ_u+FdIGmFs zn*rm$+ujzWd(;0U7Y#N=Y#7tOmdy2h-&B5X`onnu5`zd|-} zXDbCV?t6_`jxE67`S%E(QwYlX(+8{dGEmSii1m@=T`oh)zD>_}e*fayj{6dN=c>Ox z;&)Xf=V(Rjtb6Iay!izr8wQWr= z+_fLjkCD%B^=SR>QQ4aF7jL{yN6c^=;-^nu6pooA`ny*ANYrvU06z208<)Q#Kza`c z*#Ok1M=VTKK(W#GO0%DUn=!4NLp!KQ&}*ZL#CZkvmYY+&?;=0Kw}6J8>9C4(v(znu!MO-5zSaa?^7qTd@ z!utnEwO@m}ILoxL2G7ft4bQT3=e>NU+xK}O-}5GpL%%P!`#^m_Vt4tb8E;!d&c1+8 z3-Y2C*Vz%ZGJ=2fg&1>2Y?+cdk7My$Qr1=pSD)BpvLmbPS>R_5-OWcLxM;O1f4$x$ zAj=oE6hmCn@);{ascPa?U_lP}>|Y7mYO?rP8*|i>puDmnaC2HQfc!Df@#IXV{fv|} z@1MhK9HDgk`H7oRy{@=mP^1@`vA0NIRvf5N^t|MUL2OUEqfdjwkRXC@`Nazz<<|4A z)^`GlEJ=K#ZY*-M-}}Xi7&&Nh9_J#_%Fj7aPZ#G(KRmQZhyiRQkrGKCsL@bMfxr>j zOsO3FwUuylpVG|U-%1>*e2!2{ugS_7?QM1C;tk{$X=#PgrnB&C>ihpVf^>Ix zgMhT;2m$F->F(}kNOw!e2nB)inkp{^T3MdQ&HXP%(&-d~8{R8*z?3{b1#HPh|T@O+|kZfV)$P{=dAbsV{~xmVSbuNdYg@zQ_VXSOkJy;!IzyM_>^BG zKp6@4DcMo9UTz#&9=zlx#V^N#mqiNsydY=0IYeyZ?W0^q^OmE#YZ%7!tX%VyxA(NH zbTQD`@PjxPW;~2&Pugfn`2N|Ns1tlsS*T;mu}<8@C?R$sw|f69VrqMK;j|MdLAAW* zG=xe>g5I=$;&f@jR(wsNANH6Y5Dg?Vu2eKuq$&KG!N*s`XgedQ?c`VnIYUrCvT27q zhenMFVg$p~;;%o4bPY*N0e7C#oVsc@2!BZwa!R=8y$=FfN1A)UNxCPt=Fdv5j!-wkq@UtE$=mdZs6*KM>RG zONh~w2~vtTCw>i|APw+y^sm0dc7Xaa|Xn%zrjDhIF0SL zS)?p#_jORV`2B~@uL}|uuHF^i3m0`@@|aQLk2KMTkLf|7mo{e>Z~vSUB5!U)eQG}^#n>2J-*4no?}XifwBv|E$N@4?oFL!nhyQRn zDc}?>p4nk?=#LGr1(Ek_e5#`xQ*QEY;OTRGpiu&ZYrbpbk>hd6u|ecA>z;X;%k0D5 z;7$E6qwRK!A4aPS5AsX*-M@64e`=hd0q!efs$5FRSGkCNXw}rChJO^(8KBA^h!x@T zuseF9TdBAZj>uE|l{bt&2eL+Y- zKCN!D{{i_j+l`FZD7s`-#C7^@uIH7?69Sn>(d&=dXJzG#7IPORAmZPUULeAh%p&5W z?Gq=zrLk-oc~W)>88-0uS5fXBQ}g*u;Tk;r_nM3+jKmXfHyx#AjB(#Gh@Qli`AmGR zq~i@7d#6mR=x4>1{@#4kkRY@vye3n2e_NEEt}E4Mlbq%+IgbKn!ec0=6@;H}lyY_q z4bF47Tq`N2F+5 znUw1*QFUD?zKyN@gu~y)g1x&brXO4ydBHZ-s&$CM;X$its6c;D~R z5opkK;tucmhb)Vv+@5>bFWyGNT>IuqPYQq60P__BfUy!z&jK=1`0k5(XRfEBk3Yew zdvhiacTcH$`h+BLD+sv7&e@Gv&07dK{XvXD?UFJpR1LE+i>NNg#Jm|FAq*r(* zs{;!q7W`CJb43E)u4%a6WsJMq`=6$6jRdTnoX2?0Wx2%g!k_r+8G&p*_iO09H_3QI zUx}zqjMP z$qJknHR4Kj;1wah%>qFS>=zebOPKVY@|gQBktV;<>vj0}HTu-U+5XP;tni6Gyv$eY zX9jxm_p;5`H9dh1S`&2a;i~1at9`X(wLV2X#Am*eIUJFHY^&^+63JcT@HRz5vZpwi zYLsj17`5r_T=CVi^wHC=x6Tb1{gC>BLFQkJUE%UzM41LckY{YJ>_*mq1cin%-7v-;jy#ZdDdVCe8|Ne zwwUmB<+ZB?zda3r$L%>zyr@ApITyCoa~#02zS1#k(^pneZ(A{~$xw-{+XIACsb)-c z&3@8g!VVPFA{=O6DGF9V;oC~1bb&L0 zxM06EXuYXf)1e(HLmJ{0_P~pTi{UT*_v@;jNDA&psxoPC77t1n>4YSo!y6hkHpL6m zgna0T>dFHcWnz?6F;f8qQH`?K;`A5e(pU9v-C+#TWM17Bpg1l#6Fd#$4P4_vPrPGV z-|c1i@;xgu%MCw)`tcuv5}W0En!^*Z0jo!}CRZE6S31pXPPa!wKv*A@kfm^KAP4$6 zPk`V|r2Q?s579G6RCMVHQGfq%I?|>{zV?)MG3$y-lKwaP z4vwHKeenInM-GL|Gm(bmHI1_0GmqQ=y@VQ|a!%v^)lT2*DD0T#f1ob2R#J2&-fwJ@ z2T+TCHU2&mv3tLyKYv4;6?2JrgS;o}*AG3}yE$7!XlPJ<#yL1EY$|*iWgS;R}{^t1f)FHpjyj`0fy%&k)>Kt+I7U zeFC!$%kZ%p(UvB9G4peDQH3uCtIwFfri(|8%kZnK(HyLq`3=wXi}a-Ti*J9V>tYmeFuiKVfb==(>voQGUpBo0!_Jc9_W%&k z*o`OU*S;fK4tL1pX7xCHE{*#;;nBGMlKgi|LqfjVAQR+^r7}t2xv6_{(w^6BsH;XN z%$S{h!wy?}RLdRXDIbnACka5FM1#scj>Nn>?RmuE&6)HVr6i&{>#y}#x+xg|>oryK zyO*|P^7TddX?>q54a;*L)w>muMrs#e`#42O*%8dlfWwOtljnUD5+Q*Q&YX<6sNL27 zhTMFAdYevs;?E~i!tDNxqm%qECa6<0!)~d}f9#8gS}9_8aJ|%|k*CkI$PZ&~hu9U> zSO^cE57$(U_)wKj$iJ@ok|<{=h6u~FnNguL=%vaf>%*q0Rz9Fl2+TC?1@uVmDU$mb zU5#5NEz*~pD@h1YX~4oLW?Wwv@ROnf=)DhE7Zh&T2dA%N`;|`#PijlB<2*9)%#r!7 zCu+<6dZwHrE5m^ve%)J{YOG&W0Ge6BsMjJ~%z)(v-tFhf$a)Et8hR|@IWMMQLFX5Z zeF``|nqnjn?U@$^sU4ej`1L<@h@p!TV|pg*THk*CZ@Rm(m|MRWs##S}jdWD%1WCUc z5Un)*Z@T-i4EFw=dA`$Kr)RYmZ2J0mjJ=PU4){hn;8$RDJN~M5;w4qPEcF0`q|&qL z2F#fGYV?eg-@o^WNhh;w^}fGWAAHI6^wCes>sk36LXFg1lCpdmL*ql%%|9Wj(VNPACIXsAJu#$lg1p;$TD)`4*ZHjfI z-Bc>6(cQ`>U;RZd5tkI)n`*-?^BwNgTg_&hgbcvHw2OOAKq!CA-PzEc!o{ZxnFV(G zb;aj55x?8et+J8UM;ZisDR-VBRs+Y4Vvia6^0Sm)WgZU+`u$L-hp ze(&!b70dZhn&0)_8C=Uvh?IW;%;~E(|7{Az0-t>pLfu^&M@fP=MrtAhBOzdujRW43 zkiD7}-njKqQiz&S$VwipK3=r(>*_F*a)l=J!;kCXm zkitMJMQHJLk}7>ECQE${xA){P7PNlmoHvJFBq_4TNcB-O|LXKBjBX|%mcfRz34I``q%~1hJthg2`Mw@+uK*}I~b!6#DE$5 zDsZ=6Sr#;9(SBuI+}S2lx4wEWb#!6~)_CYcZhU-nJJS>XLK8)19UUb%X*3eN0ootB z+qgpvdW-Z(X^)*n2lmJPiLM?tM@bM%ygduJC?GxOppia(S%~{JVTS^*^O zbiX9kAA%1y>*L(7+mvpe-BjJvX1GQDjFNd^9=OlQQu=}ShRhwx`((z|yXEtKvg@49 zUdRBwjR+AUG{a{`g1@=mYn<{p;c}q8aj0bXH8L+8p!`+xS_l}`&&K)|s{W$FcvEYW z&fM1VGW^MW&x%KSr90DK_dAluTP-hxiF74w_}7Y=FxRJ_TNA~N=&i=UiJOwGstM_= z(XiUfMM9UzBXv-6lCqI>4w?_fU}C-t$NI(uZW$Dh8`rkDw#5&6p~JEyqIcC#e;U}2 z)XOP~R@7?9s3Zpvh%j~0Vqtt42d_IJk;>S(k9Q7&JGpJpGs4vYt(-CdptL>OKiD@L^UJX zBfj?50Ii%q#1ZDc?J%!l3!B^q?2cm53Q2ud9=m)X!4ex!71uTeWv+pj?meBukBd6% z_s(o~qOLzzH;$9v?f1j179w+(G)Df^?>uy~5ORcPjWCJp>Eftb`Z9AW3|OEWnq-$Gi$mHALZC9(-1*n;&>3q;-lwqon5<%h1UOaP2k z8%1jQ?;Od6`c`aC|1`d1dm3cdIk_+lp?I7?T%d_v?LQ|7U0~m}_=)@D$N7!%=HB~gwl@L}w~%3qHB!ykaK z8}aMB>r1L;83Yl4N|^W#4J><>^ZUIYf*LcxVMHoXlOj?r zyO?(ry#M|fy_4hYvnaAd!KA-l^)_Wz%a&X<&MY%pRfAhNN z<(tHD`MDBUVKVg2fhfjrkJU>z(Oz&~cc>y9C-;~w5ZWp?m&7@}3+^SU8*asDlu}S( zu=aDdIi4sWwu#dod__AULKmI$gn(xkmp03F17FBzayNOG$T2q|c+$9D$sJ~5M6nCa z=Y0{!`e!y93!;gt+<=CdX!6Bg0(9{a?CA(;VAso!$u88S zF1}l#2CRhd@Nxwc`PJC$L>$(?tRYwb(f|5_?4};ZVpSMzAPH||?Fnr{}Yy#qMyChjy$&f zP7j|^`e7$v{Zbj*bLH~a(nnxiSY`kjB<~HlYK%VpUTE47tmkvZ>o2efGXqi;p7_#1M5$c=mYo%VDt`(a8&AMNy zrlOXx22&}VBa|$oBX$z+2Fd>Y{fLT9GQEq$CkJGgMLZ-B08|P=rvqng0~pw&?6`^6 zHV%4F@ysGOB!qkB?nF4bC>CkSLwyN}FBUkr!}|suBQ1OSoZA5h@hS}`R$Q_7nzvNK z*5o-c@=GEn=7A!n%h}9IXytg`@HAgrZh@c*_DfPiy_>h5cm%g;XCmW{ zqT!U_C@M;t%<9_&P_ARD!^NNzIJ3k45L3K-IC^#H%t?NI;v^ORB{$$5oeWfIsX(J# ze&sa8o>fzAc%t4V%VOI`JoaP{ck_WT}Gw4EZu3oK}`Mg0y;n^XPnHJ z*uQ0gr!yMk-~^w+7@z2ZKfmu1-@~`PHLVW<(t+cv&=95+@S#gIjo!pAtlL^Wek9A% z8F>JRqib>%y+|7)vB84_z9^U>J9P%{F`c!U?`Fx<2_4zdrw3 zxfeYH!n>?Seha1tu*b57ZH)Gpih^(N_TnzaY^Ddoj|BdBKQ#Wv(?~i5&#vnsg=wuj z7e000G$J*T+yDk$-1b{B)TvS&npArDKIZo@hhReH6^4mFTCJW)qk+0sWasvm1>~<# zPr%NN_E)qe=@>oqfUCTX(Tt5>=Zd|xu3XZr{dpv84R7wX#Q=;z^Yen0S>RA<(@AK1adf7-9)NS^bM+)7Jb!c3Fw}m zyiTGX-W!!@&%)Ym@QA#(yR9kPo4LoUI-Si?@|VN(K$007ol@eE(HNw-?ofbS2Eo?$s0qtZM5oEkAer~}&{tJY+ILFhl49ihGs8St(zoI?AGmW>sOKOwlA68m_)3=~x+8ywl@Z{C;H$II>f4RpHM` znXwpygezFO`G@cLh!)}UOXuGNUd!`cwPz%y@7g8RLF{YA^ddLLH8Cw#Y@cI1XO|sb z7?oOvjOZ}|YI_`!`%KIv%=Sx7-o%m)b2ZQ0+B1wyF|-*G?4e9p!^N9|5_~n(bcGt7 zx=CzJnvq((!5!VKu%y{K+9~B)q&t#8qHal6LIphe-EXwVjw8_HHM&)925DHai>gCa zOq}RZ{QH2=%xA2k-{_;uKn1g$C)j8{cGz*DH+( ze5u@6FYo900|(Q0OL;w)6DAWb@8bu48*GqjlTJYQKLqGdK{dh|^5vOs2F9{B#C+`C zH^bD$oLaQMJJc09QsoA6Q!Ys0b1HApGWyvQrcvQxmirOGotqll5VdJFKwX)lu!A;m z;3U2LX`#f+l3bEjiLd(Jr%UXG`^8)mV+@T-;jMhh$pm{{2Fc%RhaJ)rFAU#36=iv=*_4IE(PK7$ zpuk^XC?;;3p$0ypc~&@5JNo1a4NE&|CfL%X<`VilgWBJX4mXAm$Rq+nkeD!hIZ7&= z*8fL$^jsLJhTQk&>w5#mwG2xk4F65J!_mYIcsCKCq!s2g9KY)cvKpGnI{h&L{?{_X z&R+3-%?AA-SKAZ)xXHlUBZX*uqV~7_`E%y1&!B1yMv5`t!X@=8(kW|Dg3XvuIcSk? zX`vF2LA0ZEpFE5-T2e8q?ceMR-bnjkq_ctU++!-bDZ#;)CsjiLr zP}X#J?KEH^9GSq%ED4q(BncIRT>M#rMWo~R1hn``B8{RVFBNXNQG1|2vVbFWFi}0( ztFx(vf=6wng!RIO+EsKpD5Vf0gL;2ebiL(7^#*A~0TV)LMg-*|UGH;Q$H;k844$R0vX5sk!T4!)_U0K<*ju z!u%10XNPvJ9_=FShbUp_DE zdCOZ2pv^!tF~+8Mq^~u~B!AR?v>ks;^~CqD#?(lw-I4*9rH#7K*852Xbk4RMxlD+y zE}#Pv0Ch6A8V@DOlMCvo7l@WHZy?8`_%6;0vCXKC<24BW6_E)z*YGg2u^wPh#OTTF z{g;$p52frpZJtbjRpERS@o*m`FMl9DBIsf(SAoAX7W!UJ3CkGZKesG0wc=rxkffoVb6JF5Ea?9rml>9iC1aK8W?* z5th35AGRwY#ieI40r*3?-)=a7P@56P3pAzarZ29}2&?fc-reMlDYOmxelE6sKY(=o z0N*E+I8fdeLs>|?0^%@P?C0RQ_SI+H9=6l(pRW&ItBR0yr@mm2?;MYg++IKf=KTFTr78ORr4A5Jw2UB|BGssc61&AuK*o3TlG+x%S7SaEA_{d7hv5vXo_xvk z_oMzZp!^PW01;q{<~HQO%V*+?_g5!uH26^?2$Y~{TB$R`yS1~o7X+^8rwce+uFDvV z26EVcJSXXdF&C8A4Ii9%zjOLQJ#b7b3-@wl&3-pC(m|1XG=hayJ!G|3;eBMvTw&9+ zxav~kFCmmd#t2k?9aqwsfi(0C9v$-gMgcgDH+cwh98cF5#i?rC7oMDIJvMjwksu4n z77OK|S$vj8H(FHkOW2EowuGO+W&8_3#n(kT!Kct8a+VRm!Z==H!!G+deV4`oM6Ov) znaN+B0%t_+?c!w8(MTzrt_3&(8GVWYB{l)IW^EhwYNu(@e{kr!xuY9S5ef5#;Oe;a zY~q}kJP%G1@*f2wjN^<1SZ}nQGw)x|ca#kp2sC!FgxPnlp+l&YlizUu8T?x+O-=@Hr|Oxjk!>G<3V~lZJnBVPaQErxK0K`e-$yH4XxDT3|`EP$cC4%qncK0 zfOBu(toc0>Df*j{OE#-98?^yG8T99y-B0m-C`7d22-isa>^Opj8x{#&%WmaOES`6k zUN2#c_pj9P#LJZacIRG6&wzeOBR?1)hn)ZW%fsp8ocH-wV;2VrRR_|gCr}Yj zs7anmf0U6r)0Vob7LdYC{M)&t;*g|pKir1&X|A()Wc{xCR&~H+yFtn74pJ)LRS)SI zgzGi{)4#UBuuxukXO5I3>VaqI16_2=41IyfOgCv1L72O*IxcDzS>TY=I|;fh;7tGo z(VaHf80{kTb7MjB+kjE|poM9DUFa%O_{-WgAwKHDoBZ)j^lUVm;Fi+WBQHV`XrBW(JEC z2van8uycE75szbCuqQ{mucpubt5AQk_KSIC87(A6gnCP`A<&3i7 zPvU4XOfL2yt_z2kb9j5{7WkEw+-h~es2)zdh4>|kQ7o8+(lbE$Yb{DeVzVyF*j`h^ zn(Hsa>IiEz=0f>KGWW;wAu9(i^_uD~Cm@IFC+vi*F6Ggu4$jDs%ybdXdjcTlE-?CR$L8o%~e_6(|w9H9p(KwV`f z948U#INZeHlHC!~m|tX!t#R?bIP2&@y7a4)hTWNl0+3F54e<2~0k+j|#$7DA zQlZNXA6?zKSnPd+{i+d~WmiTSU2THoe{OCb5P1Sk`uzq;%r{*ZoXqNkoLgkDDc5D< zH@Y6!OS4;ad_g-rsgGPX?k3*=G3^0>fll7)#T313aW^$o zq4ii2n%DI%z9#&93*SAcSN`H3V`Hc!Xxs$dDpKF-XK?4$ZY;8n&!+^ zH_=-%-Cf=oFLDy5jX;PS+?l5#b2dN0u~(1#rv;+fEcO;)Ta&QDC?C%wUbfroZbnto z`V75_Ut-VQMj#*7F01;J!4`_#r-e`EV8&!%+L4bmun;N#sGknzJzL1&*#w%wW&5Si zxvXhu@jQ$yO-I^mUhW*dm*}Wp=Xt{5JM3)+n>pS`J8j+d3`oe&?1(=j)-WAa)XjUpH>CXw7Q?m{+w z$4F=(@@hE7$H|R5>Upe*QX+c+cY1}F&v*RDhy@lFi}sqzRJgu`yK+NJ_s|c9Se<$d z-lM&wFH7o1amJiFu#ngHC9u-F^J%v)&sre^O3*J7q)ZZ;jMs8-ojvcfzQGz9xM938 zQfQPIDd}8Dw>!4(l9h9pGn&ez?RfD=*Ipj%D1Vso$esBPLrOiTjmPnX9ioBpVzjF1 z14JaTk>_0uaEdD2jy=h)X8aTFR3`gD@eSUJ9P(0MI#8&05u^N&7s`QoKXdCPTx zZX4pl|CK2n#mx?H9njAX5_m9!4u1nfu|+~hut?=E+d>|qlWW)348axl3l|e>kBYOz z%Gx|GmT4Sm&kK$vhgr6a;gD|x11tva;O z5T2asYc+1RGwj;6cB3}!_gHr4qtL9j-s4cgdbeO|ocYhCEyOP=`;1hrt^MY{am$G2 zXA1JyO~gMbG%LJ@eg~sAl-=+XqsOR0y<;|=UqC;H{MDnpQk9jxtQahO_S&H#MGr$dq%k42h#K<&#WvA_aX8B)YI{~p#Z(j_P>~Kufk_YnWgo>-$&xwQ%CZc( z8VgKqyt1+|dF<135gX7uWHoC>cADLc_`Gj)Ox!~o4Aim)z7Kq^LBWXD3B;HTYY+1o zvn&;rJLdWKJ&3)kxSr}+yEaNcA+P}tq&?H&W9K!;6A4|P?DtCpJk8W$cSK6263OFG zqu#=E-f#AT(fdw*Q}!}e?yEU@VLt6XNxUQWNF_G z@6&mG80c&v*$}UEO~jmcIC~Jqg*rw`A(Q=7e3h)}7_~V#|6Q5F`F}!cfck4|MZ5Fn zy^_*p#XqD{yWvS)e?|v@o4=XfB0g312g;OZ!SexVXROwTmA3|)aC$DE>u9uv3QtIs z7Q&C<5fXaSeD=>Y`omx`h_Far;S;dndyi~gPo~Dhq$HQK-}Ixvqa=8t<=@FxzxVzI z?(qBb<2lAqIU_Gp0P?xD%^npZt6dQ`@LuYQmE-%AfSC{H{cm+>!!BFx^=_yZkj)R0^nQ6nSlm-l~o*JWm;T6ysQR$lO+5=5#itDf5_9S%`kB|5l74 zq0;}!ATPCSzRyfeGw26k0a;o?xC^*GJ8if86!dS^DrDOU16s>zuQXty)*A9SCRg*HC>y_qOY5V14j|FpSb#tH@l>I7bCTn+aE14a3Raij{p7j z3Xh^}d@^}iYv5zFG<+;-loY0~6>8#MlmDA=rTt8guI`Ol@!W)=9&MNWYkAH%{$u-c ziVv5EPq%-*be6MhJ;PtgGNQ-)>G#eOBk&orO>A@V;2!*!#cJH;{o+uZetG(?HXGV6 zU`PRY&E`ZaDhRB2?&*~Y(DVH{;^B#rY}_9TQCkaeH&baP*0i1+L*Vk2+4ICR;Ef>DD79VF%wHUCWhwBKsi z0+Sn{OEvdU0lja&@pxt!7KJ9or^++V9InQ`%0AxHA?n8_@B3nQh33(S=i6N`Ips=X;yiE?;J65w(|Xe-8S$+3O+gZ3*_5QdG9gBS#A)x+IU;-owL{hJ~WoP62^qkmfB-}v!5r(3MWmHQIB{`PY9P296c z5)om~r@Cr4N$tg1XRl&Q4m&rvi|BEHS!s@6GtMKxCIuf`b_J8iDuc?QZ5G@0@` z&iP))MaT2-r8+x^$1e57s}q1*GuyLV3dg5nc|!gw^&;2$5sB$S?4pz7K7pAplMG;#W zAHdzlXryA%$~;d;43V>w4hyOL5FJ31&(AUcdiImTX4n==!dN~4_TIh5RUP?NnsA7?6ini^wz$}NvJm7=? z+7aBRBK*U+i{@lYnI!80Ubc9g{N@egE|pY5R)9qKzVB~6_@AQ0*T~BDmZqvHEp8Ri zrS!pq9lnnn6P>KmlL`iaHibD;8X5w4^7P#Scq{}ZB#okqX0Rj!kyGnu0?1EI;kBk# z?mG6D0eZ;3MId=Jt~<=Q7tW7T02?Dm&yj_;WPrK$YlykS1zZmGJc;Hr^Ib>sn=EjU z(O~8-K?H0dWdDQ%{`?%t2slVmFi!<=328})8)y}_W!-xvKS1t9r17B zedV_X?|ApO95Ok3)Rn$f>)c->RR}N=J9ij{8bhM~-M7lvg7%iVhUvY4B1T{@?BK$FJ2;)To+d@D=X^6>947 zR3P&2>+F?mr^fij_yr}xT>_bB`$A}a50~#N@|mWqUxUZE4HBc_dUs#$v4Al;6+kSU zypWvA3jyH;JwqC`2ut#a!ew}YTdv?_Fjge$ zDW9B2EAXZQilSsU2>E6`ZFlInzEU`r^k^Oo;{>kPB>W+*IBVz3g2%mgLP~-U(d7WG zLQfs!hYsfY9@*1=QK)lWKDxgu@4askymU3mMepIDSfThi|8G0Ksj6?gPfl?W2&5!S zZZ)ICel#6n>@L;2$BrOrO?5$&bT^x!IqeW5>w9d$XN*>2io=!fE=l;Fxt+k7-^1{8 zl@x%A!NUQJveePH?(6$=xav487;zU`85Qy+tTT-hhCdZz3t@V0W%GmNwro7H)x;*O zFLur+Kn~{nWd~p^{F=q=qFQLm0sm(SH&2*CW>0mxgc{GYE%MrDl#BC3u2D~ZZ0zAO z@7aLau3QNLXnW<$_8PZD>*IQuL&JNUKVgCsV$N~H&XEs`da!pSLHc1|id-Q>`z7M4 z&H8mur9Z!R+qfiF70Je3Ox#n9%@{Ny3toYhKT36_ZXJ9(74;}ja-^#m6XIxiLXQq~ zMYjdO?<|nj%({Q5Y_UOd+gtAYHjV?95!Ax?KP-Q(eBocCX;c5fRj>E6({K)YPYBd& z1yw%}wx5$oTNttDgR6{C2BNd{uQ7zt2lQMWeq}AwjvUz`=ZJp|HZ+FbelGP(Xj$9L zO`}fxgMRdbeNNkdj5}H255O7sC=_j^(;lFesx1!44PgOl9t146@A}7|)mU>0S!3!%RYJ!2~j-ZYEkC}}rgzY4N!YAzf3?Lb1XPa@FG0^zFpVh8eb=dNi z*{bBitgu|r-&wrGCkbBpnykFZ&itDIpQaca3dtUeL@Qr<0QHP;c;SiLQyDJ9?rpzh_85yXW2U%24 zPTt!Pbo;SCL+evQ73dT3&fi<)^q@{^0U{+}7*Ovc+xl-|HQ*H0*J#y%zH(Y!4G@;_0ugXu{Nm?#%Feg2RGj;ELae&1!aX@mXdBlG27 z;;nFP#n5U?V;TyQkdbkLynXrcSVm>vV}7g&0;-gd>JYzRF7+*T z1=j;|S+PIi$=PQ8si4>#((&D~Wx5P9q|YAdjJP9n9%!*O$3&gIZ5y$jRxkGxF8Y06 zfQmUd|6siV!%w{(Id(cxndm}B0_)=_ZpFVZtdG;IHj9jhR(;O`gs2oKoX@o~ z>3T8O!d3FqJc}PKm%l0cJ(9UP;K#kw`zIhP`zaQe$M^0)1CnwhgBxuby`__Ska5P@ z&GAP+^L}L|a7LD!vwhZ#SGcf=){JRwcXGiH+e4YcoQ}_0OlY zDO1|nqxDFuU)LxAoiVBbD;DyIj<|SksW=e3@ZC+Z$JG7x;8@llt9@q7?R%9t-q%2{ ziCA0Xpv4pKlB1O~+Pg_B^y4Ji4!{CC+y=vsm^FVy{N1^%y~)@YmNtv{z!9PaX(O7T zj}`$g7#@%TwwN=$PQW6sN*4K1Uu=?|(QW8Wjl`YA{;c4{gnoqwcw%mD1R;lBeGn)1 z&b0gSI(0BLPIyU&<^aTgJee&1C|CC{-(cVl#8QL9WX%Mqncz`8--h7=7(dzhY(;G7 z!6RTv50z}OuW;4@vf_|ccqWKdn(Ma0C8< z#CBd~yIR}N)-xjh=fTA2JUckQ!?;^)FW>UjHw3= z?I9KGLQtU@+gY@GC>#a350iaVhz)+2VYh;RjAOmE1~8f)X07&rRP5VfiRS3J`Sc%x zogrmYmwDz>&$U&5y=1N*b05T}@Q6Nw+$i4xiqzjc`aPz<+-KPA(*;Uhkd`hkd!9wN zEDTw|t)BNrMCmfiqU+UifUkQ+gD&@HyOqk*>9Qw{fAClNCAj1W5uHu9=hUlnIr-m(F(u0iD9iul4fb~0fw^dyTPxN zd1V8TMoPHzgZK)Zq6~6f04_-~odgFgu-g%v$~?xp8mnaLvS*Dpij#=N+=pHq|0&cv zDPMF-^F_c^l~#|0!+@9D7PQ`rq^M~m>j0mwr)r~o14`m70cGGF*_y>GX8oWs?|aAx+ho+=d;ARca`-A8G3$x}JLh@5vC{9l)h zDfxOyN?!jJq@%a-S%XdX`=Hr?dbO2~C7BH0|XT@DWm{v8nWJJJN0Kwba*=zk* z@Pco~&CsHWehmGlnEp#VDjPPRcXL_-g6cDh!`u^j9OYZ9Bx#$8 ztIe3Szyo0yzPKDCu1X@I3za)9lz%Z7OF^Lle2=wPWPg&S&ozbAER_%5YMCK%9LneV z;VAOJh#w(^Xzwes(d)4{OxnUE2@8qsd(EksHE&JPUxWQ}G>>&ud4l6l2KwRCr}E21 zdJrJCV>KPbhUEJ`^>$T+3-$WW+fOROMum^L3I~bh(mXXBOopU-1lhIhBW1cgJNxfg z0(_FX6w2bj7<$I6(n>4Jchgz0F>gf!AvPLh2W}KGSh649ACvCbw9NqcrxJzv+GdV% z-XCXO^m6-8_~``!mG9Z*kWdfdr32OPErHQ%As?$!fqn9(ybHn!Ryd(*r7&c>OuhRE zU?@hwd7O@e+T5fFUVw#Q^-*ik{!y-tWJrZAsW!pa59)2@P#Yi0i&>!-=~BthSgWR&MdbR%*K>ab{{%KQY z1qdAr`hP@ybyQSe*siE_cb9ZYOUlqliL`)-ba$#SBi$e==}31-hcGnK4FV$FDh!Cs z;9<_4-}l|S?tin^tT|`zJ$t|Vjpu!y-sJ9&ce*3qH~zPH*I0f4{ZpBC5vr3?eEn9pui32o~T0t}D&3_vI?%FcL zbl#{4uhgZS22H@^ngaZxDlHxem{lLN)itzdozHV58VafU<@#hV&+%y_R5zl8U3xv(W))Xw^3M|x{Y}x{ zh0RF)NtGttSBHsBj%awM@Q?50`xq^nC;ajlv51HFuE{B+9?7%j!{M>LU3)VKenK+N zhaW~ha4X+Mbkg9E)PD~MB}fE~luK|jH@t1U05V?{ zs2QJsZjL;`CewD?&?;|b8!cwo!D*Fx7MOXgh_TN{-n;dxm!;|+FWyvi%uW;;YUBT0 z1LPIgB@Px#>e~_^C?p!3`H_F;G~3iM;(AIHzb{+GL^xxP!qeoL3XNJ`%=|#w-CsV% z`OA%K{BxQj-6m@kCDns;$u5R-^}NT=8d)h@1ZXL62Itj>!;2ce&TF*1&ogpeLRmXeZ9 zUeAkX#H@}<{5a62uow{lyB}MxUVI4kL|a9s7^)aaNW>~K(pUkq?u&G`6(PYtz0g(+ zM=fu{QHpGf}oRG;0^7K5h4_fwh_m!;O|WtrBF)&h$iVXl3@5tYw@W4!@>+2)$h*voY5 z;Cd!x@8;DMxZT(xuR2->7XVL6O`y1)C?p@F?ARt0NYN5Y_P9+wpn=Xf3(ON${A> zz7*=tY-}YSFxV|A+yAg=?;L7g+~5!~@VeQg9av%-CKSAme&Ep!I=VzK3=3kpi)IK^ zM;arvq{Q_&jb0i5bP{ryol?s!e;xE{6l;m*B2hZgxf9HbE=8Mec!ya_0N5nXYuWn2WETgkH)HvVW#`vp+j#01icGuU=I! z?(dm^OD_Af`KTFmgf}&)z-JQ1meJTFA`TUwmpF}(CqtT z7VsIgyyyC+ZbW7%3LGJK?l3Ku`FBQ^y60T5`TL9$X`I>e^i`Vj2UE+56u6*oE-yyJ z_X2*Yuu_*U0EQpLKSbJij$AlSc_+CigE3zu5Ora7Vi#tB3}i_dU4^L$!=kTevJqKo z1v=@w6y!isb!(dh;u6T)VRjIKwgHa|;I>3|WF33^#lhU573M-dAyM070rlbqyrP0; z^vAJYt%ZG-Y%RJNfIOX$}9Gg*Z3NBTp09VLVb#vfWiU?0W=@v^qUqGe`9C@xU~ z$tIf@!4ozk{dF(dU23Ug$QYjzuI$)Y{y^W1GhL#D69nt)Akz)iJ()HG?8}F4{TJB? zW6hYG5a^h@LIGkKeGSlMhJ#}_l0Vpappx}S$!Tv*1`}qiOcjcBHu@K!z~iuwslR~p zT44^Bd?h5<+<}>ucJ0uEh|U3++QaRaNUMLA+a;NZ3gl0UvA1@IRDUs&(a)df8w>sw z+#3&A{khe^A|EL(_T%-Z)-|RfDxj<=#z2nUNbtC|Cht;0-d@UFbXFuRC0Sz^zGwXO zhEvqH7NZQ~mbOejOC*y92LkHPO%A4TXE*eGRirv_{cXE-9xqee^ z4{MNTXz85m?Fzw`G~K(SUn~pB>?4VADZ#~7O2y%=c3|Ve+_t$l@jXjKK@~@$2uy27 zsrQmzDEd*Y8lR}G?Jzse-c4P zrxftK!U%)IzK;&KM7fH)y)dimzH`R`U2KdxW1SFJ3&T0odvfc>OjothCoC}vR8l6koI^kS`C-=dGr7SII{cP<;%V|d{b`03bP#e z3I=0u;N+UGM__IY5C?e%9qSuj9os#8PbFd);vP)kSd@Hj>Y~HiIl+S6tTUW* zg0fu`q=K+ojCDJ;Ln-d|&A)K>hl6e>6TfOEn5V9TSJ)mUpNk>wX=I1u^s>Ar``r?f zIZ0?Os_Htmyz)ZwxXUMQ=Pu@59|fK=0^)ffmAx{ViNw_7NhyZ&a>fvnFqCh^dC%yp z5A%(1GKWX>c=2qKk2U=7%C3^*pIyj4I2wss1Ld)FH>oUhUk%#ykxOp_9NA{|clqRs zgkuCbcGp35r5hK3k?ixj6O;w=cPh%{QJw_R|E#sw0@uB3=)bW|V9}FRKOZm*F-a~G zUmeoT5;!vM3ucrm6UbqklZ`B$RyCkn@Ri%%@Fh zOjOnwZKTogi=`5+fSTQARtG#fz(7g0I z-J4>v0DKHH^xfo+|By}2<^ebvO+z8SC6dm;Elc9*8X%M^2~s;VL6YHfFccL~x*hUj zxZU#58JFE{0$A~SLckjnCZfUU=t8pg6E??uF7&fhaVH=C8&vN!aYhBLFx3DIHaZuG zI}hm}7hAEyli5q?p~c;u4y&mu@+yw&*hRt`>G0->Y8D0PnqCs%jXss$eSh@hA~^1= zIQGg91Zp=B!6!F2-yLN=4PO`#tjb!0*jr*6!N@!cX{$Wu4PFBt|KckQAxpsT=w9zy`t^@z2}f9)}lb?dPiklbqrp7^_<0@ ztPf8EYF>MJ`y0vC&mM3fskuC_!3+IwY%fEE!RYxr#lug#M+3N%e|Fu(8Gag-_+Hxx ziF&|#LU#8)D^*i}i&=udu-#lqb#G^_X4B>uTs$;qEi?Ujl_>FVqFx3y^NaS^j*4&C zzo-=LxwOO3U>C z5|~ac$@-YDQ9GUDUIhHY;HfV`Kd)gj2YTU{XCJ^!kYAuhw|z=p)G!r&7D7C_=#QD{^6g#0ZbtUo=2qj+{q8 zsLog1Sr+~X`%7lZ^k_d{R^pFniL>CFOYbO_O2&rGbnap9wWm5N^lvG{*RLdH?TDP5 zywKRo-f3;?Z>+AIy_(jCPI;l3aUySG*M=Y9g{Mj(4vAZa*j||>-BQ-??(vjz7oRRO zV=1Z4go3V(P?wx7=XDx(vND6BfztqALxljo4KQlg(9?9QX=(-eL6`~BJmSOgDV*sC zD>ZCt^wD1&*s{WXNsJrqS8DA#h~+M!qUe2`x-C$G}&}M@P=c?(_Aml%QNO<{7RQC}Z2@|NJ@``|Y-=P0k!Hyo&3U$5Ms& zkH-c+@0+)=Dj*!hwwnKQJmgsZT@@WR)R89w-+y}i61COb<3wSG z%gJpPE(gona%%s?s~613Qf0oiE{aMR%O=nCX>5s4YeQNxqL!m*=Ttu=ixZ)hi!bzr z|FIvuOYw57{muVrrmpC-bICWE&?=ezAENs=MLn^C#+4&|!+L%D;mVxObCRkIC~v`8 zvbW9ScFM8JvWuk&UsQ8{e(o@GRc!GzcTjLz(QLWhIug#Ot{N!EUkQ#MQ~rc{=`Z6U$$yt>Ab2e zOYWhE?~mVw#;TtlW+S5gGxC7@<{Sw?_^-v%yAxgiK7C^+g3R{a%#JH($jW~X`w-C3_RmJ_+Y!g$0bJEUih0at(Jzlf zdR^EDPeCW6SsQY|ZtDFk87*SA${(!ybn?K!3gq-ajx`dAbyIk$M={|4E{kNVvuwZg zZuF?f!&sHkK4~$#L*u|#Y#$-(%*gPuA!k$U8Zf3~tfr0BjK@nBfoW6RmG1O-9q<+X zVN>~sOja_Ls^l4w-0v=a@kF|IZFRBab)Z^V)KMg~_~|El+k78V1j+_ESda34*9pey z;W-b}^ExUUuDZ~|cLssrFD31EqM5E6v!g#MKvoC<=loPM_Cf>&*>%QK^INoN(YrO@ zK>EvK;=bRq?-w?lgAz!5sNf$DXso7*WoUKyh8@%#U6pCVjTVIuf<=Z|St$11=F3Q< zUrAU7o>y5@uqRQQ6Q-{xY|fhoU}AK!(;z>42V9D^LVl>C_t;GWL3;nPZQtA8zE|m* zm%4Sofnh9wJ*DQyy#CuS$vhX< zho8>b<*hIY)ITkOoZM)XN6S;<$4d6d#a(6>>EBkIdaJAB39L+tES*X+8|R3n)z;e6 z8=DJvMMQukie2r!$4T4C=uE@VFQAk=g%koH^nh?Uk#+3K&6DVU7-3aYA&eefh0Gwr z#t-1UQYiGE4;@?%(K8q~lhA;!LoL6I1Ie2GnWhx@f*)EBM1R9UWWC_m4@X5U!ZaH0 zQy3YHDKJntSMYkjsDjwN*$jjH-k1y6|*DbN#PsJs6E$w`TQM@^ixtvgj z-}`4#!P%T?bShGs?Sa4HI4Y*zs<9k)9jR6itH?} zDhuAMYO>2Vl0Ckxz8sv!3nigHLH)~6q;QIdU6pmlJ;l>!;HgxsN@>Ie5838NtQd7! zGhFrX_Dssc%n|&C{W}S*FzHdL>%g#OFyY}{C9h@6Qpd!MYFzUPLcI1uSf|p;vujoN z><&6CDTQR!3QXkC5)(Nsi|{_Qk)(T@MxeBg_Ei9@Pu8 zl!L8>UA;=!cy@~P1IwH$UHj?b`qb@NnXV!aiX^eBUn8Wa*UcXvw^07U)@#hSX~g=i z=q{IPL|Ml{anj+`FZ9w+9qA^`0$ks*O{^vd;H6^y<0cK)J&-HzV`S!W94)XfF!7^O2-a>y@|inucU$urEAw2!nB#rKy*;yu1UC8H#4mFmg4?dBGBx~1P>4{ zw7P^HDQ8`wSorG-gX;jCg@_dGX9r`bT<`XIe=YI=hOwS z9J~D;C>Mcc{M+Iqx55beo!ltyeXy&*zh;1I@63zu`u8f4qp&CD%hqP8IBz?U{E2Co zZ>5FK_rLT(;;TD^JK?_KiD`7g4@pi9+)L1ZZu}L%bT)~FuVt@n)&3=nW0PlHEbxg# ztv3DL<_m)w+G4&9;ay|bzRhXGjz|NHDLyCeh1MkGden-2e)_rA5bV^45Fzb1UG-@S zb$Xfq=5?v9OFpWrS)|7tDMVxWfxO~+=HvVk=H;+h5Q$}Uh6vf>`aPyO)_b< z&nFA}BCvl^ak~X1erBlL^OzCAG(q7u;Mbw3mVFFIGuMudcG+^c>gmc?8L+=PzjbqS zwBbO`bHxUBdyAFB?B4k0b=I?ux0sla!Le-cR5w?rvU4{9GShJIx|U-KoKO4WS#pFl z*0OClKxND9{{E6|VL<;(53GSE8&r88Zk8qmcwox~8lhqdIlR(Jx*eoQHN3Y3KeEv^ zfv>QY0Hi=YK1LEfwme>2MwBGiJX9$;@kZzQK;BrHkL#|{gXGVKE)rd#KtJ`i`;XJ- zvyRLx^5q&N+Fq4zIaMsaE8jK?NdaDn7J8Gj+VhN3{RHE6 z{)IW(+v~sOZJL+QB+NiDQI7Cf#AsaGl&)yHzk$3-c}Rz%BAr)X+gjocvRY zcilZskp}tUut2*JCf$LYI&bSs3TW-3sWp)US=MAQU_!!K9kS+gL8DZ_|9jhg34vKa{B@M3xJR;Qrs`wWvrbdI=t~Ikv;E=2m=956^+l`Up-wbSI;`Z$c zHz$DJ-n07jeps-cv$lPIMWu|6t+>)^dfwc`XP|v!vF$nsDH-x38PCg6+OTx;f&H?C zEfoa+;c7O*?)i7p)5xL6s2BS`_L^vb6=1l6{%!gzc_5zYYe8Dpn+J<^Yh^*6&W%(( zr^$8o;^k%G_+iT7&6V1p3NvH)R+Xo^G71EL!j*zSM~3b`^SWL0WJ@;W3NS-?PazRo zP$<9E*o)zE$Hd~clvbY5AWDK*C*+|kjJo|Z{ObBod=h}`kzK4lc{Zv73>Q7(!DvCd z@WY@YHJFMLW58?U)}Z&Tm8f++xZ!7c0ffi0h@W z=UC;7P0zRSDAl_JH{UOU!=T=#(7my!Fxca(T)Swmg=p)?=q6HxwLFSvItwMJ4qPRudgAhrBmV3em5JO| zZ!FYy3-=?UCe27#bi=@QoDoB(VQ-3WSZ-*J7lSPN25*>voi=}16!>@_qG6{&9*_E) z+HP}_dXE0bV@QJ;bx{tvafHrxyK>SLQY$6DNRt!9dZ^x^e6V|jEd4IZ9!Q2mlxrF7 z7>3#Aw;i{mV-X)EBq&|&(C3pw=-)A3=^DvPr$B;d9E#5teb1@YN&~-DSmCC4E`d?S z->rt*ixeOBs+^$O)h^hlsS@g4jGugR04Rzd?{lW9q^RD0bzWGSo3)*6=;j{&J&sz| z%dU(x8zF~E;m#Xu=7GoeaqlTHC(#LV7ufe}>az?Obc!?5WTB`rPx9%S^o&_lpmT;R$g|ui{_COFJ44!M5ps{s=m>IiT)+pJlCp`KEaHQr1p&OnNvrFiM-|8&5Ce`$80<6l)SYz@T-S>RFfjvBJNz zm$cU+%jqO#i9{c(M1}8iNBP%-7_Ox?!4LzcS8E;AMt}>iwH>~gA#4e@`&Qqb<3h8r z$zFDvF4Z>$ZZ-_#&2gF=DfIc=wJ&Nxs>(`xu42NYmmNXo4ja7(XH)~~`GH|PB8;+JYb6r$mf^9*7c6UlRY2XRSa4(BJN344! zIS}KlzKBZu$YhdkC^{C$H#%gK%)Un86d~F-R=@3yUe6V{6oQVR40=p4#yF#|0e3s5 z(>y-Bf$^JXpWdV`M-a|sBWg}l8JJK-rIQUbzbnxb!?@v8LbHv%uHQfFLQQ@Zb()lU zc;2Q#e?L%)MVaR_@fHSf)q4j3D<6K3cY0qzA$tO7l$Ok-_8_v- zolIG4q}*7O5qk(R2_@!}LLQhF9zN8~_`<%)jkr9pwkaDa`4~o;mp7sfJW(MfuJmAO z@D{XTLg9n4s2JJuN_WD|lAwEVj1OgBb#4jy=0SRWiVjNNc;h$GS;UNm>iX^PF| zomMn1ellujMY@MtI!|m)FJbGriK*>ayW|v;o-)z6!z8Vtn0_a)%|LWse80=%TE}|D zomuME)pE63okVIW`X6W7)H3Z~(Pw6(A_YRK{i%`^T)wK!g=ODEXbVgHMHh2qMI0^5 z%GUSg>V2I*ln1Gr4gKVD?|AT>D=Q9gZn%h}k#f9C$`x4F-=->K(MMl%FU?RK{A>2M zqnOgC<&O|wDZ}-{1y3Or#}mW}4y6~NP2}@FZ!QgXyPTJLD9RW9a9}~db7z1PYpOhi zx89!oZ01-E-d)1bv*Ce*I3cqlIvyc?c)ujuFY@=qlY|>Sg^bl16P4Q`=1Msb=9M_vDdzsM&Ge zxkIAmfbzG*zczyHm%u*BfC2eEwZRnvccpf~G-|k$oZEF-t|WneL_7C)4v;B3WRX!y z+oio{o;+i5u|)2Iv2A0p#z;bN-)r!TbD~np5WXaA8iflZb5l1>EXL()0a~9h)2Ccs~ z)DOBu(Cg=Z8hm2y+E_QMPj|k*r0igV4f?#Xfk=Q=X)6O;@%z<&mUoTAeB4BRY|&HM z@0ZPon&iFIOlq`G9;XO&7w(IOYVK^^&EQu5{RhQ~MXdllBHv=ty5Vin&hT;}D?5)f3m^1B&sWJ+NsKcyfsq8TO?^Gfq|H`k=6eTduC-U^sEt3t6{7MfngH0^i7eaZ&&`3|*W0qs^t3i-L zZY<5E>`v_cfnR{`!*Z)_*jm8zuxVIH53Pd%aEGbBVeQJG%`(2Zu2ZnE{S~Xz64A$Ec9l2etP|skI3ZYj51yI3NDVY<1>PeA95iB7kL-Ay57^>de$hxM|M_ z%mlGC+a&8{lfnyW^}iZw0mcVIL_m3T4POGoQ$WRzKxMZqSYJGbKj2MF3>#d3fyIuz zk6IexK#!8T68$c0%2&rlZm@I!bN?xJUbOR;Mw6(bI}N>halQK(mg`9JO_yLPgU>Jy zrJ~5mCi-jFU`RlLVf@>L=pq-JC3l5BR;J+dWXz=S1Pg?w9Y}xlR#GX_Yr;m9N@U%u z1Q;%{)+edL0|DB{2Xhm&hwA<`xhWlu)N&9dL~~Y!R*k=Cm`H#HTLr895Y;0c5w@GH z#gULZ3FoUSNBF$%>A3Sj7~C+&uzDcz>A|z7G;$su-L%CbFk0gjz4!4iDy`%gJ|-jXeuplMQ>Sl zz^|#Eyhsb60Tm6OHx-Hj<2U#C84#!$SGh^hP4Ay}kNGJORHf&Hk_4>)lZm96aWIO# zIu@Mr3-FBxpFQSUxl?VZiC0a6ysaon*eINA~F0g0d4 zfkm>ST{p5-QE*fl7A2&rIRbJ5=D&;{4ws=tri9G^ZR$gr9cNdl{ewiqg$UvQ|5__d zkeT3cY&|5`CJ@>Vh^T9Zunv%CaVh0*%mJ}(BOoQJYD1oZ$O`imjRRtLcp8=Xmp5S& z_l->kcvoVpcdo)!Dm)!;8|duOL(|7!vYD8h%C95u_9{MHVU1&>mh8ER?*-mB@%AK= zqZp-~%dHDuQ1|1Fk2ufFHZ;Ig-lAG@?j=yk2@F1YV{INzjUkAMF>0-k`a_j~vR37I zg1}Hc(9C>vPK=R>?yG3eG)!btJb``Ew4unQOuQAMymsOE8r=}A;39?H#lO6pqJAtD zHh#a#gcT1v8SMK#v$&O^jn^B9!_oq2th+o^dCxS+5b&(ZuG`^Lf@n397;$TZGDYUT zgI0!K=oJoi2jdx%wlu+bVzt{jawLd0b_9VEc>agTK-1xYctyc3Gdf&veh(j(ppQy7 zm;Ed=*?@l!g1?ajL+Np=M#WGnl16I(D$lF|o%$YAVoi`UF2fIOaa0pILJ(ZwfzBT~ zKl-_FGE|+7LxtBA-g6PtIa(8q3kE0Ln)$|6NC~tTtOi+!>LK_u~hLhv_3SjANG%egQ(zjz47UqpN5c z;BeKx8@!^Wsuw!@p9duzG91n&eZSwYTU5{l({)l6(~0zcZZUkaYV_;bu%tom-PF;~S?+`U0tBp( zn>K0bz3)emvVqZ;*_Fx?!xd6(DR3(E{JDrenE#Y!Z-B)HbNN@|%c~-9suIhuSQi4k zhj{?9ngNR+iJ)#OQd<=-MyHKrU-qj?hI#9AfuyiIA5BwY5nCWh6%j8We6a~U9i+u- zHI$y*hYUB{`*DL>p+|Y3Sh`L;`_wkcxmbVPCOG7((% zqWsvwm2$5$8|t5ms=7 zX2B5bratL3%nQBV=~82C!IHu5LW-1}JflRGbT)jU2Y#{^<3RT1vA}@+vqD9f{Vfgg z{!OWqDMC{K3!e3^<(7sLK?dcU&2nqK6MFibA^jRMB?v3Kk;0VgLmJ^E; z(Ah5l2MkODUi;lD+m1Yoho{>}C?sz>_BH*uK^hfM{Y?eSa?j{-g_HAj8lH_quS&GS z%v1|~*6!#rRNYtKVnV!~s1F}q5Xc8~A6a&#MtD0M`wMTt;!NUuaYlm`Wd&a|?GX0% zxbd1&5A+=hkUhUkWlown%T)K*#2v*#V$~U2B_xaw;e3xXVWJaeW|$4m>xLuMSO-Fu zDs~bz7^pcWt7p^+8oue=Re;DDN|nl@8-&ZirjJwJ--_`2@0O4WAze>BmD~8<+%vd1 zhnNVx*N7sY*1bb(eO@m&S~=G%7dW}*o^6>2aOD6Waf&eas1~~skq-@AP3jEqpkiKReZ-8cR z3;qf0W0eX%CJoj4{&8HxMpcYNG{0Fu#_v3{v_|V_bE&=h@=C{~8WEViuexGd(Q{YH znFz($oSk}+a(;EUd5Zn#*I3l((X3H+K^(h(_r7;jj;i}M&`}5*o(#GLKKDTX8Xb{u zz7F%pyxcKwvfr&Qg!!`N+OT^a+FUF?`e<$Um}E5wlWbgX=c)io2xnR`N~YOBF^yCZ zVsB!JSwH(p$6S(s>9|xMqZ3}P7;3LbHZBT!#3xxSmeA6zEm7rF%5NEmGuM|!J*YUR ziTD5GGOHl@v^sK?tE}n=<8G<86jUx3r8G_3@xxvQ=m4!&KIv zgucy#bnr*d`x{(h&y5$U5OL30&JE+ytU!Z$QPI$01V{~3V%ry9uU-WADy^V|Pu0rm zGTQ-C2_;Q*%@rqIyxKCnNojAd^(uK8dpXvDW8|qk81n;jr+!##(nS!)n!{1WWJhuZ zP6sz*N7*R9J9>{2#f!I57bZhhdK}ukH=0hp2Py34{BwD8-)a5IAFG<`O&&iJjj9N< zfKhq*eV<0Ce5HU}Ve+z4?J((n9$yf%c5A<%z5zZk_e@x@6LfSbVN0M3k{=POF@udt z!N``DMhy1J^D+pC&IuEw6t#PJle;PM9M*aN(0a*!i2rSpDXGQeVMJ1f;8O`2=TPX7 zqq4Y`XFZN>&IUo>nVxHz%N;=mCI(v_f~q;aBiEsq-;T^^elfS62Nkv|KEP3nU@v(0 z3pPjs*#2CNCh3fPhvMgC`ChUn*xSNQdh2AeB`cU*8X2QzI~EC580b*@ysD$$Ypmy7 zocz+)t@JfNMr=eWe+o_eE+i^rbA z6&A9QTyFa8uQxAb2KghEPGzU<6q>Ys>9F|!<_Mwz9xstM(@EM)=KZa zt183A=IfpQvZliGB3oJbvE@^Omw3W4R!kM|$g^bFyAk?Pi#mBM8rFs{&( zBFev?zi#&Q-1gjVTpmuZmuzfoNIFly_!6@Hv$I*w{_~S9OPYMa*JH=65gGM*aZf&x z`JA^q7Qx8Ab|Jp4ZzzNvBw;>@VAVr{g8_wKy^_?m7RmKx8SpKp*nF_y_wPbIHLyxAAVsz2)`p2@7w?-#D+A71h_OH zp9f^2rT3%x-M*{GP9- zB9rC*z-XX%XH8Il;if_PQ`oHdy48to2Kev|c{nY>dklT_kVx@h!hp;xPEHXjSdoC~ z!Dv5BdWOd~3st1i!?G+(M#7Zr#Xd^}q2{CJ@3K_nK;2PhI%NJC`>~vt9F5P-2Vz#e>}=xlV+rx8As2atij zLXlKPMD^0*;c2;8~KQso_uJUU+UG4hA)J&0!8w+5Q#<0J5oN+k)QFn;idHN%c zF7{&}=7I70t3Fovp-St710R??dqF=#H}moFTMW?IhS|#H2sq!GS-zQ|Ln&CH)U5b| zP$EqDp}exIv+JSU8Wm_W2Sblj0|l6AAx_{3jym3vzrNc18lFmKsCtPCy~A4+qjqES;;U^LVx*l*N>7eh z2vzjc%Y?G`*12a&blezR*)@K$DB~mV=DRKf=IzZ6P(e0cg~NYzmtS(bmWG7w^ezmaMTA~@)k@XODXSEYDYCp-8)GFA0bXkE{w(B ztYY%nb@4;vt4p_MP{)AGp94^^7x2E}C&7k09`ocu6K8ZLpXD{J4IV;y)UZ?w%2i2X zhTmsD!JvP0>xL*FoN1Jjj}dO2_t|_uV#B>{Hyb~}Q$UQ4dcC=qXebT!UR&*Nh<>Bs zET~8R6XElv`DIdKL&$s)9xW>g>$O=;nj-bLNs}j`H{L9g6qXm{h+WmJ^GZ#1%1w+6 zRBHZ!2~2T#H=BLu4|vDkOylkK@Z+xbUUhx;YA&VGy5*y(ygc@>_c#b2VwNB;XJ(}h zZ%Ntd29pwNLGe`Tusb0yvfsY5j=huZYje=vv#X81&})pg()gR12m7-Q7)(9lWR&v^ zc(#P?xdjXQ%J1P;+rE653&x0cyc28rQha|oi#?D^VbpoDJ2Q&CPW=_WQ=C-v0`5S1 z+@%OiNOH?-VsnyglyB!g&i<9{gbm2~vVrYjbbGhpren zl=&6h$LcDw6S^UVff_U$4Z)r7Qb-v<^Y(U!d!(cWo33s~Bg zCdr%odNo?N*XilDQ4bF!o4*2sZogPVoP#p2RB6A3qsZ*titp z5t!+4&*xt-CUAde%-hn|VkK{s%=g;;!U6J}Fg(eieP|`K6saEXG;nsI5&9^$##&NG z7e)O0$y34rG#viq&fk zP})nYhO8o!qO{b($Ip>fUHjffSj=c{C86xd3@7l!u-on9&~yry*P{xWD-oE6j)YMv z4=n89@i%0{bitM9t+v@^&Y`HF$rq2qG{5cO?VX#xgVy+$12Y);T3z>xDz?*O$b{<$ zt#Jt#BFLiEAF%T3_0SY1p``(=LHW7bhSh)JDNeO;SDX8^ynL8Y(rahFAD@F#e0afy z`>XnOH0$_??#m{DvPy5XkWqTtn_3{Fsuit|eJgT?usURQnrk1MzJR7PUuMcwxerdr>vh;2n=5iljUQ!C#>AtTsh2P;#VP^H*x2UyRge2*@ zP~lB8kSHYE^pRzOzv)cI7!&Qq!0jtQ!|d+XpFd{z zqu5jOxS~V{odCf{5ZVRI+qip58Y~Jb6RUY1SbCqItHRLb-}Ybf62>8{Qx=k-E@oj3 z$|U)(OhItLw^MU$%?UVxxs5BlR+S<{iWRi%P~QB3oAsyO((4V%wSjjYDD+&*VbZ}3 z>VF1z-q0Y{KEWMdzR;9>d--J5vYTg%llko!qoEw(y_RmLLl#_1zDswo2C`j<5>-1S zP#OSQ;UT+(yUl!xb{QF=`B~u_Oy`~w6ubWX$uP=nVAkoICb(*X^#Az>G9FR0AFv~f z+(aG%CVHgN^)8nj(FWK;XSw6Ix6A5S1PaMYHe!-QeKx-Ebo0$^*je!Ae~2NO?>_ga z_DXUQX%&t zd}U#~E=%@s)ZjroohFka5@1U)_AD|0CvHYu$%FGCqkj=Y1|g3IZ7$$le}^!!_g?K* zUO_a+$9%qZt#keT>9_}plNm1M81ZU2B3Cr=TZc)c%extZjD(8^ao>tR?*XdsKXbn0 zL>!*gu?(6?M@2S!rG044M;FVj;X`?o5Q)f_-C8-jr7@x?#VFfTznB zuyiK18lOIZEIn+LQcDD)RFO8;xR3#SwjLoMmXaytSSZ+1Ya%*AG&;dMF$Ho18D&SM zqS6*7tYcetQ{8HilGP$Bbm$`84fyWA8Q)ZUzO!_ZOPjD_O$L0(iL7ZCB&co#(UlWW z5TrZGgQ?OY07R`lcW05v%8hyqXWmAadrm3?1AlL9A`*$9rt=7-BeGm41x zxi>GiVZKl~gRuNv(czh)X0IPQfzW)4=p|H@j=Lk89LC6+49Dj1k1Pu|72GkJFN91^ z(JL3&UM$4RS5YF|nt~LWQz%3fWXO5#`Rk81bv-mhy`@XAnmROObo1i>&~z1kO@CpR zle(xEgWqKpt}1_A;SqdP=VR9Z?(K#+#fDU9y!W+OKk+q>WUdEfuwe&gPI zp7WgNoJQ)bJ}F37TKve{XKs8SRKWIQhPlOkw>}AAw>bpKQRLFzO=>O#z7m;_1Kqi| zQJ_@g{30ODGWRBh?KjKdSFWm`x!;*nu*dDZQ?N=nA$zbRUu(35xskd2{_0M?mKMwh=R`wcP zdMWn!$zM#rUJ(P10qOctsiJ+);U{TG)4G+)ZO+ANwlZdXL{a_7UOw zb@KhufCC=5C9v-hx16!kR$ZP7llqxC`tn3rS9tvy&MiwekNr3b?SuqpOb(}Zj&I7m z<9@)d;=$4Q#GA60SHBXIhizBggM8f>B** zWnir90J4x@u9z7jwM+BzXy>?5!RX<+3&PVBeu=IM;OXZQuI!en!=Lh)`E#T@iWRvH1BJIyFD+8zEh94)To0eeILq=kq6i~Ws#??nJN{AWGtRtWSNSx&&ZRx( zzuzoym3mly(|qon$*>k*d->vt5p|wff~ZfB)E8K^Fm_Z#=poaTH8;k9!;-eoB>R4OOrm3zfZ zTHb;mVj8ByHZkmweSr>b6YaH%2OD#LFi}&HHc6P(&<%)R%RV*^Ll#Yz;aZwcEQnsp zi9uLjK(NbVMV7@oK8;;{iLwg$T3o8(oVt1J_3^hwYI5G{0X@&l;TLHzFlqh=I5;S5 z57A#x(4M#T6=Cm4>yY+2Pax=tk72`kl}o$WM7=ZSvoP*;G{s zzd+*3n|qQFuESM$-A=WF_4?YY?|H*E;+cp7i4w z_ET_A!J-tP0?{BF*26-|A0`Csr%IB-{cmwtWs~|%r5WNz)WDGzBMFh-5(7WexAz(| zH83h12h(uHK5SXh{pPPfURl~}pX}TRXJdT7L(~PYv5M656xFaO^sz%y3t7y(b-7_P z&g5|xoMh^Fe@teRU#1{h=f>=PX_4jNkNtV0%Z{kfn_>m&khUs4tY}PCgX3M|qIjkW z`va}Gd0cv)xQ1AqiN2)PQfRhAVY35CRz9pWc33C9!LWM}LO1N(BopJiP~0*R$;ai@ z>`jLDnEF2D46_n4&T#}iydj(;-aW}|z+E3qFb|HU-2tF1hCb8xeNf;>}PR5ma zG5qC=2+iu>3(s@w$0a80otr>A1IsFK8%eS5MN?&+nRp}71}_xK6#o#e`2Ah@1EK); zHohEu

$#@4ECuq*TtBl&~8!a`U2jwpPFV5grs^`#@{e0g3o#yjq9_gt#eVZb+@ zu;osOb?g~^2CYPd_lOwNSgvZoxdn>BRc7_wM)!evsZp+)SR=I}kuu6GdRq|X3|#G| z>7!D4!Z3(1(-3N~)8QEAeT2|==j;Kebmp|oJV%VtzsOSyCB=qJ+fPb9a+Duk!~apk z0G58v;Y&n9Io~BhaoAfNx|`F>P7roXBK%8(jkx|zW6{;58pfg-5^T!LN@K1}kzQWE zE(c4q%|Cwrb-{&d!N=n`Oc8uQ48GO*89eGjV);WwNw`~KN5b!nn#Q~pmxH%?Q0I#8 zzE_Z4erw+)gtSNE?x$Xs%249;F(y3k$D#SjwefbWfZr?6jtT$CrHL%IXAie_qnbZs z#`9fS_m%@KHrY$ub|?$9@kwD+ftP7E`%RPM1eMKR!ZLP(-SlBkD?gI{qXzT`1lTKK zZQiGZlDWe6I~CkfGI8dG`GjQqgkucGW;2>POT?3Sbg%0wmX#DAxqjO<3a;ltN2iZg zF&Gs+O6!&PG%?=U=dYLry>-*h8l8V;*((&$yaW1FQMlJQ`OrwPZXuA|jU}Z^Nk+S| zoP6K}#TMaCN#>a74DZcherW$0$qJ3!MnPTWbh=ZczNUSklnTOL zUn84=xCZz_lu_tw!H;k81~cDi4jh2#o}(@uVPMb=uiYaDlXUi#xu~>x;yyu&wxt_Y z%<5|Hfs*g_rT(@E<}n_cK6q*WEh(&0lclO%=2EHD;N7R3u{eH%`P4lhXJ~dG-xqU) zJNdc(zrTtxo+4zRmJyPiGLAfq?;g~Yh2xBU-85Ar{>}97UHH~))V<+4kyA1alt}jZ z#o2r0nO%i!{{KRpd=`s)jii<0Lfac zFM8BC!l}ru0`WJrxosa{k+Oyo_hWMw?v(D*M`H;&Mjo2Q5VuHu|2AZ#$12Y6+r~x790wSVjKZm{5SAEGWoYt@pPjHSO zk03JX`GiBq-X7IUr8ObEOBw%DEX;COb@NKZlWdpfmGx3KOvkm7ljLTX|ONjogjj1r=Ya>GIFQV%QLlX#9 zA=Q)bN1I3Q>W(0M{BoJnW6tWj0B}>CcXb+4q6(@ z#QpHqq<79y<&pnKflflB)@`43s@nBv@}ISAJroX_W9q9L!7`^w+#gD2ImGyH?3pP= ziC8jiVofp&SX4P)wvEx#Y^$)?{~8Ovqq4@} z+o^&w9>cWMb36&e#B0@!dYX&LE>7D>n zVu?690sgnsF=R}6*Y#)Zn~4{&>mTl?cVB?Wz+ZZ%BB!*Nhgs72+AS=%ijZ9(dJ9fe z9X6$VXY7xfd%xKjc0mZf-mC7XKJ$kLtDU$S7k=EYuW6idjz9QZ317x<0flU_J-J## zjk>R0gVSx{cO~`g_ZL@Sx%HyS6foH+3h` zbB*yW>GpQjy|WbgRm3I}1fg&IfmW}R8J^8iJ!T&~m9>gG&%|_DTD^`bjW!P11Ps*G zuB^O%vs3o@1C{)){Ld(M@9s`@AQz073OZkd2P?L4dT0lPMXsUN0b$g}6KXr=YFD*s z_aKqc?YviR=KOneSYz0n$i9RuuQs?cx(|ZLJwxU~(Z|qP&8L0_%H+7z`=>c)A$#KJ zP4S=ZZKikl02uz>SPtQ19{3XBRcC5FIzP6cprNsmjNEP4g3i>mKXv4-;0zCUS>J;5 zx>*A1mw1pyx<#PJ;547Ll=t_(O48|nPmw%2%=^p&yrlOYNaYRPR}9^eGSbVefJxL9 zo&9;*cim}w)^#F2$46nyRfW8Q=5RU+rQY5;RT;5=TT5hoC=N^cZ-!j2sT(+kg!-q=K#Zs#}bRDT*! z{!M`0IU0WVWlloBtLar2sGj3&ZzXIW@syQmlWoeL@Rgpl+_Q z)9uX23hTFS_J8=K=t+DcQ@HJ9Oyewd`rf1c!#r;wKE;LK;A8GumCq9MIIVm(7$VcE z@76l5cXnR0$k={AcNd!4v)YZb?rGcT{QRjy%{=#e@$AEy@{SC@sMbn@zwDF@(ycK8 zX465i)N7@0^*_^ed_Ror@jqjEF{Pl`PPQ8Pe(lBafwYjs z2z|?|hs0iOTz)kdu9)qq@v_hhHAE%)=$>y7{^q0(a|C^$misMaf+=)j1c*NW{kK2u zN%7uhNEmXyR&0vd?OdAWYAW{{fcXbSiRz&%i2#Ud%x!g_W0Ts%WxPTcsV@21E*APh zLr*W#Fo?9kBkCEe_QN#J+wr(q0OdkcXMUkhlwtljPDU!5hjPb{MRk18BTl!p2r3Y5 zkQ!*Dy0he*KifN#y|VWn7~c2Y`GIYfne%C0|WRd`rlsTE&E_1BOh4i;>H^K zVOBa9o@{)dVT^15-8Gz{GBIW#$Ydc95d%bYoZWR~d379=i&VF4w9Bur%tP*%Aoph- z8*JzWE39G0?PY16asZ{$H6vlF+69y49T;-Aa%N8zysyc5u1FO;-@|$D1dJtESXl$Z zoWpjPRkI< z`+ZVV*RifnHFtK-*n1v;8p=~S^+~JaVI;WGun~sWJB8e(soGMa;9QtBNzxDu&iw|~ z4mm`_KJw#UU~(X-Ajt=!FRG#YFF0_wFGJ6Q*FcC^kh-q3QC_T0n#lFQRaI)=iseW3SrU{Yn{R@rXURM6f4{4j;a?#a&G?=V zpY=BXJtT)?caqQ;;!H&|tv0Ziu})z0C1u^_lFzw(74w-+QlL!|7t3^5*{?u3`X9oA z<>jHD(&9GR4!zY*u%lE9$0)3NB&}k*bS#O*c0OsgSzu4H$yP#YDVJtx_G08m<4@1ZWYR%ECivFA|Lp zH+TY!Bm^_7s<^Icv%e`WhurcP^goT9CPKWO!@JK7-1rU&by=|vddNV)j_F$379rHZ^d`nyxHs% zR~zqT+;7|07QdYZ-9J}hR{^6@*lU)1Q+=LS?g!^pZTFvqlc$pO%^92hoJ)7BH(~8E zMZpqkgbFb}PvX{`zAnWiVM`)Kq6^^NU!Zcp^)edG8GG}3?dtWyZ@vM7y;3%?CZD7& zq`TEfy@8hWVYF6`!qC|>&GZW_JRquGUpwtf+8fIw%9h=bd2poA)d+FTcX1nFmZ1DL zyRyUXM6l{_t=f|a@Cy6&rwy^ zX1f}%#9dEM1ayqL^0vFYx#(MYoi8P4l&ccDOAdF{oV5Q%Ng=>V?4&|c#&G2o!?+q> zn1GrdHMLK;kMlPV*K+z^PLFhQJ(v~f&&0>oczQb}N%wIe@$B6w@cP7bB=+_i=ptI! zzRIjAb)Xx&3%OPQ>01|Py;bY>R&AJ0R&`Gi+GU=61761mT=oDi0V+F>tbn1iuymkR zHe_=*3#4{JHIu7=T%7A*EI9ADX+IkU1&y3xR?qHlL8#l%>gMV_tL#ZaRFgP&+rcR2 zz!Ma@q`V5hUc0SgQakm*T#9sTima1##7`OTQ%}M^n~nm1+er_Q2M|yZ`5tZ;s|sB- z%Dl8^j`-*Zfi&J^LEu^Yq?u~sVQ|cyBjDO`n6g%+!zf!JsjcZeQ3TX>pc66==m-gi zk_V$VTc1F4RKW*0Ya9BR>vG;Tx|{_}f1>5nZXV|-jk*i~aE_v8x};Zjz%7)%ZLaa( zqxOIovcRZ;$UuR?;<(x&%x*8ro4!r7=Vyv@h8Hd=dIB6mfj6gPSwKSK=dC{X(oq=@ zPZt8h=@?S?8mAJiLZEfMi)XU9F8beZbc;EUWv?tP0fHIb%Wjhx(jWwmH|&JLy5|lP zzSKWmL13yi{)1CV_jBtDUV!?+s#=S3il~;j*uj1r+INp4{xrfjj`ql-oo`26w>%=k zz`KinmsJ@Mf)NW8&`LxiTH6d2tMMTfz#P$ajGv?J**ywN$;lM!Jp=IG+-LaM0bE_UY zAlJ-A^{&GRu!@;n>t1NzK|WJUPb#{qZs+VPRwBv|KvFJjt z5_2L-eV}=W5~m$LZ@^k0V3l}G%ugjfJ|k+!nJB!z5Dc5V(OStp1B0lLY-xm_WjO-g zqc4py^k;4BzdspIar1pA0$vV^pZ{h0O&VlbGV(%Zvv#a?6*&gEgMze&kA4ql(ktIU zF!z8BIyKzJjHV}jDQE8JOS?Y~TBcORSdE~#?Lio5&NjV3v!2#9||rfa3- zK@Tqr)Hi$w?y$vZv`yd-Js0MR5-*eOb_uoV!M!Ts^F#dh{09zC3Qvf2(a6DLC-Kln zYUV)~V)DMRevFgf<8Q(P%Sh9>BLZ(Pzt2I?SJ*9!A<}$x?CafKddD7LaZC&q?f4He za0Wo`zSkjoH^=Dm5Ybd&FwMZQ2;CGJibW@}%sdQgZ|&-LKp(yqfbO^_Uz+uo*6}D? zi`hS%qk*tv76|4J61RsJ_?~3(0O0A}smT!bihc~?GejZ34C@1)^DFj6OjE+Ru#y|` z4->CXKUk%T!jVS;s7Y50Eh1o!YUso9D!*0O(W9S}lje^JlS zEwnd_Uue_$2IvtlW${|A7n9H^m5@$#O5*>K{0r9LlT#QLV;^*X1qwa_hl#v|X&odO zhtTGdn2{ltKQpV)ZWeMy_o4ag!`AV+)ax;$?wH*qE~K8y+1Ae|Vmb^Pm&8Av22Vf||rgQyCW35eDpHj=CDvN4TNjB{JjZGyO-4G^+* z2HC!WJ$1XWJL91W8uYCWJq3T5TI21P;8i``1Ih;7w65J3P=SeW1DD{~B8zuLQ*a?A zQ4luF1Fq%ap|2?eQL;lEaMe6&dU*ojPg4{h2mi7!lA#3%2|4M}yjf2~9yO9|jEkz- zcR(7c-lETC!9xQ(>=GV&>kGbnoY&)Vct!nGkReklnKkU~*m3X-gg13NPc+AkxC+At1rCgw8lp14vKHET&*fbmyfOlrR0#e`g;-CO#R6jt>Gyc4m(?wY z!$jcSsIGGG%;n+H?;a~*JY2FzZ_;i*>Ar3Y#D$1RD#93gdc|ps@)Yk|<~k^$@8J|R zrA9eH*thGAecYAx&-XbG!J|}P0@1u4QMSvX%rz>(c&f@TKU)1 zv)@zZ)LWq@e%wALgf`IOz<+ zv2gj?PG#L1UDAP|VS8SUl-(;uKJq0M}orG|F#5eVZEJ6^>-LC?g%-%>eH|o><^}% zYdD?f^WxQFxLWLaBrSC;ij_J3^5E?huJZ^fz$JInci9Fl0P2GNH`i<uR z_&GU70&5^5R(ZKe3NS&$Uocq}szbHkHlX9Ea@P)!+9}!1ukx5?xx|D#R|k+M_^%#+ zxtRNv??00h)HRG?nK7ZmcFljE(?xqCm6O=XN{}bEH2DZ)o#o4w|_=bm6hocO?Nb|Bpr zaUdA64GKMns^0Cv5=l>|U?#gXX$XyFrA;-en>-0jR3-$Ko)%Js4ZI)~y$zpR4%?c` zyM1k}3O;2&HS)O$KKTnp_JdFYIID;asx8;lnw`^xoKg&I3Nu565?O}XZLJ;k=F!*< z5OHCPs0{!N_7oSrr5O*ns{@QM-j6%i`@%^&D5Fr*Xd%+#e5(WdL;$den>x!emOQAYd9+pY>ASgBoaiQY<6|EJJjG6t zyk)yYE*E%5MwEg=r*F%Dk2D~n#}pvGu9l~Xj|p6@W#Tf6WyziJrWZ8s*p;p30fl@J z0`1@gX!T{8mm-OEZDm59c2@xJ-9o-1TdaTq2_G%60$H=t);!eE4SEhU0tp+&Am~x> z2O*|89}mPB3Wj(let8|^rhX`5=aw1b#XJ$E(#UI&4t{A4xtbgomFht9JZ!7!C@`_l zi{{UpR!_k%tveq^Nd3gG|H$;b$go_Fc!QYXaCw@2K#1tikEP%6!ZAYfv~lZy{%nQ) zA}@kI-K4*-=&~2Gh!(tL{eD#lxJ_mQwU89{iUbYV74)Wb?K{=J@N)dbJ(BM^ZcweJo z=I@S?*MQ3cs&=%0wQlkT2`ks_R$XIjaA5r<6Bv!fngFh^B@w?Rt<~=HUin_lSUusw z26~OMe8fOt*m7W0eybS(oVxo8LVwK*o2^5m&oF3S&ZUSqOCq}B6;9htd~G74HO&3m4GM^MGJ3r$ewhc07DNqKqX3AV zVRUgj2y+bz-Vp)DJn^Y<9#h+w=JZ?1vyCGpU0rY2Xj6aoRO!!!Kk&x?EO0>i7=w?+ zD|79qSJ~-?!AzhI#J(=z0rXxXipgsOMAmUsW*oXJ4UP=TK|89d6aej1cO}!hKtPoL zXW+dh@V)|aUGXHoHKLGG>s?Z+tU)TVSqlx@L-u2j&mA;H94?drhX1H|HnOiMVo2{E zR-UKkyqmX>!g+Y9tLzfmi!^+N9=CsAaZ%VG?Zxl2XQl}?D+E2B;<$BmoYDb)*5v6M zORf9-;5Dl8Q#u}((EG{)`mk-bc!`%&H43k8M1KvG401+S>k|)%>HD+RQ{2*Eenyo{ z_?u4zicx$|CwLqtQ&Gm1Qf44015qCje2Z|6FGQO16z%3cE}(<+1sjo)W+z<&VoUWu z8F9;uDT?9I$|f#Fkk->cFAN=DUW>tI3gN|1P1wY+vyv7Yu%QeUa>XNj%!0vU?5T}B zOR(hQ7!KL9kH`wsEINj}-*8QV+Vnv&@c$Zh{x)uU$Sqi}=3Uhpn`lKL=SnhjtEosk zC{lX)*U@aEyJT3rm?d3^E;=A0PmFw{CiMTk0D>}{G)wA1SWDi%7@=ROW#fMcg&j!f zN_jKW?oHNvz0Lcyj~&Ygl}ag{sTZ0;Ux~?j{s8{?8g64MRG?-3YCOf0{#qTG@3x*IH=%kr&Q3oY$N?AhH(N1*&FU*mP{ug7pJ@B>u`Md~43TO0WBt0oLH#=uO=x!-!qy^H7xjtG8 zJMpdVdo6LgdeuNlKCkG7fcK9xSiK553kFX*a^1WjPYl>nCQ1uEb_8CPTWxwViX-5@ zA+>PHOo31S%GLS>kWao>y>#uFZQJ}p>()`PjeW!3e2u{q=l1U%b3f4y ze4J0z+I#KHt9mfrmXY*G$6o!>$%X#%YVqf9)%a*jf@B=yQH44jCbd;Zw+r`b6yinI zsI1RFc zy$UUOd7JiR@~$?>tnhQ~?>`n&(8LvR<~~jMOvQtF_{aXQ7W7+FEWy)K%80XFU)1XR zWZHj3!onT-bcJ`IA-gXIN&*o?f?k2pU|4go8cfyq@CjAOI3FjR_%b#I-%V=Zml5|M zG*>~_%jkIhq;3s;vey405=+i@B8vO41yoLAC(Dv3jG-@y3B@WZ{i8N?gLmKya^#irx~8UDtHoR(Gbv%E z6EtY-?+blcX*7&gHt#d%@w)F6;?$^!YWC}%Dh3%s`-6=h7JW~~g@$1evGT7Lx8inA zhO#lN0B&3~e&G(&2O>4J+qfa{*32Pa|02mb^7MvZJm}F^Etz_hb^;ZhILE8yo;iW+ zo?Xas|pm*GkQN} z$IW~U1>7yHVM5n}p}}FFp7x0nPwWU7y|8130;G2mGTB~pH7XmTBb~w^I&Y}w_97CO zWWb7C%WiM19el`s?xy}t8Z$_*ot%UIl(_&t`GF|k9#8s`nEst6UIbxY_&o17sH%X$ z3qPbZhdR#s^P8BRS+G#Ll96)%(8_$JTtc$(=BKuD{kk#8hsVg)Xvh!X=L;r*n)k$9 zdwjBCS8=|q@2?#_vaynLE=Q?hFP(5voAWYnd2cJM*F`3>V6GVl)`@kmC7I0A!Fr|YBY1C zxB4Mo$Se`(=%whq816PP>_`|okP&&%_)XBZqW`vkawF{7Tx0W6LY{hPXx7mf|9f?s z;(l4o-q)x%BftsC-DER-zHF?7KX=$$57If^ar^;&hqUG_g#Ldm9g0*&rrg zBs1K21S{8T#33|s0Z2M1s*4N#B*c>ObXxq>wzo01%)O&6N}Oc+FOjMyMdOqJ`7`f~ zaAoDDTFheg{UVnBw|w{fja!`Z`w*osQ6)DB&I&U2RIXD=di(O)-EicJB@q!8)?DNw z|2c2gvaU{TjKVl``Cz#GDxLQNt9FSZwRFlD*cccwhB;7HG~3u?U{xUEoymjw1LFXq z;X0!4$Gb*c5QSWAN&Ra81_saF1HStD2@d3$@b<^$tz5XZ%168EgY>8Comjc>D203* z5fubRjT>5=(iy%ENLV-UYRwzOo6Gf14QbPhIu#5d2s=T0RiIsxGpXm@RJnzwWs3Z9BoK3 zPzSU$Ojzf>lSmcDP#sA4;Y~G)dLNe<63Rp5SHDOqO(LzEUG(E6YcgI?2&$dgft2`Gcj->?s_XF@ro!t7!I zaglM4#Df<`PQjSFCH!boa$NimNS9ceUPEG)%_ma}Q$>;HL)GPe78kXm(f&rJua`CA zzDB9hs0*Pb=k10u`x&33^H{=q^g|EFV{Lwml=rZrp`lZ#LfqR8fZ=gQ*45s9?8BKN z(%_))D|7vXn3+N)mi|7wPNtiK0;D1SYPDcO$3E ztLVGnL!=TJ2fm8g!A(QL^wKj0vpE1Umm%L6u490Te$ z15W6MQ;#6##<*Xkk+_>`tKA7zP_Vp?Z@YFEm5Jf`L@YwyaZDA}ZABq0HHu6KUcTo< z{4o5GuRlSrHex-^ktSe7PWgYfBw<^#p$UCMv> z>_@_8{Zu!7@~q!v*Y4^|RSylht~!ZS#!Z(tJ-E_osD1)mZe zZEPEh3WL4gl!eZh3&hzw73_xArs7r1;{63LGJARfx2y8|>TdnLI-r-LF_h%{JSpu7 zwVm<;+Yg76-GwW{dI}Ql2w3bU+>p8-A>Z!JLt;L;rUK*atnnih@S3K=BbFJFWOE0g zjc8dqOITN&PHC;{o@6joK}*tGF7uJq8~CsP_Gq!ONw!M%pn0#j3eel?tVXjKUcSmqUD&QDxs>JdWFp9 zQBR4)6K$-TVkuBk*;D*9?rL=V?@+16kXrqR2J4S)l4}nO#P@}m2%eGqVe^uQ#+fRa zEwJi8fAOnCZ(l29bz6`Q{hh@0d%+RlY6+aJrC!D*j+u0_Q#VB?9!_P|`T}gSzL5E# zt3Y<|C`*rU{RD-&GY4FmH+|{!SN{FZCnq)ycY~L)W9#P+85r3ozpL^M6Dj+*8865} z5~yY`ceL||rwz<#@;`cByr?2C{rbVK@{eQBPFspJuXc;_i=VwT<<=ON9PrkTte{-pCid~*Cxicx;-CyX`AR&9{tuj%PFIHCv=|$z5~G) z2W978MI9o2t@2|s?-u6?QN%y|#s{pf@+or6scH~`2V_8mhKg19h)=;|%tUfIszd-o zr=;_fGd3p!8(h<~Hk{8p48v_Eai3Pl3tD$HT9w-(Aq>$mN@Cg9T+t3>baoF;@Padv z9|Lc0=cGL-Db z<5pHiod3aMF=-!A@>~*3-V*-sv@7q*PQ`sHuA@+Vi7n5ly8@Hfet}b8Bs7a@&tiI? z$*m}Asvg;CPER-#$@`dB&*+?(_L3x?VCV^n@K0KP(s*2I{wQK0eg3_sH#GclqrqpG zo6s56Dc`YXL&Qa{=;}hdwCai_(8|*4=IPwQchh+bp4XAATC@_POC*BX>i6~RS0jbR zES0K!y0+6*4BDgtwDjvc4$P$PzsA{0+4<$CCUEgF=w;vDv)s_z1+y@>r_--rhxk6f zMAwOG;nsVZeo8kE$4j5y`ct)h``ALvvO$8#k1J?99b1s|<3Fy#8d&N>A09TYYmm{r z2tqgSq2C{^5nG0*bu?{ALsjZ7#*~5Y;C%!inp~PEf^=+fX^DUF3rtojO}DO0lOM3r z(5ja_(J7|h^7JZjU6?nUdNz_OV>3A-2B1K3$J!EKJt20b(}?*YEgh!TDDcim##P8{ zL(sr#>LXwSn>yqcT%&MTZ`}7}9`8kGZXk-}N18XYTHU-uQmEheG}{jg#l0nxdRir? zZAWAK%Jn0lunV@u^o9~jql5?Nr+P0@lZKBssxnoKg@ZJAb8}z6Uty5iZ^kj_c++4Q zkiGfNR!Gp4g!h7lKMmkrbefD4KM-u8D&Qhim^LW?!0$rhJV7w`7kk^A@%)v5HI8SL zfMaimSQ@iN;&Z03o^|)#>ApMa1V;zi&8+CoZ2X1Q_s z-AJtDhLrl83XkT4Z}!x6GgOs>VlYpb7SX=kru(v zc`a8mAk}c5_%eOstAbxj3#GhIwe2xa1(81HhaP8;hKk`nhi|c{;(HsBvJ;HljyUe- z?WD(A$1zA%16^qrN-!O1bxnf2>u%&IeoVXoCJj-1>nj3V(?I`G;C51Nki#S#7FwsQ zRi2;ZIw5VA<~*&H?1(xW9rGS6$)HMIrE}nOZz-hA0soqcQHs!kZnbnS9n!%EVF!7q zZ%$_3*MJNE%zl2*L1uY(_ZK5q)cBqCJoDOB`dQvJG@l_Zg6-L2CdNXPg^%m*4%DFK zFM0w@h2GycN~P0zZaLRA?Nn}Hy4KdfOR?KG?wG5-$pGOpYt_s8R6xt-(bekdlhJ}^ zSnMA#!@~Ik#i2XIZR=iRhingQW0|rpl1MQOl0A@auTMvjqL;X{-SW+)2GL2#`@8ah?uATNjY@Z= z+?6BY(?74e9z8{C?ca@aQ(;w6UW=Nh729{15|vHb-zAQwr4^^5V7-q$)Cek;r^}ia za2`nH)NG>}4?%qm4`1+3vyM%>TpBS!rq+b3P90%QLkGA5xu`Fg^H1jMFWeE67lr(j zZI*$5jO#YRAdLTukAq2mU1syck9o}(LTEZ_5iU(iA%%S$lFm{;!6K&X*8*OY1>_-n5-D1qXs^=`8Ri%4{~PsKG;^5pNY&1nO-!u5Ourm?S(;x4~->0hRP05WPET zToVz8kSb^&Rn}2PFm&6zj{2)aC4pnah7`|9NlpL&?gQE_>QGQt_kO{@dEg_zCmT z;(4Z@;`{G!)}SIo2#4d1I4+}lLgOfnlbI?gyQbYcHvQRfLt_`U`{^lNE`OQvhVsjt z;ME`_z=_PN!_d~DmQVbk%FPV~bTQ)z>PSeNgk`^_nr%hj9)bOqeS$-S#n(}%gBE@3 z5u_dCYI@o#{6$}guCHrsKk0Ha@IANssI7@MpkEJw!+?mj!sSdZ`K3RwB zX|{Az9XZV&E60+`q7osXLd5 zJgJ*`ZsV$7=J+~UMT^!ob1Ur9kzn^=vGuft4cX)TyZu@z_q8^48^^Da$E_Z@LAwQJ zF-Yo~(o8B_I&Pg~8a|!Xz#fWQslU}-a`DkRG!Gh(dtA>7y-A6W3DSZ_R@Lr~ds?M> z2(r8y7D6sY<0LBGPz%toekz?7?*)U)%;3`w!BxOuqb#C}+8vR1g6{%4YrI}0WBG|$ zRC{Bl{;rcFV7aR*>Um8|#pG7*heKB(7AqvhdhGybj?>rg&xF}?WlNsF3CB^V_=cws z#%1*^crc89^gie41(sX3XgT`bNbey$P^|`XQ_g~!hVde0P>*fAVz}v`JnZvhy8wfCyvy_Kz@uM5F* zoR?V)f204HyS4juQjSG1YfrzNEvCAo-->$F{cvKlrZn0&1!y_>;XX#|YIbKcT^0Up zZ=`PX_KSU$Hh)-6A~15WjP^NUnbx_?gU`as{oG&Dzbsl^8E}@9Dz7(_C>U5I$j_xc zEU%{)>gCM@^MF19iiVfpmY->o4lb%$k1u-+=|t*X^Q3#y64y39Cgu;hv7Q={h$L_C z&20R*wj#42D5TBPI)r95ym7IqUq3f#@-d*#Xr;Arr!&4<2PJ#&3%2p0l> zUkXRjRNOZqt~fW#Q68xsWwQ(!&MG-MFN8y`C~v6WM~H7^_tbU^c4?7^^n=%ewf;_O z1)!gWI3<|fKaF&yzhPvGk_%bPY3qP}R|(G({;St%aV}ja1TT2Qk9%(3`o-mM+Hc@j zQ6F3(kv+p2WHdu@m?uuherBk+jFza5U-d`4PeQ)^}Sx`iFN-gpvmxw&W~?UuVyR5+(>cz zqbB`%{s;MyHS{gh)2>C$ZVCe9AXTn2A3ir6E2*Z{8La3EyVXw`Y=Y)Lt3Sw#>f|m& zDCn1pSqoJ#acplUi7#MTn&3VrXccPEm#UeXhr{`%j@^5UykUN4#?BM&nAi#DWqPr*Fw2;x(rC?NU$)o?2 zC60o{?Z#wE>mnMJEhjgu0bP!@yptO&3HoGh$V-yfsDr73@p$Xj=8|ZzSg-zD`A~GB z{uSOgp9=c-yk8AkcwQ3fhGKqctbiuy@ap1^D6Vc=h)?H3K1P!F#7>oTY`TOGeBozh zJUQ*Z@S!E@bs}+y93C({PNEYu;+~_J{=51Cm_L}RdUbIW7FP9l7B8CTv(@axs6HQ# z?eILs`X@s%j`PmRKXB7j<^kr3eLQT0@Z4(xcAmnQiElCE9yp)FPX1cmV1YG4jDST# zXfmnK>e1}QX*+b4nc*v}P`%b9-a>m=(yk*-(Rfxh)-_{+5d9fxQ2%8 zym_9eDidAR++F4%o1p}@*a)?`)8To41@74&<9@NPcC#7`dYg%{voQ$Di9}?W<{PAD zKY34|@K2_HfSFv}y|Q_Cki}W1QL8@vaNlm|P$}Ht1v_)oxX+Sjg{vvD1O(t6&I?sWyLtAQ@PlfCam)2RAq&-f zX7HxbEJ4Lit9XE(88<&Y|NgX>*oA#^HZ8wC!CBLN{#GoBtXTO{+_dHU+RhbIn!b+Z z%5|Q{_AuDMwIqiUnRV^jLlysOq#k!o$F-lybpzAyY{kgrl5=N8%?(Leq?bI7s@2pt zEmGRN{dNjA#RYt6 zx~0t{pEo<1r7?iX;{8dQ#9PZiu~%eosA8R~(&Z+?NQrWG)RhM|C5twYZ;ME!$`o;43 z#t6^n;J~IdiDHc^jWUJ_mMqH0-0W{zci?Aj7h`wV*rU51*8S7qJV|lU^03$=qW8IM zjWKSF*2(CT>ZPf2Nm0IJZw2j+7c9M#MNd}f1fM;GNd$)5;>Nm1dTI=n47I6fvxW1> z>uFs2)s2_%&9gk=YGo`UcfX$wE*EA(usYPp`G$Xiq=+z2{uJxqe zrJqA}M09{NSTy+$k=8N4O7Ix{X2PmQ#>Vf~v*xqaV3J1o4kM@4_6Sw9%0Rm!WhP&P z-aZk}=(6vQrB%n}q+uMFPw#qRnt+tU!_cD=XOlCKcuTy3EJ80Phz7Lalo?}(@b2*7 z3-15ijet%Ju=`$}k4TUADV6AQg=UzX?Wx@;x)7sNt-+yqd5FErx_{kAhk?e+*t%@+ ze*jrQroQk*5S9Q9ed62S{xgikt^nn9ro%KWPdC%$4;X`qQ=l}VS3FCQXd0WZA7&S?6XW}Z1#vtpSC9(Z8ZS!eynKmLK=5}x+=OgtnK!5=)> zi~b1k$s6hQh(}B?#0?1{sO+Ox3Q9BSME_6WhiD0M z4HY~GDD`I8aFh_&P!?SpL>CMnQD$mF0q|o^P(UN`E&up&<8Qk8=9NCZ5~g4r!?yeE zy^plevLHe0sr!Kg{Z><9>zWNZvby;RePSg}Vb_@1-Sx!dj~{s8fj|7g4^%#Y=Ysht z5aObQinVE6m1Ge3zjF0c4L`sP9?B0G{OWM&rI+4%+if5F*vFt?L)IZf2ThzfnJO4A zVF!>u@-;D-<%w=H{NQtO|X8Q&agF+j=|;qv zshrN}mOosjuuqaOxGan1E994`x$>M$!e{7IY>9|_-3YugSofS#oGP1eFmoT&Z6{rbvg+c9o zyM3a9k~GqhpX_7aL`m9$7N|;=Tsciuh^eh`SL(t^nNNq*nqK)@FGk%n3Sb5;{yZ}Ec6_*fv_9i*HP#!I6z zO$ucgki2Y+bpOMtZs&?AFEpNBab%N?lYLZUTH7dPqb$RjxYk0(G8>(faGAwQ_VHWD zJR@R4#2k~Pur&R#!teY3OHpVc9uK4PGH9d+-Q}CZ!V(KKRgN9v@CU^;B%b z@-ZgWJhH0p-NU=?wNQ{bJmEj6Fb5NICXj3_K4nj!NApG-{>2wxIP9>)9Qne?GM#Eu zE4r~%i>w%%SHJR7zp2a#b3~kP2uxUeoplsc);CWSW)Awm=cLv!!L%F#U(M|Q(QBZQ zi|Ait(|nWvSt3pX-(HUVdJmpiYCh4r7hs;AM;?CU?z`_MRPi9&L;m;RgFew-*~Igd z(8f9`-p~j;;RZn@`?TQmmE>o>^ra059MBN-SRT+OxJG*oVEHQlywTG*GO@80FaN%BV?ewcto29@E!Zu*vB7#oWAhz!w>ToKT3#3LAmtOOW$|jeSAWPw{7nCo&9Cq?3xT$8nV@j ziuvL{{Ir1^2sX>FP7{F@zkf4)2~QwE{EcsXAY`kM;_luZw%=Yl>DER0 zr=ET)>%K`a=J3cizb+_ni9q~ZAucxikGV0IO1isep5;RnFc1-r=Yztui!k+zLX2;m zNS-X0P=q5Q)y)vbZ%zDM0ToTfIDl0Xg>9u!tJp;x5z=5MQxM9g(4@*Lw|4)u{3S_| z&N`M^ec{^#6=9Pz>>q{HSNTt-k}4KAXH+Zhvypo4RNBi{IzI9DX3BAT|mierK&q=9DL0-vI;YDGMuk&c% zcKI_7Ug1p7-CZ-L#IjG(sX-F}}6hzui8q zm`Q$TXDti+w~|-qVV{g&Wjj&YCzSx%(LbAvwLiD{PYWTZlD5D9hF$9Z)rbE#?B93$ zQj)e-|1Zz``>(t&_ZAtfPWNH|Y}V>%O9KMue?}($x zbF!7ZIuH9~`&Zdcl=ev_lI1Nx^`#CGsmar(HlyE6?)cEx-Tk&O7f= zekspg)0?(6^bCq=%xyZOZD$tRt> z+itrN=o!xfGH27tsS4|M91Oc2Skwv2u5lS2Ftdt(;LtWyh zrdPsX(CG+|;Cv!>-a0vE%oxQUV2{?MP`uKXqfUyA2#99Xjz@$H7!~gd$%7W%H`ri< zv114Bdb`$H`?G#{bHFJ-JLRO4Px|9E*UU1@tP~N%lxbXX=pqxy zR>}S(*c2w#d*ZqY-VL8&IPYG>g@(^CjE_Mda`3@?^I^qLevqRjV>{NC<+?hYAT zL~QUG)(^TZy0+hbhmc)}U0uKb-S7PbaW5ef0v0SmU1EtPqKLg%v`BL@Q4FGgG}C6A zZg$g6HKM|f-4$?8Qkf&Mi@*@(4VuJc`7;+47)52ax2j2-&V>>= zuHOC`#bJji3{g#N9%+$e24AJ(7GD@V$y3!Fh+-3eu-CUc=#}&n6ut!l7d%C_2!IanB_c!ZxfF!JLB;}360pgPf-A6lW=!o4ug=vp(X7zYQ0cOs zP*iCe-vWxNo|k?(X+gxwkj=zwopl1gPZi@4uDZ+$dP7zyD&A z@EO}C`K|M09@$5m4)Rp}*FVluAF0v_CGy47;?K@W9$fmTtMBi>vX-nGh6`ic{?ljw zmFRMfwoP8;Os+3dO*_xXK7iNX)U(hsg^;9UWV$m&`>NsHi~6s>z5-5n*WVZXjPpaZ8?t8@&H__TTwyUmHAhFw^YUUwi$|JMLJ1`Hw+-?2mqg zH?w4e-b@Gnj{U?W&~@WF>{yX`hJ44)z7XU2#qsjGn{FoXEU z+W8VDZ%N|le{q|aX-fMTB5S83e}>i=Mj>^hWK51JP-)Zgh`1EBH7&Y%*&E57H_jGk+yi_O;mGi0%0XCTkqhZLnlw1#M=pZ9pNk= znw8~jbcFNH`vnb&ThK`RA8lcts1bfxBMsD*`Ok+$fbq0wU) zMi?U28}r3WF1-Zv+5dUOTQAV0b!^ZsUd_q#<5IJ$n?`y1>8DBgrv-hzmv|N%pIu*Y;7HA|Bq3k%kf=95#$~ro4!N_@?4wieL^J zHcYSeWOJgnPqw#&z#1$S3aboh_4mO;aBS6IZ;Fjh;#0-?-G%(Bs2$&} z>2d%#66lOEim2s52OqrYCY#VMh)ut~>86`L@rh4>OlQZ=`ue#wUU}uuXjpL53Q0Xk zQd@M+X-&VtvAcWAEw*6A@y$2itjOTt!Thds(n%+N;~W2zYzK?lR;^#FqEwvv&4InW z=bZiXIp&-b8b@Qhj{DJZhaGY#ry;-UbDOI6J~zwOp>$gI3G&MSbt)K3%sym(-}GPG z9}#JZu!HQQ^V&b7^(4Ov+P1gfe@lQ0zreFY6!uBJQB$}%nu27S%492gkRgosBhK2 z3A6D3B(;^OYxF5#g&zI=w>D30m)ibSd#W`HyH#AWss3_tW-x&Zw5!M?UE{y4_HDN$6wod=eVKcL%{2N?sI>*`VYg04rBf-=?Cy-WW|Z3qNRin6eAhh zDTC=0X{nVc1SiA=n8S;n6UF89;4R&e#Yg_M2r^FsYY<-n7N*!)%3>&e_BU1-g-r{x_n?bY6T`4@@{f;Y8QT4QbOLpaY56gxau*oP|E(2lSLmb~Yldlp}Oap;-G zAOF*zq8Ozstgr%K^8n``&pk)ThIa^_eDcXFtndlKvWgUFi#9`UCnC%HXvb%teHMJ; zeZ26BoZWWYm9QY-G~%1%#_^Ihp8q8zgw2U-@Q^sK@FVbnRA-!V2K4w6`_ZEp0N>s# zXjT!DPdpiGAn@FC%}v~d_zNPqW)VU#V4yx$K+eoF&rHY(EhJ-?;@vzwDul}ac`CiP zYw^Vvzy0>xu`Mggcs`TBC6Ot1XH^tTUyEh7JK3TR5dT9G&Ll7?Xw!8wQ%yQ2!c=Q~7nMj4>=w&l z>TR3e_@M%Sv*c2l)vx@9Atk-x26Qe0j-1Ul+iV~G=&~%P`^is#vd8Xwy!hfv7)lFK zZ7*sZJY?upK2b8Y=b2}oA(+SuE?IELm^fuB-M{ORM;@UI;HXbM{S>g!vxWbV0AG72 zs2l=5j7|=Tsgrs6DFZF-U~(U1B8{A>r2~P^U<*rha!A`m$^MZe)s!Lo0D$~c`HHb$ z8aQykpmo+>o1uenJTCE`Q+vQ>%V9M@)3)O-X@!DJl9-v>pM;EL{h%kEL%DUj@tR2J1>aj@nLJ+ z#rr#!`RGUQyYGH{!?E4$oeX@|I_zYF=Ay7u*xA?;S8{6?f5991CP}-jmQIWG`>y|D zcr?KvdQY&oX+AqbU`<*Y3kG9-NYAW0-QaHbEg0wz@%yirlIkCfI{@kcuHYWw(HMfM-pP9@RB zRTgSTy_r_Ws>Uo#!#*Xwn?FQd`9-Q}*}Yx)Cu*<%dqIsI;UUB+iq02Vsu>{#$!R9)B8M7sdbH{;M@J9^i58@NheR^alh~zfiuMUp)eHbpdaYa8GIE zn7E;7&?H{>qlyMjLkC#GuDV*nl?*|sj4bDSdHvolv51IC|+|-#j+BYH8_b`dfSQNUd z%5~oB`)3DnsG>zSox1BN?iaXad+1?+ir8%;fKHQ!V3Vha_5fQ@3`U5fBx9VKOTW01VCWF4Ke`A zeZbhb;G?Ui*-WB(2K00j=s}{P!}Qhgusr!Mzx<+jy1Mwv!DW|U%42;@VoQgJ;R7f= zRmkhNpfz)Zi-_=Hv>ApEyX(%o=a_R26y)`Pki^VZ+Sq-_0S6^*t0b_HO4&s-;I6yw zU_Ia~FTacoGDmm!5#Rmpn{U2x_@RdZ{^+BRe*gQ&1QSIOpp7|ms6<>c;)9%m-?GIA z6PUv@;pd^hJ-@XV7`z@5K?V;T#Dc!C{{q&vyB3tTgjn1p&JOyF&O7K=yYiN^7z9jeU@!B9h> z69XbVT+q1{lXR{j+rQDiiX$qrY11Jy;rNd(zucX--$9zr_0*G3#*Zbe;g#@Gr{}1n zkJfJwUA^>QS}l$L(d=r0kP7Ko^33}lx))w}VZPXN@WBTW_uTWAJt<@7%t4?AG`y@J@rj{`+1Z_+e zNP*v|sUomQZ(;vLoW`G;HK$yVnw46jqAXJ_e`?=$`#163zYC)XP+(2iTz4V=JF-tE z7B>GR<=fo)vVYNkL)5p2H!b@#g|Zo1#H4-OLfC!4gt z|JK%#&ED1cmW|oL|4Z}^>Hhv(yD#Rc?f*f({-S7q|80z)J-&3<0a7PXS4!?sc}Q`6 ziD5Yn#fio2({umTolo4>N)ZQQa&B9&8W)t zrlORrQdJ@-D1VFlx1zE@o%X(oFIsZ674nCDoZngg;Hd1wX}~6^R4mY}U!CPtVgHc- zt@>|;UnPv=@l6PEk0)uTqxn8r@rX{Vid_ZDFta7DxJ4L3wSDgnWfgGXi|Iv<4rNS07)8!vJY=9#CT zCMHOHfr^P#9D3*>yX>;-u|GVPFa&Hompw&a0zr1E9&cgO3%oFjjM!@c4(uZE3M`nsyziJ}j$y$jzkAR^JMOq6pD+$y z3GA>=Rxew}=eQ8!5q#7Kj(d9MooAjU7GIpNtq_;x4VB;cpKlOcm}Bt%f8JYLob5CR=f!QGjb-wxMLk87u+wp}8X%p>EXrO*&UX2!_SRc( zC731qr!6<7j?DJ$f_?Lh;WXcG&TR6HW~KB;{GJ$A>SpRu8^`d`mZw zkkYw-i#__O0Abm*xL92dUI4WSP#+X-w~rQl@eJ>nFRjn$iohTs zKYeHNWUX5y&FJ*mwbs^JY&P4E-c9^YgM*saKUCQFn)uN)5HAe>97CVMXif=azK=0 zlMVhC`mb%7E!@Oc?Y1X1TcDO^hz7P?4gZ(04SMs13XxxpZ_!?cu=9iKkegNFa@vV&w7RYjGtibioY<8u+n)ctuxb3Wka@f~7qEdhQ0N+1PH~p8pyy^m3wUHV*w%5%0IJGp&H6Hu8+ZH*8c0Sy`XjzyF%R+!ani{(S=vZaB=^G ztH|v>V7`FxEc5qd1Lki9_UOLNH(7^Z04F z@EIi`cMVGT6ut(&xhtAPB2cr6r?L_ogy6|mULTf3aNJe`SZm$>nZGmoHA-DOao1XiNUmzHR!fUU+w)Z}J zd*)AE4=T7dT$sB?$VmAZZI{Ltsmu&pl6#Xbe$*8CnUOLPr7Ar{%ypl~>-3DT!eWd_ zTE?RqIa6IxA9YIw-BM4*KY8YH$ROUR$a18#Fe++k+uM^O*G&a*4cgW9#W7VOKlj{oLm^u3vP&-6cH3=P;zV8BZMWTJJOF&@r5NYWfBG{If%5ej*DIw9 zrHa$;E+7!g}kS`mHddNV1r8Hn4eCQ#94lD^A$a~GKmw2tBrpR4gPd@b|^3%$Y z(Jm~ELSTF-!P1h1QwI$i&a2=2@Fr*U}AJiZzx4E$&`DDe;~ zln<8nh>;>HOQenBEJov)n@E%;e$&(q7PV*qo4IO}l&wY%5!D=dVCzc~@}Xu^LZcO@ zO?(xX++i~FuO+P1I(ds&Bo~RVp<})~C&;j{?ujST@@NhgT4Y5jm(q4((WV^Z76nQ zsQ;$N{Hcy_F>k;9_CG!T_^JBdiv-wt=bbS_sGlsSg7lY#;Iy$B-_4i(|AWzg!+%gU z?C;(b%@BF-W&h3N8~8G(>n!l=Oc(aOEB&*QzY}?-NT>EK3M*d3!Z+S(e5=SwNd9yl zqLiLo5$5r&9lw&l=wCHkm8$(SE3aycVYQ}Iw#zEPTvp&mDY;72@|O^)P}M@jrUr{t z-}t$G&Cs$dfKv4tO@y87)6&0879Fq!+NY9Ff3=i8sdgbh)~?#O1z#hg{$doV`D**5 zrYhyG$Qp^#{?lyVf~#qt!qiT_Kd!&oKB)n1YWa`c@U84$s0wW$$dZ+PN`jm&Ox=!O z6lQu!QMGSho=_1xA^B5Ih+^TYyj0Z5KLfv#zvy384H=n?O8%^T*gyhTcuTwgdVs}KQ=>GrI~w>W=4_g`Mf zMVQ00E^8+j)6JnQGbz33tkkP|{4>J)+=&C?s`poCG;J86Z~d9MLVU6XSn zmIQrx=9D!%$UpNeGxNloX4Fz9Q4y{}CRPX%3Y=-w%n?6gxgrT@sLL-GBlxFCgzBm| zNvmq*fXU;r)H-NzpD{rXJCW93Xze06`{WZ(?6dE_ zOD?$-kxVp&kblHVSWb2DK?hgzBa3Z_{=JiVx|Z)Z4;XOhp@$NP3Hz8J6KQ!U5eDRC zndA@xwVz$fz4+3zfB*Xe3oN+B=3AV8 z+GzxTkpnTP>+&nEkQjWuVdyYER6>{;XTVNuK7S3 zI*Nm{2^+TrzLhqMCNCw={)yoP5$0LFq-4 zitxh@6-C&GUV;-{b=6f!L))=^Zr#rVD}HbEFWMa+)*9Phg}@-B4i@Oq!i48|@_oqA zVb;YAq~iAuXDMpj+dan|b3Xp~<1ByXi`cvGzPrWf=4;3oTyOzBj_%1CMry=Ha_F#| zqW5ITZfvN+c*ySuCS%+f0wdiG!ANjQsXv&5Tk^S*p~y+cMNUcE-J=JVIq~?6cUrq- zf5WF;WL3JuQI?XXkMOMY6<1t==`bR1kPv}P$;Y@S{rI@!uoZP8IaNu1>7*5oYKH81 z7+GnFq^J9#0duRZw?5;HGwE3b{qMc^URMpf;I%`tY+@OOw3N1!g&&mun;Z%4)g43A%B1M6 zocE~z>cT+NMgIpl8nN3ym}UD*3{{%^X=SOuiJ$44$EcmWxc;iJuv%zgKXHkv!lStV5D{` zPDXOYQEkdGTcjKIH@@{_Fwqqf*6b6B^*_4*rlzxXleIBr%8f*S|4lAk9Kt`z=g>FW9wG;c4___Ev9=_j}zh5 z*=K2xR9>~O2+~+0$AC%qMbScXgZe&@Jok`%XKX%@8|_=*i=N`hQ60i(3;)RlkwdwSDvjmACBpvjMQkX|)LJ@^}oR=?7VR6s^mcTsr=wqlEZ_%>+vfJuw ztbXGSL`8d-U1r(8{Piz{lbCoD=v0~O8!|-3WERc*Tp1Z(BGX(PSS{#g4#6wMBvQFq zBQhi`(7pl@5LtNPMfgev3uON1fBuIrejpe!{_NCK^^QVY0WFAV5EEq2?ER<5|Fl94 zhcJ~iCB(*;h!Ihu%h<9{7(FD{B0)DLp|OoO+4#vPpJXYV)*tCvLg0Hg*<=%fZX-sF zpvCz_>LG_5e9%D$FSy`BmtJ6$ZFF#L^F|u&~V7FBCmdZ^s&bl`|x6@Lu3P9wMjIt zd`B0qIp&z%y|Ig@;A#EbdNy{|0DUGbe^n}Ydrnkg|0Efk^rW!^UlM~(%z(jD+y0SbKAB?QR?t085h%=32e&7T+$jz-j?TArevIQjhZ&u6%x zSw@YT>60sdf*wVL&MlbfLzpfaLLbbr_!P0clzta|R9eyc^he^?#0r^K(XR8&JHLHF z{Q@pMEvt&Sdr?%4ARxhAi>U;noLm@mHh<~Ras zGtWHBmRoMgveg@IxBke1e>C^)zTFYqcFx z&9su6PueLYiY1Rb?#F!dy{AVH{E~?XbS3iXi62>VNvo?&7&N{`GYf^xwAucu0BE`` zossn6hadKfbAK^OYyB`M&7@Deg#5yg7?#O9=#^QFpEMF|*Ijo#uNif7h{IHpKZ{C^ zGAl5OOqW5UJnvOV+JCk5xwO+U3mIkar~VuEw@tztoy3<(ld;`6R$9hNR(d!6H(OA~ zNM@G(`@)BYV$)`ZAU1Pigu807qSTmGffWqNW3Rp*%v-dQTV6rkgT9|wompKi!7+rf=&BV z3KKlBmk3Hv10RmoEw@3@zY3C!?5sZUUDn@!3#&Dgwy<_}`>L&VTK}aSXZQEt(i%2N z+Wn{XU#pFmbncwF)Z;^Ek`F}xRhX}B>%acmD_Y0@%2LzS^%r5|0>b5sTL@hSosd)K zQBs=a>toWgC@s5)#!s=C`yE{$Gk(6_HfuL#WnjwE^;ZuOyN}j)KM~^~S1Vxr*o}fA z-Mz$IxO*oyrG zN>QSn18X7`cYU?zR1_Q_;&HzvKC%QMMQR&0(a_Myu?wy1#N6=GIz$0@K$u62b3Hopr!bfjdhH`g0uoUZ_NnSb5Oi@& zu$wsDIp>^9T!Y9Ov5b#>>|;bv?zrQ2xCmJMclNmR}JA8TiB#`fvde2b^NX6;~wof=lqKxP=#9nD8IIapsw45`%_o@eePy z)z(|_&csopM((!ju4t-oVs{sxEC4+sea2TqqS>yy?6T9>zNRRM=`Q^3g`ZsUle>L= zH$EOMNoc+)TH1NX9d~TE-F7gdH&Ik7Vo@RA)j>zaC&DPitXYgUc@nQRy!iLOBRC~q zeK2Pb-`=UzqS|e?*;Xsa1`Qmd$B@IxI9hQX>x~*Y0iUBRf<#qi%Fh+YG5b&Ohy39~ zkt4+uwxLowvSF1E2I+ zaDfGhTaoFFg=@53&}zl#3opF*_~VZw2aB8e0YIW*8s0fzU&RNA_fwJ0!C=L6^!QKj zPG8&UsVAR0=%9mmr6!-^5uP+s{o3*8Izsq2UVHuZH{P5)X(GAb_{P`y7SNh&uJ!!$ z&(lF+;WNsRBR>Lj8cm}hB{eDApYpl^Qr5l5&>{Z zy{oReDiP116oQ+W)2fOp5t$yT=$ekw_VOw4S&lyXsJ-^ydrI#l#)YZsnqcVnF>eiS z^uSUwGBj>ig#vqf|I5pkS!@Wki);B2I02>h0bipZ*|thjI{RnTnRPHtDFMY{OQEQe z>8Af8s&qqoUnz21D=(#!o-}w}orlCOW~1HDmfbOqOdjrQiaHQ&pwh zBx3M15tJumSJiZCHW9f|;##9qm{Kb{U9m@zg-|kRGQMT0T|flYmhcPzX_r6j6B1^v zD*zH=W_>DA2?i_!398f$kEsW@wX= zBLN_9sF>?YXg-M9Lg&m7BzcGs`ulIyvNDzIB+F~Fh0|SG;Yu!ZIXgF1w!BLWk!bq+ zZ`h~5|As@Pe%IfBZ4QleB+}5NtQccse2apx7<-R)X7kRxjqzVY+4R|eQK)5jX`Eso zD_$d;Q(Iq}a6F_^Y66CtXG?S0EI9bA874rV(@GdA6V$riDflUSKvRz=OR-g@gn z;i)V!rnC_3jML9ReXZSKK_K%~2buhQ43ldfQWhA&pt$mSuj?*m;*XbV@nR{K_#JxK zVac~u9kudQm;@5D%t&5rI_iY~QKIZ#9w;MfzycX2t1c6_ojuDR+$uAYv&o}bi7p`o z(VyFHz5V|C@4x5ndk~(GCX`gfAI(abjQ^yD4&e*A+Cbt%%$5QgiE`h4_dfXGgS>Ka zpMCa`fZjAiXHz^&!7|aYsdH9cQRn*S5|tJkPYiNKjElGF^UdDCoO|xMoG&vXKOqu~ zNxXn3Y6(s$LvoK7ygsYhKIt#o6%TNB@tbhoi)`eQP^ysOE0~2zy0>1 z?N|qgZHW*5_O};cYU&#@c<32to`IF1#pWPyyX{vE;ahlXgoa=xJ++!o&wfvuJP8vK z5=0ctxWp2RBR}S$Xv?j(A_4e}UWCY!sC)0bZ|}YL9yxLZ$b5JN$g z;Je@bF1CQ)ElzDR0!8d@x#^}q{pki4kYQvNhraOQ3w(T*{Nx(Fz1wcHjovWXzSaI& zmW6%Dk(ZYR9n;mtQ3Ak{imW4erX^0IZf@GVMJ2(uL{4_Kg}iy*ed)uZQZxv ztJwHKhsYWM+VN|cz&A{mAOGjuM8v0P5u$h-ZS)n|jMyE^=w_HH=0($NqXUU1=3 zOD#>5|FFXji@=cEx$0l^lqu`1vo-?{%?-hR`|eBEKq`X55u#HMiV&3g8xN@R&Ow>( z<(FQ$>BbxRtnh{#ZKU&h^9dp=x0?_i-Qlxqf0pNkiP|$H(m@y#)?9N9Dn$jTHu}m} zXne@zs$MTBFXZJ2VIL!tarGarMk=0oXO$vQ(4#l(baqG>lQeeGuUOE?lf2%-{#XaC zDJ2j4xI4T>!v#A-pBl_=0s~!a_^@G&(|m=g;zs~F4HX@8^fCIyrD$JHvR+kMf+lsZ2ua)q-_}=FmVfA zi~Zfjn*bfo#FV|GDwY>kCr-4HP`ie}wykyi>4@LP)yaQa@RNPoS?iq2f9k9<^{(tw z0#x|2Pt*Q||Cb!?>99}j|D_Epo7ebp{cVRSa|FS~wiw@{|C*+OpIfnR-;7_#ucpsj z5LW53%3A(LZaaQi+>)-^H{@@W*7&9WOv65f{2562mcN1D>YpCAlDZXshG-s>|CE`9 zeUkm#@oV}9K6N(E+s4nzJ~cpxRB4~8xNhH^m0KpCbL@=YDt|}(0wpC2`)B`Y!B6&S zXRULp{#9p{sbv2?@Jq@*(iOh!)3kpfe;>>#yTAWt`&8}Q=wDIV-+%k+U)g&3*s*`T zoBk`4v%Ss!t4>sIh5z@_zowu5+oD>jX0}i1pLKb~SdS0+`dAaSr^5dm_UQvZkHU5P zX8aEJS49y1r(S}66Ucy1UHmvKP$?ZR|CPskEJX6 z_^%oPP(9!byiUB$J_mt-L-#-6z#e@;LhDKNQ0LI0Ebe2@h;|;3f>ud`h~JVcpGXt? zkdt?HDGMTxP1LTncwM~{CZ^96kk4mlMZ;z(0*9G@WQoD9i0WI0idhQ)O{w)ps(Yd+s?ZR0HXMRE}CAryb8e>*u^fkTqG<&P@KMn{Fl;M|1D_tv&hr z@SJnbao3%9AqE-|rvrp7EP#9P@dWB&xt-7q2xwRXa6iDSfCUc@;}@8O8U5r56ZYI= z4~|N7VYL^WXoqfNyeP_j4?XF~EJ!-la`P(?(Ds&BSP34I9%%1bV}WVO{+!nl5cZQ7hD@myzYJIz*`r4-Yh{C=8WwoDGPa zMPRYz!Uld_oXK>I7ER2_(`rhIVSS-Y_>!OhK@++tE#XAtd<&gv$W9yC6bFy9Djc1; zNIg~QB)O^Dc9p7%0?90D3KBQ|`9^$3!->{)e(H-~@J6DthgqmGwf zd6}FE6UGxT((2;@14w8|pcUo&Fk5fE^#k`m=ztn8PMzV~dw&}!91k}RiR90C^X|krJu&`)T;+ zCqi^QdtqNs8g0oxQykDy43sYzJ}V)@v3HXMM(xKow(J0J0_4DBsiG}Pl45^x(U%JR z@7LmHcybtW<*0ugMUs}W(`L%JqNl{X$jtj(z))m(TQ~FiID{u^M`}um-hO0}Ip1S` zoV`I52ouU2-M}aNh)u7kcm*^HfyW`LM%7J1qoZIxLEIIvvovHxHMo%KX!Tm?l8lYj zoIQWKYeGqDW{@s=p`4`WoqmtLyweHlNzJFl+g&$#PfeXv*F274kOd+=gCL8cF7)eX zE%J0dwiP)|qCq>X_}EQFgens{cSFH(5Y4b-qk279R4dRC-Fg29$o@TPIE)qG9s`*& zwld?>ky5Q=72$_y`v$Vm?Y~J|ReuMa#(0%>8*Hx3q;116rrAGZ*tr7TL+QP ztpS^*ZkkBRvw&P6g(7KulO2Fn@~GkGZl#?ZiLfo~Ht~IaUa{_DSyOhRIa@+{^LgC) zer8ghA~n;9YcdAUcE@b8gJSK2`p-}5w|$?s^z!SU8a$+J;7tx;2?wJk$m)c$f_>0O zxDadG{7W_h!;sJ~H_X9O*6+)fWIeB}!_o$xMZyab_n%UWZlH4Wy5APH-u%UXpJjN1 zPDRLffe?;gmrb^sUD9v0V?V&{lWhRR7p5rlmA8-xQxLqrf3NXPO0{c^c}cdEx3Sep zrbhJp9{Dmft8|r(t-5r5K5WqvKZMCr)3E>G9S3&>zZ$K=_y^E9enp#5anNS+WqUhD zJXp;roMb6?f9o6KOvB_l{sSw7H1Z@%mOIu$u1_U>jdo!}mc%42nx+vM@0fJUuBdx@ zm_f_URY?4-WTy(R?XE2YEO!SNADFk<8m2n)-(UpT7@=MCJcp^$(E7 z1)?-ymo?)6b+{N?9(2!Iw9K6G7t^Sp@;H5Wb$>9-nOGE_@jzr;v}cUo?s!Pb76r5R!|N-FWsc#A zQ57q8ZGKfh(||8U{=fbUfTl4wJ@+d14u$x>T?K*k1m=S^`q&tI+WrJ!F+B_te~!6Z zI&WKOrzN7Z8vCBZBTw$SQiV)LM$U*GQj*+$kYjD9nMI!xQ;L^%d_kY{ zE2clmG{lgO|AkTeCJ_cCFsI`>!=fO5T_t`&HsdkhS z*H^9KJ$E|y`iN{EUDKjX7sMqrt&rpmwcn9hQ$qpfa&o+^P1xB|QHz554ET#ru!`Ti zhJli32vfnoc8;F~{_qF&x{h4{;z3=Mu+WV1w|`;aL$nld_cR`|KkTJU?ZDW!#0#8% zA0ggL0ef_42tW9n@4Z>{;a>+m0Rv&E(brwkZY{iBPw*9Lw^}A&PU`f^!VT2I;*dhj z(2bLXN`0}y=STjR&h5{<`LBIByN-$>gu@PtXD4fJBUE22u#U#_^=}!U-K?YIKD<_& zGj?VuY!94G^G>{Gqmk@WM+;ES5@F3e*Im8-nJBa>@n>};oiw#vT zYHe58YV3Hek&Kkg96BH!w^=AgUPuEa5I{`@AX-x$9Fp1_OSNU{l?Ux7%dd<3imCwL zTR~+AoL>~VKBH!7Xi%X7ewaG-Ea@TbzRZQ+BfoUq(`0C}ZFKd$Ar0V3JMNQoD;WcR za#|~%ob^pOJ~W` zVsAI((hTBIQXW5dsC;3^s#dY^Bk{%f{qgJB#y}DL^6q)~IW%~A+L>Y0%;=l}V=X2D zpL|>?M;C6_rKf$XYZ1Yk*lFM)lM!(cCJl5{3LC9*NxROBDmBrWpt;9@{ZJQ%Gyf5Q z{rYgepcSjm*bYDbJYl+4kpcdi@Gg}{81U*iuUP8bJFbw9LjE|j{iUfv8RE4fBUp+3 z=QsMBj|H*SvOueYDcLsgc}laVV>`%Ty@2!FMVd_J=S%Pu{9K52j%_|``$Uf=Rd&Kk zqITe=^gdOlIt`4vCff=_j;Mxf&(~XcWbYBh#(^b6>tp@y4M<8$H{S=nb&gsh;W2|| z>CAf&YXnWxZ|)2`4H@Y;{)__VtyH zpI}-k^5K1oTGJTK4*(R)?`vvcGzU2R!{ zGp1IvPv5aHQY=WchbUF3n7daT=+rvjkbZ6L$9@+lVm^$IH2s`oFc%EM<{bpP5=Boc zC6sr+nTHoA?EA;;ek>$bs?}o{MW;(3BQ#J&|Jg-iV7bHa77!x1`;gWe^9T+7@@ySE zz?%F$S3k(^xkzv2hj$<}VV!0e$pOg!MSku7Mmg`Z`nhIc&}!2Ug--@G<|GLMx&2hM zF%0_tw2pN6$D%A&!f_W?AIF#EqHPCWPHR}h=n#(|vsvS*14KFPD0%UL_Dw88#idz@ELTiGVy#+O-9bO-6VYD|Zog2#4w=PHi;{y1 zK3Xm&Rt8X77mLGdwy2h$`bnLU;l|?BAO>8%4oY(~9B~=$$%Yf&{|vob1LJ^oBE*{4 zsS}h^#p9d1z=lUEa@_Trik5ac6joE;_V@}{JS?Av;)(2sXaF1IhBAh}g7rw6FL!FX zQh>-%Q#ep*y&>0?02sdN2*_l2b;F6hSq(rTaC5tG8jaqt=zC%ZIsZFcpA_-4T7vT!0C+y*lf1Z1SyWB_Cpuew0StlVD_N$3*UEc zm__3*9@zX&uC3q6glLB4BW(n_K+~Nk+Z1E^4ENY? z2j7(l3qL$bKN@(hLp9&oszpLA(T#*29#dGQfy9TDnh_C7jaJsoVt>YVHCfbCXWOE5 zL5WWdYz=IwuTxlDG>dAJB+A?IBsCn;UQY(-u`1V37GXaaJ-YDsGXAyw5<4dj*C|)x z>14J(mTDQk6Pv3sXxYqTlPWWGw`y%uOOZiqcUTbL0a&N3?-Y_VHCsOjWlBxKvA8To({paC*@0xc__*lqg&-PFehcMcVw)caj@_k31qp-)$&_wv-! zMcU_oRj@zt2XelJX29Ov=zZ8rZCwZ6jV-&G+r+g^Urtq5NxS9YJ55(#nEGTj!&(r{y5|^nvk>5S_RqAUbC@eeiz`;%cF2dja8a+Zt0^-0c@8>5xx7Fzm4P@l)Lyz_X}rW6-6lIi`v+2DfI2@ ze=~m9>dm~WPX~g|sDc>R^WQdCVlUQP~rdUYBHC#po+M&mmFMqHuI4tE%YY)Xf78QWw3@zBmOG*eRiJ< z(!1H-P>7t?Y7))cAGBoCPd5@? zGf?|%!$HrdLu=Mgc(MUqgU>Q1r6$1Lpa3kgz2-;v!+h_mNi3{e!r??3@qxI>IZ2zI znN)Q#q(DrPjU?I1$CC|7Bh{(rOX9)zZ_j+?qe=t4{~cEpN;a!`bX#o9G(15G6HR@W zbZ?Dwa*xKKkbtuetbb8lzjf|4i_C&D(e7_IT?Y#1IBlc61I$qY2xaImb*AevLrTY; zaMG^BO@HU^9XXXiB=JDQlaE^9y*SVBNNbC#{R|_=?}>U2wa4T~xR$Bb17DZk{6#L$ zzcL(w&)?$tzXU*rknG{~3kiv&Btot)`>hV5MdQem=X1sDYCI99{eBK3F99p<)iD5j z0goArXUuE6;WXCTc>r(yb*rwB+j_5Hs5hPZJ;H0;Zhw_gI?i^h>x=Z>8-p$9FIZQ( zriC;nn1{mGl?*6vnQQGOi1fCn`EDj}i^ozwJ65B4x{-+|zlIMg5=M*MT#SaX(pa)K zl2mOVm)CWwhzt9K+A_1CZ+nr`bT{T2No<)B+zObL4n^#w?rVQS(l=)>iH+)I?$;f-LY%uvzCLpW+Jz^A^0*>D z^bYQ(!5{U(!>yG$?|ehDRvfC^Ptfp(BXu-&-p0@H_vg8fkJXk3Jm0PC^e>UWWU-pM zGyXOu%kgWN>8UN=rK?}@u0GrP-H#iWi1&-LZRof!D7E0=`fFNjOY)$z1`D_ zz&<8|6Q9CaS_v|7pp>{>ugS$SPI7Bks!GTil5UA@rtz^<^>z6^h8xnyGCuTjq^JQy z(oAno7cOmtXM$|3n`-E~u8!{hJvU@HNoGU?FI3VN>l7EPKC^JJ%(0`6-zxnPz{-Am zL;vM% z5SlZB%n^d+i#%k~A!M3^jhpVHpIxpYa6}`3@2K>A+K$##{qwV(+K! zNxyFMCrszJ1^(C|F>>sS-AE=3oDqJ?i|=hp*0b(_$8qiO^kVIbGKj`}S{@U>I$v0>aVzpg{6YLf%{)>{P z1*A9%E?VJe6ENlVI4W?M7Dmo|bbWrrb#vHK`VxH?G>+dxo<_&SuLijCFAD6+v!UG1 ziv&&)M7EC%zS7nXwYMLAziK_DiyT!lEbEYpFd9q~DAEr>Lv9^2) z5~J+G-t-^tdXH)mu-gnZ2WBNglzNfIZtoh7_MBA_d|M;(=t`p<--D>Bgqlgey$50Q zkTf3Q3d_aOY=KRmJ?oP9yGg+hycDsy8Y;LRHOZy?GGte0%`?K!w+qqUMKAs3P`=71 zQmdAx@4UcAK0+AK`HZtxP`dsK7CHOT^UX(W?dz{UKm80GQE7+E?eH$cLA}44L_(-0 zhcT|A=M%T2Zlcv|zdqbja-L7vms7ZO2Ulkw&+s1_Pdu+&BTc(;1$TSR{U223$7zS& z8NkYpBC(4P+oXXcc}=%T0cV7#DL-Bb>>QXU%~H0Yrb$27H(nL0H+0Vz!fz`ySkEvk z6Z+#edBC<%EtPj5ky{hvzkyZ%C{D z>t3Nyj;q{fL6iqPsLnPV)l(ts%mTJ=-#>YcCZ5p`NRH23!;dVEcO^62fniOXTJFeh z4=pUZnwZ>NaW}MRq{Dp5teh7u+2g3i0B&q*(Ju?V*6oUum{QOXe~>nNgV!fpQ(aLQ z{T~A&$fMrkUa`|6-)9!rmzvm3>7b__qvd&Xnu!+Zv1Zi_8c$84Q6XWvVfDHO`OnFE zXm!yU4-(SNe#YDhsz*jtL6rhz@0cu?t+~**Q<>diO)N@$%$N*-Ph|i+!shX z{U|QGzFS<8q124?m)ucy?T67OAj$v_-oo+cm(eqh-4LdyOHdF_5r2w1-^Ah}*MlpS|X5Mq;UpVp>)7_UTSjnpEd~8BWV?1xC$%Q!xwbp$-?X{e3gORQC zNl4%L7c1|cGNu3)84!wc1{V0?1xXTilF{;(uex)1isT>f>9B0{UH~}-wsS=}N6o{B zaqt)Qm$(BHUFZ|^n)!ymNk`Onq>hYTp+wo4(pKrlDQdEQr}PbKFx|m^LzFxo1bGF& zp0QgU*<;3j>-F^cQV9^(l|nY0Nu>ZSr21VonSZ7|O2bI-Hn~>f8pI>vM#8A4b^SXt z-^t1(m;2gt(33=H$+Iu-S82&<&6RG?Yu$9I8-oGyu@1R%A#J9onni0;hgE4Vl<4yT|++|F0PYU{6TpQR;`nATZLeahUjlw7)!>Reo`n;r$784MHQSSu}qp6HMJ$pV+J(%e!8J^#ubiw_mDbyGk_8|)v#Z3pFnpE!#T{-h1 z{}8JpxDv`SY|mi9U~+dIqW2h~K^QA9*d(!oVvfONc#v{#NK`bMz7Y^36J^mX8)$4rMp+<>cBwb->i)9d~hjfc&J&yJec&w+E? z0J9;h);~MSFx%9u*!@{(xR|9vM>so|*l=k(@Zd2hy=x|NPNT0B7d5)xiy)o-1h!58 zU`%Z9;$K+AKQ8A<_dR!kzQP}TMNR){d5kw6cQ?1#3N!pjjyE|I>-Um3{B9%NUJNln zy2c}vCXgXU09_d4U_BV{uVD*O9L5RoZ7xdq<>E>J=#n>iB{3&cJ68XC%P#_7?x3(d z;%cZP%l?7Ousdso(*0rT^2HIoDM~ee72AhjZ@_oBxLt(k$Xey*X$0ty5Hsct#G4{U z#8D78B@~5mO2L!T_4hh47)(#1&PtxoH0*&`KjSCs=0Mxifq(D7Pfu>)1o& zdwnfGNQ5D|1HOa!k+DupR-c^hUpP?srMYLCcw22JU*UqF_K^M|h33-YNWLCJY1A6w z1sgRm@;=xp81wtK02;>Jq|k>e^unHkO56rR(CBTeB6u(5=dZ6pVs1PfC(KO9D~Rfv zdFR1aY@r$4PQ5N!4>L;cJoM{;hMou`9aG63^Ae6>(VP1hd|v&DujqRQ1c0+Gq#?p)c4LwA4#M`M8sfb_EOqUc;-28=C||u^=zIwNV`E}K9EFL&b?ltj1eflw zjN_xyA)ed&-!nUZJDv-=w*G6^uI~&^tmIsIELY=?>-TB#3h|tug7)~t?zN91pwyFtI{|Z(?yWA0;1d zK~>gUJF!~KAY2--kKA+3$mV4fgn}vs?y^^Lymi)cw6z0uTWQ@>hD`;!!ao8fDPjt1 zS>n#n9?a3fO4z&Y=mW6d>jD4_AoI&eraHd>rG}%oPZhx*3^dR4QPCE(F@&4BWnc$N?fh{B!dQ4owQi-KSetJP+OJm_Z?g1l- zrWb%&UB^;uAW1{NQx@p3F!{glCI#5Z{UtCpFyf-|_F;qk(|;}~mR0uIN!eeA4gz9H zIQ}xrJy>Te1M3}s9j+Jeu~EHZIce0DvE_L&dTTJ`4yFUs*axXrs-HTN2)9o;^>xCoG z15UQ_C2$ZeN0_&&xt3DTwGU`BtamkZaqVYo;kpnY@lvD=D z_?r@QR}p5pkHYtm?xhmnaV0*y{v4}$m01l05q`yJHV01d*?923xdR&io+rPRL}P%ANzhTDM)>rv9D+C5t$u6p$W7IN~%C+ z_h-M-jVa6KBQ^w`70nz>AJ%}V_0|1=rW&X|?*UM<1~*MG)jbzPXi+^1SEWF&D0gQd zOry_MR3D@ieJ_De6MbZ=$hk|qx3$0e9xW~6<4`1#vnRFCkGfQ-&Ui$&XGlt_JsuAY z6@!(2velM_E%_Y3x+I)`FH>V@Bl#;{#^v+MKj)w4dC130W_c>lG_}&bE*m|~ip_;s zE`#n~cmxIf5RAxOz?i|hIi)|X1H%Up7ngXlA!Q%Ke6wzMFF}Qcsc=}f%>}Z*>%->U zivqqxkDrfo|azpMBeLnRJUvy};hrVP9li}QaPX!t&w@nmn%XbE3Ku9%O_R$MlW zl1w@1Pf{g!2XdYJ0YR??t96QzLJ6(*d+*Pq9rCfLy7G;(#rYS%>Ay3rq!nwOd~|w# zRl18t6|PX3Gf>2F!WirtkNE1qJ_W-~nc{Da-hw1|7L^&N8Bsg_t}-4Rg{70{|Cy;) zg2`2npo9j((Yq9?6`6C?8hO)=UXd#;>a@$5h9$66;;>VLskz@I)y(4K7!dtM$7NRc zoH6AE5^gzZlcq!6dJlW9-eb$ESl`LJOWwOAE21cNC~|<? zBx|*^=ZYtOy=p|25}6eR%Z>y|w8l~(Y?zBVg4ot}L@J`9gjgWtDc|*4*??)kw_OLE+`LIh^MB5IYjbEd(;-FEy+}M3d&2WV@E-H$3tuRH`xEE^} zQO)=5#v}+iv?w|z+jEqa`a?)=QPRASju$sP70ljD?7aYCzuct(Fsu}qLf;s{lU>ee zRPn}JSz%nBipQo4dnC2_&PycV$UXRY%9((qT7;|pvYwN-pL?NHgzPqd2}mS!ha5vM1}O4&q?hRH)JedF*OzMUYY z(?aXT3P0n#f!@aojTVp&wU4toM9;%sCuqh*8CdIM9lHf7B=b)tZ1d4KwDXovwaEs- z_7K*ACG|u^CR^)0}SCoh|fEje3U0_m^ z5CgFSC5D`L(C_@8#4T#q5U6?2FAxxi!zl&@;ay z_WE)EV&=mssg#oWT@v~rN#7K?E#mnfXt}vyh>nT&l`e2g4xFS*vSxD~1a>~m|GMG2 zV&WxyT$(CjE@p!J5R*e6-cElt3P=)X%kaUZb>_I89XN{1?$ zTypRbATYA>)iXe(xI^G?nyCFAGf}bBLwfwyFPFT6wWBTjdTT@27qeHA5Brd8B_~)5 zjGZZae=8Rd>z9M|DTm>W)&zLy5?V44oQi|)m+e(>NH zJfle`V)d?(rA*?a&H`z*RO_4zh%;!s%ywF`gD6*BNwVXUD-n74(Fq&C!lXvu1Jyl) zKvAR)S`+0h$Pv6*+sO29jRucvs7m89;2dhNnSxk^Qk?|)+MU2CcmqJd*q{WKcf^2K zkis1Se&BCbiN3O#vSZ2_j19@O`dw&z<7q;r?Ksi5Y7B=S``OIZ?peXS2Px?6llHel|((o|m~` z)P%@5#&u|_MAPp;DX#Ti^nE2THm9T))D+@CSnt;{7c;2>#)*q1=NvSo_5Cnd1EMEs z@)(33VB*IcTd-5EL$P5J7cBoEu?1Hj`SA)l1}sLX8+8agHzbML;5$8rP$ABA6wKp-eFmv`0J#! zDBn&|O02ZXDAS#NiQ0oE>$h}Yd0BL9Jzf=I-9SX44Q z@KpLUBUXL_{rt>zpF_QxbSQ4VTad)vDg2-+`5mQ6yH!Z1Gg=v&5Qm~g5MR7uJPSPxETET)31X*LI&s&jU>S?~Tw(fSc545nQ zqJPjw(PmM3*w7;3Eg}lEXr?1iCf(8O7zGUJ%T>-X7&0Bw*tI$-WLxOg^Kd3WB~>M= zF!R8&$-M;amQ6{6>bML|(_N<@OsZ+3j;>;I0_@=z2{G2_E`z0?$Cj&P3V!g)uh6fp zVdmj4bEH6d%q81L$(kRmAI301WNWplD0hkxFke?ACD;BZxS$cC3ICtU! zhitYsG%cAp(otG4uUM|sU*|$Vv#2=ON>a2~R_x!~PgK1l4VtBqOtiHaOamcX->Zs^ zzU{MA-}0cmoWYK=eONUydEp~Tmu3H&p6ymx=$dW({Zk)J<(fl%G-ky~JrAvl+9#`| z;(jxaT6U-Mpf>;P3m;5Q`oYQ2!ogG!--u)L>>u``lVIK&TmL#OrYa$aL#1IhWaDMc z=S{99FUhLSHb%^W6)yrO`1N7ENnFWK3FM##3(JPJ%~q1^W1T= zkObnUSFUG7BvgkW6bkRkx9nm#=>JNjRHH+ojg}5lR}To>bQ^Xn_G{IA%DWj0WnFY^ z-Cnm09kx>*Ljwv(b8EWzHuX0shI90`8neqnM-)m2wZbLWBAZ@pgc9%qZLK0bvfgk~ z+^{~|&Rt2|eZeCoYHJn_uRFokn0v|jyI&H(NK9O<9Y#9sG&qkhQ1gZfsJ^tTs#vXj zTkr|k?6ChZueyfi)go3}Fm!6b>C%}gUF4y+w_F7gIV;2-se+tVX1@b}1RyJsrvV?e z?Y(Y(nUDN3ceTg1&zWD8bB~sll04Af(@#kL4Wsfdpm$$xn%M7Dp-w!W)cV_VD?jOy z;jPP1*H5w)xZ`)1s9qkzU{IaK{-lvHcp{95k(rdif1h6WQvQ#sD#g-2YpCI#3XX`f z@T{H(Tz6sv*FMMes|<$A`JpdELT9H9GA~A;rN|>LE&zg0eLI{!h@fU^j#|OL<=3Y` zblpj23Ok$d#SfK1b&|6Dyzof6C8A$aUt3I2k-=2x+vycw1E+r_u)7t^mZnvrOa_GJ zk=dinsoO#VERCPmN*)6ha{qBum3`F1d!!NumWZ{mWN=w-h@O$PrtiO$T2>&wP_0b#>Kw1gQnLt_Z@;yiczErv}lPt zn&kNBqcD)6W&H{bW_mWbnVe;CqmLESthWhfKQO(*DizatiwT4>f5!gjQuxEK^61W? zACV3s1cbto9G!w`e-h^|J8{5|D`an40uA_r;kYt@Z&Xu*<=<<02sdIAdQye%qw~mq zh4$z@4^?TNq->c2B}MNCQg*~)WV4EfLB&FIbd!6_B-0ukJahD`I1 zJcOAo@w0ax%+mbr72a4$VldFklGL~*K{-I$qbFlfX#=)D!luxAdN|&1#b+9b0kY(j zm#mOIuaG^6*IlYS7+TRnQyAYzJZuxkVP)q+;@13&oG(xZ+BAIg#1(k@RS-yS#76S`@8KH&Tl|MylhIBP z8iZXZw+gaAf()}jOyB1`+rqcyVftf5rvqaUitoifKlgB<6|2#KyH`QX2sy`U=L~}rF*3d8rwAWAxe3fv1s7# zs)a9Yw0u<(GB{ATVyxZZlcJHmb@C8fQKXSAc$tVs>H1=ZDMeuQ>vKHv1v#$n-R{S_ ztL)&s>Y#DWGsWMJq%pk;2~7~YUwshV!X|4lKV6V-9z6FITZ0*#kkCZ*x`u>Cvb7T;J2B3!iT8PvN$HSgc3`d^ z<;(sFj`n_N9otK}-t#j{K*T#VMqugDd&F-m4$z(c@s>vV-Y2G}MLw*E`DyDGVmpQ( zb;OKjT^a)Kjs`>hv;k99rDub$!AI3xjGvcxJ%60m{iG)T!@GJDRA#ZzYW(;(n%QDp z7cmYp-}Oq4hGIx&M#eQ|;B(Pb7BRW%e1<$kl&U$pgXW@0a1u?m6~60If)$?b>D?-! zT_gNPM=l_ps(qtrLK>IqxhT!;`Xs+&)1@R>PxRoMU1k^quRZ(8oB&~m)82|>%g1or zm)xGPbq7u6x9ElpRR*yk`(R4N5H;N8{P*6Q5G=v0VW@r2Vl3#!+rj_2CMD&u%ew}m zOsITH=146TsALTl{+u-f_Dtf?>;_}-#Q)z8dd>hZ5eb{oGX?i-O%=H7L z>Ke#n*}Ce+iyF<=@e?8|`P!cQY#%E@HwA33+_~Re_Y%*!G#OegRTQePr?kulA3H(H zbKc=DXE#^1?H4fjJV*&XA zO|ws?o9;b1^V)rRRUOFI5?f`E9;o9lNC6%~J^Av?`T(Oi#nt?~@U0`%NwEZ$y5!w= z)3CmA?{eGmsW(?~vFn}G_|t-XOXu)sH(Vcf075Ai*RGfHnuBtJ`f)ZA4x3jtK&Nx%Uu6cs zZwmK($A@Aw!${J<9m3Dtkii<8>AYC3-mFaUO+h1)`TGz?sJ_Uo6DAW7qMz}6e?9Rq z!@gp$=Z|>WQm6YzJ^lRfXGHA6?E}NSRt6IU_a6rvIAHgz8Qgo2VRV5+6jcQ5+=!;2 z;w`+s^p#e*m}M{(croU_LoSY~KjuMRH^5kLUP2U--JnYKLH|}S;6)EqS+f&Z(e>TU zMjVlmj!zZ~PI>f^we#@o@R9YaF z)O~mOowiZ2L6YJAr;pZuRrYs#MRk?S(TS5|W{6Bv#ZfZS@|Yp&cFFf^VA(nUX`DY> zktS+3Z0Ot6Kn)%cMB9Ffc~24A@Dix}@uuV%AT=wiy(cKT5O10gX?z47-%TGvSQa>s z6M}AGv!lpZE(BweN(_j_aJhsX2u-r^c$~1lvy8y~#OlrfO?-)oKwCyfsP7godo_TX z&07%i+wBsmElrVys@-A`f1;ewDhS0i*#Tl&75OJWES&xbl61C*)vwy+gY-U-wj#g# z`4{Dvg(R$s;mbuR$J|FE%KD=RX4#9*LsOG9=*{P6q=H@X^nnpU)L_3QV$1ILFsCIJ z5PNk+=#Ay>)G|>*onDB`%mjoE(A+yEK-f_;;k%^W+vp6qz^l;qjeEHDe4}Ihjlg{> z;K?BBn!RXy1eKWSb2S6v)D+6cO`AxnxM;}Y^0dl7BWhMiPWe%zYxGJ zd#8Eb+(|uUqXBFzd&)!wKY%B03r!l~FB$$TIZQXTB=GTf_-?GCT@a&+j%< zZ12meQ#0IWACy?pEqbcj{9{hU%_9dyB_`mz9Hr1H&Sk82u zeoj}Qm5#Yl04T|V;=k%rK#oHV2~<#YVQ3+5P+XGsokHhz4J5D!6Covc#H#b~8b)Hk zQHf;i2va}tgZp@o{&GO08so(n@}o--q8U{U*2cH@dnJzx1*H4`462))kAXOxN=aI& zMXP^-68%*_;%g1B-5iZBc*^Ucs4Rh!Vp`SkR3w1c8DT8QD=5Ck09?^q5JJef7V2fH zzq?r!MBv@tJ~*Q-=9g^%c{V>L3pMkM-i!I3L;>2;s6mIi#YiJIdtvavE*oN-cm2Oi>p%O{i?AC1Dfk= z%M>cgXMaHv#Qc?`zDlMW8)hbsll&y^ju)QKy;`;qko0 zR~-Pq1OZcDT`(PPk^4n==BlNpFh~iwISJZ*kfiff>-xfCGjO`| z@LKa5QzCt0BH$Du`j5E5X9x*!8^mSny$rmx=u6NY^}YEk_BfXapJN34u}oDJhwUWJ zp8o;(T#qq-+j~82%|g+gmv0>Lt$joo zqTf}JRU9bzr^AT%NEc*0KGV(klcR|)uNTMP;dPQ7^aa-mZ8a7a?$Pa1m!a!4yB#en z&8WJiO{ih!=R_U%`^B zz2T6(k|3~o^#cnYi1Hhgqq*P@VEEsa5<`S?OF1YMG`?uR*H-=n3nzvRP)uTc!byxN z1djqx!78}@H0~J``x&cN)((hTEg0@Yh0}Qw=y7=o9GGZl{hGFWj?~#r8Hf&;9o%#x zALhyl^&g}#xkF(LCWJ>bOz70)oXEd6jigH2XmpR5D2~yQdBn^IoiX!Q(Ew!Y)^cyq z;XqY0rxGHMWd*FAia~wdm-ojq-yaTH8HQS3(Fbg%VQhqNGcMwRl3CL8tK(cP@Ay`A zQ+OREa_;uwAA9|I$;mkT-XYMI(yEq*ojCqIVAUKneCt{z&Z2{zpjp%}0#LVLnw>Rq zrkb9JPKsE;KYur^M*(!zoADPk*C6#CVF}TxQJW$2(5=yWzqaj`ckF|g?3JLBLI23% z#RvUq4@88+S^C)X9)uq$(V?FmM8X!91t6y7y8==_p|wh&o12CR$RczKZYxsbk^Ic- z32znIkFC0KiN-!h7PIZ)Ou2~m45Ss`#G+{pf+>&ut!ScPm>zHu5mO zNN0gFlzgBzRKZgo@`a$Ks<+>!V!I4SL;G#6dj?A~OmrQf>h)bAIhuc&`&L}={I65q znvk8ljtdv8;XZGnZ7ik67MvKvqANrrPeuPd zTSaP&dk3)$(DB-Ef%TK+8^+^zPGiL*4JhjXzM)dJYVH3P=cSa@uow0zlUGl&9xnE*=FYJ`z^pTE0iw7wa*Cr z=fr5(-0iTw*Egk>kd7;4*Meg8Tib`)eqldCFEY&=lk;mq(lVvx(_nWgnKpXK{94F7 zWBPoLTC#rPYICco0~KOlOu8zU>2!-{9POD#$jZSmq2kEYXhtblZ~N0VrD4KMW$$5} z=~LLIX1na~^Lx&Qfm#h_KR&BV)%Y!>#$QaOA>NvHqb%fd0;{nrF0FzXiGJ@X7oPGA zh1*&2=`J;Y(d%GgZvAgiAio(mS2<=fc4KrnEd)La=9qF428R!CSit-O(pkpP=gcPW z#So=w4ve%eVW9o?VZZ=H@!;2C;nlb8P-(8Kg5I@e8yb(s*1rcY?Urvs~@3 z=>yKY9YFA#PG#Xd@jztQzUN=h<j61nsjQ{IDJ#~o^B_qc`cmvc zvE#diTHLf*QuXJKq;zP~p#Wh-@!WajgP^B9h5*B=j<`g_HP@#BxXTqWDI<%=Ktkp8}{s zeyz?0_pg-|-34_RPys6!;Z*%JVMHvv4y7arforY~F?jIDFqgF*xk}{mZ0ju)wRl4x z(Gp(c4{K}ldss%)E1=znd1yR&SIWJug_(%_0c+N(6=H+?^$@ie5UgJ45l8Nl^*8#4 zur!S2l*=;WVf7ub6ykjz#D7+t8!q8?u)d2s>JcgZY*g)B0WsP>5f@FrNmPco=CUEQ zg!qB6%#TlCw7%#0Y9XF=C#0qD;46mwzr5Uo82@7UbZ z2YA}gMzv~86W;pHX#1wQpYl;qbU0D?uSvI9(vl$1@16z$p{Yb{jdG`&t_VHwccite)&gO|YBtX)8s*eqKZga=9B8$8FhFAP3D-R0q+o10dV-~(X;pa{&wEFb zqni$1{6wzXu9k4N=zLc-M>IEVN_G9xYW9;^$s|09X31}=-9IH zc4(GcVJEF23)U|SW$z&%`-H(b9P-!(y@Ej4lpu zD#~BN*NZ=?MjN99ZJCZ$k3bonTO;X=YLlnBtKv0c8tGy!?m{$C6|H`D=pvJ_DM$no zR~e2e4;#Prql6B!_K#9X%Uo1I#+J_$AVOTo!;v*7W>@%+{8s6|(bH_X85b8@%Nte& zvoks^yR_Z-b;to#M$OxYsce?4y>2;VdN`!5(D}EslA1cOw`6U%3I!QC;lR+h)&xFE z5Cv~9O0$PxbW}#he~ERPdu=1c14G~Gtfa_cHRNCt$O&*1zg2d*Xvs;p(Snn08AaFY z8%<6jnTOj@bmK71c$9{=)-_}BcOuNrgLTo!m+83Wf-2Wod8J9m1FrGBmE|d%u})55 zqHxT7lV(fsPh`Q=4w@8m1v+0R|EJ?Wt~m5SP#*wB?mI-2#W3jC+(`^KT7 zocepL0t-fPv8KK334>j@%PO*DdMq)fg-D>`49K3YR5#s{C^>z9qR@P*2`we`;Lv2pHZ^lB#G%D(OjCYeo6e`zUK zscGQc6WP66Hn}AEbJ2!q+Ng&Q3anhEbJ3fLJv%sdbQ&N$68C>?Ty)>)BU%1Yb8tOq z6iw#pkj1}oHyPaPn*m%&1>*}ZpS<YRmpTWqUR;};&z4Lu59iGKNL41k#0j) zi-zOcOJWNG_n)}9o-o34`0mQYGG7A&$C5zc1Q+O)0-*Z|AK2~(gCZIzNTNb!kauw; zu6_`XyE9K1bbh?eia0$xb(_uc9TLi0G`zSRv?(Wo*BTejN_s|+!CZg2)ngjZ-_%OzI5!&5NHsU9 z)qhu%DIaCVBie7$c2Vq+_)&nou*Jk%kiP;RX54()m)DiU_`i;~s_45vR+7|V1s8yH zHeg{=Xp?~>k+Ll*{JR&QSbClKPALKFaWVHdr`Ug6GV$0fHIim2NI%tnH10T!RHx**+6 z31+7F;*y_BERG}jq6{0WU@k8VpcLNVz~q$uUEz+Z%*96qg_5o{h?<#jX3;lbzqfW^ zWRlp2#Piv@Aray(2lS`5Asc2%f`S(Mkikt?D08s3Q8^$uEPVPhL9sv69jEb~Lv`ZT z9$*~MIJX^jXAb>n%Ab8?dL?BqjSeW`dB;GZb z0)dem=Z7IAXVs6_G1G_{83QzJFHS)r$X$h$7ycF(MbUo#hSWZfn&-_f+KR2Dv_L^o!Epa(8uPfor$UOkBVx;tWS;BW@QtqXKDm%H?v z36N|Q#T1VDX-J?1>p)m8^<(y^C6=2IAZ%E%tb06r z+j{Hrng0iIxLbvLu)gJWs%?X@dS7`x)5qA{$ zQK7}kS}Jq-$wu$j=F%#?MAftBZAGi194!lmh4yp%Z6|pL_rDhm_b%rL^B8TbSDyJ` z6UUDC_S=gHYrI|rhm20q)$L0~Cn<;WqOu5kc2)h2aTLqjI zZFJu_>BJML-a)x7LUmuHx@f7PW3(Al_6+RNRrG6eWy6|s&2hob7j5>>Ldi`g&9sm% z?b}V$95z@C47YZ*>O9bibTgGz00pY5WyNZ4X~Y7GNDiEkZKV zSRBqfB-cTQvXiVITC1F6h)sfHx=o0NP?)dajbQcxgYC`C8)Q zd;TS1nA`Oi(i@p-;!dt#5kvqK7*cY5C_hN&D>+6CU=2#}9vReRvzl#(9zivjpeVIu#DiZ*W@JPq8$u z(yp=?GEvU%l^gTO)KQIco#(bEvtFSACn^vR+|{(tksK5u9V)M9RnFi%b_XoB z2C;XgKV8m(#20Oj19(%LFSz!Ep46M^lVr{MbEh=mlFm z%C7`>HJ)w>E7<;}pfT!9-{wfd@BG+gB+PH!`WG^Fx^Vqi7&y&ANC75)A-dT|v>16* zI5x%1;Jf`5%3FdwinPmZgt>4B%SK0j&nUmlgxgbr^x0L+2s4j)i8U_>nR(PPR7&f%d!vSL|rZu|4yQO7(ZCFKA>r53-TGNRhM1H*8{jN{( z2NQQUOcVMwv)r#WzMpHrSWXDF#0mBK2C^D2?r!|Ir~}TxVKh)F3vnmbF*j;jBZP^9lj-~9v3%VedBUDjvg>c-^lMYok-oxW|9Ic+BMm9kn~8{w-mU!g-&+%2!in{;%+N^sstVX+U}hiKmZ%6ukss zoH2Ck$gDP^qKEEqm~)=ZPYA=SbTx6HK6;vVuF{71^%wi@il3+FKBiI~Gi+53LM}nNhlmJ9Zl2EPrzb5kptzobE+Z2&rJY4R z1_a{L3Ps^gZ&^bHQGcr^m^~Kb#$IVq7MMU~sWzB`XE}EeZ=C8dUc~MVUEx9DLL5?Ya{h;V^0V zb#6eeB5h|yC47O-Gp__&(k}78%w`RyI}Pn2gvwHvEy@2xMd)w1C-?G8d+}AkR+sC= zW`{9p1MhFDXu}YX-K9{&SdSH^^XUx}yL7ov;rd`8FjyseBM_!TIXnDC={4xxXs=)o zt9%N?zzr(2dw!<=3)0q-!)?{C;@;2oZjNOVLy5cAu!LX*xjMTtY~RdFr<{svTj$(A z>l?43ekSsiv;n-v;f4^&kY`DZgPkvh$Fm|lV-`!v)O~DW5B$l$n~5*^GcU#02;Hpi z=ps*{DAi}L}Cw{Tt}{sV2c+Ep_q*ywiw7Y z4p)>?@$YhnX7#&m=W5oPd_xM#&*TQu<2h$i^oQk54$QT!i6hFs(&#IR02iV;4Denb z0^Ji{7*_M~<3`0jwxjx%mmycANGH}GDPfs!wG~$G%=v?k|MJj!5tTfqKc1I#%c&rd zy0GCMA`w93RH$bGqaF9bdv-&huX?}bg$SQVdn!L+N`;ASxpKA;x%~>2hDT~?%tMg$ z`zS6d<3?P~mm|JyUuMrmhU2|09>fa1(sM$|dMVXvRu=z_*}vr)s-+1vNXONY7ImzY z^)_K3O7R)>C`pINtB+N%ycNN6Uc^!i52@)q(wo;L9Oyj~L`ZR=`IS>@M zoylpS+~=y$31+M~RFY#oL_fXn4amxQFB?fNg=O&uBB>a+@5KWCt=zQj!>GlLygvxH zZ%AJ{LsNIpt6@|IYKpHx%H{d)}v4_y2m1$>v#GoOC%+!Am0s2~wx_aZW#OH$1xamF-O z%9d>N-sSJQAkWH>O(MrwO3jC#MlUq0C-X|7p)KRJVPvN}S{K9NW0UlYBtFLlaz~a)zP6i_0 z`cO+Ga34=E*IUPmp_zB{j+i~ZK+BFl#@dXr*R7b?oW4u$SfZ;`(AD!T*!ZgWuR_XH zEhIqdR{8UK*Aj1V7%+b5?4wv$<4-p6vytk`&wDC0Y|IFCa%GB7 zgvxn|sGNu$t^gWX1ENN4Ult~gAGvi6tq8k*G`ELFI?PLVQtTbgSTxRu6IY_rI}gr? zxC8{Bs@Fq6(dC%|C2#spd_retGB)}k^R{en1bI805BlZ{_eyB2Vumj6-+}J+^usiK z({D)e_j?hMAB5zAzsE-7Q6GF)?H54 z&7%-e`4qFK?axn$%um+@R~LtzcSrD?KEtTQ!K1>2AIIJ-ORy?xF-Aq+n+5&!SLE#d zxKYp>0GuxiA6kTI{8|&mvUc86ofaB#1I@wz zb(Kzs?CR>!vA>&F5nZe;@8s9f&`^cWz+sp1CwR1&UKX4{yA-A!I3kmKwj0OYdEa?T zfRuykXR#8aU5pgw0FHFI6`Lr`#< z+M;T*f;K6%Zn7h!6*$r&XjrC>EhANZf?*M-g8&C1%~p#p1t2qg8xX$38AQ-)W##^? z!?9jfdMU}G4*Wfc03~|W*Zn0^7Pxt>l7`YL$pPjI*&-*jh@)w>!zL!vj5N!gb)g*t zXi^$w%7SqeRi$8+?mizEad)fVz7q-kWZWA#R}|s)D*XKOwEix7>g^ZSZ*Efp!00xz z4|ci$yl3W`gmv#1v8q;fzJ;@LJYX^q3@k4sdTWWrtm|3-Y26-!<$Z{03EN z&KZn+{J|UJg4}Ici`O%aN{|IB3|~THJ-d+fw>Jr$ii}v9ieWcNUS~hRJf>ZzX`lBB zlpt_00+oGzhmDeFL$X!HxkgJ23DL6sP-^He#mQ7-LQ}Kb5-onPh zs3p;C?W6gM~_M%X+nj=E~WB2LU)G%Hc6CA!T2pSZP+hTq{570J;nCs zjQBc7xm-gl1+dE%dP&#dZjzQ(BKk|goz#8DBTG)wwMIYHrVk`c;AL+PLKQUz|5ix6C9fp65W zha|1kz?NX{LiMu?k>IY@H1-6lD-E}et6oU?cVO&H)XZhVy@VxnT!)-QUWsg1RZL!Zzk zA<~k49_G`yVQwNJeJ)(jQ=O-^qXH}PlrH=>&m!5|xF2i2CKq}?hMOw4f9bE~0lW%j ztm|$!Nm0R@q;@|pbR2d96LDz}j0|1YLH_K@8Cr%jW7F~gnm!EQk!J_~lvj;GNk`YR z8AA5W?c?##m4jrh$*?q1o+-NDSx_0a8Bj>y!O|!Za4&`hXqmuB) z85-a1Z(Zm2tN+DcK`-YnEy4$Hy<@Z?peLRBUDGuapFejpG*3V4(j2mN=WMGAiU60J zH?X^&OF!5R{+AC!Ska2A97puzc#u)`#SM~aiA&=j-doR`4}Twev-EcP4HY58SF{U9 zA+GMHTTjfRU9_ij+(IqO+X-b!VEZclX||COn!LhQ@n;0=0L0SsO<_9XHnP(;N%S?$ zvUn%R*q;!pWpp6JzB}5ItSg4yd(ZZpiSDB%RwBKHoV0Toor^%%2xTvWmiS4Wn>1T) zEL^ARif9^@jABXIIpdO#^Xuo|%WXA{Grl8y7dzh&5s@pFm!Lp&4UuC;dv$!xSbz5RMDP!B5j%H|}a6@Rv1Fz)y zMf^9Z!)|tvn(9@W zp712w6j^*A-}}K)W+HaE{>Nv_sRuC}4#;bve*CTQ*B9|16Mn*cranIqAt!~mhgR7l zuUdL7{s1{f(3e&auS$G{E<=1K4w%KX1~J>hg$4kUj>`L&$vO#Eje^Z*pHR~W9$7E# zecb)UWMeZ+OZIj*vrw|Siv5qwlrlQ_U6BE_lUl!&nLIzszx;7;@J+!~(!n+c7i%AC z(Co{zft7k<6w9E@Bi+e1`gohZ`--T(?%A*;6SlHcEc^ysuNY z@0{fRkUw5iBAUdV@fP}fbXk8nol5K8%d*(UiL)Na==z*w79^OWhsrggeE!(hI$Zl^ z!u_Jv)}8v0<~&AsRTk5yT4WvNr(KK0C$NnCUluoKdy(E)uMEau^XR~D7kx`rVq1_Y z2#-h#^$|%?&W8h;=hhA931)v?=k|d)fvH9-kHg6|iwVx+u1%?+{m=6mZO!LZF$nL! z?YY-7SZ;PGOG6ByEW&+_X->549vdHTjzGr|+!jko4Eg`rNU5jQLJJW~=tD}%_kX#tJWu_SUmqOSv6 zv>VN=_4c2lM7H1yLK*2ov=s16ae5*6_NoQsz_}tuHv=Lu|4O;Lsy7;yUZzpN>CT2EGCwm5_It5q5B~ziy9&3h&4j zkqIjN{AR5GcKac&ny)=30sBm^_d4UDbEU^oIcp2?3IMu2aLnw_7<%02*yMLiuREBD zxlljY&C`>tlX4kjz$ds6Z!m2#I)nxd=ZV|Ed|7<;P9LY|>0kAK(7%W7BMC$lq)*E; zlQU?Uui!54GSG$xgh6dmFJiXn@_!b9ZcHYL`lj66mjrShwGR#rsDY0MSzYfi|EPsm z6&v{k?HWNQ3q;by^)44vY|GroyM_~KL;=vRuLO7ESF3t5(NrNfM_aOv&z(Xd;g$Qd>+`V>)Ypew(kVGZ~isg)00z zqH!e0>dl0&%D|igO;6z3e2&9pDAA5hSclS$lP3Qx=WB~$u-V;^Iv|vI}oZX7Yzth)c-DDKEuRQe;aBmHtE*{0I3`mBeU}n(U@S-|(^jWfIxx z`IWyt;C5{FaDf@7R6xd&o;K++4@tjfdp|c@uxs-kj#iAmmZYIn+1Xssgy_5h0uT9^ zx%AtU+XAlYk`bfF!?`3s92H+|QjYDrL7~488-W2FLpVwpJ+l{6QC9D5MbASp(dQ`9 zQX4Z8rE~;#;8b#ily{x5 zcx`O!d+%FXrAGA5XhxY$c%#i8(KB|yOS!C;mg)#oE(K^f&ps=+=IcQ<(6a^*akmIL zk1~si+f0%K)Co(X(?;SW0&h?3sDBLd@S;rqAr{W+&g1$Lpjft&M^Io=T>gvpI}UbK zW3_`tNG&zBM6Z2|*le(A#G?)4%Xd?nd!P;HlTbqD-zxCJSPEafkIAp`qJa+N*v)???Nk(fYIT zSFy;+(w8V(=ws5u_q9Xovct1?5#y-*_Q50sUQfcGu7*X23wOwT_ImuiS6L=O5XS18(40C$|HWC(A>-+oOW@cB8mq>{ArtP;kNY z6)6tytCjuo*%}77X}5N5HrbxK=zUGE)J4x_z*Z7h$NTYL)BcF#LLDWtO`aEt`LAU| zhhNR58{kaNqn+z8yx^U*`(XN0Px`|HC+)?z(vTRz7uejqPtU1|;hPe=g%Qjk`wkKv z?W*6)XLeMbJ&ty{^_nfZXuJvc$VKZj*qr1jqmsLti2err_8JK|)MYOr>b$#YGrH6J zO-=t?a}I|@d{Un8F6LWvc>zHM$@}I|x%1k#zqt=Vramt%;>XV^Nr9G#mcq|~N_Hma zcGwd(SNkgou6Vz7e-Y63-pKgI-h%b?7gx3CJ(_xHwA6hA$t=G#-*X4E07Qi&@_^_{ zoLL}5j@0D#zZT}H96kewG?n5}=3BH+B$BfD#me17pW03$U*Q{R#o zft?Ty-tSWNt>@X8_2*33MOyApbDtfW5*L{unP`91A%{WQ1@8?U9v_L}rlrP{Q zI3ZQT>Fz+&sQ)YSZe-LUFnEvB%q9ql)0z7ZA~yK*pIkWq$C(rqf|rOmlKPf=!+CV4 z_`U@RMCT5#@EZ#L_${g3l{GhRhXQ^0U43v1;-g0B!k=l=ld?m)fG@I7KwxXBq{Q(g zXE)UGj&&Zg7foh`!XjxROO#aT{n4ifh>>5YAV{+xKSS&9=mtMaII&b3Zl>LSz@;&D3-eT^WI zy?(&{-1T_)u;D&|=v(10f(X%%#e;Mc{J;498>vf1QuT}A$l6N4MnB=jr(POtDg zt!W<5=Le$Qz>!VFYY`T>paz6L0D-W_Bb5-rgoGyh8IAb!`|Llxu!MS#uiRDGBpk*b z_l|ekC(c+ZakFotD(1)p3tV$;73?FF4c4l6!ZJs~@-xZ`uuE87?O5Esa6bsCOxw^j++ zGv_q2S#F|RI-qQ}vP|D>!!<5!!jCp815J>TaV1^T!9X+OPYU5D}6dSZvJTgbAzIN+vtC$2w0+=O^E#vT4H>Ga`N_#v(a0d9KzV8 zAs-!I0mVr2u#$=^284C?-=E!781)fd;eXfs{tp=*mjp}4tbJG2N%(A>Bta;c7-`tO zC7K9%&F;S>*Wk2?_l`sW^tzDe5YvL?kN`a~v>a9E?1fxax}0ocv3;q~J4+H?<>XZ4 z*6it(WS03?*q|Z!qeAk7!`kgK2bZ@6J#cf6x*YorzV)9EErE}JHP*imwkOXSqRcpQ z(tK}0BgNp@=2-sT)qE@1|E8Zg-xc}Pe+v$Ku*1IokBaPflrIz4KCmr`BbqCCQ#Rl+ zEn+i>D;SOvTj7EbLow3y*>;u(T!NmBqJMo-92*7P|B4d7EfilJYoXLrWl0^d>sx|-j9B@M!xZB!*3ky9B}Iu3eVgGDZ3oRuazDRn6e*VqCM9c0 zE&rS}ePfs@r*DK|AnC6%?4?tQ)RUGE^pdlGI7CNHJbq=`eOJmIq)9bq#&t{3;+*9# z!)({7Hg%BSRjM-cw7xckmbLlT75BT2drbf6$X{rgkW*Db{!VC${*41eC3s1)9ZQ*c ze@5b=+a>5!Jh9oH8STrppmGAaZY~0&=i*LYPx<-I6qF-A*S4oFCIODo$j;{S*FzBf z5oHF`R^=sf#+XVRREQ3feeB=RmxB6bVUA6#vr2=v-}bC9&XSr^r?8OIlV0|J z0U)s9*)I~IIx%u_q;zFbtK^1!HXQ9fp@sI;QmT#wB0S4OfMl7#H z9{dgBRF=6?m6B)|Lv)9 zuda^oG64Y#)y>bu`i_}8GyI~Ya?e+s{Tsqc0y1-SBz>;O>R+N(-0~g2OL?a7i)$8? zZFTyOso)&}e-pq%`V5cOQr}7p-Q1p_gRZuLK{KF6do;-mQ76MP>u&k6Qv)x_!&}w6 zuC1U+J&^w)KJYf|m~4cpX|yJX&u7hQ1u@IIELG_7WExzkzvNsVDhPB6n9}}cS{6f- zra7cf`$EGkbBE&bJj6SFfrFGt`vBa60^MFyZ&Q+v6i+|GV&3D6<_s&aq!x>47Y>K^ zwl;Zrlv>CC*d}J9b(N`eUi=}68@^@cwZvBdjxT`TGS^J-!Nom~^85n8tmQwDr#DrA zu=dgO$0@k@QSkRIm#LQJ_($@OpRQ3OUiU{~+d~)3;xaj6k1&o2z}Wq%V9h2l&QI{3 z{ASRI3;%`<|N2O(@4_J$CCt2wZA@~YRM#C!X+USHEKqHWmClFB=t4!=!cbNCA(x-3 z0C5gayVE>3)ARl!K$_E+9-m-O`cnN4)1JayZG4%ZF~@5-c0_O0{&A}pBB4A8)3nVh znLDfefVD|}5oY>1?zZCP3Yx4X0mm7P*oY$dO2Ic+2+n}v*A#-;D+UT2hwF2sjbtF^ zIrqsjFR|Mk1!U)aqhHyjqZD3YQxrnn#jagQDUO;OmjOoDk{4OiOpiZLf4A1S^D1XdbN;ASdP@?%adq@Y_)+b$iFI)=OrikV;cgI6+oJEfJ@|we7f%G? z_$zBwJ*|$e51*PobiPRhP@8a(v~L$FSgWr@2)4O8Awp0b(&ER`_*i{czauG~A!R~6 zC&z6xwJOw{J7a{#D}3jhzzwGk!tf)ij!&r#y$wf#*zfiAvwy8wKfs2VC8wVWI-*|j zZZ;Pi5wy=(j1h71G)%N$;aX;-Vas2G4fx}$#HGzR?Np?lX4B9k}(!1y0N9sArFkJ5HpFIwvVyt4?U86=pd|b za7Fp7tYTtuc3|W41^~RtZ0mmgi*H8w2738pCZdLfsWTl?VI2JE!rcXJl)XWCVR9>t z&Pg~W5qbO_7r2yce>ACv@Q$!$ug^||&=e@AFX&2!dXjG-o#22OcyB3>{iqi66L)LMOihIP1rMp3mwHPb0>o2-(AN@m!n7ju&ki z=f)~s@nlbbgKbkGqZ7MWUe}xTWoY1E&P?EP1_%^t;iumhsynR`by^RCAwsi)4xw=* zT>JNX5$%%z%Q|AzsL^WEN48uVq1(di^zw@b1TUW2)j1#@BOYdz`jiFqQ@QQM`91Yj z?7aDnk29CqxGdU$a2vlx^o5iJaw_aVB62e#mfj(hb}(^NPMWq48MqTFR|6kO`g!Ap z%FZO^Kwr$-O3I8Zg#EplvtF`z-qQGt?O;f4LTB1xk4%S|edbo-Jr*EsTxVfSg-x97 z!qGuH>H~7@(GU*MDB>Ck`btGoN(m%Wf?f>>`=sa%O;hTN{*;{sL>{qh+H@*4J~O6* z7&23C!v@4yN2BEIL1qDy`t}Wx*1Z% z;uD?g@;hQ4JW4f+%~jv|@8PWjXEIo2fT90EQWFkR9>` zE0Ly!8FU(_)p}iyr^<&>J=ZCwW!CgT>fG;H%AMEdbq-0(0XIUCFCL4JTj{-MnX|UY zeN1y-AKvozI6UjR>~&XZnCpb*7C;x7U3w2lOg~*60d9Wsgub78)kyO)kxXN=zEU&2 zyXR5xuvuN-nJcuQh2TQmBZdX!lZN8k@Vlm&ks%tG>lAg@<#fejNsl)E^Qj8;q1IuC z{@7^iMkSeBfNdzrC!5LWOC0S>RQ!9^;+>qm(+knyB}3+Sp>VHd&)9)0c}Jbl$Gg})yCVfp<>IN zD?E>4k#O(nF$RGQm?X%dYslna4<9PfwZ4*5VC&!Gr(clXbF-s3MUM~f3VL8I@~Wkk zstBW(8UD&V-Ps9MpeVxn6-%~-#}i;{54DFPzEj}C%dwA}*I|**O-RItLz5A-bSyl+ zR<=*|>D^}DG>C%FYNVH82ILD|r{F6H=)3Xu-$r>1$B9FTSqv%?_g)0#&$pnUr;Vcm z4WC(7@sSBDZpX)VygfU<|7x|Lom!SMJz0emom%b?&D<^C0k8wz@4)9a;9kd`OW+IT z79`09ionzt@wNSN?n3;~H!~>b9Y^`b9IlvyQS!@ozVp$qX*%!z3mg^P)xdB5VD}E< z(>2IT)6GDUCLZ(3(|v8$N!3Ln0)`I}0FWu6tx1_7jew^G>c?mW1Tcb{!?{r(oQRlT zt$?6QJr_aTe-)=G7=tAcFzWNJ%QOS&0aPCc9ssz5l_UUzfd~o)2E(+a)h`YL%LhU3 z(0>es)*&f!1@0}V`{iYU6(6{Zx2L^fI$5Vbw%*r0x$YJsinaJAbYLQiTU*SS`06b& zaW@FBwVF>?d=jFicDd`U6PsP6rm)250O#BCpxQ+Xrh@EX%gXd#t&?xu&mRw?ex0AK z6aZuES9obP=dO>JpvJvN_ z?ZBMBsWh@qjuKWKv~T-5!uX*G2{$xJ)!6&-bH@k8KoU|SIK&H&x_$3&b16qoCEIq# zP2@M9ZE19qpe3E~*VUMT?JGrnL#QqbC9xk#ChU;D#<+8x{^w)8DiHJGYKf%&N*u50 zxwNDg(ak;o7W`gvXq5Q5ADwFGYr`}A1?EuKbZ7&3UYojX>sa{=`BM8F1zw%HIVw)< zS>s#ropLa3I?y#5cR~nLWlbfNj5W!}Sk@M2Pp5Stum0K(L%UX>#{^sM6_cp$spPIu z&C-kt4*m8Jp+-87F-AbH&v}by@;9|4yOE!Z&hGUA(&QcsHj|ceuk>VPj@|03|B-v3 zwl8P|ha8<5X#WbP$d8AVaNioO5-qZ;ScUB)8WV^e2*kMqcd+ePsuL!;rHb6g5!Ts3 z8feJl#c}ag@JJTudM7E6j1e)rUDbH^jKG|~A}5hCgQjK$9+sr(#~x6o_E3XJdFeR< zKikT;Ppo>5?Wr^_6ZHHJR|z^+gD|182kS{>-_U+G58E35-E}QbL`=WRduhUnM@2RN zN1YImGUK!&g+xa0(5%81;lEa0g%e{A9pADTV7b?MWjnf6gwR^&5Jy+H&Du-!?)Dm5 zH>1_-q@f|kxH??neZmf?4@lysh7S{;39IsLVCXlpXc;sqy6;Fo%l_?`6e{eh9vU!? zy6kaH3E0yzm7kC{{ucNhNt({N;s1k>=jB%tFrLxHV>YiNG zfK}sVGNxb+zV^IPm#b!L1Mk#B7ab1V2Y$0k3q!sMOLPjCKyV-nQv_IqOK+}`azpNb zPvyWO=n055N`fBN9*W{XGqQcx z^bYAiuoH!MF~qV3PoD98YOcOR+Ki4-fvBH8#j9Ey_0XI7Ol1v7LGiaK=OfXs4&u5T7h(4NCeQRIjWt^c+vgPJ4;9l zkdlKR*^GSmv-2lu&NE9^B3K33h{I=qeYZeLV&|~0wd>GFKmx`(yX4(54oaA*^Oxn? zZQ2!j*?6)Ew;(a75>&Z=hN6J}X@qja^~kN5VhZwAJwC(&KO@*ot9CC7L%`z-kr(iW zg#fO$2RaIEW%^-y&+7$^K}ZI0+fDzhns%4Zo#8Ml5`@u0B7&7z$#D@zR9e+mFa&2G zCRHST6|_=@~+R(*#5C#pIqI&3GJkRdBB3)|Vh71J&SnZ@=aap@O&nz_bEr z)NC|T4}9(E=A5E|37e;EW?}#UaUGsx>SEV2g<`!IG^Wg2u=v>rN$UTT2w0v(Vx}MpC}#sv}XPbqw>qNsVRP0=O>B2|6@}yTl70_ z;_K#q`zUqSU_tfUItzkB>Sjyi*gxSDhJcfX+uwaqhOQH!!^p2zMk_sH@k=Dn`c>8T z_9Rdv%;~Vr9J(YPeCI5^e7g8m`CSDiEA&_aK#|Z2Uy`qjW&O1eA=WeQe^W0@JP}F* z=|jK&8$k0EX2pFmm>Lf|!F$U!fRYl2)|rfbx%G0I0S||EYL!IKC=G8_;$bfetTtV=U;&pSudsD-+uU-M`H3Drt z@K#y2OVFNyDEur$RuQ#G?m$?P=+>AhtUinRA9&%wc>XJ)l6cb>x}??dxP*N859{e{EOcqn^W8R*~%f<62!?&&wPjgZWbjTG)IgpX^wF8qh^zae=%S)OQl z-ZirraV%Jld(C{Hy6CkO8N+wbF!p)5>#j-X-9;t8NZBtNI<3p|`O~t(i|yw*+Fl;8V|+^c)%wIwEF3bL|PYKi9V`?M!`=hW5%8m(j^p{6bux%3?sGngpL zye(mt1P^mdI*v>lY=+{KDzgp#E?&xX1iheqP^i}L2$O4OY|W%OqI~OoCjs7qzueV7 zqmQ%7rawkJ_Fug9$|TKUj}9D=q>hoeZ5V9bw2xCbZ^R}fJH!Y?)$guW*W-J^HWqO_ z-yM$2ygElq!+f}j1WRQ#b&6o5a^ihb$dzH@S2Y89a z#(95q8Y1bMrsg|0w8|;8HST$v!Ph(RoqQSq4^*Cw7^y7J^F}n1A~Nlz;dRT?2VlhE8<9VWzSk?rl^ zi#Pj43J`pPzD=j4g8unI4QGk*mdihn24lZ(qWMZvu}i~=Ytu+`z=|G;eUO6DcFPOH zyo`xU?Hy}OPnvS&g;xtT;tG5X><_7DDu>$%)0dV54*d*#WccXvvP6V7kf0!#vkCXf54U9TG|8~Mb>?hN(buAw;c z#8e3TkIu8jsxJNx$oyCa#wTEFdp)XQ-=VMbrTN8ZLoOpXBO|ne6}n)n`8C&`Y?$6N z3jptIaRxp~zaH8^*pr?}vxJ|{zQ>aczu`i}tzxq(!HC!wv?ke7KJm-q-%(%_n9`UZ zB~G&>8c!{oYd+c>$8EbmOk4GVO}cP$gu-lIHJ;3dF7%_0GUcyoi0^w@xbD%`JOOyz z+p0ROazbzGF$+5_#?C?YYi;};63zzC8e`{DCBu&KIh~|_C3>;U{Ms0RUPX@v8cbV_ z-&yvl9JSYJmfa5(t>CJ^tT!- zB3Yi$4=zM>bgRK{BZa!ZsVp3SN#Be0yzF_9Jk0(o>q_7d$AMe{i*<8uh-y zj?~{Otlo=#h(M>gHQRcoPVKh*HwAx~+Hm#&`kx_ujz0G*)ACuTHu)T1)f$Fk>Ct4k z1J)++#IK~d3Jo6UsC{$uPBkuc_e5!Kvc(XEldXUK*-3ux(vb48aG7=rFPG~dm@D?N zh`Bh%GqQo+J0Mye?pdElO=1|C@jO{;s26zt2Vp^;zI$%I)1bl@3r~K_CKv5oCR58j zFFaQtJb6sj@|xF>rx4GOD8w9%f~r;omM&dqowacb4%C{RBTXSjj7iu*C8g*r6(Oc5 zgNr||gUL;otjh7){>Se2Sju^m*Ygi}zS`o8JFR9!xVuNVkMht%4^feldd%6tSJ5vn zy_6A4f}x%)_S^U1&x-}^2TO{V`kE(m*>e*j>%}C9(aS8su)a^D5V+{SzJL0cPiOWc z4+#FBF>4kth~$!<)d!X@X*lJ-|NWIjm+$z~??(fLbFRATs<=t;w9`&I@W2;{+}Om$ z7hn9)ogd2cayUp{5qn({yLOB(CF1)8It@S;JFrR#%fGzYaHEYk=7~jA=n>WgU-hb2 z!3Tkx=FjJbWV}Uh-{(G?^2{TPm>S39x@pwN%s)WNks?w4l;N-(n@d;QIhlULUydUw zh%!3S1u7s5ey_yEa&w!cIE)_>0#JOA0zbD=6f)ElD*~3uvMc4XQ2#YFKC{m3Jq$u9Mlt@(9zi?ryfeL(A%HD+Sli11Hf#1hYId;Le(cz>L>1K|w08$V za26HLQk>L>U7*{~+wu#IV*E;hLi%1o_t5Ysu?5L37(r)sGXxQ==bCVQSR;lw*y#?^ z(akb*ZW-gpkN0mz`hjtk4}X|l=6MQ$A98?V(8GWWEws=jm;7emefQxp0V1{te&Y={ z5SOGa#*7)GVKCW0^nrOgtfE$V=L|mGfz%e#UOJ~%sA~nWvPBR?%B$?scdh>xYJ?tzcD>4Pk@HFZgDRL0EcdV40z$9t)60>6^X^4Iubfd+mEYwivEl*#}=oj8XG4g2)OFRP|>1HYxb7yOV% zg}Fx0{_FgNZ7LhL7yBeGmNo;{_DN6`D_@&8SydewySfOFp7zhs380kDhX0%WN5WXO z1_w6_Ip(8Njc-wt$$}z8ukG{C*MCd-lc5U>_9DM3Rfk=m;h$j}2>w3%r-pCEw^Z-Q z{(b$oG82i`_I*qHK;Gj2ef?K{AC}4Xj2z#7l7*GyBL;(f)!XCH`>y}m(8=1i-&@^(ZCct>U+1XfcC`PKQCTM0zXEQW zkG9F&gpgefqIaYJqJ^Z9@FBkw5-Oy0Vcdo#tw?)4{;$73r*{h?mE_8Qm3XY`WQzF6 zBYzPS;ptO}2XN8lZ^FcH3=#+;PV<3nfBCZ1E>2|CDtu#O7vt{>&>g zk3IHSSTiFy7{V zualTP5*pZj%0@2SWpL}Wr5AKf~fk2Oq z9s8gE`6a>!TErB(Nh?SePLQd06vWwezt^j9BI44^0_B!r~7s3?tQE|D#0Qo=BvP9eWJHt<7+;Au~?C4?h<3kW-v zIBmAMe?e375x;b(w&he+ky(DaD*gSQd+wb$aUvXu<Kwz%?(FTS+TzWcxqpI>FQ)e&IYn{R&RGoQA}x-iHgUqAHw z-#<8b4!v4trth2OoTpR|{Wu>18sVHiY5a&3+>% zp7=v<~; z{R>u!GC4;;29nQF44@bRgc!B_L1#2`5e<3va3#dnZ1~jOLV{QwmH9}QqElV*F+pPM z`rv0_6MiK*8=Tgvlkt`Q+=3!iL5lj(#|d<1O5&&_5Omqhzg zob6*6*hzwfFV0-*ABQf|r$5BX1f~7MZzu`>FYr}I2>cVP%07W#X#r-kQo>iQl%phv znU4} zt&7@W)gI*$F0YCdxMh8=$g+RtLgYwgt@t5NLKh!FLZ%**B~j<8Fvtyxq+F6s)iM`j zB>8KvOSQz%Vu)wOtitc>ztI+@-DEjz72PbX7DPU)LGt*JJVItwnc60NDGDO9umAR9 zA6r;9tBz!I=ebk7&-<_MMd3Hi{;RI7TO5|D`mcO4IyD}pw9NGW=)aW%kSP0CIEQmm zf3!_-6PMTkQ8(ozpnsnJ>-9Lu8AAc$72`uRU=rCMBGj`i(dfFrORs!?QAj{vTxoNr zcZw~@3p7~iAJYLa!aihY+X%p2$@iCJKGlrxL2@%!Gn9ld8npyq2h`0;6@sFW-!vkt zj;Lkc!L;tbl0i`ivsO}T7Xv!;O1IZak_;=ME$6#-|4qxCY@2AoESo>|0Grmor~xW_ zAT)29d}Hbxg9d6>U#6yneu#fT%!lnMa^|J8yf?9CbVh)9#E6kM+;9UeO0Ct`Tyxym zu>}5h*kOm0PC5x=QA8w5GxOQAz$XWDUhwM+o|y0i=Dp?C+h@+y8~upX9C+YCENvM! zbQrN4){BYIUIJq`Z3pYuSm*WKV~)Yb6e8gz7hfE6NS;|^=YiMXc#U`yZ_Oi=w&^CD z5*Bo0AbwJmB5rm}k3mLAwBV(FNWw}txah$D6cvP{c=Gl@6hv#z&p!JM%L9>$h$rvb z!zUDfoHK_2C5Gzaef_+HZ07Vq><84*aq6k3Tyn`Jcieu*bI(6V?Ch+w&L-%|d(gJt zcAJYYx@gG{e(-zWJC5a)B_X-e#L^ zPCNCqPwemscb}j@I+4*htmGpu7O`6DMyQkC60HVwHc1B?xyoO=J9x_>+XPGT?m4&J zdfTDj_y#MUFwu!8p0w(!A09Vu+~+^PC*c(Z3Kdf$-ZBS#sNqU=y)07SHDqc(APFii z;*Er6&7SnsUujmt3Rs>UNZ7-ghT}otuN`mz+Yx}X(@r~0nKA{LdD7fLqQL3eYOAey zoSA^HLNofN;->8G_~eeCBnXeOAQD9)8%NEX(vh$-6uP;BPcQP9LIi%2>@D$AE!!sq zOOm)CaYrP%ibeNHE8x6(G$doOD*hw;$jHh5l_Ekiev-%XlV_JkSq|aTuL(DkV!)vz z%$zeDul~k24twB%hv0<=;uQ})_~6!Cf1Ct$Sg;7OjD<*kvmZEkE^B>)^nIvoE~xBHqL|d(JF+7SYBrpZnZF z2OrG%Hf`FQY-eoi0p`yT%#ZUU!=y=*KK8M#A|A?nLRsy*)Y41yz6i7veYe(bV@QI7 zWMC*3=kBn>_Pi)S?xZHr*2zdBg`_3Ry6IsIdNW>q^;H6StX%d!B!LEz7Dw~?h;6pn z78U4LoZ-bSg)xKbbI&z5kyDy%@Ss6@?SLN~zzEk}fBl9Vj^2IuG5mDI(n7i+@4BU; z;`sJj(jZ_-_0Rf9YDhBjeO2|ZnyN~*_)i1OWY)5(P$XLLr8CwJab))Lkar5IM>aJi zYxbXzIPwy{D`59Tm=d9TxmCKf zf0bD$?4J_K1GO#e!qWb2>{A2eROoGCdhwr38TfL%q)9Ye54Cz2uB_aGAM*EVpSQ%X znn@thSS|LKyW8d8!aiA)$pIvdM);e+O7CyrJyPCa}HGU z5{@urU;nj1`ucBn`M2y};YE#ei+}d@-!yiGnOf|VvSW0rE~@dfFz(;ee|ck#2RVNM zYmWa}Jmn^CJ?>xS_)y3b$!OnR#~=HTDJ%SU_5Leuvzo>IF&W`Kd;bmlrv~Jr@}IZt zzamrPs-7E}4^<}1-_OZ(fKX^6sT?GlK5iN+2oF&`4z+LN{uPMf;R!z~2%p;2Y2dqd zlBXB>&8Rx{kY8VeIhYC=7K!q_C>6*)k~4FU?5tgj{7Dp3HSALtREmg*i^~21?Q@AA z*+~q;grYoL)}g&-G+_};m?s&Q2gHbB5pL7g!#vPB*OgcP4$YVlGbaa;`Iw?4JcLO_ zw=HGcZo3`(!&+>Xt~d@1snz#<(w1PMA3H@(n?{I-LSjaQctE)CzI$)F>86l1&hTP( z${VJ{mj4_|b=-2xO^Q}^cg!>2JWM8;T?aiNv4V!s&+?;21vvz~u`;g*lya=E;UQlV z1hd|F{dJb%m@*s(36q7yNsiggVmCM690emF)YR&{OV+Ii2d=(JhtN@>Vyc3EVR&d*ImyW zHIauUImXbY&lc5E*ya-29RS4$P4a~G%{Se&_S$Rnxb8MvZ+-a{S0Ln+*QR`Cx6hn$ z#_6B_^k=5NF%=mIpYigqW7sm_nD2r=efpdG@BdZ83e38<*kX%6-}7f8p2$o*V*T~j z!_)*rX+A88V659&eT~(T1o`_99KbHjq-cV!?#|JpH)PkL`R8ANR^qj12!>t+T*9(p z0d!ZO*iZ73qZOBSIP$55Hy(WOL6`!q=ECRZo{L3&2tybG?N~7e)y+5EOi+)FM_BlY z7({RY!P>wr)Z3eX-@E6fi zl+&9i$@c*xiEW|X@zenB+mV;J%ZsuUNgkJ{v}EJZuU|oxDn~9tRw}4Kj9ub7PLi!p zrxT~Zw4v*vDVJ!bR3%d!V51v`Q#r*Pzf@8lfi*1_ybTdfq}Kt6_57vo=MgB1M)<# zD8x|&wr*Om8q*l zk}adM?3z#%xt(1jMvk2H)Kj9-FAX!#I_s>@?)KRYM{h_D<1^92Tvw8Cf613>3$m)q z1RXw58N<0!HdCqBK4U!^cb%wSuI; zry~Cv{R_Cn*U+43HT7yL2fs?2;NnbGa#M=FI4oEL-!kx3K|f7}R9SwN$VIHAFbYA9 zu8t7Z>%tJGGDLy|LnNtCIvQ7*R;RED85O3hm?q1nivUmAwahug=ovqYV@UgZR-$U( zM*j`wg&_kqJRr$0gG8&8Qb8E_O2{#=KS>x{_rTF*C4b=q0he6Xm6K%!neAVBS`Css zO&LkAP}tne+NY{8`9}DTYNaC-wy4<$kTc{-_Nln%I-y#^PfFQN5~0F&d9GXK)@;ej zr5X5NqyO49e5(ATa61|Df6{-A(eTe`k%s@ceMOnjDU>I~(eR(X{wp7rsqDYmM$u+@ z{0YAe1KRQX`mYVu*MF-XR*!?ph2$;G`+M>qmA{AkFSA5`LiYfpp2I(lJW|%DG@dAn zafX9$efj&<`|AFypS`vIMnTeIeH|Ltc98n=f@pc=$NiaWs3Jd&(4zULxc|o8gfHS2 z7oqZJYFW!$m8y`bwx;1hhf-kzY3I;^@~Px?P^J-O;D>#Z=fsCALm?}Mfy~S34gaz4 zMUE({Z)#CV966kubV#7ePOV_%WdAI`i*13H@ET z|Mr)V>V6@o&q(dgN~RT+4jVR{?Oa&IK{H)``ES2_%y(IKG69T5gr~y0~T`fY~vQ2Z^6zhh(RmM2Eh(Ca*$3!4@6gyfq;#|yUFV%9QF@le?&#q zmjhUA*;}z=RU)6=%U_*5`Kl|gBp!{jynKz|h+V4-$HL)2^J_=}?q>B9u+%~|e2*MT z1t=pc#E5i~P>l3mIzQd%CNL$DAdDd1-Q6$0^fK$JXaX_;S!0dWiKKw>;6o36V*5|b zm_2jQz#$XIjl)%V;p5UvFP;4x?(h>^X`ohhd-Boluo0RC(G!Hg5go&FM~2UVZ$*n- zk(*=iHA3K%ksP8)-WI=AegTIjfp3tM<8%YRRsJT^3P0o*j$nPfB|f@Pu5rRz`IIZF zcK4;1UHZhtiM)1z@rP2~D(nGO+V=4{(%Jp$E3eYs!Z8dbh``5M#I5jUI?atY-SnUT zxfd|BuU}VyDdrG(!cSOJ9LwmaE;)$1IYct)sYwq%@)z>R59F|q$e?SrkM!sLdKcos z;^{SjYpl6C;|~LxI=HI&P(uBL)8dOR_T;2V$gcoNrCJ+{+{MqN8l~jXSOvi)6 zPrNbp^%xhBpN`5P00d6^+zZd~($yQTzkwae)jdMg3P0MUvy&M7GD|N#e*Ad)3y=Nt z_E^^F$*>N&-FfGo3jKHXqmNIX{3;!Z@ko($zYs7)h9jGg)2CQMh|zc#WVh_ty~XC6 zZ#a6ie%;Y3$XQ?h^1iGlUTLM3=(!;b=jsRn!6%15_6FjG48#|Ib>4Y25D#C^peZ{# zmsnzn^*2~AM)k6K)QYyKFx4E@MUjwkixOyR|18KN*#lFVy|iyxl?JrETlg3D4-o>t z(Z5O{h{TR+wIHMezZ!oeRMo!LC%O=I`0otCq@z{-ToRuZept!XIr#|rBN-f>4*A7P zf(G-%I#+YHo0*)_J`H?RySS-y$2Ly3P_-JA9kECTmR<_Q}oCw14G4J@E^y@S3ns%6>clflLPA6n1bm zTOEe-@OIptx*J7Vhj7(Cc)Ks}uW!!h?`!fr0BpKl~Vq*4O32X83 zT|G~#z_z+E&vwn&c=SeOsAEIJcj+!Kt>OxTl*congbq8j{tk^&HaLN8XaO@a5IB?N zSPwL1^5kQW{odn`kLPv!`|Ptf!9gCOWnCC?z2E)rcWiyjn!8b>MzO$+D4~{yIeqSU z002M$Nkl3TD+!Df=Y($!Dp`>^aLXzarCYFj+81aPZ-WA3ouP6SP`T(JCn> zyRmJq=xtyH$YeLJHO)q{P9DhBUOdzyYRTh=)M5F|h!G=Sc>V=(nKNgprIscT%1j#? zgjC2t<`M{{EZhTyObEopya)l132Q|7t7*deo6e@!T$|vCasaFwJKF z6JE5(k|OAl@$NNg#5Pp*jC^E*0=WCe}8TI0Z0zdflq{( zT2DRoRB%U#dFGi17{^4Fhxgz=u#g?(5V8R$35!AWVf*k22@jnO9LUba$RTC@!nIRQ zIRz^8qVi9F@{_?lJK5d6#~%LyF2bC9?zt?F!Zw;6-G-XlVAff;=)FXO4wMyIlrnU$4Ll$221k|d^1ci!8qY4 z-t7$8pM?J6AUnxfk7 z(kd;6s;A%95KYi?x}SUYx$CaIR-LZEl;pa!Ru+iTIDjIa#h{_?Ei}_d{121%!>3o@~IMux?gSYofe_F~}u(igvU=QVo zv;WvkZSY(DQw&uvbJTdkbzlE&*x#*SEBE!^qTkUS|8?)bef>9kLW~~nFurldxTaE# zDiMDu@4s{;#v4zTGlWN>_sjj)cPSf%^^np~r@s4dd|B}QMcWF_EimaScW^;K7m-e{xEb!>q_Ihd3G=91r>efHVUJ^wtxtfP)T zng>*QnLW(@9#{0Hn{!eFQ zBCWSvb?X6YR;=86-+e5O0|fbH4Mi_HH{Em-NzS3hjbV+mzLJd_H}*HbVW$f{xXSU; z%P*TwYC56sP)fv=>n8$o!c{a`Q)m(kEVcAfr2knOhH?#y4D$s`;@5t2Ln zo8LU=yz_Y6cD9irj$-h#itM8)JZnM~@;V;{BUE9?PmyzNJ6`J$A$YA0Iy+_fV0?2MstjIssxbiHVz=<+L0=S=dT`7se^< zqZjt!r5$YHtrf34@{bIRZ2GOjl8em$qJ5WNepG0Qm*TV&Cr*T3Q57$L#(^bmN5{eo zFLc$FSC1Sy0?Y80%=On_f1iE!VRVwwv^mek3Jcokfgd8tQXK^1=Mb9QWtUw){i#o} z5R!Nz5k~#GGpC!V=yAs$8^pW-uuiHCePvE-xBA@uZXDrxEEAgRaPBKcr15=b$wWp^=}RSQcF1l1nbW>(75?pg5MCeU>WCK6QD!I!%r3Ec~62cdYI4polBd2%&OnmEV8Nr|_+xVHJwog@xJ}KQs z{sOQ?4@p?sUobU(mOsEt9C5SaWcJKgS=3Ofd9=rezDL zSHXZ}$*(L?ewMrVG>NWD`-}9Q`cLLj;^(}qQ^Nm;_uoQ?Hu9rX7AOnIXSRQ7H_2O* z6qVc*E-CN}%1m0cZ!RqWt9Hm`;#bJ-e)mr8-%kDlHzymP60;Tm?d>BuOQ5XoTgzY9 zXlMUiS^{T2@tL_~O8arh@BU>fSJ!1<|FxY8|IGFYJC=5?EgB7L$|5sr*xuo0aeu7+ z)AlbakTiZ9{})wH{9IG{_EG@^ZFPw8fD z1N-{0HFU-)OXPaD%Ef35I) z@y{AxDK}U%%U8lj0H)!xH@&EbnzqE^gIZg_MokH)VNWLiS zlZg4K{Ih7^Tw2;KaC1qtOKE>Gd8hUd|IuPGJ>S9fA>#0^@9ycgWe)7fo7|(Tg9j>^ z8#4a?7n-ZoUO#V+wR+s|~m>zryk?V-&AhGlvWsa`&I_W^ZT$g%E7K z$;JyWwD5pIdJ?VVnP)UNO7n6+i9rm5D2BXDUBy-98~n4u zWEZ77J9)GaGeg2bz=bfJTT0Rr2jn%FBv=&%2+tBe|J-vj{_NR%{KtRTx`eQMevx9A zSPdZNg%>?FY0@pX+!Ag;%+vvGiA2DWsIl8cJ5i8;=Jq>o7cBCyAd%f#JNw~*Xd13X z6VhEAud)0{tolN;_);N`2xPkuIb=D~`b_Kid_~3FnIk{&69gZ!i?56iCrlQsbT(!J z)|E&D$6~6k@XMo+Gsy#!M3Ruj!u>bnC;9c^dzK z&4w*6*xhJ0A@VVw8-t$x*;07G02Yn11rU4D$oFQ=+HA8;0fZ?%&ipMRLTe0Cli=Ht zFEVkAZ5FVceOf+mn_3c6c21o(l}v(eI2na(IxHHSRNX*_jDgso!)pHd=kM3mUoTtW z-Kn!@vmFy|4-tjPkE0?suRZIzx-v!XDSXFJm|_Ruf#407C&PkD&r3$8$CuU z%FL<%A|9Pc&D6;&2>HQfCgTsc0tPy66Kk%y7Rz|4f~bT2)C@L(eRasfR^dOif=os&>SiAZ%z!G-i5FQ^aUCfl!waHjpX;>x zr|8}GsTNmKTCA&lL7ObWn8wB8srJQi!Je~dN=%&^K^QY@^wvRE}{^IFf<01&V8_%>U*yvWWZr`v*B5+yCaXQ&8wNYRH zZTd-H|5dM)70RQp|KcUGipS5=GHJ+jR@hDbQoWWV-AeoVug4-A1slpt{K-gZkhWFX zf|KjWPs4W*%b*QPp{XU5p9^ennOuLB{nKD+eDU&~%eCXAb9qEd8UO#_{_F3$;aL6} zD8uyKe`!27OZxtnI$mxiwYbxE9okn-22D6ZYF`^tFwXzGzCR1$Z|}bjfKi|CRGumd z>%DUS)oMZcqYaK3)Ty}k7e6eb1m<=r^USl)Ia{+w%C(+OF)*f}oSEG6GEpd*2aXsq zV%pSc3f!okprX3wpB)`PJ?+%9esR`)_ua=8OK{>VGf##xFI|4r@;mRk%h<;r1+#xY z1%sHzLk9kfFTB8$ddvm|2&B6ZAs&z++iS1APdxEQ@Hym=Ls%-Ppcqd8Qc({BYDY^B zE)Y>tYaWgyT113j0RjRfyi1G4L=omw1&9BbY-=hddL41ro9vv_Bw4{VRPP=9=fP~-%$GiK2rUS$;(cXkqb!~UolFHFRStd87nyKS$#?s}Lou_GdY zYDlA1Mco~r8#4wZb4tN5Y_-EDb~yL^bIsXivi-p)Ciu)V&x{!}=8dUS8HJIYnfXtC zdh#y2?g|ufK;-8MTS?FG#J`S?U3cAez4g}HcH3w~0t!4*!#=>5qN%qr@oAJoD$Lop#y- z4?KX)Q(SM3g1p2>w%u+!nBIHOJs=SE!!r2rkfB5FzUyuv34arMMjhUi20pTAj*a-0 zVpOvx(uhB}HV@@T-wJ)@60$%LCqoE2L{_(Osp}=Es9EC{b}c9}ev-k$wBpw&bxJkG z&sB7eBgv=>QDpoiziC?Wll+0udNxRs-}nL38-4-^!6ccQfB}JBeIh!R@SG2^O=oFS zO?!_Du&|0bLVhxc(6OWrH`9RA**({=p#=SLa++AA{1`BM4o~W`XqEt38iD{3Jmnf? za3Q@TFR+~*_$DZv0kLxX63u^d;**Yg%aQKB>#nU>hugG&X`e?QdyJ=U>1z)E%~)cw zB?PWRaXn6qnL~#RU1!~O$U#r#$GZKdHC{pA3$7BEPu56aVbpKABfv z|CJ@Nr+v`r>QrSpk^HAB?U870Ula7^KMni$fa-DC(!bQIm23sE^51Ls-z0uv|99g5 z@x7(p{iQI6fIOhfDl(XlcH>(r>gD6z=)d|h7%q~ePV$o%_EBMTd`{A%>HFsXt64E> zhF?chN_$vA~n2m+}_BBE?C?&1@(C(lCgEW6;ok)MJ{35)HfDm|MLQB0kn2z$ULteiX369dv?T}v{)iDHetFJ0Yp%XJx`JkdX+4z>iB2M- zmIoem5HI!P-DI!4`U;^p79wGDT71aRp|mm%Fm~)?P=mlji?CKr{mF*Uz0#mT*h12PLn20f`nI9-gNU#_uqd%kOW)dMMMeVKlGsw{pwf0!uX%vZMTb*4TKwRxB)KKB6&6RIY)J?v=aQ0 zr%&hWBGPI*myut8=Qq9Th#vi1HNH9o%P33dfGFxDOz~>qCzHEcAz`W+Ibq9+L{$Q= zkUt3|q6U7<>A<0q^o%1ea#m4hQ%Jke(yF?vN4=28yvTsg{$p{HM9Ni>D5GLrgJ)1> z%R35uU$Gc)uHkcSzy0>O7B$%L=lUD2A31WV$HqPijuyUkb}qZza$3Ob2)_L=sieqt zR~HL!iIj;O43fEPZk2c{moRWeip~{RT9N07<2MP$mRoMUb*qnmoFeEslD?#bs*l@l zy%k{?rf^hhExpXrY~wy?FxvzJ%UDK9H}P~<9m0qqv|m33D@ik`A|wfT6Z?CDhvNHn z$^Qv|4;q-qt7tp0v`G2lOD`rAh?GzgBA+t(wZ#@&^wn2+<)VI};jCWI3q!j)S$d2S zc)nVS$twIFGmy2+bQ5XoHw?InN~?pC+it&|K~=YpS+f|vY*4ZdVGHG2_F-) zbQ(?J8~!8flvF0PSxBj(EF_!f4-uzI@e$T``hC@ZwKmpvqg!ACDd<0}QQ0Kh2b=uE z`)@QuvU#*}Zo6n5WQ^vD9vqo&9cQ=eztH5v?x1$&2!P}mkyR_5+#zL$JdKNjTUn}l zx&OxamTLLKsw9q1Cp%=_-Yvbc`0Px7^# zAqQ)i(rr|`MVtGtfQa4rzuQ)qsg>d4tYFBwvcIz%-``X#xkC~mWNUbWfd1F+zX}W1 z;t4t(R$6H%AW?$FBMQXu>L{%ZR3>ZXysSpz>L(7i4Aur7>CIfKZ+#s=)GOQAkMgd& z?&Ogs3W)1P3$P@O*Z;A01_JWa${jzaEVgV5;GHx`U@fM*Trwl)NjFb@nL=v5y8ZT_ zIQZa0UY-2vpaBDT5H$io5>bM8@XYeiVZ)eFlcoY|!I<1GKWY^5N2apGp*Gx5`;D^I ziFjByo>o(Bo(Xgt z;fD_!hDOwwFku3iJR2*n+Aw4=3D2uNvDc%MU04#E6CZ>JDhNT_aioJ7s^;V&NXivM zc5Z<+(Jq$7?X=U5Yz45|YO8(!``0rMTiNO=l z17!rSxpj}gXnI1BSO7uT8Pnf<NW~3GHAp{h+1yC$)*=w zcoDuwNP@Oz`4xhYx%ARYvwZc|+iuHmA?%noZ7L4KHmvwLngC8z`nv0{M^t3M|5>dG z>zi)632-ti9hG)>ua%;HU7j_Cs!BFXg`ke4+(53OVn_kM&Z<$~VJh(JmdIRsR2Nwi zQSy`Ik5oQ#p{@&60zJHjBV&d4375YwWl~XY1ru*ST_rd++RBX^rve-`q7UR_6)I0 z`y>%VY)RPJ{qe0ne)UyXGnOJ8j>z61Jj;*VbNpIEDg;W@Bu&&?kw18|?fdLmtoV!3 z2{-0}Xb6bc5iElqGzb2s^Vlz)nzgVow$ObjRs4U3Hx5FKru~EGA%`A%(uqGJ%|SUM z!@1|38>Cz+D(agY$e>Yr-g)Q2-Jv}Ri#_S8tmcdN9zYPAIH@cjgcRAv2U~(x-7?kU z;;GN7pY z&;@VrKfxWkMpfA#xQbm%`=*BKt$lMls-A>1U4uhSQKd4sDuaruY=k^M7ll2Ot@s(y zD3vy+z?=Zwxvv2z(3I&C8)Y zEM*&j9D`R(!z`T;G-d1TV2RwDZ%!jL6Vt*@mTY)CSBRP75g;REHg4SG@r*$#01M{%??@Wak`!V6yx;$o^pbVFV6bveeHRtvc9s-x>L*cYHrm_nP zwV2=X^fAkYkVlha2h1X7N)?6i;yG(IjmkkvQ)u4LrMLAf;63cYpq45*C^?ocTBL)IOB}1x88b@MHXguyz0T4{|-~$h`)vt;4*kHVtk;FBB_-|@O^RBbb+7QEwNjvcf znPkqK`R1J`0uwAmf&mAadRGzb{1?$-TUbVCp_8;8pteIb>S7@FQY9pK;DHBTfBp3j zKl1Q^egoJ+nC+4sh9GLpQ|J9aC(OW06|ogtC$lYyf{6Y56LhA4;O_<-Y(S8L%ddX* zt1SJalTu6XYwPI1>yXMXWaO#y&i-BfhzzoQ3a_q15&9Vxz&?aiksnF)0!&`V=9f## zujDi~u~X46p(L9^MS@dWm;OurT6H5srbtKvlJ^)eS9C@&Nx&&5DrK@%GohJ1(wK)X zIBK9gN)^RPiAtuZg3L93mMwBz7lEXN55(yye@}dfv(A!}VLHiIDi-Nn;tCSoR&n^4 zRt}Cd9JvY{TQpIW(%SxdCtWEgAk$ zdu&1e__)Ve1IW|qVGlZ>#t!$ZGtNAd!2xIlM1p?(2aX&u0?EnYJZ$LTY15`7CtGqe za`Ki{^68+|ojPsW(@#Hx&l6LtBT>=BmOA-}99uiz zH=RWTAR+k(8SP6d?Cb=nzE-(X;h))y-n0D|-Q6f8IhZ1hCT|QE%gc<*G<`(J7f(?%WRak@>{1+trb6VkjTmkDWKt%#ckKV!7Z6y z*gfo%now{YAl3L*Gi8Q$BFV~I7mJYup@hq%dL?Z#nQC&pjeSyi_@LXK3X;i3=|5Gb zbgncqXi~K$D`}(gvz;?p0g?XZVa}!0^aN`6`Y4c?5zW$q>QXkn$b^nU2 z@Xs)3I6lNA61`f(*Md;0um2{y$r$gm`!AYh7f54Tv^J{dL*G|Yrab5Uc>m>Vyr#V@ zGSH9xy1P}Q5NQmPZ$a56{`B~h2%T0g_Ng?Tg91n((fELFqG^6%@pb7X8C;UdG_q$=}}szA4mgQ!pDAZc>smi{R;DlSg^;j49ix zmMN1>QXf_)Xui5t(iIJ8Nfh6MMU=P@OacTOXynGLg*8r{)Ah6^ zP8}+WP=5%8x#V8Nhhh+ z_6IuY@(K?Wz2G5KEPH`wdv$h+-18JQVobFkX^ZH9BjXc2;1y|QbigE%IKW^UU}tEO zNGtcIn7h{aKcW)`b?png%5AG*mLzc!t7t6E0H3Q=4WP$dDL8cG($B@LEa!n9eh(lE zbQ8$vrL?H;NuJ;O1CnxanJP$c%#r9Z&Q(D~6C#oMcGKAP3GnNQ}-qmpPAGYNf7<{GiyV zIO6n8LMC#mO)ey$l_SfOw9) z2wLs+uQL)3!o=BNuKW&P|13gvrFGi2D^eRllbm*9JJSIohC&s4dV64&hvtxa&Idax zI^CXOx8!zdM`3;9o42Q1+%NSo(nmykf&33&=WFi0YegbFOLC;j((s1 z?iiHE+8k%fI=%eXOBLE5%t@XR@5Nk(Ut99liYpb6mwAn0JqVS0zmxEOJAw?sjc0O^ z7Gy_*h~Z!DYwFrM_Vd4dC7?&8T3bys$gtcny5HEC2^+W+-GH&2iY3|JqL{bBnC0i!XPQ4b18O(KiSXh5^lfIjz>Z4Ld7STQ)+7VrpV@dK( zGxBwwG<`wBSX4?>731Xb);lD8dg?QqBvI z<-bo~ZcU3(`L^18kJPv&h^ui$2HXL@ec(QpOQ?nG8povW8BjqOHbBj@anmQMKAB29kM2-3lZ4A6_6FVa z+TcaZwQVDD1LhOMkD{c4PLN&+k%d2vsjX#Q%5cBfbDbsuS%;vko#9Vv^Lxcei~``x zQ(M(~i=6XX!@Iri2W>@oBM1#(%%v#mh1p6?tFPa(_NHEAJ?pv6P`j#M*5EDe(~Kfc z+wW7eWloroLneHFt|#Z9PVUc>Hc6=;OtcXB9*`;2&zXOFPER+6yLeCkEQ3TMtR@ct z?(PNr&NQH=XC93#2q@!F34mCT0)o6b$3E|F;3j( zp4K3F&$+h3xCW&lVYdZ#R znsu^*qKn1AZ_7M1e#Qsfd@Z<%g`(vd9&<W*J+jUjlW< zpiG&6w5AurreTSD_SNDxdpArU!|e{rPktI2G&E|5PXyk1ai3&z>WhpuTxz+M4Oqc6aAU82_Ma2Gm17ghLMiO}nSWvwN>{v)^>+m= zR^^26NjX9lqzZGoBo<_qj)esBf2GSw-A$5>4*z5M6I>t0wCDM)JwziTOTVL2G?kPKrgs=~({2ksn{9odBT8fjpugzzus zfaXM?#@PfdR5!@$Z4CO~&-0Vkf)!tor(FQj|G1a0;R0eTj>bZFSwQLgiAtGE^IiDs zE?K&G4^Hp$)nq-gh;!v$0ZUK>$5^;i$Bet`0}KaB7xVT{jJkkgMKT zqCwE>X*t3*Bop<1(jDk>FLNg!j?5@Z-=BE1WSY;>Gi$S~-(L(6ZUi3JMWZTGI0-&y z;pKf?kIBY2F6EdOWyUD}K(VH4R6>;%1m%klUlOv{0yynvaIy?iI2{F8Q>02i)utj;kK+qg9)bv|+a zViV#I8BR1q#`XCSr|w4PPHc}4A@5QD<{`Urlr=QeI*S3W!fV%1ND{{x1+j-0wccz^ z4G78D6nzEoj3hU=iLJg*+u!*m0jKDXrB+T*YS96XZ%V&ENwy zLH@{kp938OCh*OGkIVUH3~Bzao2K7A@LR>df17&kzqtplRms_qrI^&da+fwoVkGDp z#HKXJurvSw(K!()0qChnbp){EJplI+Ai zT9WF+{^fbmC@v4`}dl{b#;^*H#IWWWc}-JG+Ud{4Kx(WQdvI?EoRP=JElh#4v(U6$Dic3 zH7_tzFllg(H-aE=5)!uIek^LV4F?_;O}4Pcn*vZP{F^45x7R4*M{ey(T=0T#ak|@Y zQQb5ZWt^at=aGe3ly@p((bc6HoTq@zaAre;C*;&MXF3Xhei?*3@Q>cE@;yE_sNf~h z_5hsX=cph>5iiS}V194VKH>=qOm%IXfjMb6LIciR_j0F@e{Nzlb%4U*vn#WdjQSy&3vArdt3OI#n9RoKGg=T+&53N8)z zKiWNEt?cbDu7B}SI+>eQNF?D<0TUqQl%cN`<97_N!5k-(ZjG@+JLIW@Bs+S})*T9@ zX)37g2u$ys7-z5#zl|R+Qkkl<{Gh9`mF@mz2>%6(h%39dn_1lI>*JSM=3eUbxi+ZS zZ~mAE1#ASbUS=9?{xHHWT`;u_efTe!-)~#{=5x)cO$49V(W?~@%Jjn9Xv8%vH{v%5 z21kKUG0s|bF#flz;G8)FL&G7a56Cfu-f=I%kF)qJ5?`0~m2ss{W%T_;#XV z=6>)2Js$(2ZrU9RkWnsBvGS;cTMA8zg=1S(KB8;yBn|*$&S~559C2^>6~y@`U-KKr zWsIrXyAuUhQtFWOJZ3tE;i5tZj>cL`Fyi{VAk!tdGlpFj?H@dn2J^m+%E2 z=ppVAj6vcQ`QBVd*m^J;+0{U1Y(Fs|4X)8ro3SNG{KfbV*T&;r8^P1C)rQPo=j$JT zt)l3xsTq^}0iQ32!lRs{9FM{pEZ-sEBwzbf2wArydZV_;N-fdZ(&N36bWUVMXgxxV z1%^x(d^z=5e$xZpjut*{_+St`hhCnH=h zvwYY+iq$#k9Lh24li*30cYHmsp#0bRYqjkbI^|#_XNC7K2KAs0LQK9RmWfd>X$WRo zbw0`I37<~o(`*v(bDqyqJ97W_LAkpe1m@s)&l-hwi*zC?F4BJL3N@yZRi8-?)e-#c zciug{&QssWe58j?G|cp`6Y5#gjF>p1M*hIC34#`MRDZZn^YDSMkp7`V$eW%cq@s`*LDcpJ9L0QC4r!ieMmh_KW==3>w{JZ z;gIE^WuH6i-25P8^SZBOO+_~^wjx_DSKv7x=9%Y_U!KrGk}FeL5Bn3;FYztfGXW-3 zmD9}_lj3hhMZc+Pf>1NkcgMt3#R(@T#2EY&QW7FCj;-a}Vkfgpmsr1i5<||FMmdx^ z=opcDTfX(-W8GuJZ8M6o=nOiGVkh6`99S>*Q6KG0eL~;O%Kp!a}Mf6RBsIHJb*e>%TB6W0_ti^;Z3za zwbOPAnh}BHh3>zDp5FF7v3q9S+tg@i#z zpZ3)WB(6~XO86AbDh4pV#0^USxu$yZTHQGO7t_@!r^PYpQVbqx&m>o?dH6Vu#E2w9 zu}6-SO{Xhsc=;w3_kCrr99_pkXQeSQBtaI-)rI+JMP$hI#kW8718;}>XoT1K)r9!t z!c~MUz`b!QRswcl_{XmEgm*2JFE#x{w2YF8_a+#mOxXM&31Uru;}^}0%sn53rloPE zP$ehatzO7@&vxIhJJfFtgp2$(j{Y};u@ITpkwcQ(tHEkN)rFGiFD>OxQZODx=G`EV zw=PdP|DLQQi9JMftm1_o!=&qv#go-8subQ}lT7hZ9y=_X%S^?ge*M^xf8m$}`AFhx z<#8|ytMN({bD!1G$%b^n=+g_iCj@+>01h2NS}1J@NTb<)@K$dcbeS*&8l!1e65u;g z)a9k%5Jb=W0$W6tA`N*iPNWy)kV&kYzZGbtyp5jDHEv`XaQ;Q zOKQaOJZs;xaNLUY2L>IMW%fW*e zFX8-1h28+@%c}dTq1}>ZU1)3{r4-RQV@QA5c8BTdA|0XLZ|W}iW$SXqE58%Z|29tP zC6ShBe7`Qj+V^sgqgnhfZm=SPc2ZQaWY>e2|An_&5*6H!?v|r4JtZw*za)7zXC(2d z<;354)FQEv4_8saC*`|FBGGoik8!wJMyz@IhhH}0|7d$uU(9#s!BBTRD&lN6D}>n% z-;rRLk0vp_d*F%u@AKf_;wt{YvX=)L{;N;Fwf8Ks?L7(iGG#`EqMZC5szyx%jX^E}3APwyD zUsZ{jZClb+)Jnrs7Ej!~Ti;Uvd{mufUsZWUw#{1Iq=83;8_rVXMtwG zE>MG+FJILSz?&4fREN2)Z5syFhMoN)5I3ADE!O7Nk?N@w zIz)Hshp5FT;1v9_I>QwXoZ5U(g(X(Ep6CxV&q2{R`WWk913 z$%uaQ5U9zFY*fVsDqq6?rqp-^)2qofG`8N}^bwrEI^eB#9u9 zETrt3Gl4uG1!a~@$;*AA932uxZqff0sJZl73iz0WuIS!rKqtH)acGTXKQbIY}D?!3~ZHet+oI{+*FfK`c%3V8cg~rD%{OPYC5VNd zLLkEckJ-qk4}SYNJW1A`MA*#in|jWzY;1eMsU!AMor%)h(Lz(=HYEw7#i28o^-|~N z$l_S1IR9y4uoU*Fe_y{LvXBmthyx zG!iNvi*-!&mOToVZ1^=#AkH8)iUCMP+fRa1lwu&}N1 z+V_Adsws(IT&+Kpx#b+mwQAWqz)vxGimWs|DEHtd)+%fiHBiH&uKh?XesK$ym2t|- z^RwckJPG%~wqo#}5I~Yrn9E$-u8=%-Ow^@KxyKb7B$MGBe;EK?%5~XNGv$6AH6O&= z^P0oHf5IkS+ye>inv1ko7j1vD4(A7^)y5Q@=)F}zM3zW8)^joouD!~%_}r?m6+~(G zKpL@S+GoACW;<^I-*kvz58$c9TTW=O7FVODpbx&$c@@9$SJ%hq1D8CHkxIR#vHMeh z9N&qoz2A4t@Clh^OtQ}!yd)@hX_Yg|`?_k0lMaG~I4IP@(G@E(&xh!_jrn_%&W>Z_ za_W)ji#<)V?$<01G-VbB?mdXIo>O+he!kc-3)%w!VTx7 zYoNh$RV$BApf{I`JjGv12}n*iOn=bH26X@4P+@%v{1)U{bLoQp4%UP)?dS-rQ3aEH zm<8v;AzYzH_Zn7ex&^SSL%j`!}z^S7U~2JUxQtCV@I2i+X3@J2vO45gA;YN>>!kM^<%pYGzy{r+gPU~ERR5p$ z%00fZ`#3ncgb#Y>z?UjS6A_OVb*k6-Nl7=N>mO{YZHV8{8|EY!S&R-=*ZJ5=yYYAi z&Ink)z6V62OOQ2{*HlZ9gnSzl>OwX>9q30%t&-$q6V(~xO#F{G0Q7;57s-7}C^FqO4I&=Oqk(gtUNfORwQBpS7di_*)lGB-{u2KTabnHCEdEIGgbP4^oNT$zHZ^H zIjQ^qX92)3u_N?3X+(pvdyG*RRwdjXzfH?tzILybdORo@L1qFS&gm{^Nq*J8AVIsR za({0lL7UVZjP!W_v7-zNzm9~#eYy8&fJ6P?nw33gbqek%pu9Lxfq zj+Tw3)GyLaK5yy~3uGTXv4w9E;j%eZ3=(zDZqy_|A`6C%xOcJVX43hN5xpyTnuQF3%vbn%vdsK-Y;ok}hD^ zOkhZsA9SUSjjQ0Z|IsfW_;}d$Bc3TnltyUJJA&AR3Tx{5egknW1bjo?K~6CU*Js6| zvigOE!wA{zQ{<0}XZo_jxpw5YX&uMQPS?#_&Q+zF=N?U`C$_SnJfRrOB+k{ zJ6kpnlb>%5S(LyI?vKnc=e!-_J3vX`g7SL<%- zVe!0;`xX+|l=51n2P-mLfW<9RU+kSYW-i~5$Y}aGNad}xo!BK$ziWZyKw)(;_?>B4 zO44rVYtek6FJmbz^uQhBz+U9;U+Wx;!t@f4-0f9@CElFca5h=_aE_CqM7x^Q%~w6o zcM5X{o*6sK_AFg=8CeRA=6*zq%*WdpO%d7yDUS>+JB?NY*sjmJMVPxQ3u3xK20kvi z0@BdZ4=P$+=_K&)Oo~xWZP7M@G&@(ZBPS3hp5l>%87qCww`Z-0Jk|nig zE|!P67&PP|1XTjPD1Z-LMCXEF0}Q^;3l;s!txeemKhPeVm7h{nU$u^$*{px>yB@`Z zQYrmx8t>43qE5KE9AWVLM?dV3OSUlqDjcdY~cd69znX7<8pRM%KnRw1M&8)rnq;a3axK+SQ}pPc}Oa5 zpIU@8Q@Y2Z-z_P@A5leAy5&r%o`RL1cW%0P@@oCd1X981q|w z0Fm`*0qba3gmAy+osJ0Bt@bFlyNqDMxE7Q86|rlrxsw~03323%J;~z4Y+|awfGoD0 z&2`Q#Ilvo#=HNRQW-ccIo4y&d*+NiCww_d*JF}NTh4LeXs+zNeH!cQ9NaG4K_mot@ zZq^om&^yKe>xuPDr#Y!g05A+(JsHaG2Zw%MZc~9V)S$SZW1hY zCYwmvAP;Ra9r*zuf5zs&alii?Y{05ns`Y+Z&HacA+vI#09Q0t<#vn_xMT`<8zNcks zb5E6CP}2<4BN43j_%sUGtuAapti3Wwvo%{j59M<7ow#Ao8*O0#KPK;UW_VEz7g;6OUVtsV2o>)cIjz9J)OHs^!7oaY#AD+gtZ zpyYCs&-p@8BIZci5(cgXiftr;CmXY4=`xbI&efo0MfXax-L{G@RERyT;eCGB76z>} z913}jtA5@AscJc_<(jtzR@r6Bo?HwBBxd|`%JCMwW(`wWIYWA&_5|88LD zfCtuW3QfxlT$k=_iji0%JyY=OcOWJ>chZqs??;)=M0*=_$rRv`X3uN@VEhVE^|Kz z+%doGtaYibp*643&F{K1ce+GLr+FPj{a zPw$c>&DovwXWoCUOx}scE%+y-0B05}Vnf~>HnSq?oK|P>cZ-YnS7mFtY$T5E{seE2 z^VGN0hxj$^P4RvkF2G64CtsU!ds*$#B)Yt0JmT9CjzRCg`GWihC-=YR7fyiI?tH_1 z3ClCMUd5DUHq{DF5x=i>a7v#Je%qBh&(aFL>+}c9w`F zIs%XD+U&_>9!5BjH?V)I!}tuMt~qv@bgliB|3*&w)Rc(%6qCi}xh%3xWAh`0gnnx3 zWp{>)A3op9T|gla|L1<>U3n-MfMI4ah#g9?a%C2M%4*k){Ch6zN1~H~f9l$1!Hwiy z(6^BS2&s(+0-r-N3p_^yH~!lX-SoY4##XnV&oSQLvo5?R@;)7pHqQYuMmv<^uaF6`bSJYW88VC zh(X@CqUk?e5#R2`*+x(}b?bU799cdPj2Yxfat`7Y`gUjyDP{k1sMokXjd^w`B7lNR%ug4^CD-=&(pq_N!h@P)KYO%1u!OMo%2Lhd2MhZFm! zgO!nAtQHuI8akq@wxsz(_NX=}gx8Wb2O?S+&CE~o`H4`hnWX(RRoRv! z6W`l~L-~ubg=aSV(4z1bb2~+rjGvKI8ABif&Y}{Z{E3bZxi=S^4hG+?9doM1$9Z7n z)YShx0&r+qs}mEqAfvpLynvuf<(J`K$-P!$KcL#a?03w{vH*HM=L05|F`#^`Wy5i7e6dL1L%T=$+x>=7a%yMi<%0U|P!E&ZVn ziFyBfJ!R%*fG%&Nw5S!QvXAbDhS8YlGmV5eEH;1=zD?$eJPUQ6QowC!G191T9ep1R zTF{+}+NDXj7(Q~v(0GLQyHUy`vDA_x?{`!`8tGQD+1XnJadYsaZIETKa({+e2+vwA z#tq3p+pCX1SZu0Fdb})_Oa(mXD%`&Ac_H{NFO89wfvpGE78P<3FNX!tt$s>wM*1H& zK#1VMAZj*$f5Ye5_;^KK#D%VMR*;%-4$`lOlhJ#FOc|)KsGvxW+9o%G8(26CYD*Sd zy*)}z)+Ol1|7*`gvm`QN(aTbY)pake{jyFSW{e)LNOmi*Bzxo*tZ$2}6t48(tz32F z9IY0*>0&TQZ|4muV}IB|dV)rn=<~wfe%17i;>c{9d&d)#a)cFAAC+9qj(Y)ct3bsr z^e*(YaPRg(1}*GZvP_SDBTzA>OuzcaCNhf!bds{r)>}?ZQ;0s-HmG?-$Q;fq@ZeQ~{65~bF z4Zj>2ahb3y-K9&x%Qt<2KkXsOdm15fb**bEx%Za=igNB+)`jSOk%5S&DO23~VB*x4 z*i`qyQUNDZi|nD43p-7jd3OL1EWgHq7=wk`ou5SA%LqqVAIpRmZ5_#o5+b z9kC~TEW}GDDf*fbA@VMRuf;m zdJ5P2LdNcR9isTPNo)=$u=0bq$4SHMX-TGOr;^(PaL0Y>ZaLcY%A@{A2J8`K+rBB` z7`qvd`nBIz9(i5G#bOMzW!yN-tilf1qT>3a9BhD_wFBO`~#rtPO?r|Uf7xDZB9Cg0} zrIUp162T-d0MAtg{oypwe!x2P9DXVHRRSI{w7&s|O+Wty!3v&l!DkKS{j{^w-47IB zdv413;KOo{s>pHfGVrJZh%-$Df`Fg#dmsj|d<@wjuY35@#(nn&asxQaU`5)`_xSt> zx<2+N;ozMxDC-Zq8e|4e;D$Lorp@KEth+-XU!dp?U7kb2L@4VWkdPO0knoLxUw2wL*5>NuDs z3r=F6kVI_d48C&#f32v8(rtmS7~tfvM<$g^3c8KAcNxeuhdb!D<~-<-PAxe&$S_6w zOM&zfU(xjgIz9fTm4LMh4ZIF?y8dF>F!lZ1f6MA#Nx^ z#DrS%bZ`nG)a5d!{>?G;g`=8f4r>RP1p}uld1-C+5Iup#{%4Cz5gNWQ(H^EJ##tl% zM5*4vEc5~kD|W9HCYqk*EX)9~?dNT@nX)xrh9%iPLP#{Ie}wN7Bu+JDWaBw+h(3{DCeJpBivkQIDDY!dK_1Ei}se^SL;ei4gC9wx_;#1LPoavAr7(-P~FwpJVh zsc*p>UGvF0sImt2LS?zJ)mLBFH*rV;^Hef>XXDefJb1K9v zJGMUG_uB9)-Y6S`VZz&KBXUp8a&gn^UaE`fb!SFISK#M!1g0paGz5*8h+9|_A75u0 zED+`{@zMr{!m@B7L=x2T%7v&i_AL<=t0lIaP%pb3*j#^hs)2 zTvlWHPISM;i&ExSaZD6>G2C*!F_3c}quQIA@B~GB=Dl=-eWK;21Di^N96q`UY|NkBZa|IJ&jb>zF2S~5KR)ZA^^iD5>cK@OX1VFq1$!|zAnw-7@t0qo zvk{repGeVvjWx8s-#&+CTx0ufyx^(#Gjx`t@{L#@as_+AczR`?wSs@LPQa3&Xu?r0 zq5{}?vH{0p=hJzBY0m-e$vb-IeJjtohny&ezsJazma8eiKWhah-L@AUa?;#_>Na|? zyb!NouiCG3y`KI4%)ma>gO?w**7LAk347qVzJynvMW#JuMU!TM|C}!kmH~JH3$&U! z%|*{61?Ze2V@{-wf{dtaus}I%bJQ)DeuujNtf+gVP!I_?6cwqzSv#P5=vDc8@JRA zZ5S!;LNYIR+|`h-n;H zZ>a@)|{4WqHOh9zPq#egm z4!d}UY$y?JMDn2dW)i3Zefs5d38j!5VG<$>@c?=RU08i5%!kN4Bq5+l;Rez#b*$Sx zuL3Hg2Wp&1JXr}h!PipYCqDQt77}3r8k~rHsTy2qmgD@Px?G3-$cxS zm#F#OSER4~sXXpcAD(5U`Ib6;r8{mRD40=TNZ1IDKh`A(dECa1W%FLwY5KsuXR3hI5SLGZ`sp^qf(aW7GR@qI9M_hFa6+`wy%03!qIneq8J73fEO-T;HOS-ob#aG zCSTANE*4dk<-@(0Wlf-=e#;<7`j7R8D&r{ySYv2<4KR@YFelD>p;4FxD1;4Q8pO6} zXT+xXD+&X{vOwEl1P9`!F%~(~Vd$FB2_gK!`Ton!Mh~hUT%Jq!MN~*XO&{DHsKSpI zb!!u3V2YZZC(7rDigB#b2GG=7>(Rli33ZKCGQ1 zWFx4aud)yy+l$}SUJxEHKhY||QoiO-NWvkHJ}|;9hTOsB_`v5ISYXFTQc-d7EIUL? zb<}NO3pbE|{qQ8$(dgM2vtwpjm}m{gXkUK5`y2TLxn2-({}G0=N4o*T(9pOQI-ZZt z6iQ@Mky=VpJv<3|mW_xCOt zBTwGf=TDf&=ErS9PCzL8hm(t7Wszi2*znmtQUa_*RcWSZ%^6Z#-fOd$_zaG9lOM7l`%|{6dX1uwirhsLjY(hNAW!=L#wER)9 zctbqYz%oZNIc0!ZMI<56P{A+I99O{l+6Gtn!ZxDdDct1} zqM8P`6cQ$;2cFZuyc@ycw>}dWr_cAAzaCdqj2ftfny@^C%;BJp{s~@RPvcBR3F`7X z3J4*UCRL)ocpJLYLno4QRvE`}AC)6FB@uQm z=w1EfTO{)&*gKH}66^*NJ|Pl}1dorA5(L-tDS8Bg{u6!noHUw_JXZ$kwnlkV*RMVt zQ1&eI`C@C{Cc!U~E=>3#%R5k)E{Ts+%_OFIU@Pj&bY^t{gPWz)5B*MWx${n zqw8wb{Se-uc+_zIVsrhG$H?6MxLFUeUfmC`)elFL+<3nBBM5q@kJpJ<5`I#pH6w&iWFAiLn z`F19Zwo7Dx9Aj`~AJRhe4P|rBVQAFsOI*m+^bW(SRrdFK>jNu=;ej6ywlCFU8HUrE zEyF+LxYBaS28uJo;Win=%|4Pq^_l2R@$aI#{{}(c&%@d#7Co@XF96W}Ke=$iuOl{Z zN2c3%w?63S?+T5iQr{Ql+>+v?ey*W#-7_*^9s2&U$e|>uf8^@GVF1AdH`=XOPJ;O1 zMOR+MDJ=!LwCJY)xPDb4sf-?IOp-*Q!2>Un1-L(F4)mY1E^=-Y24((mii7efOpord zzRUPE5hmGr%(tV1=)b=mS|`OstNy4S>b{d9Chi6$(9f08M01Bmu|te@zCDI2_o<0Q z6RnR}Jxg9?=fAg)@gA=os;t1F7t&)|uBu(pbJz{|Nn&g`R0NM}K4T88d7r&OkNAqP z$ek=h)#MPn7t1UQGd>~}RGx+*pQ@_HV6lHjQ?fgk_^JfRSz~WAVU6tG@u$XKb7P_3 zZdX&n-L~eHLrh+eM2Ahye@WatsZ=PO;s|`+R>2W|{RGCY%XxF0b&nt=>;v&4`gE^s zBp!KZ{A%YdLhndHkm{!wkQO@`5A2zjld#-=5r=f{R;@bKQBljW{X6ZUE+zQPjcG6x8nJd)8Z8KNmS6)1L`gqp7i%>VW(EZ z+PwbFx`zMvro@?2i$&43E{vX^pIa%7(y1Q`xmx}hM!iTzFwT5Hu!P5eG+l;tBxV6c)6DW?+f>C zEadE!uw4Oko5_J`aR+$8fblvU8yqS0P#jc09o^^Zc$2;1Q6I3N>-J`Ijh`i2v_rIW z1OSZ$K=%=_wo5A~7+n&>;}*l?7tlEvc&!D)Q`EkQ`qx|W`hgnaC^6XgK&DQMx5(^* zItxJoJ~o`Xr-dDn5~C2=!ay5vQ82YU<~p#ta*L8HuCpp$YT7W8?GtZ~A~AQ7Sz`4y zq%R=FGic7Wxg3rU6n5Uo69`!CQews50Jc8a+98{<;gqjzKIw5Q{24uaX&-;U%KerL z82CafXmJ=>gAG65g)WJKs~FzRzIB)=Uw%aA*il9V0*6F@qsG*fInUk1;wdSJwmSxX zGHtzfhtf(k%XWbGWl%~*)e5Y`t-!wEt@qz?RRgO z6#sA|1yFhe@^~BJoxWcv&n&lxt8T-kw-wEPoKI%?P(#@HwPYxdFJjp5Tdi2d$$zN zZ%tsAR3I81FC)7+PIMX%hJ14$A#ZGZAHp*A0UH)mAOojH>~Ro9%FXb=vB62xr#S_( z!`T9$Qv1f$wC%2y+uHIGY9dK&db#v+dW#0xxD$&$RaPc||_g z4I_=uatr(x?P4K5-kCoFqdkUV%+d5u159M&`=(#2o{8gcCqQSH^Duo-z9!Ln^Fs>U z^~k;?yog(Dk--4!5o$eT@dn|9q#8-Ns($wkQt>2Ha-CE)g)Q+o%MA0=04L-eO5d@> zcZjr|2Yh3xw0#rsti1eiTAd@g_@3w2;CaWVB$(*!2O+o#Fv(bIY}9=i?CdFfQHpC{ z-yj0Ff~3qc)Um(^3Gv3RXOc3N#*&V_b=!xgYgfL+>!x^FQ>nq~A$P=}!A+o@e(Q(QPZHLRm3f!|&}CNiZj%;CnF0|Va5T--x<|p7j&zjfFfOb2~AM}0qHdY(xi8(0a2<*lMadWB7zhPMM9Gf zk*d_t15!dqngK!&Eg%vg<>p=YertVy@9(VjJm;LuGiPS+*)#Cl99=9FYWO()2X+D@ z%+MBHl_uG7ib75L$L~U%Yp2%y9{DFq=?IDyYwW!+{rHv~y~ynr@aqT_rQeg;cvY`- zgvP}Hj5X!WM$xwgCLRf&OkpRiswHn|bIgnw8|Z}9zl;kPl+tl8hn@t9MaY5~25}Dl zHK-N+^#^cM4V%)EKPlOe#*goveja-f3wwD!X&1ODnkm1fn>raj1aGi^UrWn?$64bC zRH3)j!jgbLSHD$;39GHHteHTg?w{pZSOErB%lpO-(;M4vn=-kXudGeX6P71 z=3JITJ_&_A3u}CZ#az<7kKe|(O->dFI=S2{dVe8L%l8pvbn9p$HOZs7`RvNI@{spr zLE48?FSft;4aV2*7j%nq-<=HEj!oAZ!|75A+zrjRHpoBlc)<8dSeEi#>0`6-ohZnq z<)mdzBfZdorMFQGuQOHWso;Cb67bHGHl!%!adpWJZoXIYb}|WWVPbscx*YKpa+iU+ zMvZX*;|h#C{yx|+nlEvF7xl2`kl7peF3V)p=jeh!+i7-@K8p8}Qzqq@o2*drN{7pZyzZ6AE* zB;J(s$A62y3xPcOH3XleWTknJj#coHF>a}sU556#5yB3*I{i3o?78Cl_KfCGJ!jXO zw1#)z!mZ5y%JgJ{jKA_HTimpol3eznP~l9gQHggw&JRk&FE$daQ z{0_q$x*c3pZImuNku9LHwX973z4hSaW{`7@lI81H>p{w~-ch=PZ8=I5pvYD?NP>h- zgXop`&se*kz1Db^P96s1;%`@eF2!`5A>oo&hz&I2+QO8mHeetP&`wQ07tzY`D3&^_0__^YX22=-J47`_R zw`Po|#iWqSm4aqroQoX?H~~`3(#YX=H5ig1lpy!n-PML?w`_*={pW-~{#${PpHC$q zDG2YoGKdrjB!q32$lqvQw}p4dJmmWd{RS>CeN+eTXl`x2Gkn@~;PWz|p{@$)MG(~V z+rzDnH-U?*Gw(tB7uy+1dTgO>1m^`{T;KrE6u67=iO{)qD8EB{KOvTMau&dQMenhl zsM13rGb2U1gi-*{v#Vw?gRQHWc~r7Q9_LdqD|gCNg~mqe<7G~$pe_!MMj*9lKke!+ z1`;gmjZh%pMtc9!;$>h~%4H0uf!w6=##Z6q^(#naB5jqZYxo}U{<)YV#1s}rLFH<5 zj_#5OUG;hMmig(H01dwLwN$!&_IU1~+5^Hv8l4Z6s6__=eT3Ign{04;m(b(V6<&eZ zR8k_~0N}f3!3-}l`|mcE1M-VVHIH}KETFcVcGUtT*B|3Yy6 zyUY041Q!%Y>ufT;gUitx^vJYgf{V;=`6t4W(|qFD2J=xERnlbMb6;VwWh4ECMHvRL z$kMq6ta|94Q7}Sp>uYe;Saql+M@EHikg#5-ck5+DQW<+_8lEpO^^2SuTIT{WUirX% zIe`*YF!Z?2xel~}wT1T!2R~x|h!8zOA7?lt_)tz+aIKz0kr_!-jKEfXxS;N z%=@V(@Mu4$z8hYb^n$SIlAy+e3!2@{Y_g?wEI!5+{sBVK))rXeaCge?&zWfPx0g@% zm<1BhaS8pN$K699OmHDNN6TQeoI532`fu%eSm@fn{S6oG4+X2q3h^IRRw!oaJST=;~S zSo3k#9Z`8f@WWVMTXr{mVHs{18m9x+a85Jst67Y$!G9`|b>gd&5y%n2zrZ&<3Kwx^ z0pjXL>xh(ImCz467FZw3{T%s47VzT^Z2}Ma5HA8mn5g&6*P+XB62vN$-^V9NjyK~Frx=)bGV6FVn7sYa2-Y0@g8Bf!!K6{BC9ws3V;oT73O zSf#xd_UPgj3{@n+HFq)U-%<p$n0U$FtNN178ho%%=~9gu?n5Y9)F-&yCS9o$N77V|fCN}twXUqU5XWz)S z-TLw?BkZAWECq*&yR(Y6ta92$wSpS1(93U~ov`;Z8FbSDZn%;KxOn7~$I;uwa-1X= zL#vB`nnQ)Yo(JP+Z$$+3#9fW(yp}4GruH>F`bkg#zsYiFu`AULL%GOXcjFZMB|d9w zval2jyMhFjDlU!%XKGikypdTU*bnf}|) z_~Ug&r?_M>lH+<-Om0K^4Qccd+lIVB;bM8iO<+;)B1=V6@$~y7#8qjrf3>G}FSWoC zzVFGm7j62l(cJFWkriyJ5jCI;rmAq;dh>+XyQo@Qe4h!Nvx`uyz3@=VtUK@8yytP; zJQrtUtXj|R-wU(OtXEpKN=HOR2e_?85@?G_lK<^c=Zhvtfo*X@nd!s&LqGftByNI$K!L4HB^G|>WBj1)C^I7yVdh$!v|DtmIzm=rPm-|iH$x;gS8UVi)ad><{C zJNuLUX9+wE|F~&mE?yd_CBiM@H~;!GC~f=H66fQpuW?DUK(nm^)@tiZdyR#3Jzrqh zF~Ymxc_E)cXmWPY*8wY$+VDA~PoUZ2 z*5!Ktx-DQl3*1AeVHJR(RuYFd|C~L1^87fU2G#zlIkJ79p^0R$_iZ#Z(rEX`nfxIz zATmC5Ega%fShiJ38Zyl>)IfhOhxKrx9()M=Snq|G;mn~00}SvF1KQTMi0#Lkyn+UJhzbovUG_E)E9~=gEB$b!nqBZ8b4hwCc{0fSyCSn zGrZBCA;{<55^dB+y^1SvnxHk2kX9xVJ)DX4%HMIUsEWLj+H_82iTf<{+-BFj^1st` za#?i~ln-g`hPZUv$zh{W+K)CIZtqu;90-rM+KJa0{TizM2~gubhzXuIbf2$>-?~01kqzr%qxz#D! zWfj8%|7aovZLde&?qodb!G-&(+BPa09BKF(YxCs13cuaA+Nt~;GdR14JI-{xwRH5d zO41dqTSIZ1(_&lUpMQjR-&Dj%y~3zRAU+>7kXe1ddFYUdG)S_Uew{IgRVoQ79`dpR=3 zmr{W81Pc?MsOnY-;!kWuNU8gSnuyQPzsyk#5%UjHMtrns3e5v-f&c!1xUn*T29+aN z&aMBqa=8;31fY)*UeqBEPd!P5(u`3RJtXRYd#xJN*{C+URHRaL^0>b4A{uBU?&d#b zYSs1{_*+bK6`@x|r|HLv8z{_|otjplH>i|bI?~*-0S>@NAx~=gQ*lp*1%v**N1gBB zZmKxm4!GM!x1PB80!PFtPIv7!L#*PRl0ysG$?r8# zJ_tjtj{YB&6FaQKZc|J%o&D_g`ShaU}>1h#YuS(cIp1Ziitx{i_ z$-IA~4)~XlZKFG@t!*~VOly$hgF{Wn2p$bH0j0|ozRFP1ZKtEYfBX_-b~0TO@0Ywb z^C?SR!P0{*73(EaCL*6~qTKk3JwpAdk2+f+y+NS?7Jz90p?pq!(tgEl>8N9S?}}8m z=o;LREn(&H{To6fow=M~m(wdzkyk6>FPeMAqB0!5okODMvH2zQ3R8wauDC`R-9D1= z56f<)1LBmqDl3@Y7L?zxklHL1V)#&E2h~)iWS`KWfm4Gm=#A*UL_u%3#mfBP!g9Eq zF7>g8T#?Gp%YB}U=ffJt+{->urnc%30t+U3g%w{(^{vD&Ed2=w3w-86W?jyF;)}`p zyWIPK4O4pWUv%1QUY7a{VUmT-+9BTe!!{+znR>VCShF$j4=0cp9<5W1Ky9St<8@-W znCbGKm3hiBBXgF|FPgMg$no}hRB^ve8k;vgi@0>#DijM~kmA9IHwuxziu9>Zs&#u* zq1{uxs!HBp>;#10BJ%=)*PI5w=mToxX4-``p4hpKp_E z43>q}SmS$pucGa=Scpy@?<7#FV8u81A>dIdjzP-i!@X@fFkdt%#3U$AJp(U>O{m&@ zcd!5y-2(;V#w}8!>PICeCKCo#S4ED>BE+z;b2^mJAJ>RX()yMF)@@J4=V&P5i zmPSWT5V^&P5C0rz{#H3;h%bEblF)dcukpwEK2DtI`U=me`>h|qL-X=I-1%qAjn}M( zIED%`9@d^Wy(REP@54PtN6e2ep){m#Dcbs*Zo!1jhkRSrV)F2q`iIM_hGbw4>E7mXLMGO0gLZCNXE;m>QcsLV|w1WpE zBs$a*>W{+lTsd>>N4zA-#U|$KE^`;+(R@9S`^6I(=kpy9zSuoXK#I=rY0{i6oSKz( z_Eglv>LGFsb&BFIz4-jp5#;_{tFZ6FyHvknLte-1uR$8;)f5DHu%k%*-1pS<4xh&J z*s1k~Nn+M0lD+WheWi5`(y^ZX6Gh*Z{F~7+9~-?a>ikmWXwiL);8fF@Iz=kYe*0J$ zE)F>)OtTz(%djrGCy`(G-urmV*r#C{e{}9|s51+O(7x^I6ygVJFZ~Wn?Cq^k&ilbG zGba4doc-bLDvJ^FaCgDrMJ%l{R(5DS%b-rgI?uzC!cVX_KybK%$*sjOyEh>00hp;r z@iD80VFDdZHSF!}h4n{kXV=^l*+K@hbJ*WA4?Iia=vt4Ho_Mu%k1{7#sfp8CvY`+5 zHj3%_{CH!E@md;3`KQWFI66G>cm-i~i-y8IUQtBCRO4HIS3&ER$nL%DyRWuTxZ1+$ z`_$dqRF9Pr$N>Lul2qKzc^o}H25}!E8pdmp+E;(0V5_$!XZTbLTUHj8x zB`vq-X@sIp+*yDFZofC>K(xB=y$BikNi1bsws2Fxd%zrIi`f2U;@wr$dV7ieF0&(@LU=URDzz%R?8io~m* za(B&@wr!?mVkIOCO*h)=!DV;W6;+%F4{Rk=SROx6@f!7ZfD0OF=doL^Nrt*^be<01 zF_OLTLpNZ+|2MY~MDi{UnuV{btHAWnRYnQSijW3Wz5N z!+x%024t$hdJ)F*3VuwNH8)oug+d-iH~c~n10FBayq*Qh*TmKg#2j(LQ*V%L;e?AK zg7ed)bMoDD{{?lm;~YxZMKSH7{l7Z7Q=;@fw8L?M`04qqadTv?_=n5-*L-*2vp)8y zAqELn1ts8J7@Y5nq{g*x+<$S}_7eNZu~x`;g6rY*zf!MuMZhU z8b7f+^J!GuAg^^rTONiH+%EaieC`wdXT9{I$Ma zEk@H#MXg!zF=1PlVlv*J1*j>Z69ouadMy+1mlf)`Lba8^CklK%Ft8lnXosio_dh?0 z3Czv#njGM>kaNnp#NQ?Ba=Fv||9p%1G2bfY_HNU*P$-b}+4eI_DM20I)b5*yT)7#65O8|0 z#3xS)+=XAiEo~9`>&;ri#D-}A`<)MySt2a2U3SXu%2J@#!FjV1oxg6Ady2GQeZWvvJDeo{tOkD39C*&H!;Vq)wYd|A1yMm8CIBCJA9Pmmp; zlXuM=$~m+f8r9-gh*2J594J*eLRQo!K;& zqT~oW&22OHi(BA1<+MwqX>dKTzz`)^nURk=LkY5%j<-xns&I}^=U(;@%6ab6cQLCL ztbXh-6T6U+pC=X!V2R1>9+kpUm4M;uUr`CwD3L2@q7OcnMg3nn&3-fH?X6T?jOwj8 z(h)OMRi;<|0LtVrV)Q$rP>~^YeNUi1xOsYGNU-Y9N$W8S{d&pj6Jb?>Gu5?cvv` zSl^qIWSS?V%o@>sx+5=G65rlJnxEydIvHih#_CwzZwt|j@ZdCW&c(S{^lFcBC`h09 zG;4MqY0`0B-s$DqDs?oCleXOf4%DO~U)_A@7FGi~X1Qd@P8Q3(E4(srXe%cmIIO2B;xVfA+xK(_zk~0F~8NYv(M%Yt6%f-Rb3Dul3~*roEQ@E4g{M zINRbX)rz7TD^1@?wDCm58$3sp=Fi)m=wy@DRj*a<_bjdK@XP3+XY@M+{H^7ZM|T7j zlvTJvD$pZTQ`$Zqu;PsuP3Ezz?-1;qDS9iCQn;~Pz8^|d{2?g8MThdFB}(4Q(k#Xu z3T-6zSOFcV_itI)M$Dfuyj@GM9iG^Qus~+ZQmBa0QMKpJ>rI?rpz%JUG%DR=Y4%%h z*9a^&aEXHhMmY3F2IyKP4*Qrffdg8WPmV&7PxqEL5wT);z+)<)ti@?Vx5T$sVz{r6 z3J&OVBmG`lu;4zD7Hr)r`vSt$T?l^4m+PzJu#k(>@5TT0Y8HZj^XsC%iYv6>HDs!6 z9qy&=WXqM;#izOP4j{}Gzr^UM07}&IDNCbg@{20-*Y2L*u#eaxoLLHS*qrNJeLZ)W zOYU?=q&@_OU&NQuG2+WdAbZTPd)5jTg8T0WZ9{%}4b?N@`8B3V4Q`A9k92{24xY9- zSMBnci>m{TXvnKvZnzMTe!2BHVo}iln#LQJFOO9uaJNSj(DLAvD}cxv5GA?EbK9&> zu}iOU>K`tOmIBj+bNNNFb@%>rD&1#`oioN4)IH^kLY*^gcxRJ)3EKNUk;28Fs>u#n zJr|dIp;xy=395Q|qekEcilB8r>gjyM%^xr3C4rkhYh|kmpAGb>GcY{vcy=H+J~|g2 z->be74^0VC3&wM(M_e((Q{@qKF-=El_`Ee^^?yk(Q~qRkeHp&Gwt%{87xW3IOq!Hc z>(BcX&rW3ckm=NY=<*|ePeRZgn@h?7kPE~MwfM?#`iTEdyMm);+&235)$8AVZUZAj zG`vrz0>{T5-~U9W9aV6lS7}utDE#GOYtlyY?QrSmz zVX4snPDpMarb_(q1IK&R!!VNpweFF(olaW=_8$uPzc-4!Kx)mSf$niL%#bY**FYIA zg)~E00v%nh)CgR@qghc#KO+5i9f~wA)DYP-q|?7C9orRozS>3LjUq0Ytz+zR(AC66&b(l%BS2p;^mHkS%5esKMzTlc)J>1rPzh}??QoORvRi&VgzECut zatqi5WaunQT%Jsz&l0f#^ywRq9sv)O^0`_kj#9$#1MFX&w-mN;&g|z+-e=nvh3k2M zbGl+)HHTm@PZ{NIr`S%z_Y!ze-owLYhd)^5N50PtuxWT@!YmZS62x2D>*9#HIQCuf zRgA0Y2#@@V13V%go}){dGu(Fh+pvAWcsI6ucHjsp`!OOMxz2r zDu04=x9 z^|G0fTmOr@ zz4urp+9V)B;#xb$c_PCNHe2lqEN62+^qGabF0&id%^_ZclVFaYpGX86lx1QKpW1K! z+E{V>avLauWf*N~8c3U%!o6(ihB6t4d4U7u3?1+?_^~9Fc!2{vo*PREWHLB_31Ts=Hc2gyjo7?M1H$l1HY1D^G9uc-h-&P*u~?Z zj};D&0)7wbMCdxUsBu2l%y_qV5>bPCK^O=f^%<`blBhXR^#{th^~c2y+AzZfLBbw5Tj?D3+QDyp z1s~@ft3Zbo!2sm6-ld9psmpL$|a2)97)^IHE2In>Z}2KScP=i@YK)Lyg)_^U#T%@j;hcWUi1wSIGgib}S6ndwu*hGM^k>R4*y`U;<`A zbJi$m=HdKWC&AFFsoDzP_ux9sq;bBv2-3|zH8^h?89+WF9DvS?(+RUGP5w-5p z38$@~7FjYv_kbq`u_0 zPIa{)bV70{?nwH3UI^(<^p4D=#Qw3RY?S${qQ0< zVyrPw?@XSN>l6kKO7=^Cbaam4q!L=J-#5&;KFjuZ;B1^vSZD%MgXVAksl~uv(-+aJ z)BiG)r!xi4U8pXBPGuJtNlF-Yl=xszAMK&7Mb-hLerqO{g0B_kXYMn`pI)bxV7 znG3I~233r;9e}?;yL_<0yiLw>{l*GA&=u&$=L6|jJ1l!=$J7gUxb{gLHg8o^t)KMt z+#7!lT()}^?-rYIQmDqjCGZRl;Rl+7Z#1py@LA#4s3~RVt$=Rl?|lQ4@j<3doC&Ec zcph<88e6bv$+&Xf1EQM0F`mw1ln=P}Y`2UOHI;hdVM``fPRR_95Gi-@R|6iLJS#!E zLIc0bB+~vJ!Eozoyma5Gcv88wJ-Rys9Pq*L5gR0f8gk{jF-vIKi#cPCySW!+ERafb(p6T-?X#BYmDs&CZc2M(H=jC>(>hc+mptk-*YWE!kP9HFeHjK)w+a zaUwgWq&1*F+0SH7?qBv+n%#M)oj(6BVZHe;zrrdc&WiN$n45-GCw3aG!;^CGv$OdR z`zjeB(4K7Rgg_HL&p);w`sk3QHj_+T#5a*-?8HV1T^8_rp@W$FY}EF%`*f=Zt`PDQ zSm+>cS8qgd3#;KR8t0L?=?fkzK|sk|26L9dC=TFJoA2PWI`wEF?~xy}lm5 zbDN#C61By-{H%T72qLwAKHutcc^TxFXH-4!9$( zO6dL!Ju!HkeW9%2+sADmbS(??Vgf01KM>cSJ+Fzto$cVBEoB+0s#{% z8jpZ%T%1t+eW9|4fr8C%v?7zo-As9~mQ!yzup-Pz@%0MI!NW{UJ06Uy`|B#jyWERCcij$5jR4)J;L&0nVFCfPZ3fDVt&ZGf=M zJIYV<{yf`qGJXZ@Cx%)qTKAZh0?Hi~k%p`iM{W%9X*nbdY3GM^V z=e-s&QUbo73ayk$(n{4!H^P^xr+kp}mj^^?x3tj8gx-p1o|A!YXuo@R+^$mJ&jqx`(KWjLYO=sPYBh4#rY_v(MUc7jo7DZV zHt;BLPj6edU)6gjq1BkKaO2%+p>05qQfJ1<=W`mFUjad8+qhQ^^a{Q;sC?M~MX~|3 z3s9CAl4?)&TP<|8G9Fk+Jb2WI1Ks9H!dxHg zqCxE|+t6D7HGMvVFdwNZ@cSoMyTc^KCx>u>1VdN*=^|>2b0;k}lI=C}^v~3o_WDnL z_H!VZqiJXjXzt03vDkCHQ-6Cn?sa!{ePO&68%La}7oXdm%H0sh0pSVa5^c~spYNkp z*vl(-7KP@zj0zDzPSn8GJ(xr?WWe}Ic&M703 zp5?HUQmO(;Z)&F^Swr#@D8`M=G9#>BGW+(j&oPvzJu67dd%9{c6E_e zr?2T~YKEP`YYwNmpz!g(*SVD_q!08l&}}UTTx;BNOF|jh-5Y{ar=ZU3B2vh82oT(1 z;_?d*Za8gk=es5PzDT^=!E1EDUq(2kF0uH>4TKQa7wHeJ;Y1f1?DE&}49n#^JfZ

`pR$?V}zDQz@+Yz@L@i2p@9aRPKY>BzyOsL!gGTT9iV{L;r?HXn>rhs z$GLpJqnSEDW6KGTGYs5Jhz~eTdHau?&O_J2yKoG)aJk=e#wonUKh{8vXLvgg*DB6G z16$?CRq>7CjT%iP~-_g`jaVQ&{Zo9M}7`sFMi#?P9Xlj~>;Cs!Ysfk{zWmWQj9j0(DC8IX~ zvn<(w-{`|nLItWoeS8^?kKcAP_&v)0e(LHQ5Ky(N!W;k4)5M{A$>e()oMn2Hd}57j z(34!eavoj8%ypNA6zP>4@Pxp@L#6`s;%)a#+g_jF3>yesisZ@S;{%_E_{Odx@``SJ zi(w-%mTqwZzwJk)pzv@0{}u!g;;`iD-2q2(zB=*5tBn`5e%YC=`(G=-s7&B~H)m=s%xkK1~sN z?ipKQe7wQ#ve0#Dn48r|3F70VxcK+SL@fR8NUsK(MNOD!s`G^lUgcImseDDZT@9%b z68BgV&k;MD^T>>f_k$vusZMX&4n;ttO&P&?H50JB#0hfS4%8R z9jASv&sV+fIogl^E7wC}MblU7dLBe*tzD7ab`Kx%Jn5&Ypy*z|b98^5CV}@Kzx=NB zTG~Zj{$G#YnA-{uob@0DChE5myZOd$n*e+l)kf0rA=>ovlpV*m1aVw)|()hQ8aFYA}K5Cz>B~JEH{xv|Ej)tp`KyU8M}G! z&q_90Fy$DoxD-zPo_)H}}+$vz`R6q@e@tG`?R^W;Z zwC@mP6yn+cc0=hLP+d@XAr?#ZMEA4&o8PNUCNer!UkVtX`Z_@R^%k#3e=A=p;(VTA z_v(jhqAIz5DTbb%=MZ@5%6O_*SZSJD&Hb&Y_PmSEfy0ZJ%4VID>3F&$yoG56mG2=C4zF(LvEvDT%oH^RnCDg>_hOT{(=%gOxL&{)Pb)MSMl?}Qr>Dy)E5?%OI zcQ~OzPo)fkaUW!V8sgDrt+-GQ;`29qRN$kj&E^$=xO_$cmBnD`2Nr*_0wBYU~Tpfsqj^HAUWB~8+lV~Q3cMN0h2tKe&Z~4y8d@O(-pidgC?+Nd=5ag37d*0>7EPPgwnY zVdGMZ93VwQT!BN#^OjkOq=h5tIY>RZ9l#60F<3oV-6O)yd2ev;!r`|H=JjsCCh#nQ zEcU(ah>_3!84CV*8h)j9_An}=V};BBvisNZaf545+cFyW$>pK?v?%Ru&R%~A?FNQu zlQN3W`{0hn6EU{%7aMp2Iu#la20IP!F`O=g+cD-HzZ*wa2+V%(yu}N<=nN2C8Ti2h z56!6YBYf-s&4It$hva{c^s!oty5wrrd53C9?Id=-Kp^A=)$=dhd*ab}|DZ;if-m>f zX*J0$O{(*vP`=NDT93X-Wa$_g_!p=b9M)w_O}n@SD;NpVYIEcAPd%?9oWyE1}`CUGJjS{8U8k|1iwK*|81ej>lb-xZ$zo18(j!yvh>Y@RR@8KO$Pt%<^ zS}`trZ>5y+Z*!NR05c?D_mTTRD*lUQr}4T`s?to2^`29~KlCa7Cc`I2^XiIw6b>uP zsn%n~*8MU+gPtSeIYg)5d|ji=`U#LF-d_8XZ^p7*7xJYpfe*$4{La9)H}zbVE9FWM zu9ric$f%95!H@ZL@ft+|hsb=}!B-a6chW_!z8RoxYrJOoG*Vb|D-r$<^6YD~Ab57G zVoa3&7BOL!4}RIsOUehV{3wT=dUaBh<}P?yLnb{>r)0$rPC3aQ&|4-Dtj2_4XZJ~f zjQI3HGEj0mvpkn4F|fnYRv*X2vVSBm!d<=e>b-+8kp~ETJXQ~^OzIsSJpAvCkTzlh zVKNzaHiytUi&{1X_By4JlI-C8ouDZ>EOb3sa>7oC%tm%VNG%7@@ck#3ws3AUv%g#z z^4CyVK7|?XGrkSj*2es~OzBaQl6*4Y`enZgwU5p2q9m!b{zd+ zMk+v|kufnpIJh?%(02U?Q$0H_zq@1)v9a{*Zk;UQX&UF3j@)lt%L@-&75H)51uDrYIkbH0{m*(k>Ol*0M%Glx_SM^&K}mTLVxZ(LbW8^;|jh>Ya^!)wOrbu-?g{8k`)Q$9Fg|my2kU!A0s=Jl!A3-?_DXBUBJueH# zi>*bHx5S%~7}kIC0zGnNW_;RcG75{#SM5cLqsYTIB|1*Vw#Z%ga#8r%%rcsmFRPxh z>Lx0ZB48DS6WPhF+Xp`WL$#=yrkPXYgbX6zIUe)*L##;~z{YvcC&5Vzztn4f^R~=hf#yITyMG6M1O+vnv}k16>@+D=v>#%q8Rss(vVFQSZ0!Jl6PVb)#VX?+ zTG5tg`QwAbf1j`Y3$e6NNaw5sO!I_@)>^;RJQ2EHwFH#Tk+aU_2^x+D?ai|S-?e|+FT`mN;@I0KzMizb1JsAo>2me4`D|sqWRJ(Mcjqwm_ciCdkIiXY85}^Vkoq8+zvlZo-e8t6#-$1VV( zxtByb(iN?@1@-dZlrwRQz`}5%EOAg8Cb*jg4#6LMZkQ63kG1|njuN0`YAStQEVQi_ z&fo|FRGsFYYw>pR9^MX%;pMh4xAR3R8A;6nEc_;^bBUDnD z=7y2C!D%tBDq|BP#JgLnz(Fo(2zcJL5e2O!!znLZs%d6HRqDaVQ8%e%$XS!{Yh;=Z z+2qc$fe#W9rP%NudTJvnj=Q4!i zgwes^7(keJz^6x`SZ44>eApXtx*i>bbWucOc^EUi(bK{;U1})6K~ImJbEh(^J|sswu!fs|K3u&+wxDt` zHtT8T;gOkx2f?EQ7ybm@m*AG!!x5d+zr4yzFYY^!c62rI$wW*3jSfgXMzsUSpKSb! z>V({+z2M=wbS#=kdY6;9JJf`os1`EQ=h6Jf>=b+R_aa*_W!}@cFKVw=2sPXRuiL|u zwl%oTjO`RSfzcyi}yA)Sp6#Poz~iqf;T_)3AmTMS&GXd4v;D*wR6mw z-V19zo#x2lvg@%-H2@fQdzeFLijeK=ER<60+Ns=htW>N0=RMi&uDK2m^Vly!j?13H zYRUn4u7CO~40;BPPL0kXE(YSL{eo=V)6=5H{n`N92xWyd(p~VDj!_v-e4a`g0m4_e z@}kCHaPGnekSVBLw^M6EI_SilGGU<4*Z{uU-~~V*7AFL4ex_uFOVU@$V@`S#;c17*dv7#&+y(MbBaF0D(c-^s8DL)kg zOO^u{l+7Xgkm;S4$Ma+iNxx6xZ}(&3;51N8i&9=t0-6Z_HopwR^NVcBI$4Ge^;zUK8KLC5 zplyVo-+{YUd4AI-H7E8CHbo9A`i3F{K-Sp24eH82Vs#Jc^G!qX#R(1PrK3f9qUdf= z(v`(O2Ck|msXL|gJkIl^|NXMZmx!Mm*uRUD(inMHT9SScTg@q@rLK5!B|~~O15l(7 zjcI(;8GG${j9X?VbB=GU&d4<3xa#3iB+nYd`9TTpI$+RK^xxk*(rF6}*Yci@iWDc6 zFmqLz-{?}(EB@NWOASt` zDKnnDm?`OFDdxjBRzWPjx_PuL&Ru8?MJ12j7>+%&Q6clIpXp8Ch!s8%rN1n2klW|4 zG-=AVB^aFcA(Qvf$#G3^VEr#@HQQpBy<16)HKXSmMYaVTg@Cm(J>HAE@d@aJuSYt{ zl)e-Pd5qWZ~fctDlh$kYY&-Xb1RiZCIoc5i_f^Gq0KYgJVoVGz#7JhndfF-o?q(EL7WWGL?Mghgb zdUaEOYYA2Prql=4pt4;|CkJ=&GDJ6Kh7Qa{c)_}b?+PjhfIQ`H7{u4s!3d#~HRQV> z*T9q7zYKel0u&5Q7Jp~Q{j@MBv{na?U%w7Mqam$IWV+iG3rhVA*d2VbQpgPm-0`y} zWW9|EdqB_AoWggXiDGel4XwS(!a^x=9>5*auO=?RIEI_W;KmWi(6#$z@y;i|u|P9? zs!f-#7yOOeTr1m8eJ|dhHD)7eOhBx8LQKx}YbjTqLMob{-yzC?S_Bv>^t8uc4@qt% zY1T@Q(J%{#PM=Kv7^L*&NeNhyZ|Pc+@8NK9>SIt8u?W=>u@A7&t;oeBj8q<2|oMI!vBlJqb=4r_a*0D-N^$jFFGqn z;$aRtsJZuk+GFSseBrN@zl{`V*E?rld=g$Vf;01@p}bk(NJ9L@*v;bI!IsX=X#2MS z#$Z>r8tS?ULg`;_@}51Yo(ccUo?esq)tZ2>D~Ne*PXG2mmg z&(<+^XKvm6U9O&qo(TZ2Kk&FkTu8?|)bI0bWxzfK#A$^CMhvokZ;P8=jDTH8+#$T&;-C)$+y_GAuF>YPZP)&hU}s4rd*%LnIG4R>atdD{ zw%k`26XBt!n`PMamH!~%0Kic!QH_)>p$1L-+#xz@5}mF~@S-HDVYEHnIau0CDqlOj zWK4$o!yCBPhQJ90wc7o4H`;c{9l>kVC7cWlS7uoqIyf1C8!|;Jkk~s>;{Dl-vV7q* z0ZSnHL5D7ZM7sfJ?gbkjANZB>p3`V`_e!C{Du?(gAcZ**3)6ha-;=60+p%yTI8g&C z7xPKu@jYQ=xhOJ-=z>I0Tm>!ITlKW9%I4N-Z5twpRo+9MB>aH5l%Z7_si z((`>e4qa)O*Au?R$=U%r1lZ?L?KKalpd%Wt5@6cXHIF1UDO4h}$ z^ZpOX!_~P8Z@cMmf%-c^rCqTI??HDS52sN&fb1Xej2nkd-t`V#Sj+Fw3!mx_W46~! zznGl2XhWuGoQZB~oJYt_;+u!fhCuBsv5djZZ2&989ymCGS=jT?y zy;d-0!G%BC@PJ;tHB;IXHG7Lw(wJ@9pQVK%=9bcO!&eX9G4Gv3U+?OuvRpsHZ6P(+ znt*LXT~P!R8gH$}S|-=(Ml9Kgq4L-c`p~>(4!dGN_0)~;M1y?$YV*TCjX!1M63b5a zaVn%L`A1LRpKYUQ-E`JP``?BOI{ZOJ-LB>*;)Yl!mLlg)QX?AOuj*c|^CC@cQt>YU zLkCpk4R>JndRU3egt_74duYjP92z! zQ3Wh8DWwja&itZJg66=B&WV?ek1P7t-LzoR3^118rB)$Oy5Rt0RG-V{kBRK>A=po8 zjI?=}$kT0& z&V<7vK~9sf3c!x(I6BUXH5Q1l@=&a-0S{saI1wPd@Eu`*hro?Hm@teVG5C(-nrAk? zVE!h{Xju)(5kOFGFkrc@oqBy;hsfVbfKS7-u1`wQX+5}P!VHURVE&%?7xsWLa;{k7 zKQADLg}DWxsAB{xQ%Sq%{hc6;MQ6LYIRTm;AZh_YHFl>#TVCGw++@MZHE$Q;vw%|~ z=K%qdWdfuyGFQ7f)?rf{AzoL8Oj#XNN6-UO(|Rw-Ugi1UtC|;F{K+tJ5k;^esl6Aw z(xAc!*C(M_Ta@KJ;DAH;_px|aA3)0Eg@_BMh}#QBZh6kO2kq{N`3i5JlbS#LWhAI* z(5WzxF_%aZtqwFNDet~AzfSSY*QMY4H)PB~h6~4xDtV2d<)pNK)AGs02Nad%3 zt?84+jj8K#59U6%c+lVx+}+&r@3=?ce~%j{<73*l=ta*{z{HrEx8nqv)m?j&bt5d1 zV`5@y{<8t3TK0{|BErTLp2ha{Co<#4J6zkn&Ef)Er=n1AA-GQ0(Nkyo32G=wI8Fo^lG=?k@xh71iJMQ!{4*SKnI+> ziHjGoqe)1?`pqN0rR*R-1B*RupBV(PPG(MU&G0U)DH{|%OV#6wD}B3^4}Q)B+?Vw2 z<0b^V6;=>)z9?P2QOJVN>vGCmkF-mrJ~V25{hF)*-x~i*!Kgd*Qn4r0GSZBR8YH2+ z9_sT_4K0L`Zjl4hg(E%Qz9gsSQ4oF6cirSz<97Mqf{HR(U^;lao$*oeD?(fO`#ZVt z{10j&wI)8@E75@j3lcP=(_h}TvH%DQaJ`AM1(=T`C6)0llaMgG_|cWh5m{-A~*d6ooBbTp+tV`4CM3aNkS zz&!RWR_1{>THJaouQ|~Q)R#f)fCR03UuFaS7Wu&ddT9N)=!Y)mhTA4}C!$$OQfb`Q z?}?=Cw)8sWri4CYGBRo{| zt^rn7>8v~pj+WDgCpLq46{@kGcQachlVkN4z4L1?H*bNLox+FhI7RYJ2*u@iSYP-> z9oZvv4$#aXUVO^0^MV*gHf3kgl0FeH70*Au`|+b3TN4b|t`xaNq9S!=6Cc*A!uevX zBz^)Nv|YZc{MSby&!1%JBEV2E?7F}1QI_ZT8*h-=0Uz$JAiYCm)`DGE_$Kdp)cq9X zAEn2V0&JZAw@5jB*RcS3&@gTVKM_eyS zb*p1*T*N{Oy-|IcX!O66ZA2Z*o2(g7CGWVo`U8Ewd3OC!t_&)bQKL^Bsu-=;H+zk} z|Jt!1EPj0aqP$atu}NpnZhCJJ4lgFx5pEs|*QKJKo8f%+Tt&nJ<2_Gy9sK>7)nfQd zAlZ3#`(@d5)c&sIC#oE7)x=H7UwxC=rAbEj8-5-6Y2-Zvzs`7poWy|Zbz>(iG3^j8filA{ALQGx&0 zy(G^9Tj*f~oN#bg9UoW($T!?@W$kVb-&6@)dZrXaqD?YrE$3BqZSGBXEk9(c9%xto zUoX58;sNS1E^waqs^_P=u7w>fm3F@2X zD(&UL@gg7uUgfjK_%alB(IYpuiG;5{ye2@GOqzg8<{{|@{U;ahJh_jqPmJjho3>j1 zp;93*hkF$<_s8o`U?8S;{w-Z#%Qd_D-U5LSf#fqO(WPe>^YRLY`Glj*mkcg6otic8)2U0 zdIv;>F#7-Fnd8>_^4jDoL?XS=E465gk>X4F9|_hd-BCC--X{;Ic%bcU9bL{QdX($i zcLq8IO4zwyBm!c3X8-(xkrb{~{P=sGCFeIpz>8x>@?l`B&2+U^uk=l#mngQZXvhY; z+?#tM?(GOAgMw`w5mzAwlF-k7qi}1?*>J}G*{EgoFV-54R)~8mkj=6AUBCpMT*|Z6 zuF4{>=&BO~mM2mc1Sy5Sk7lBH^X!uRSsgMh;kPzV&N$2S+T*k)D{OMP`WxwEWzl!p z%asEN)1kIz9W(|o)M%1ZjjuT$Fah&7DtQDRh=&aZ$^0s0Mn4QyqMrKjq;9Fp`(mB5 zuzZ_HcF~hYIO@_Qmc#y=S*oskuBL+u1IUn6x$7#WzZ)`GqE4Kt68fCxJk(llmF4z5 z;kSo@FST)lWV99~!)&E-1nkW?+=Ei+Ok()t;c>Rs(sP_H?!9lJ@XzzoFA^vn!GHQL zLL%}FN)*{&$t;tJO_)?Ctdas4xI>p;z&OH!!gL2i; z2XgT`xr+V_U40gwB>HpxJ~!TYbpAic-TrV%$t0laX-*=jTrELk6z}XS8ZDLHOuNRW1TY^k4VRzwMOBTan3FRXi02Cw^32YB)snN zH+;*&%K#B%3Bw}QFZ^c$r=CbJpa%F04*V(lmT^qLW-qDhv8M^R^&Jz4jxQGulqXeM)MGg+jUAc zsQUC19=kZy-|Ty|C7;Br)&^e`SpL9|97lN9gQwjlYvYvRu^|bTuoA0=9JdzH;I7 zTUo75K;MP7;t#UkZ|Siy=X=a0u+riABh6uASwGk|CUKBEhzb%AqTX7jdFOyb=KoJ2 zsRCOmA!tX!=A{gmog}&2FzZqfW@c7`v1+d_9?*X1h!r)eeWcKh`QFLKF)*6fRr+POJO}dBVZi4p@pvRW?)g&N zQJ5OVe|eMUYS_ugdc*Ly*z|<#4EPcPjPk`H`Jx>MSKQ+JmaqXq4r>9&3ag{&p;7H? zP62too1G&9k#J=hX)vTA>h}qH5vSI7aZ-+|3}y!!y_b^j8v9bGoss=fIZ*x(kh6*w z{iNZ2z(JAkw7=yS&Wi@W6K!4K3rqfD-Gs(S=YC~ETXNl+8|bl~zZgUgh)ltSg5aG< z7hBoWg5eWe=qOr}X}DN?*o&U)H-#*EJo)LR${Bjpy79qXpFF{&F;bRy(`$$N?|4)F zK-Qj@E}h#Jvrk9JMv-K_kMwS6qkKAAx!Weo`yXq#*%`jwxqkwOmB>&ZfDglWTVel{}F2(s~9kpsX|>|KpKPb&3AY z@9qG< zHbq;lOd8$*&#{^8hC#zij7^T`09;fEeo7iMyF&+H0!x2`Gd+xDvTF9y-wq2=T~IH2 zu-L?V(>#7b*K%zN5B!n=Wkc3)tVF8L)RrRmS1+J*s z!ut~*;geyO?{yESCSJf7iuNDBbv@L+6O~IpX@}{}-ez$CjE6!Pr$?2H3x*%u3wPDe zaT((xwrMMNEKk~hnbT}QgPYXWKCHk1O3nHvq{I#umQ}9~+Ik;U_S?tTD1nMTgFZmh z!1T`vr$12CrrG+rO|E%EFHuRu_r zL1?bY1ROW(CRZ!hGduY>po^jrae*2W!drVgA-T}EI@!WnpR_Kw0;@}Vwh)v+#+9>a zb%c3OD-mol6U~abU;g)+@Z#EVppxt z!+>rpwe956{URNUKOVF0+jO5DUDyGXw95uGd6yNlOdA#F^odE(!T;JIJgxACoh5hB zg42+;I{Q?lZjHsn6v9^JT#1!_?(DqXu`!P3Wea|8J~UpnCCh5Z|7Ra42K>%bx0`Li z^@_+;Gxk61zLLOwO$L)ta)5+8p&~W`t*2JD`oJih{$8+u5PA)w1Pyx_(OaHb74B3} z3=E2=ivrNyyd{(@a*KrP(INzl~i+PvvVytEj3D zsxAyPlL$f&!ggU%GN=s;)&Ly=G7b=zO%*N?#;4*a%?&|3vuwn#+-dnzTs0dDxV*bb z@gOBd;i4j)Rqv#)r(u^hlC(T^5=P_G{WAq+Lp}m#6aqF86H(a zyo9t%4(%6vtuKT&KtdK_X?qDk-F<5;nGlF^xWK7l=V7bfc~@ee}tf36#0u&6pRrCGlgD59-;S}lhc7(h)8VB zMZ_cW9kt!5kch{JXgC+Sgo2Nc4_s7PCc*D&_h_y{yZ1Wp;s=T&0p{jX+dw4u9Tg|O z@#7w^lG$gJ=s4mi;Qs0!9oCcee}Rq8Mo0Tb0^pl3Hf69LefR-)L$qnopMJQP6)8H+M!Uy8|jEzz}Y|hu3Wv_xTm=ia&QJ{j>+x+Eg9?7lViv!eg zk8s4q>9T*FW5W}AVCaNCggmHkeNN*@N?AwXC*6Ts*CrRoV2 z@ho>WIsLh`KxD$`wB93qoJ%S{rn{nq9wC5fdmpflXWXO`4YvcTsX0E2c>jBvl{cqZ z3Z6CblMDw)-xukw`0X(tN2e^CD=NU%3XHdlu)`$*_ddl>TVzK?R?!YyfM&z-WmG@% z=%v~(3{3m?@qR^;$*`528J|sQpIYjZl(L#Pm))YL=%x|GS%zfUe_Srui%0qjk@U@j>m2kCxWK--MI{Wo$32Y;_G*Nlizpq(L+vlGwIdlj%Q`^| zd!D9H8^*;f#^AvZ5g6cjr@{8uchj7TX+Pyj?z{(WqNhSaT=lGxKNc=v%;Lf~a}!^QZ3Hhc4_La?Q> zz?Hk=9{h~}dB^N*+tmXe0X1z9=``1HLuC|$?L7V;ZN8R?RBK32uS(sA9n>w%6(uak zG1tM|CLbF0NQmjmowl(d2L!jYwaIfaGs_Ei$O|s4&V|^Ry3*=YAKx2`Gf>}E54UCC zGgHnpbjSs3KZ|4-K)~g~W8^pt@zqz^1 zjO!a?K5K`WwiNKbbGiUq%};$tl|L5ry$fT)dbzqvp;zO=;!~)e&NnHP5qH5d%uPG! zS?n$HLr=b4c3oD%Tkp|X(t-HzYtPJ^|KNidQDc4P{{9@|zsDL?TpU73XMxG{@=GK( zt1G+syKuND9Ve^J$wf@_w}Z2oJC5MF@K@IOozXbe1%ouqiafR3phP3qnV1Y-0}W#O z8Zi}58!&nEBjb!20GIq!8zyV+gAAfjGu@Wn`+*JLH9j>>Cu|q7 z;>j&1i8BTI0Y{jR^zz-A=G=+$BW*2OFI;PCDU2mbWpC zV8tIYOj*!WuCHQeXE&PF%93Nhfq=BaRE;DdSNwzUqhtrLt*SA*;k?^GhUaIn`ocC# zT=g;KE48KBOTlOvsLk|iQW;Ip>NFM`p_LlZRHl6HlFTjtX*{*R)eH9?_k|k5q0}qZ z zN>Kq93e{*|tq}~ce|ZwiM?69KV&sg2Us3o?6t0k9XE(AsV!o@91`ewy;C;G|wG!^cv)}pn8r^|34G<|m6igeCskIdn1+{-`8+?u z46sk#X@48>J(^l8fT@NMwU2WSsLF=XITHk`fwnwRB@N&`I6+`u0)KZg9e8Hz4sebV zOS(sp4P_+I7pz=GX`5iOYijt0>ve+&g{#ZOR&n9L#-e8{v+EX`xN*T zni%SAaF00ErbPRxREhdgwr8EUcK|7SyXN)K;(XEoM&7QvgC345+S}Vh zSlL;R7L{peTg);fc-hJJ5Tn{4slbPSA%s$q3z4)M%fD}8Cvb_{ig#pP`&uOm?(XI) zWM$PQ-Q7=~Wn$=9nG$5r6`Ex|Q<^h%?-8}W!FfaRrX=NQtRA1(;OwMpJDP_`V)KW- zfw7|RX!;Bn=0_7#t6tCBWk=b6GXs_qd%N^)(5AAEzZ_9I+c<5XqUgk*xvf9-c|{DQe{{Xft33o%Pkhq6cOOik zb?776w*RKI7y}YF^IQJ7P2uYT z0a$4%^8@1dv7bZQ@`zd8K~kMLqw1#<>ak`Kk-@sSlpRc0<)uAbt`+t2(^C{RX5mjk z1;t`kMOZtBSbvci3_PsX3q}TO9SpfTJ*OF8C4nr`5aC=ZfZ#P~3En!JeV<@9xWc22 ztwlk*ap***zYjQ{Da1hMFGg?7DelwRz8kNq6YjuTfb839ZRpA4gw9J%`&ga+(%5uS*0e949(?Fl5K zEHDMO_t|idKPmK|(J+8Eu3;W94s&G{%Z0{5m`D%#hTZplY&deDE(ZskhVjr5g&kW- zAj;hwB)u)GJ(753pv zB7N;aUgeeiNVYCI_z6<$GZaUwJxHi1uPD(29pI2l_);1DIz7K=1&(9|_o_QK{+*la zx!=zL-#)b5wm}qQD2-c}FY??HseA2NHeWV&_Sfx_KVP))ud65kt}JwAd_!$*Mcl3U zU&!STrF}xuqRpgMIl&2#joPCB0HF_bY`3_MtcymraR>SS>0v?hkVULoW)Xc#zTE%S zhF$MpJ2ic7vm{$36m7!~e8tX7x9KL@Vy@BmzSgg4m2F(+Pr* zh)-?H`&$u=d`9LnqeJ{+P~U?*P{3}2&xXfibxqyv7*E3?wKd+=OTM#*mZ-UdvkF}S<;yq-x~zSlqBxkEMHU4oB=4YQgp#Vviu zC%}bx>_2GE@!vxl3d!CKDXq^O2cN+dzwkAavyE+LX3GRNN zQ;x%X*wuYOFT!Nx+afkBzWD*~^+#IbM)5Xo= zy79qG=3hewr3Q``6YC?Ml7gtdooHqS=07JVr!-Ht3nffy0Jf}aX_|7tvSS)kqz3w~ zcYnQS>43NbCEQ!)ukn8)poPpXTl2kr(JlTyCOBdrxOp!&Kz^X@)hd9$<~mW+k1)8@ z9{7TaVn|D1pMGb-)VCT&F4MG$>Ki$=(&GLz0idrl;M(je*T4z?E{nM;|A*ZL^S^@F z--K2jPkh4`HtfWj?a9Ghd+>#Sb^#9YOePB;ZsMC>Lj#Da!FeES_RfvDR8)ZPQ&`+) z(H>kBiv2>D!GX^p9(si|77mw;$bE-eig;vTr_1@e=2JT;GnB`%%YNtnGSyQ0dA&Jy zsNGWdc6Ie12MMh}ARdyMA}xVbG&9$AdBbz;#N7((YU9sM2BFQWOU)b00GJ|VP4?li zB*(3Dlq>-t=((#x8Nd8EpN&p?0Sz2xCDk9&PeH%``-)xDH*>Y;pfJ|ooD88J^vO0g zF~$&gTWZ`IP(Ez>&=XqU)l-tf=0>F)ifMi!0Zy?FqQxUM#=ZNi&hNO$uXyg3!fIY$ zk5wh){8wSlUVJ22NIpe&UEROam%U0+s7*Iy!O<^1&1Zu^%)6zvX``;LCoV!)Ur@j6 z)zKf{XSit^ki4qOs=unmQSOTW=A_*8)9L;iQonZ7)+}1q_?W8O!b5*Xzi#wF zUZ>efZ|S5=xj>qh@7|5L|4|JKiRVXP7PW$%*M2Sd`TKFXMDbAe`JPOQ&+Y|&lfgu< z4_M7V1jWphm4$PBZ`%r#&3w1vlj6|V@+<3e;}31qd0+a2uz+@>f7!a(bCwt(z1O$W zY3U9dw2=^+nud3rH5Kg{k)JHx8GX9dGkA}VkB_bEHj~R_gr>y&Z{3Dpp=bhM4=fSe z8^N<|<=!tSe5}wgzBsBVFm}A!HSizUROQH1&>)Tj$X$)}W8xjMTOK$)^B&?KxqSa@ zwnqADq-w){+Il%v7(Nvsa1FXUas#1 z`LlNJ+Ocz_Ol~e^6%x$$M;MHWwGt!hsw{^0S5Lot)B>-62AgnxDTw!!qP$!%7m`4a z;#tUkmQ``J{Wigoi6iq@mGP2uMZ6_%g6E$S!+iv{fFiFNA1&Cb?%Lr)E8J$-Q{TL@ zA1?S9ms48m^xiKWa(JuUNM|X!OJCqB4H%yT*QZ>Oof4JP%5|HT2qDeK17>gf<@v#BGcor=-YQWx&p~ISC1eiA(~qwI4J1)-=zxyG0W!$q0jZ z?>=IH|Am*~{-^xfBf#bT_zRqfCah0AH;eE8_ntFafLnCk1ONY)XW4|E~| zBVqQE)}33%CU3f2-Lc5L<;LFaCI$LT>z)p}QBsk1+cxhXE_@gHAq%Ue+_s0jmTfn4 zcKP(FK-`hf`YlmRES+RDRj{$^$F+RbS`DIYgy1 zUS18pdmZxhqMM17iUCP)C&_?MOmuQ_#QbG>eSL0uHr~85K8Cg=DiF1~?z4V#SON+c zA~&@3I^)tSw=#vkC4-80Aj40@p#gX+a@{p2mn`wW&O1+nm#H6(g-?-mw|w1)74zpk zpiO0>!Kw2yb?EnobHG4DXBpzq>>R0MjXRx3`7wN=4f{Gq;&-=8}e= zlkaT{1{}Q%T!<+Z&4x0~@Yj8paR7@OGH{G^orOMnL)>plKsi zNcBG&c?B)E>sk!U$+vS}bVAL5sv@bZEhE>`PuN@Lwna93NgCC!1dBAqI?ra$jk<6g z>JoFcUi0Lq`>k~wxl?bVoCkeA>KIiLkH2o-P+BIte`oZkNSv*rZCqt_^{+wuhIh|C zll7vdQd45$`?xD+1dw11`&&WCv&~yqigs0+V2TcxRNbPa^TcXpYGg($ zXSbr(`sQSEr-FT`#<Hca?h~Mg+qIvIYRK_Fnk@4~s!()$OCYET{pR6eO0Ph?*&?Ig?*%gx4|Xy^H{k_G=!^OHO~J6T3&j?a4gFZi;* zXRY&t?$r~c9D@Sx?K7IOVU;BbSNw{n%@=YJ^xrby47k{^EgC!KDR!2%o*e+X4kv`C z1CIV=yv#8U&8*v*vx}9*C0R;%TMP(T$^1>{KKz2%74&a_(Q|COH`UjL5hCigRUB54 z>p3Rt0}bgj{nWZ}*Lx=`pQS)${ufksMN9S~Qltmok0(;_mP-F~*QD4T@_d7YMd<^* zx9e}$g?za$O*arzGBOO*%zHxF^Ix{oU7a7GXHQ+$6t{@upW_^w`6|CsJ{!Iz%gK2& z2w5EVlO*pO{y6Vj_`R<<3?amZE$w?{_dMyNg7;S}lFnW^N-IL(MfbG;=`DM0Z+LrB zVd=S`N6ovy^N>Av%BuNShDI1@CqgDy-A+$v8=;Q+@%~i4*UBsH7X=5wvV}ObZ<%ao zjj$_ge~!7hqMZu*1KK`!x_|b`-#9OS?mCl_ww~C*_oM*JJ(u>{*T@$gde$}-Kg9zK zq}Z}hGj19kwr6i>o~o{i%@g&a)KO&M1RnN9_$+xmmZ{$lHp_vuE(hF4BV9889RR z43BYd+4B3&jvMoepPfo`*cXKhd02$ik?v8AUUe`cFC%132~2p1`u*vqxBVSDQn!Yo zZ=42AY@e$E8^c56_4uR=qON51CHA9L$9T(hswLibWJPK8*bOGw_Z#mU)HP~s1+POU zm|+Cd+#bHP+jb@;-9JaPqxD62xM0$4Yh%S$`Ia2^FGCS7qIq+g#){6b4+?IFgA=)a z%>%bM8@`KJy<(|qBC{5H$JJETHITbUJ! zi+*V?Zyf6J{AaUF()K}z8r!$MA80w`k?h9^`0~UZ2N)P6U%$nQD1|%V{2gs7q}Bdd z0@I7PbmZYEU!qPLG3t%AT>&q+NNGO_*9>BOay&-E2W+h&!*s`})Poj<`ysn*=cCw9 zID;t|fGftB=x0Z8?)1Ut2fwD42d7(9`%r`B{<7Eb=;0^#&lhJWd!Q@6Rb&e=MMEgO zrQ3vgwk{)fd$cLvNIzdy8FhCO1KY7l0+s%Jv+b{>n|$YPFh61`)R?HU7Y#o*M1S@0 z;s?ZAXQAF`jS86xwy}nxo2$;JoXs4{<0!M|bWasJJ#qg?Bjx{oL7MJ}MW?k|as9~~$2 zTQ~epTs4c|JU)|g&7}UZ5IxzX{NtdoeZ`L_%o}cLEk=}WK+Mz@_D}-?r8PLhCHBj{ z2MpVdZnL#S8d%|<%ty`?Dauu_D97bWIRC}viTwyGluHdD7;W58-0Gbv=jdbAC?pIoaaOi3%ieY!G>r7pueixGB8#;3s?;g5O_4r=m%%E z`PtC(uu_tM#0fF}EE)J9;(`@AQu`X)0X5BkfB&Edp%rxz8UN?Re`_0g>wrIaRrYGy zFgq+rM#v3MOi>_)`0>U{R8=a~F~HX$H-Pf+=Ie=$tBcXJ7~4&W1-^pc4em(b?Ro|_ zz3;zuLh19y8_4RHk~P^v?N2||D$45aB7W@rKO#ICk-(QFDycA0d`Wny`FVXR5su%lDcGoDwuAh}eV$eS)wppL+tn+*pp zm(Fev`H_Uxj2+qV%Ak!3b&&)atsJL0H<5k#-laza>6?_~Zhd!(XHKT6KUqA#`4$ac z)1&($^%5IoN$lfkiHjqJ!lnv7xp3qxyT`R7ib-kyRxL>A3s(^-B_uL1$iX;BOYa1-*((71b5EjyqT6 z=Nl?OJ_}3Bc`9UESho$tT&3H_jVwWZDAa82$fXN@bHm850+Z!f@C>LCY8g2jr1}x& z^s?j-&OEM3@Wnxi&q-{S&r7FSFx%zoC&z&?zH!sOy;wWc?UK@EQXpsCDlED-{CV%G zmn*KnUaMTx?Hp&8ooVG_D6Y$BRA|iA9$fPL&Y@!?bag|C=n`l6*d?GoZ)4-vU{^-y!t zAqJlETe&B(^w{8Q00&V6Qg;^j3!Za`a#W#0`>&84-5@c;-{NDB2X2E@)Yc4BLTQcu ztZdsdZtX80nr!F3)yw0m4Ny$Jw0L-|{PMnNEYTr;dO=@tvQ1_u;~8ZW2R@tDzlrAQ zT0v`qV)Y1mKR7yBw1;kE*g`W*kMruQ`*6n{Y z(+td#!W_(^GA(srQ1DqtNB>*a zUldQ>A+^h3_#b|Yz&-uq6tsY8(P;JG{am@GZO58z0zk}vpVwgG6Zna6UcKZdQAlpM zkzdW0^T*3i@@9cyr{$qHs26j>A!*y;}Gc6xew}g5lnOTX=gYAjGhvSb(HQA|TbmV-^L+#1TaovT z=FK|@R=5qC6{oOiTXBNUE5(R}b)B{4Aee_7=bvsBoQ1dIg5JrP5zd)y>F*SdnAS?~ zx;w4PR&3P}ZoG+1>uvTUX{;WYRcAX|Hb_6~PFg#O_$|6Mh7ayKmf!s#|NY_Gi*$f`ECR)@ve z^Xs*}>L5@twZR4+{}_sn9wrA|Uh`eX7%dyeJ#D0a$lrG4Hb${a<}1it7En<4=3ye) ztQxaaRG5qx0c5CA*!%Lr)T&RM0U197`vC8ImnLCkTf9dIX@EC;diUqxPdXX0F|zmz zM7KX`+JmO-f28}%bQc8lo(PdylD@O}i^e4&ZAX0KU3TgiEN4nyOc4tOyBB^#|JvLF zq-Yt`_T{OXxqj-#^}ZniE&C7iGuUUyoTW@oq2Dc#%;tK|i2Gt;1pkgGOAQUyk0wCO z-1_6%h1K04t$=W5W7M5a;_Q0QP*ukoifG3Wg_UZF&`jgV57^Fe%)42?2#C0Gji3io z6?oX2QwUZ;5eM$Kt02HO=V0@c#E$F#uAIoUSQ=5ZNOTms^7AA)%*rS>e4QbGe04%i zBv{D3=bm!fPnX)YHS+yYpHWYW8nU_XrQXO^V3^D{H_%8`Ud}zK>#Y6ZwX!Q7V>)I$ zrp=o-b+zFv#|qxqbpEGJx9kdButkl;x{j<&mY9ojZBR(zs);L_atR?~hdab_SH9;Y zAoC5sD~Ji>l`D(mx2 zlfzU7!%Saq*=4-WC>2ZMP2hGEhf%SF|M}@`iIU!*(x{ZhtZqV4Qe4@c*anw- zDwf#KoWJ*5q?~b9F?N|JIl2bN7KrZN2(f`&oD-`Xgl)wuEG!P=ZFBM#?yk7Oc#|DUjHUdJ8M} zciqn8hN3G>n6?(!J|8Rap;_`p;&Of*9jH|Ew~1O-M+h#wrVEpGz(FBBgU`~e!Hdqa^Hi8VX?4mS(;O0 z<31&M)SQ0+qo1b%QzM+i4MV!y04V~XO_bktMJ^xVttW!U+S;*J!G*V>8yVgqJ*6g> z*E}6=p;q{JORdaR4gR9I8@ck)(gNf`oKxRlMnnX2E%Z+^ha&a|5W`-d7}wNr!$0Jr zLOKc2E5RiixBq+K^x@gC>=;h0s3{KMfEA!w`WM{x3KeK2{%-e|2I7p&vBCw2?FlpD z@nGC$ItaY$lAvO=d|bmRV98a`2=zG*JRD%ODf&oMQ;7}v%E_QyiD?aa*)I)}14g~} z<;7w2MmC@Dd&uUKJCuCx|Mp9eS-@-e%hTEM>1&GKm3KzU1xZ$a%*JEe9Xa{L1R5XD zyU6yKBSQ%M3^F1<`kJ*@c^(!;FnEqIyCi>#-uZV!f9;NHTQ1MhED)pD6JN4=(XTWp z6}Zdv=T$_MiiOXUH{fyk52b7H>&NNvbQ?7ayvru+`YL&0W2Hs0C520DbGMNdj#>sByG>gXqDu`gu>)>=6WiD3szAe&{FG%G+tur8mep?1?U#r0~CM zro8(7f~eG-mx6Ugg`zUx^2MWOu0#?>Mek<(pf>11ubVdDjnAB)_>(n094+q;2|Q2r zS@Ru6OXNb%SFGvaBKYz&FnwABb>ND zQ{mRK6~3hE-cNeMa!(_4w1~d(3bg}*Gqg6)PEy`G>4RJ^Y`gzQ(^*DE`F>qk5$O(T zkZvjI80ik_kd#j89z;O8yQQV1JBCKOyIZU^Ut0dMZu8}-zx)Zm(f+<@20k751A zPbVGs20WIlZr5X)%KM@JZHcaG!}_?L7n#DwU=kXz?P>r0<{mq0mpxG**u~@|+hDRzH8EH&@JbXC;x=wbh$IVumTMYFAcRWnpuBjA$)sBfPPjZl6WL?(G;b|^vk zFmiiw3?fLT&wu1vY=v@v3SBjKSjJC(p4ZVjN*C7Jvm$P|vx446qqa0WD~j9dWPH#x ziq2g+Cr5<+QlEp!w*`H%4LAZBMXDX`9NZ;F7}&aYW97|3Rc9k!cD~a8!wk>0VU6D= zLbFv(38}&gBa`Q4+ha7Hu#L(uTiY$fXFPw=K!W+cc6POVu;83T!hhJ4{D?}V^yXErKZEfX%NKe8NH^nsS zAQj};u?=^G)<>dU-q?$n_dY$Bk0B+h~ z`7si*mq*&SG?0=)Q7esBhUz2i0P;9j<>8KL&}x2}VOPm(HBRnEMUU2aIq(sl?YTZ> zht{!VWV~zD)uS&}unrr{1Yqr4ouZ;|etdBXIhE@({SKV z+l4Il%OcC``yV%-P}(gCzK3cV-MypoKieT`cOTvG%h3aTrlj|S_3vewqe=N4SEY#z zKbpdzMQdzZ_WG^CR_V8&M1%3s8LUpj@p0GPW|XsOcG8y7cK9paxuKqXGR|BwFQH^j z%HaEa#*;54qAJ9K#?6AGvj0bXEMzIOy?;%a;rCm1Vf3&oZPU^MgwCSAm_W<8Zv2Y% zRj@`vA?@GdMWHC15lhzyj`)E)VX-~pxZ{i=>me3JgJkWXB`0a7OQPAVp5H_n%r&by zKwNtTw?c!3&<9)Zw@;Ft=_VbU9_hdC(ecU#mhsAzaQoDEH!#tPlvHe6JYFw;k_<5l zJjaJse6ZDQTyhc)*Ec*0VPh=k>b;hz;#MT1S5I&d+JBXvufQzX14B+8eK83QFJnhy z^tMYIqGdZ?ZTzlg2ia;H3qv2maL%%>9R2nr0x65m8;u&W=d?>_^l#U@RvKqP<@`(w z(hpQBZdLkwLj#JjVwddWwcuk;oZ&i>DQD+6bV=kGtcth9Xw4)Ep~2hVGm&YTo@%4B zOHT=NLw@)h@LqINj$0zWYz9(ORRmPAvy%nG)Eg(Cxt8{zgt)LecDF27qZs2{G*gDh z*lT>)V8;mgOtx^r=cs}78NUx+6mo(3nSp!h!zhku8#blBRZ^p0mwZbt2rl;ef9@n8 zF*55}@TEt8q#4cCE|e+NLyD-W30KE(#IdWtLt-kb;u<{;ZHGq zN(+wD% zPb%soQtAU`dJ1kkm*Pqi12G*y38?jS52l)b{CD-hxM-@*S43#b%;9JIV4AGi9p%(r@;YwkoNV|(6*>4&)v};KC+)z6{I6V)YAE# zJ{*rgE#F@CZH5iKs@>>(TL0D??IWZ$B{3zLp+@nUOyy@Z@#%5DS1$z@X_&(lxW@fTova>A=WDQzicRFo%)o}U;}407o9Av(Y5QSu|y`TV(q zO6^Q440FVyo%Dc;f|^*ND~ZGYBd%K_p=lw~eZl(Xr$r|!-T}Uxa;;APAYvcgnL$}a zHiVUlgO~@$^Hd0o&@Nt+DTL~KO)w`@JSX?()Z04(9_%eVo+2WdSkkwkKCN)WD`sq{ z-wqZ0U&ElgUPSZWya6k0*t5vNcU0TXrJjA327r; zFi{8JEWGNnP$fSXXy_PG2xh9+!D53eN^bUEI-x#+jde#eMkjx3Aw!7_5V2F<*4%_5 z#ncN6GMY-eiW(Uwr;U-S_MUCE3?#z)RbSw7Z|Lt?w|zu@cW$Q<<|TbJRHDO$ADWI? z8a(}Yd;Q;BQZ{ERDDdM&# z4({EuNHtc8%0yu!*w&#i%>LJdovq^ji#is5u`F&PpBekzVRwlf*a~4rMw#+jbxYF) zMMABtR3tpjkYT%RRl}^fx1T4b2d$(`GGMrTeRL)YA-PZL=Q+RKEdd2&;Dyw$YOZWz zgd4_IK2T3iWjgC`i&PmwC5r+Lf3A6WEzrGC+1aiVwb)1w73Zepwln(~Gyj=f14jLT z`kc8<9P9xbQA&PxLBxt~DvZAkAw6E$gf~IMp%)s}5RkZka(%H1Nz3x zonTqRwA+N}{aM?&*!1lR$M%=xeZY#exu9eCmL2pkq^b&fR-E;Hc>wKS)B1seCtv5r z(-%w5{`~&_{5F%*Q3RKaEwrn=>an3=poTzrr+g3%e96$-4Nkeegq;LDFYuy$)dSYA zZd(Jw4T&Uz3KCbY+CZg=%$x9Ry;2lLC!<{M3QW| z#4oc`vbgR$@ea!>&D#9N-=W21Tv1$CY}(CNi#3bipO>37QPM@PZ+Ni9E;g&T3~$)9 zCgY^hiDHZ$8rD0-dTf#2y7rx$<5l?wlfCEY+{H8dc9SX;gNB5xD)#W7=Lt|D$DdB> z?k~1zwM`9@--btqx_cas^tXSmnXdI>PRU<~^i-=L*r2DfIqnd~Chs@Ks8oD$jpohu zJzFwx?AXikJ!AO$S7WsO{u}*@W&7^apuR7vKKD3=&4pJQ$Ca&0bjMWUjRfsM$0YwL^qJKcX z`v8G)Ym;74N^18<_)KL&geC>{kEi`liIIA*Fmk+!HsTl_w%=)^>nF{lLkCGEjcpi z7L{mb)k8QwL?I#bAfxf_-&zu;KWm2^MUA=ajw;9#L7mk|aP%1spQhrqqD3qO+>DY- zhVDzHG369->*MiHs~Q^l7=JL*|HKx}j~?@#O23ZMwfkp0GU}23tU}_0SC>;Q!$@c$ zL`MT=`m(B z*c#`pBHCQq^SHaMgLMdVsNYh`B9Ze}VBvydLER-il3NWb%ViiniOLV{7~{+ImMES? zXG@DinD$QYG%y>)I?9^6NPKKu46bs4_5A37qyvy{-LbFtbHj0@F+MS)NA}cc;pg5I z!{T;2OJ7r$V>3YJ4$mtHRzpkFSIcMHOx;;vmj9Cw&;40fy73=d737Qum?Z?20-^v7 zAPI1SLQ!?@Lo>t$mS-s~moJ{^p)rVP*GjkN=r)%`ob-k-FYaeq{DwQ5gG}Rk5hulh zeX+XVW%`1@QQjLP>X{^HeMVenGTTZsle`J}6NTVDe2RcG9#*Cspl9SqLM6DSc*>juc)m?J$ce$Vk$dPOg+g>N?*pnfjBv;m$HpT+d%_o^dD4$hJB zT}(OU7L_!IS~`ZeO9wdxNpI~84g2cMXqkC9`duhFo(K3LHm5{lAph{5=XJh)!2{X& zw&LR4=M$R4OJ?)!()aQr8zSpJ(Ck6bNy%#>wn_KIejgF-O5&^@j|!6&x4KH;h{~$+ z+r#L*K;P$uyJK>U7sMjcNyTF56R}Ft1{xJW)D|qUooQ9E*i8bd^f;H+*N19_&uTKm zm$g|IKgk}WI`Ea%c$nS+$c487%@@;Xh0(}yk!z(CQDu|5oxV?E9Cw$=xUl#o8 zk!$sUeT#AK=xolbQ#yXn-TE$O%^bctCg$gm^fmwj-k1|+EdqCqH?o5GUJaF~kqg@Hb|X_sRAaNJcsew1{WQnRt`Sf< zF&5J)pA6F95hq`^6KCWmbTFkQ7PQ%>0}C`Yx#PXx@pt`C?FngpMD{^0Q4x($RW2Cer@%r(&iWN_JRH3^-bUlab&1(az!a140*KZBV z-b7eMSQx^dBik1w2Q#%j;PMa%3UiU#$N*^D^YjT<<4LYRy?6cMRu?O zzdXh;8ea?}l-=jwWs8&=oF__VX^+h^yYRtBhfJe{F2Z4ymZX5SCyqJorKu)1;&lRq=Xo zfTQVIo}Rk2l?Eh@t`&~n@;n~v!d)H}kX^>Q#zOWDHEt*Jja_PeWRaF&R;}P0X3$UC zID1!BHU5P<5Po^4b@2IvZM44KDIZM#DLVj}qY-6k($0v5ssT%at&+cC)W?VL%i@Q>Sr-)W0R90Jtk<@DPsI3oH4I?;KLjI~&G*aw}~8@4OS zAr0VnuIk{`0NcS3Cbse1{pgeu=cGD{>_wzTkB}aM4VcJi?#KYPm{S8F6dWKQr!HKlLRU%MW`Q44E%IqMbMdTQ$oG~3{@w03&X7#PF>)L3e_Zs<1=;wKwa?X54mPve*AwHK+A3dZETj$ndkWPv`vZR-T;{cY}PnO9a)$E zSAT{8PM0%xI5wUaO;R1;ZTr)9)+*)qJM!;H-Gmsrm}<*-CO?()t6%=ph4Ql!X%^5h zT;J0c#z^D`bHvx#Bs`YY%^05;)VJ9g!Dp@k{*O+Gs4Myo7GHiF5MH@mgqilDOa#gh zgnuIICduf%E>Utv+tK?v$MS4v>tx3mbN6x&f!i5Bk#hcL>H7+}Yhp#NS9$n6yh-)3 z#oye{XFB=sWNy4lpTsln5_mGO!Z)lBHyl&8@|yq0Ot~=sG86=oPa2!`Zf}b1cjmF` ztOGnz5-F7@GdkQ*wyVykQ_rK)5_Jy6Eupqn%dw391?i1uoCFNo%sK!*XF=WT`(iYVJ_7l{~nPiv3=AhKqE%c;yIzHAfScgg_jWbXkH^UzBWBdURg&W#u zxr~R7b3ppX%Vv z7Q(uQo&Cx+`PQvzLttj?-|MDif!&33Tam)d?hJ&(iTq0<<}oU`SGpk%g?eK0d|S(a z-a!e@GHBXZWmo#JMQui4{{}0lH{6VYj+Z>8V@b}HStIT%L*`X|ybW|o*DmVZG!r4> zE5W4==aS@hXeOd$9PUS@lx85?>|^qLDHrR+Nvy;iESN+VZNw>HU)@`@Mw*+)V){bk zX)SIt#mg*Nk>w3rewsJQpKgl;I=0@LY5U~at2c1zr}8KM@)ET2bsK*_y03~ApO0v6 zeKPa5jPV3F5bxT<)9*0*GXf#$YoJakt(<=xVnQ zvBf%1R^q&(@)yy`QHDh7Ieh7s_A9@4+Q}hvFXvLFZGAd|V6uscZuvcpbjUC^JMF%? z+q-9=0S`>m2M=G`Ww9N=zVHNC2D5yQ**JN$Znw6!ybd7F=lk-;4mcJb!s`~e|HKB? zd}9L2cH@B{`jJa?%(-jTixinqKo}fd3^wP9UsH^utzuaRnPbe4!sSvR0X#lsc=;ZB zFyiAs*Q(s$=f@Cv#MTO93y?TjPBz_zl0~VJ!qE`z!USDQdIBvm~pei^0Uv5dt z-2O_$&HGzh8ZV%}8)wt4-UI@KdBi29ZIQf@a~)MWj0UQZPrv}63x@p)ietR<1WF2& zqUn#$+sh(WW!43iOA=S8CK5-qd0fJUj%KPSmYSQL6j|jD)1Bk_}%8r?= zxD8S{)$f^f`+aIoJr*6GC>pV=m4hnc%?q}6FTS%pG3NTXtlI8`KbN+o&~7y!PHh@> zn8zi%_+T}*^pFbW>sOQl?XU1EF*?r>UgyQrtcI#7#OT`UHSmuS-E#d$*fRirrYjj{ zo7-`h12Lg=W=4o)`__wBZ;lA}3Tu`xU&n&|l@05}9YVU z&x~>LC^!Fg+MG2Rv%06Oi-c$W<@pVE7YsF5c&pW}%5}}{KjsF0=6V08z1Hg${t6c0 z*9BT?heH#%hj9CO1nUQ~f!<+#&c)sppsoM!LStLXK!Cd>9A z1N72+U~5xxC9#89^wG`X44Y40pp<0&uetS%pIbV{?z~4AuPDo9oW+NJ%RFoN+Sj2y zxMAcL<%E{YsaQq^rgh~)d160B7sg8>_~plB;mKL;qCsP(u;rb> zP6eqQ-CiJ7uTH7Kd)%e+07s68a7?3cbL9bjN_u)HUb2)e;RyYFkwNE|XvvXMacrj7 zV|Ep52Y2Nv4k>T+mEjp$;xa!kIF58#7h4?O??w&Tj!an4r)`<*d$(1u#!1FR>Z>x9 zlm(0#rDDL^v<6gx(R`{PNvC=|Rh?X$iT>|7>@uyWN%P&YT~ctN-d+c)n7p3(UHJ=! zSQ-@i&1(+QVjHuc_Zrrf&ca>qI_Iz!b_6b!4C$9c`1?x)=SnUtNauWYOMqo zGATiDv|2n}%#wXcM>F2qet8~Ivfi#9$(`!z{1xll5~D@T#z}~^qJHMyH$W6PkbIgX zTG!hCjRj(m4v?aJbDzgA_&yTY5W&_vmB!-c?VK<)_&(4bh>3U2(aJjFFMru*u;TnqKgp;smn?BzSpD+R~H zy*dN`jSBx&d{z=1dIw`^M!?zBo^a|3Uy#*Nz8oVNKCUu!!ZX9qPr?uZMh9M80nsmq zN^>XI4}v{E5BtfDIY-<+DeB)t}ART4-Utm5_BGx(mEp=qsmqS^{1< zrO;OjXSt;dmw3V*Qn8$Pm%@TWuduh4bz;NYsx_kOryZ#>ShyrtYZ< zcqU_U*nf6^?t2}+hl8R~tx6U7p3jC7IMs9heB4l=?)-_y*qNph5=E&c?fZ|oLAh3& zdFfR|pn}D5o*5n8Y1}8>mQ=^YLP@8Xw92du=LizAuJ?@1 z)euU`>#h&2a<&|NHZO{M0J1ifXiGUq#O5eo_OipIQLxVHviY>Us-yJ|=$ewWN%?d< zmb_DER6wU$aDmWIl*YVE4h#1i;K5T)rLL(5u8~sHxLENSlr+<9irD#t#>P(y*8mGS zm_GvGxROBWb)9~YfS7(XOB21pu*_`=^IG?nYd)PfaJ1;&WXN>< zkw19e2V!Lf5?S!wEHhcyKwki>gqJZL3S+k4ba3(@nqu(H%3K1!$cMeLP=`v2-qxQa zyyL&`K9Ya(x!xl(&L6t#b6gqK<6%_J5D7S2?YLXATdc;7qOz7sa5QK)A4)N>zXXyH z{u4njwt6W8ww>Q;s#Bor#0u`GIjpx~YKue4CRpN`BgEk^_fl~~+O%=iZHVeu^0g1K z-wVczn6RhtWFjZ>3ntzjpQflOm-bobd`g<90tdOqm(o!0lF@T9vZolJ_42jE99!t+ zz2By1Dz@Ztn529(fp&k9Zxip~ODub3E*XI4ryledLwy~A%m1dXIBIGCZ!$axU07;lFymYU` zDAO!<=)PkEuQZtMlH$R+MWP&^UR8nNJ~w0gJ>3zy1usmk!c_aW($9C?uZDa&sX^MY zTWub=;zg(m-M=N|fwuv3hmF1fnJSbCxfl|t=kK`~*8YC~&6&u?u|aq_li^CDDm@$> z>m~7-9aL&b;n&1Uyz|M@fm3j>ykUb?vC_?tx6#(`E$z{kEL`bSX@(h`VjR6u9iZG> z-eA6-gTX5Qu^vJVZh#Aj38cg6F1)#QHDfk9R-)*@ueWP@9Nm0S=76=x|K@`MhXfx8lF15S<2Zykm$J4JlRXqLIH(hoPJGk>W8mU3!m-JUlrpIHzaZ#E5HLWwH}XAw8zZKJ z(5g>ez9^?ov?ZdH|GPwaT(5!o>%j0ledL3Fr2i3ODbj**Wn^JIA8|ge08z@cS6Cx&y0#+c=!xdc z?ho=xGV=t&G-FaM=BjC_t`T$cF!hPyyva9X->#Ilj*Y9SXSh()DcRM+`NX{x;KD%> zw-Y>99ksxVb)`)_^}zDDI~we1)k{O+1KFfwj+x;L^e=N;j15Y(vuYK3wf!f~Un`}G z6Em+$8+8kma z<$(Tkl5424YlO}4TO3kC7~?0>hHkMYH}=}C}Qxzed0>jm)11BdEd z{VtV+u4GA{0VNRmUgoonJ@e-d62KUQSx*4NgmCBfQ~3{kk{eI)sqoG2%ZF2M1|ZqI znoex;iJAKxWBgSrD{LAN`R?VGI8~jihMor}>ih-4=sW>{qA(TgYreyJgJLaTfY0|m zuixJu_p1CJB1`<^J-b9-pkG;IjB3))yN~`Dt_{u%o_MZa`dfjYmGJu1C;^;)zBW(6 zEj()d>?~$n#YBF?Z6?}J8nJ2fQ2*T<@P67?FXp_Kz^v{j=khP1h}^QF*OEMhDKf91#i-xSdjFe`-0#gYtxazgMH^V~UC1LiHA#y5!5v%x!|y zi}m}BjQ7K+Nn(7{!-F#QB1RtZKQ{ zlg~Lh=r4u2^;d;!o5gi~yMKndn=FJGq%zy}5rA%5Nm1V((dKkbdM` zQ|^+@h)hxkRrgd)NN`uKg2V_moq0ZCyjg$ zZJ&yLn>iq2)Lk$gEbI&Q7>0R^;8qMyBO~2z5$T7%YsB@_lnthjlQQw{HKh{#`-xlY zLQHDf5efNPY1H1R*7~!rEOOLmItkroKD9XqH!ut3XH5t|XJp2Pf~4(+EJNL$A?bbkKCv^k+R4cbZKyUQ@>SpU^)We_w)g z)jq#@lC%++y~y2c3wu*%#e;NwTSe3+ak~Ccns^@OEgn&|?oJf>@^uaw>_VY_d%+XA z3XTqWjIor{)+kng<@DW{y>9iu{c*v~-_L@xcER4GcdaoA2+iHC>#MzFb|*zVo=mq? zO4=b}0HOjmZPH)#tiazIWMB(-(p@E={;Zf3vkq2GEyq!@R`P>%y92D6PA-$FEnjvh z>^iJ@)Y@NsR2XvUQ=(OE1i@QncT^(erAuqQLVtI5|2&f6mj`WI3f5CI$*DLXnKc6iD2i&k^6 zCGT-&b)UxWi;tR)X+|iO*Jj#;pNuI>o2wXkDZ)@_~FP%#_Qq@26r0F72k~)1pm$H4SkNf@3iHuI+c4YoR--#=V+OKR(gew1+ zp;I>@G_t`G+A4Y`|4+d|v$~=r)bl9YW7+1TLebb5|GF1#9@Fx84?lvIIgoHci%j8$ zbJyFwSi=fGR1L@%B6V_lw4+TNbZT01Tv@8K#0{ZC_|H?nj}rK3N8{6y(PPlzcU^e8 zmkP(4a5$wvtn2&jkYt90*m*TP0@&SZtRBHvyF*rBV0~ehE7oNvp-jalSCwGhfxeB} z#C`y`=_lu*Kc=9Usn~E9$NDLm-;?ipQW2kj<0RlHG79!sBJPi>zvc3m{2A&&t;WUO zCVuxMnXs#SstjoMdDVbddI z_SLV~LWK?e=d}3OzP-VAqkV&QWnb?zzS`nvkdC;)es+2$2++VLGO8%$VCh_E5s9vK z^a}4)=RpcFm(4)dSVQjuOny2AD3H=7$EwAu{BBP z`0QzRoIv(btR^k8xyZ4)`Xu+HK2^`w_dF_vH=Wh^@Xy{xvH%QB6;ZMK^8Sxx0LeuDxNH7K zLN+>L4n7>H&}H(h(KJJFyS)X$d34as=n)s#Ba_mg;a{!{pb1CP^nQ+Zb@zCtq0Mee z-WLMALpwmvzZmoc9{fr!Pu^K*afh>==dOMI&+50U z+jAb!bB>A>3l0DJw|V$yeBW5XmGE>Lt*A`lO>F=!-N=&;Ib?D4La$v;@iZ*wU+Y5O zf#*{vte?Z!Yb%5Rmz@sH^`B{ICCK89%ER2yxjG&WujlxePJ&Svi8N}To|$ZsIr(_! z!ek515hQ0e8QS)S`nH39BIEZ2%_dtC66~68UC7whHjZnKnGV0>J@+WSb-kGDo2w1J zxdUxpGE$Y$*uQU-*1mg6?IJH$XP){%qYQH<+}85c|1w`#U3^b#lONdVQaJ?KFSlO! zj*vlIOc-!+~NNq=I(6_M!j?U$&GM^#z#(Kf?glv3hCE`}=@xd}Q3~ z{4kA2vp$p@vw2VVj*)64jZ$29mw)n?}q#s*|S4p^%j7qYwBb$|a_Fz4{>Kswz| z&->E|sC!FsxO#bNin(MOW?C5~@M}q9{s{fnxv^CC@AfCYhdd(=enpO}M3)C6K zD*a^&D_p*vk)|J#@=J|{3IR>AMSZGvj0Nqq&&YMwtX$=4o{vV#*1>yQwSicqoN>b{ zQqb;lUd^*M`SCauw+u52apSUHb##b_ogO~=prhTR8m%9n%ksPD&}9j@%Xu6 zL)6DpDlCICr;wWRs;M~C15@}p#JQ`)C)KYn<)0%C`c*Xab~pAC#M2+GrXe46ZYGW{ z_fm*XJv*9p^aR2EGGAY!%jNR3o7?x}x1qymsluOw)M7SB&k`O4p+NNiv7aWZ*SE+zr#4 zIsGgQch`{04hLw?F+<@#JE>_zkx(z)1TW0jxZB(!5^X~ane8{*irEzU<^Bk1Gn8wOtRhQsn-jeiS-ytV8smintij5?S}&k#sP*s z$q}6lQ?h-Qad9|Y%#;Uoj$Q{8O>iLknx@m%=Ws6-46G=N%@As+s~fp;z~Y1-07kK2 zxFV0MsEjyvJI~HKMi0jjky|1&q2sEim)U~i={N`g)&uNGFexgX{|?#4roVUapBEr6 ze9iBA%5oqDwbs0Sdj!BRy=xjRwU&6H7nzF2?3wke-2G+DQ~Tx zfS54Aa|_>1RK?->A(R3+g=~P|L0uFZ7_Fr{tnu#=f>m#O(0gzWrQ%gZVq#rzegwxc z7+aCFd@Wr@pAc>8`5SLrYDBas;(Ny?IY>@~=Kk6->sS^%|6$wW_o75g1V`yizJe&% zC~fbu3Hb5GrrjrYrsaCSa8M2qrKq0t4D4+HJCv2!E47Sv?Q&V7ju~6dt5uB-;2+hRjuT=_jC+$B`taETwl%$ILNsCOF=CgA+m-U8qbTzT zN`f>BR3lV+rbCSsci^w<$4MAY9FgEmk(~h=T2;63=(q$a@+S)>e^D z$5o}3ZX0)A_{vv1?-mEY=;;jtLPCD#rV9Cm^86)m70!|=Xk}k$wb=<~du&6wd)lNM zYFdi4wd!Xua8sz|Sx$|K+)3=ds3m@>&n+CUC>0;56A+(i6zMMwFQZHONxn0PUeG?C z@DjaBT0A_fo2R=sFU2}NuDFJP91SZ^f>}cdStj^wu}DzizdjRR0I@t8$=*2@Z@05f z7w@kGAGH4obGW@wFdlAf=63V^ix`zuhu`4lB)#=rMY`LAMn?f_kIV)jou6CFdYAyH zN3N!ZaoC@axeC&)lTV9e#I|5q*Zz{x7Q8eKy{sAPVi|M-2k06F_-og+EO&$mG8`~I z_!GcFxl)y%&lJ6M+Tr!M#D5U9|B)2GYIO+lqpI8;oc?A5)DeQ<#0`klx3FH^YEx`3 zD$u`QWRA^&0w|yH%!EKMm$9Df-i-6+xO3Y?V|T(Q-wv`pm-E?0>UyRt{O?!JHz3e+ z0m|1zv1Iun6Q}hvPjWb+5;@Z5==&V+R~Kx!>l}JTTcoL(dZCk_w!+t6i)OR}rhczt zVxAPX`QFSl@#=&FQ@v~Vt0BhonnB8n9a>JQW?7+k-m_?el3R9!vJ~#K0O{5ZKskdC z5g!?m+10Ub4=sBHkcA5%II*Qzs|J zeDgPHwZ+BfSx*#YmOzU&Arcust9X%gx_tbnV}BXG-6Tj&-(wV4J{%(}B!g#7%|Q;Nmnf!~5v3|d%#NhL1-!rdD-i2VQP8S}}Gy8cgBlkHi zY2e2i3r)|erzncs z`;|wRB{waFDU7RwX`36j3bxoa_^~nh@zZzIUP)7yZ!o@Pc+6lno|TU7 zpH!!*_9p{b0Sgh&nb-JJC)SAVzE89=;{l`2lKBBX!u_9DN2hi)?VTA#dDos`C%)O~&U zoyw&`AndXGqRzfgLg?JM)Dq#XVYXTVua7sZ1Q!GtfieZ*Lf2yt_s5E-w_F!aqV*!M z!9Sx<{PX!wx%x2c;EKP`UA7HbtDw`_Eb&ru$UHa|qZ1!9BHaUcVqa^x=YMUzoE0zp ztxsa23yi@W5n1(MLQUTtt~V*K0#N?uNjv;P(n6hA4?HQ4{WF6VJ)&%7ljvI+V)rdO z53O%6U(X9w+eZ-N7;g!jHBZ#SPD6-{ngKvf>?RL8o!(mCFt*SJ=3@?&`dueaQV>HrJ^J;_?>}tL#5Mx660gr*Gk1(Q&3Zy*`Bxa zXs}}5hB0+lXm_^y!YpuJQ*EWw`3HjMeci44lyNrNI<6q%>JRp0EH9R0BP$5mJ^ z2wf{$?8x%EsM6j|YnpQ{Rvp9`geS6S{%mQqwVU3RAN=FWM#fdpWYz9J=Dq+vjpC6I zmAe?_o@m7@C;OMnV}+%Pf9-Gj?ia2OP1GK`&yQ*fqo=2GxYsom+KOIq{M--3mg?Yp z;@a=H{}D_c745aZ^R+}gS63>rEp#`rg}Gv1F};?0jPWwIV~xZ?AJf)V%F;Y*IiQ?u zT(Ts4M`ng`$$S2o#9C~>SbX_)ASN4P0(KRqvYX!)C+&YC|1TE!J7=sl=?I3X02KN) z_CFIgshA&PgL<$$5KCasK5`K7RyxyLOa}vDQQyY@^mc)N)tXkcpKWK<#GEA=$khR; z{3Z~PhIFMO`6C00x)DencV8Fz;fDj_w;&*fd?!EGPzo_oqu%EIX%Asd-~{r@pi+hn zt^b}z@k}?7;V8?q@i#GJ$*!A1eAT|1wa`jAwHyk?&+itMxdx8h#{%^`0`&N^~$>tRc58}O1 zI*kf*alffzV`9U#=u15qsdH9EV0Gk>@z})? z7|*4F)!aRGTlMTL>Nb|dwPYI z!-GcCxPrd_eM^)D0mm-e!7J47 zU?m4S4hUGqOd64Qwan7570iB|o+_q(Fuf(;U~}9f^g=VHEph000s|-`t54KJ6wFJE<346!hj92`!9OZe=A$O`@*(v z5Gv~#tV8KP2KJeTQZ26lOQoLgFWWgCNNl>rw{>U~M$mYbdb00-y|b+S5+4D@xCFG# zz5Psw)_Hxq#PHBx5(sv`fpt{yN8lL#q)Uul6U&UfV?e`ncYFJHXmxw|Jsp0a|NfE{ zXHMMq^@J#-H%3?&|LVA)@Yc&6m^@tgr)1|(FU@h~+aVf4j>^j0flzgd?9(+xACDD( z{*K#NmRr?{FNvi5SjfMt)e{5!nTA~)Cg}#%p!1Z}_dZ%CnOHfE z2=`Pu6o9WF_lJW}7=uz8XZN166i8?48@cHPy<-q}C~k^1aTvt55`l3U_Fmi4rP0+w zmfP^XmhA5yi(nhI<(efeRaC-bV-voQGW7N=7n&lsb>tgk@oBF#kJ!JpR9E#+6i|(H zsdBl%K6)-ejch9wlQi>Is+7KlR3uRqkzL}i9Dmn>=~@m*1_ls@=j3-X>jqv@sayx) zrs|lL=0CUCb0sfT*#~WXpO~A^z0e3b|8O*S0P}^<_p2&@s(jJT%_ZMxINDxVJZ~gC zX1+?ibCQHBz0Ug82O_&pEYm9Cd9uiVDP5weM*>aN@9*B}T&p_ogCsYR)!vM<;(T`i zgftQBO&y0LCJVv?)rp}84yN$^-`WOoo`U!swO(qkH6l!}jcX zuK$bwUZ3lnox9F`f4-j(oXu~Cms*~%Y$mA@2|0P(6`Ja(nzXTtYaJo$^!`hW3q~S3 z!7s2;`(9rCl#Zw#5LJ#RkWoIt8Iq+OS4@G+h?f2CeH1vUR9mP-!6sqrI=%FK@rP0t z?`sEPTo1O0g!I24=UZ`hYVbE7Fv6XAiMUhhqel{EzmWQZ&dz5$8Y`I?3+L4^iptxu zFM+9}R2+&BgEhKEmAQoJcuGK?wM)7-6lffv7ehm@-nNAYad<`6(7y8gNWv5(_%f2d zB<;57$9}$=fNGPm1f?eG6oxvo+YcKn9+KUTvrbBjdCogF_R=gM?cG1)k9@XiF}F%N z4C-_^TmSP2<+H7YIa zEGmqm;pQR{O@On48C!hOXktJC0T_hi-JqB93umV%HBQF2jP6^m{HA0rS^DiF3L(oA z2#NR=Kve0pUi~}hU0EioBN7z5#VcEnh|J@TPpRg7vdezq8#d@mBXoN#1eiF05KeNc zpOFJR8dx%)tuz}2{YcWZeKO6xngN!r z&qH_c*(e31e@v(E+&(7!b+X3MG_Jzo*BJ(WBy1_!nOYlj2vEhKiFZ3+yow$W;Y6f%=O4U%+jh<7OZd@17$f>B$M(hg)AK4>!VU-k8U@EvyXC#w zG$Wb;7yq40C9zeR{JCUDzPp>i8lzirioZ*@lphONw= zo=_EwK8#U*t2+1g{Qk(X&xp^bS68T1 zQ%H^O6f>(s^?Fq3Da2gVp)_HBVx% zOygOnQE@jH)`D?BS7R;x;JX)e55qTgc=V?+v&RJx9%4?zcGWdUC(}ttsK|5e67r!0 zsn%N@+B}Q%AxHP~0{T4pt?u~W3X{tI^vSFSlt@cQE@=r}#uM-XaJ(vr>_lXBCc9{r8+^MR#F^9o{S7xe0I|sv--@Hb zA7>nmJ~4YJup}dp>uvRM}sP|-jR^6&UyP3x9#WXBY0w?t0 zKUt3)Ql5DO1rs9?FYpo$k2tw1lC!yAPxVo@Ec5sXcN*X4=Ys=~I~Ul0F0dk1rM5K$^U+Of*LG3f3w1!u`fP^}mHb zJhClObFTT^vg@IeDruME`RR{|eylNNB6iE*_hVi^4v?p^&;E@*>+dN>U-X4k{Vh(D zao<%nqYEA9&DBhjR5=Y_LL~E(-T(9t|A`-ED}TlUvIa7N!Wf%BQzfGF*!eA}y6g7N zW=?haWsB~kdd-2nzu_Cbq%Oar=q7U%VVzc7jv`)tReBLZ`Ml)E9e&ROiYIZIZC)j6 z%s-Q{n+irMe)X7X4&xtt7i|2$BuVR^0!pWP{0QK=`8FRbaYEc4JT3lc@e*dOE;I^$Nn#u7!$18e$qi2 zQoxfJ@!)DC|Ge$s8hjcvjp-&7A%JnBq#&FJSfcc&To?#%En$%U*CiHTg?x>g5^c|* zbLcFij^*ff1pLmY8rmoGqYRP-S5NW>Vddky>Q}n~_VGm3U{}HK4dR&!RkOdVe%_iH zz9tU+ZTG&r6ZJOt)fp7%nZ}x`YD~nNJ(fT$XMSjE?Qyd^SD(81O;4&E)eQ{%P;P~N zT&-SMomr<157P|Un0RmwP;1mCum~>YzTT8rpo>}4Xa$BN*AJJEiMT4>Hw?qsXhl=C z9VwWdELI&DodmTF3R(o#bKqAE@SPYUXtc5>-IrXrK*Vh|+X?dLgdO-r)jeDly`$mn zGFRX0CPbI_TZ25@Z>Qu3?_WKZUasktr+4(HRbFV5gCE_MBwCBB*W8U|X%Iiztekr? zTJ86GRF4f);s7kjuci0D7;W0fC3RX5DVkrgkBbe$dUmQ8ldH~rBqx0~aj!^ka)e~C z(fH=0rCuEwgY^^vNXI?xWtP}-Q!t!n>wJgOQeGyZ701U zlNE&5BQ5Z_^J2VI6bAiVl;aEEm|qpd7iYUUwSxoDEsGb!3DV7EN;H8Z4JP|{s9!hm zVy!q`>}|Kp;*m+zsv`wy2DFvw>Ji9STOPG$q*rQP9p+DTW!vlw4E!rv@BynO6-~v7o(1L1W`ne?8npy&>Ye>mVH*0@RlF z;1ad#e?Bz6<3kbpC=n|VtYCpruFhC-n{X>xVF47roH*2q1i9OC_=7j0)Ln1)2W^fF z+#xMl(|NvG;hnY(m~#b88MF5`n(My!wNce3a(pQm3l-PV+UsdAh{cH>+nCB*#v+CB zTCUOnDN@PFcPkwmz3$U@@OWSkSI1ivk9tz~pob6y1kLSj+pEI_M6L`&3|sbav?WSV zo}e-BK3`!<`i$X&$k9r9;$%0S6t7~z!f5)WGsX@lmy?KtniAx9047wxgK+Pt7qK5& z7_~xbIPylqrdt+Yo&O7l6J0-nxoPBCGzxz2d}O(ZGh}vP@>478B!3nv@lWUNL3Z5G z0?}`4YS2yGak;)OywULR8?|%@(9c-jZbvM!Aubl&2&4Zg+XZ5K=wVw%jaTJrEBSLo zf3b0*ht?pAMJv#D5MG6paGInl(`yu@HL|uW>om>GAn@a{1)`Rmb5(2v2X}hR$}Z7S z^ABz-m0G&-P!Ud0L<0xt?g~dszAh4hMT)qqxslsabVvBcFUdsegeAQDdy>UJhvgeT zLYfvO8y3Qd$B||8#xm%u)4^d?wubrFJ%@>;S6K0G%jX^9?2IKLR za2{nl$5}cfrx0EaPq!D0KEmC}uWUpAz4B|_pRB6%CaNC@#$_fr7*8$7qc3V9>k*v9 z)+9QMpg%@hS1ZK#YNrwbR@Nx3A(h4h`h?aIvcWHGwtjb!Ek{2CP)(~VMi)_XzRv0c z=jY2AhOK9e(hpW4DVqG|v5MAGZ{IrWz@ur<*!attWChcXSRIP zpbvB=D}DAlf}$!1XhVncG#}C9mvlDST6G!f>~-jT7P@r$aIYDR+t${Vbz+GWk|-OF znvP2x#!`N*c~2dNQ4XZtiHMMS*T&_eyO=T=^b4I&8>oOlmno`khwxo!Z7X@($X zp|or7<-^%cN?N_S5S%NqU-By}aO9TZdIiMkIVq`6cOqK*{mws*#iB{5fXmB HXZ zC+k%I7zc~FXi-J3jSa(D5c!ve4QTh_+-W5L=V^=ol;XLMkOI-4aLbyLHfNsO~N#*>G+oPXB_|oIQD(&SjbH9C;Zs%Dp zl&f^joxA832Ki%>l-pKVY?F1A_H83)ET0 zIsY(P8ipmZcL?!=(tR9k?c)wCG^mdtd^hvKYgDLxY-l%ZBN%or3}*tf*BrFagMk^e z#`{nzY2>clfQUlmlRr(46$B% zek8u=Ie{J;+C`*Y1WvM8Tz8{exRM*cxm&W4RfxL0qU)1`~4|> z#7e?T4N_|Pv~OHSHGw8w9&pt`)oRi_6p}YyQL(j+F*tB#_`mO?qDvR=EuR0F6vulh z>@6c}kBKgc8`KB_*OWL`V;4mLq-9bY=9V2SVO02uBK$GY@!Yrd_90P|&~#KP?r(7a z`EqQJmBjnyc-PsN-LC_HO)a7`=B46~lsnIwT_$K5?s{hZVvrkUMl0c;wWUhR8?mN7 zn3ttIJ1HGI4sp=I@i!b<J$G^xR2hjBP$T6+7;Yl<-3v$cw_L2%sZGMGr&q@ z+<&AGEBd(EvFw`DO6<&FvdG!s(+|-T_Y(`i0uT(n0SOjY+1R0!om^z&F7 zq3MrfkJN;Ax)s)m6VfTnr>_cA^kF=@U$jT!UmpA^Lm!08ki9hZQnx*5tb(n@SvJc; z;EW@n9uxeGLQHEMdr6zYS;EI(9*zmzb=r@)rB}P%{&$yx-D@V^6=|hZL>H0X zL&|*4$W?%^>hL^3)yYW@6^zOcM7AR#xZ;L#)FGX2djTZDD$I9@s+CKoTZ0s` zyT9c6Kq9<(PadCA^5x))6uvD11YOh6h6!Y!8fZ8%YS+2R8|=--aB+Eju+*UM>U8O1i* zF)B^*s6ITD(l5VX3Kv(FMBdX3GqqGA4L1mz`N}S-@sX6v%!q+dsPwbidEw+{`wB(< z!VBojyGA(SJDd}TwGXo2PSrror>cw-qXWpZWG^Au%ui{VgvZ%6Ua#e1SB8{g)!E8S zTt5!HnEJf#5mvzWA0_t$AVUYdr+Qy0IWUl>Lego{Q-X`I3XX;!WgY&w0eZ`M`B0o^ z*fUt!Vz=klr815a83cdjAl~6V(&s#5!ex7Ayd&_y;nvLlseN3pL+5YruQI*98StzV49LH@UJ0OFDRE+wPGyC~0(j+U0h1T6% zbTgahj1;b=4_JV@85y|5pY+iGZKe)u{BHl93pbJFc+8XgR=OhW^VqG5Da+-)z%^fB z!xQ)<|0K70q@*bK!?FIgIDC6yr)q3Y@1{h>ofk)zi98mbz#eW-yJF|wJrN<|?pIJ5 z#(j$`_@QK?e7bL6m5Uf64=!gLhW{3|sDwftC0(G8ZF0poZI5R8I=N1gJ^uKp!}unR z4uKtpnGfi?WA(ndu`FM8*aPYwvZw2WY#!#nGK7hXiadp-GM!;-me`B_(!!e+c1y@- zz0#Sh{2~GGGC{4tp9^HFM*+UQy#ifD&UASNe5-wEw(z4e$t<5@mxFrzWnC|n6UzNU z)%ZXd_6#L*nB-(@zdFs&I`WC;q{>ri`I*|P8qcBBY#m5Q#HV~1cvw)UaY~=}eZX|$ z?V6sYq%ot21lgvNOG{$Llgi+md^m{2SmcQ54bARy8$F%uo_9g*1I73c$m0!+G7`#c zpqW17tajrDd$pmi*0UQnT~%ECcDR;TEC@xCr`Mhz#@<`mm!Db`PAM5s{h+7bPx}1a z8L>s8V=gmESNYH5g81vrW`Z^(Cr3-zz^z^To{T)YPV%mIa z1#|l;;`i>d-HWgcPk zQ;2%XU}U{Aj4McpxYN*el$JSKztknTMs>m7qXu5zQSP%?Stg?pS!~;6?p!ijT{BeI z=b>SGJiz*9AH4+bs{kx8!ZyOh!Scm(hR3&0`oz9FWN>D=y&Dn_?{k|AK*}M_QZw!y z+883@aMC(ar>ne%cW{6oMp=C4P+tCwxGpo3#mW+j?WHg8Go1th6Zj!q)@3j^IARV) z97E)Nkqunz3%-56rE+_e;kIeVoMv5My%N`qfu|*L@Ofx* z*NrMZ(A7o75JFMvDIvRo_;d@J9Uxa|9LNz4{N5uSHToAeO2D6 z?px>;51@bZu`9MI4v{h#?mvukmcrLP7ZWnU{K~eK-po5ZzsN8#?#PEI;P~Tp z%M+)145y3dSlhi`8Zv*@t*{|CZssfSHyUj9#F3{7B{>XPkiG+(nH7BfPWj7xap!#X z53Tp}Ro@VuCP*>!Br}XrJL@|+n?M=iWqi|84)|}esyOfGDDmxq7cVmyy|Z|d;;h$o zUqt`M5`_Ci+5UqyC+0y28R1jKZ9%*7K46-NFHJeVuj^IBFBz%gP|bIdBqIMl8a|$S z7MVTb?Z9l4tpNO4z=Z?UeLl{qigYvS^Tsnp0ekUE|;k-@i#ZmKh(+N+?*wi(0b5P)ui(RD`-{%+4E{;_i zc;6l6!CxA+xJ!rICWuaPOyj7*`riX(VZ`Q3=Gp%JBKIWgg>LDQGR(yXAP?VV#hJo% z%^N0q&6s)1xckdk3JqhcWM)1<$wfkIld8s$^H}lMtda3rf}@W30p6#eBAWPS;)#Gw ztZ=~TwO?(ZIxO_}%EPDA+FXwyL%s9ZvP}aQ-~0if|;pBe~Eg)4MzX zJxvY>gxrLgt=06WuJz$lC&^Y|7LxR>R#3akCap{q+$wa_twXX`apn3AH8v;ReteE&2=y5xc_cboG_02LJCPagD{N2SV5C zcSf=&4Vqm%fvmFE5dr~3LurnCr?RlgB6~N}%Rrpfr>jx|2`pQ3*M=aKUgBnHe4*Tx z0;h^dL^%EzzT%e(3LS^DBiCHR1qD>A{Uslyqb`HRic3@p)>G?^01CM?fXxqNrm6%QAb6HLFm+69|;ID-t)E^IM2x_|vJ& z@=a*p^x_G$*SEE5?4tW09cl#GN(%dl=KNQyi^A3A;tW#t0 zla2@#owBiqqY`SF^|+tF#(^{2JUH8BLpMBdxA3CY^OjosX+yXhZtwCpHd1t>35QqT zfS&q>5o7bO?o&?0{(C9uJoz@q5QtZl1QUA2a3mNRf@M=F7}cOdcsnM(eg! z^j-Y~5?QZzY+43Pw6S-|`+>=CBYoeS|J)9s@JvSZzqOl0-gq27Cr6rp#1^kB)bht| z!d&$?QlWCPgY0!EDd8PY*4$0ERFED)v1+6bw^R3cUsmE{%~!amC{@{6oS$+?hcdyd zNUQ{>Ewf(iT?Ap2RV8D{x*Ov2v_)htvwa>*J){<%Ew>#K^!FXUr}&iZzp|rj_=JER zpi8;>p-mr>1j3843RirNCP}*H98G_oaHTEwCvyG|?q|4nnBI#$4=j{pdJ?^*)AUBm zg?jbB%ZCwk&nQFeq>Sd!Q~|KNr&<-&6KEFZ!Vl6a-4oxA?QcCc3Q5AS`FeR(j5y7O zImk&e&*xc73&x}?3T6Je5(UnMXgrIB*`+Yh2 zey3|dSO$(NN;5aeA10dg*!hJnK6UF+shCr$rU=%_g((YbeDN&Gk@U&mLppyNsAIF@zYr$$?5 zhh_MBcYqVN3Gj0xE8Iqbkel!%o*?aB77CZb3bWH!K=SeN-N*CTottd_YqNzf=C|Z| za&~V*6eK}qot@?E`^66up-fvS=*@f&<#TBdL{imPQ<5H42hq)bD|Td0T~3KdSN5IW zoFTCOi_>qfg%6oKa$)Q!{4804q7YS{A}bRuC0>Ni$Z;!}B_z={51HtDo#?#r!+Fwq6Em(~(I$_7ak_HRU|J^mrS!hj_Cdukc-QcbyHB?VZw$+S zeMzpbv4UTt*T-MakkH~EQX#W>yC{fy=m~|sR`&_pB5K$~F+c}yBeG>A%uy99@}62x zW`AUJ!?u}O7X&MRx?V~wkycJ4yC4f$f0E$il;6x5$E45dikEA>A_#x$_!!{w1l0lS zGBIj`sSXBubm=_m76aXL)iXnQO#K-B>b%*eMSP*yUgS#G;!~Y>f~D9Vv<&AVQI0mI zimB~^?4ivW9TGs?R+NrszP}I3IbcJyKm^h|8)W#ad3ZeMvSDeO^%X8&8G}ACP~Bzk zor^ohXu#C*ksu%EBFI@)4sv-MJH9;6;ipQ7ii_{W@70TfY|aYtb)#x9QZ2{lrwoKi zm9fxK0~|Af6`Vxq#EIc5{Uh-$-3KSqIP*6NgxqcBFKRvTLBHS$T=2B+l6yP$w964t zsLhaBqCvzGzw;)n81&0@lv@bE>Wl&p;*@MY^Vk}%H{6a>zzAP8{H6cG66rg`Z~X@_ z{_jkIXkuJ`D@6*IfK}~Ap5_PVfKwGN)G$_0!s8_LJH+HmdPnSa!x24&9^w!D0ltF1 z%?9Az!zmY*KI;MPgWWELM;9OdtSe_HkONGJGwThsu`pg982AQ}+}26ma0irvlHh|` z!tX&-aG$RjE0Z1QlJuPXIJUrXM}7zW+y8Z#D$FA+W=;@9TXz$xH;6m4>!3GQsyXv5 zu*1|YHM|sJJ+;F4qNG3&HZZX-H=kYv;|zm(k*CssDv-4C^6JR@B$Da-hpEVfXHv6= zT>~nI5YfKvLX|g@#zwJNiRQ3xrvGFjs4*@aCoXJ|q{)D*-6+~}I{{u<#@jP2QehlO zidM0SBzAYu5yUuciGpIY13DbjBx$#e43hf-aMCqiZB(X53Wzm*N9DqMl-1WImTh?W zglD~mJ5mAfRn)2>(XaBh#YL?29Em=k+8=q1m*tlj+;S8Z^65Vb+MZP@Xvww+D z#(K+2kD)I7QzCfzm9ztcy0a*G!eY~w@>;i)3+v`YOJl*4~CVlDoOeW zASy6=%@U8 zs@#wQucz+TUMNjAN0lxI`eHHb9(9f@4(}2ASQ|R*vuo`DbO48;l=?!^3{lh z>~nAnbOEDOk7w_af2)$#CcaOTgs`gKUO^96ihBzIvdD?fV-kwpAfPe{x`VNQD5(4e zUICVrhy4uxoPX>am^7M*%-LZUI9LIw-)v40P1Rdg{PC)9PDI`(!Vj0Uz7HYfqsD=W zoAQYo6xlnpx&HXSXFe#Tu_Lj-W}UH;s-@IRFE?+F8PD>>G&J!p-{Qg`=Ty7_Hw@4+ zjpbCsMF<&hsOae&Ceo+6+As}&<&I?sWgBLD;MRV7L7XZbGH4^3qf90Esh*f@S!Srx zqUEUv*uk`p$Zjxpvhvps;Mgn%god&7hcm;XC5->9`%e}v4;u5NKacj<61-lcQfx#l zD+b>kp0keDF_)XCqPPL1vdorPF6ZZUx5s{ zi^7C8(+wrSkOzax&AElvAgK+UIA3d zQ{Ng0u^q(@{Ox6XiiX@MXbHrotk`$M)_MY~cBiP;h1$IL$z4nc_q8@P$7uMP(aswQ zz!2f)22hwy*c|p~|5D!Kshl%g{JZr3dNw@l_78&P7&MPjK^|DqSSD0s8X~T5VKVQo ztM75{TWixX6I|<=z!e%yR_OKr`xE{|2#mj1`=ueGiJb-Jkc2d;4eB zUMQ`5{Y*_$M$U;o&-S=oOUl>>>+4em(~Gi<*;yYZn;=Xxlhnw1(kjj4W#`3UeldDS z^iB!_&HRngAm<$k z0AGI~ZrL-#RdKV6?VTWO(jZu4>mcKqSLgXZGfcQtesVw4$4@4R)ur52;U~EZaC`7# zcT8};Ql$LZK`gXqH56@J)QD|!{c48`s7y8WVgGu3P`q8l2NQ!|VbcxbeXk=%E~jxP z?>$zeyy_$HjEE)rVVp(~pu!O}W#u-jsUOOTd<6?KD#P#Q??OLz>OUVqZa>{1Vpy9z zwE3&OZVH7yy_qqxFk#uB^@n5Oe$P78dY_#4t>HW_A6If9@znD2H8rY!&u2!q;%M(4 zOS{HH>f?!7lW;ENiPNARdGgPm_D&eK)Cq= z`+bdr^*MxfET>(!kMn&k!~M(^FbT;&Zl;0&6~c_sk9_*j3ja9d_}K7+Q;Z_a6L=P` z-N5$mBC|{NDp(oQkOy5re&B)q>|?RK_v^hlCKQmn&+szaG(MH{pnnJBc8&>7grCn5 zizWVQo|qkCuKD9XHPg2JwRv5nO_X%^G+QJivmR!C44No9Y8a)417Z|sUo>8m(7ST0 z1>UMCq}v5bAo-j{)Is7;p6|l@ZlHlmnHR^ArxEpJDSlWx`wp7Xwp1|7)Z>YhV19-_ zfcdd{?t^|D-bF&AiTSHJTO|J*7&^xXZVj4FC>j7rr88%7=8eSTh* z2>%uKX;|#r%ljr5M`P>!78>U6i}o}5>ff>dNKiv&f$y*m zNiT_?u>0a3@;8{)VVfdbMDyF(HVOZ(T&?LEbJt>+Zrs<%={I(sxb#Sf_1oXQca1>x zqSkuu$KIELz>my;P9}y2T`_NUW6^JfL;qxWhz`_Lr2ZGg$C5@7g@N1cy_3<#x<~;x zi(bMScS;X771NG7`@7%1<#|K-@R;P6`O7C44rk^jMo~X-1#i5jz^42Ub6&hiGpD2V zfKWM^*3Q;bpYHOlYn7FH-~|noDuN%q(u|X9;L=Oz+vUGg{^2W0mfdG7`BuBFbzX+7 z(&V{$`iGN0mD>h{PaTHX-Z-;uH<4^*T~0q2$I8$CS`^(i79-qn7+8^QxkU~h z!>u6{-WC{AY$%0A$|>h=eug?~^5x2>kdRT#IHPU!LDZ4?7`Wp+si(-4M8I41?UHbl zog;Z8CH^k_oky?FOl_dpx>A}*~g_CqfHsM*@bSLJI@~$ z)d%15Ng4W9>E;7yM}=6W-r=}ICuC-JZ1H6L?dv2?5+Ue~9wGC`SC{fK?tGJPNA5i7 zc`=?@nQyi_?KuWq}k?szuU6B%ymX4(aY3XL^4w25Kk!HzV z*!cKgd>1q4Ju^3R=9%|-el?LA9~DW7>4@?0@JN+Dyw}3RBQ(Rq!ykQ!e|LrWtpPnA z9*K;-oScS|oZL$d4_6y|Cu=;s50T0GL0NEP(dvaX>Hm06`=I)HFCa9q`CTt3n>JlW?r+7I&wfs6g1L{Y1)=mb zPpY5xcI9UcK4gQY@R96DljuJbS4*3Sj4%Lfgnezx75zD8Gqh#!lfRD7wA+RYwm*XVETsOl=?uAU@_PjTDbFVj z#dr~63pc(%8S{9HV}nNNALPZ;sF&kp-*mtCacyMT{e0)bH~1(4u|wXvL)FVQun~!J zli|KVc)5eU+L%AKy^f}TUh$0xM97RsPk>Jt-tqDD${0d~E_|lf=e(%ie9_JVI9Fv~AH^JW7Dn9C)(<$YK3HcL`wRRmT z<+oeBPbGuDp8{5!f1jDRiTxhF!My_xf^7}rBQ*s6%EXae%BWb7zlo<&CShM(4{zc> zj#2C^1D5lNKD&OAHQ*c2tu0Hm{&ZbDFUc(+@%M)_?H`uX9MfcaJIqG|CTp`AQ>0{_ zLAl~O{od|!Yu{RBfv=Xy{Cz@R8Phht;`t7(!wH7~a>D<{9;zw~`CCD1jwV>_uiiE? zQ5jZ3M&p=g>+%9?BmBWv6IiC(!1-{TQzUQi6N`#@u0u&5k}@Ka@<)!Hi@!GoB-+WRMVby_6xzUAkSyUH+(!z^=Co5_G>gqKj2RY12_8&(X{eXR+sciN+! zUUq?Y2X-7c_(ubH zMNxYeHfb#Vz4Hc?DPMnDQaB0gMI~G68t~3Ogr4h`KRf7?+IXK8E9j=!^i>W)kQJfc zWxE-TxI}(zd*a$vwLylELemE;zh)LCZ+rNn+d!7^6L(&y#>e@OqN<|zueGSx84^BB zvn`UpTQgH-w|QzyPW?=T{F*|YLgHIoB!-n@=*yP$8IycMQ(>UiBYOsWk&;NSuxa_5 z-276D(v5tI^2B^*EiNrC{e_<%vfEL^g2@wBq%~9v(J$n8l8Td36B`rghf)Vkw`am#DQ1{+7fD{_D(HG8Y5jWm1XyW4k{d>CsS5nn%AzwQD$6H zJIzr_U7A`3(18`MmdVWio;7!L6S8sCv3oJuR5DqmH20!}_Gfxovw>&oj^CIIZ(>Tl zUi@=VnSo>3tbN6R*LN2g+bO>b)P?hf;02#^enX6~qJ&DTYCxP%s}D3bCAREi{`lK$ z;_OZF?3T33dOk%FMc8li$H`YKaqHS82i-0%9YabZI4}>R58s(VEpMY2=6MQTM8wBjVsSt6=aCcFO$3JN?H zO3HA4<0x|LsjGhrP;^j2NkW(7tW%>VZuVf*G?r3;0A4{GN-DAr$=c%3qJIRd`ksaE?4PSR9C4l zG>CkO@_Gm(V%J00b9&@^%EIAZI-XWy&qZfNrM&)nN4ar(l=}LMb$h%Y{flAV0{8zYUl?j8%gGzc+*XGHU{w7{G zN1xmM<8?mwD(@4-?~4zYzb=}Frfn1isRbweC?xMo?tW%GNB`SCPCoJOJm7a#JaIZP zU0o?~R(P!7N{^WzSdKWT?JJ&2UC-QaU1*$|7>zs7-@YD%cSBELD>&Mb*yO&&F@M^% z7cU=)zBtV}>dWbqqW$(DoFIj;fyn*=8$kt`Ke-cQ)>D&b%hVgs1z5JMIGNr`diniR zi+vL@P_lCo<`+6f?D$Ckaol4N0L$4eB}*|WO33g`*k(H|AVDu7fAEHryf4y-)*HCU z?yr0?Nn@evr7Ax{Ml>>G#{P6`uic*OBLWDdzEXX)-ILnm`P(WOD4=I^ zVRd7jVtYlr(0$phZA_2myKr6#KY#zYaODkcy;cCb;&`6uiH;5qWS4rO3khUn$m+cEomd*`X6V*U(l z`Rk_BMp`7v>i3;=8^PVNK+$L+#}OwhpT)x+vFDyqyIy0Y>8+lO)@E02@aU{)J<9fM z=v!FzeA8za#3o1zc=&H?(i)N7 zRV)zG+k?);3$`Pbl~flK1rv57AIoO=SvJpGeagzuZ62e;ExO-zf0nIQ=G5H;Hant< zgnkJ970PyGnH4UtHEDEONxe)zsM06UIjneFYNCSz--q-h$6)ClMQB~QmoDsU-Oej` znT+>K%gcrvDH@+U1-0~t;4=|ympO_8O}GI>uO-0TY; zNYE$Ji^qmC*t=Y~NCr;t{fo7<8Q_jRcmeYN>XDD^mDc{GjZ;Dlt!mR2Gm5A?bOiVx z?m6>qY0wbW8YzIrglzrj?XNvZPlG+y}u%J)758n1HRvp?Rh(s{Z#^bGTyG( zr0);f1f%X35a~kZG1ZMz?dQmz&IsqY4aSwA(E#*@(rOaFX`gzD`Uo^#s_fWjPipqr zvL_1Fv}?-W%P)1Ef-XVXZmTS%MqtlyWfiVYpgEqQvf#OBI8ECEV)oM66XJyn@ohel zg7IU_Pe1J{?hMSTD?xLDa&E#dtdI0tt772Yuoe121{!itHWO489wARg&Xdjq20V?V zd$~R5wANW0Xb%2w8>8HcYrs!^w3=qi_kM-pPTsVip6syW-8OV>Qcvpx$fVy=efg~Z zC6-P|hiJm#%=2eC`MQ|7=fzyz^4n9_sEtvS+%m9Hx@eym?1Fp1yB8RFxHu(u$0*tbv_8UfBk&nHPeW9yLBld^s7pziF)B(K-@Bs@Pt_e&c){9zh1Gb=M; zlarGzO{AoxvoX)Z6%~oSy~FSU-eHq13I10KD0*6^>dNNkNPOk>kTKX1@>i@eU0T@W zjcf=eitDN1kKg`}x9&T75oF1BIV4ISCT}Dqc-7$B)|4n)6MOsfu08zIr}44p8p;O~ z3cpC0)Qsx)d5>a~ng5EXm&a{=-h%#ezJs3DjfYVJli?W#!GLN*W*FiG>Fu4!7iAKq zuAph>QG{e+or{^BoAV^dQ4CiMw*OUia&qEoXZNd$hlF}RoRDbIXvsg|c1VMN|8b8T zMoo>kr>BSXl2q%@>4&@y7aGojf|8O*DET|lw>qG>l>5LR6?VV^mY$Tpg=9wh{eML6Xsi18lr&jzsnpY+^XB4nzp%8z4bOvyehWK5)jE#+mc&H@Z(Flvz*; zti5$4MgpAYLKBD}$&MkdVJ|D-q&qxK9nfddpO~l$3<^uAH2Xl$4a6 zq8)5ka6;aC!<8PPf?P<*+FKEk`!*~@3f6S2Md;}pP_-sKP+uIPG~fyb>l{EFF(+_O z+`%r05lB^dzB|W;;oq&f%*x;vIe+5rX{XGy1pzJH%@E)x&&s;x!h*Ydnc36Lhr9)2rAE-MM-6dy%B6H}skVl7lw;3>Pr8tG{7WC2dr&>jun_c{p9K`%jem0Ao|edq2I4)I?e&#m z19pEYP7M4Uk(vZoev{N|6=g-BM_aKGb*92z6!c1)8~2;4=1Kim^b9hug#x7=IQI7& zz?xX7g?HK-O+PmdWPO{9oy=qUn3!(BRrj`$gye}pnUJJto%v+``0?>il~gmxiuZ1g z?nc87DR&zM4GP0UpTkXX7G9x9Idw|>M53HOmt22_%B)lb%AC+Ir%V;jMDf0U1-i95wbU(Y#cI#gsteCC@=6H^aslU6Nelzf*c0HWF?yXRquzldj zuZHH5A<1ar$I74^f&~bDM;~DQ;9KdY$@_Hg#(T+$>0^Y(XjV;}($rA$@NdV3P&)3% zNba;D{`gbD2mp6Or7Eq?;Nq)5=WhdHK6F-p4HA2kIIqH98KSM!y-@2ne}G>tZEehc zgwuC?Y|;6%#Svj?bYzyLF6V*fEPDTVCUyOBOfU`Pa^JS1O|v`aw2#cNJYgL6+XMMA zbuNA_U{ft;Fm6PhV$-&C_*^?5^r$K&szeo*1vGLnUnUTo?4*n=K?{U<{8z*AIHP;m zG_-yafJyi5sWvy{2`~Xh2h0~Z2&i|$!FvcnWeM$WZL=|h=*%{~X2rVH6L?)HY z>puds-MV9k7!GSg0nb#! zbx3hPwY;R5<*X>WPD|#Wl*r4HEQB(E4X$^XGptx2Pm*+H;Z=O+g@OV<8bsJB^7d2=7u5zpR&$kB~tZaS7@fmr_(|;)fXUfJhERh?FTZlyg z@EThZJSsFUehe(u5vs`TL46(*M&_HRbrw*<8A36aA+@i+>uhmR8=ab)8#r%bP4qfH)!tBx_@$d4hi z$}2(Fu-#ARpyN+(btdE2TG2He4&@fWhY9NvzBD~` z$?zN$UH1Gnm4X!u7qA{XckAnIFoSw(Uyl{J>R5$*(z=-3#@u=wLW0I)E1_k5ycHGT zk9}ZAN1mELkgKa#Y>4JvDD>TJ3LJAhy5!R8yoyF$#ATbBYWt?OjLj~+@V-c(cR?Pn zu6E2e=v|DwCZIN#yP8|Vebw7W_aU+Y^YQk&0HAH*A~{!D?$y)l`dDbm6NCDMt25d* zvqy~hrRzK)*0do2H^IQ*0p4U_VX!P&1)h#K`cPx5QI%T}Trkox(hoEg9x>_#RN;0I zj=M0KWErIm!5P;|NL<5c*IRS!9iu($LN+keiJG<_iaK&&Wx;HOk)oY3O0lV_6k0XXNFMyyk zavAAKjEV9^53AskMH)7b`9}Q{SuX42j|&(t<<={S9_qABp47X?SMJ>!Z4$b6d1KmG zGhdkH;h;Sod;4`W&R<}GKtVo*#YzQn@AvBxRiEMP0S=z+NP}6S&mktA6kyiy(3nJn zpX^WwCE{p5c0}pedUK$Gh8aL1fA(*pfdb`;{3b_wLwP^mE^at>)q8Fw)y!kBc{ls^ zY=ocJsE$V0$Je`}?X1wxbC=h&B{X1o5H?kpeKl_xx8k$nJi^-;I^N%Jqee78$_w=S zO5uH`Q9*ZHp(n4Gq@B5p(g z8~2;z?tl~>;I3@v@YgB7P*oB1%+&55-qmch5#9VLd!}BlU-SFN6-XB|LTs~{R1Q%^ zcw#ooVqE{jUnd@Cpz$w+;o&rh{iv7O4Sn|;aeXS!8yoA}PrIY9F_#^+6H*8Jj+N;W zw#TpM2ox0bGMcm$6$b<--pTP#h{hXJUSa=91q4}5lxX`J*k*fY($QaB^#6M}l02vr z_6mm&@9(w7CVz+1#I%z2BjnoSCVEg$@sL!t4{|+XyFRQ{Xm7A5m#OI&0ec z?A$HRDE%~v9;m1pEb!<)aC;X9#>xh~vgPU!(lO*d+dNY-c*9)607og>=DczX(|M~q z3>!H)EkT!nO~JM7Wy;pmZItls#%hS}tr9}*ew~tRF*u{h<6{R!*P$S%vHpm}h zJ%7^sR{*U8%a=DyhCA#Qvnp-R@mepBa6aKVJvo!WZJ(uX>Td#PszyL#Ql`?oiusE< zbTVa^_fLj~K<6O+nu9qMAP%>9Y44|^;9Uxj!t%IL#WQM5{rN|JuLfNB^mbij$N#We z5$=DT{XkaiV*zm?aHsLNbPZ?5zOd;nS@=xFFUKh=f8xJ6Vs#+BNPSjnwlbcTQlflH z_#0MTB!8e_Y828&f&JcswTZ_^PYtU4gkcS*(vXtKmKePRQ{$)5nw4u z&a(+5a3e*0zKl(dfwvW7r}lXX)lPw~X%F87N>QNH@T9Z(*Np>)ROOb(dZTG006HzM zA#x!>?Lj~!_F5fxNy`sIGy4X42Gz7z6jXO$0aDjw1B~Y-m$RUgAr2@y6@pp}qc)kJ z{<~cJ+1=6j_Vjdcm`4pcqz-Obxmv2K?x({91(B9F<9X)AP8NkL_%%N$Q-%Fijh9`~ zu)ckR4Z+X=+5O`|4&QI;E5CoQ0q_mq8!NA@Z~6f-QkFrm1aaKdQ$|HUF|6N;GfZ;e z4S6qeOI_9yr*R3-ft#v6@OH+ zS(X6KPQQempL3%{)&c4hTlxi}Kh8n!ufc(-aJ`(zh%Uqkiu$KRLsQQ;0k9U$TyQSH67-*ph2kZ$L>?^bRrVN1LuHxd>!^58@fblpTqBf;Q?d{+ei4S#lTbm{A(6?gWNir z$9WX>0?5|vUKdYlW*h^wq!dThv0hru&>X2QYstgf!R6Kmj6m>pNezQb}{%X zGmahm6B4A?S%u;^>0rV}OAI=?QKvXLa=xz66h)qfoHU)s;wQb-*_$KOAOkBz*eWMW zVW#2E?}yJdETub9)8D*~Gemf%)kW|d`5m<;!2Xl9hqm z$X810A9cB!L$ih_I`GP$4>ZDOg(+1i-hYt3)lujgO&8p~0b=%ci?ePzG1n6y(Wr&NPwLy9abk~b7( z6;VSn{+;nm{aj3AO*-3fgm)4Fj4-ajA;xhNegu+KKv8v&PrT|n z4FbcZslt2A1ZKE zPiI9G!IDzNnAGbNR});cuE-mHJ(}(JkG5ZLv|r8|SG8hWcq*H|PI2@{ysDq5rOb3~ zSm*#Oxfa_?bl9HruGSB8w^)`{y11Etu|@*?wi1A5*8_0e@io~y^q;S&$25{GI3vfZ z_LGj=K(DGeRcQ5j**+wbF0&RvC2`7P8Hq+BUvCfG`nEr<5C%U9qA71?h-~me-!%*e z?Di(4iMa!X)MM=~&ICE5op$-_5uT?!=T#!M=tBfVa<_QcoF+&#B{zRKhN2kqgo%mq_#jpT-uB6J|L8gv%p-pSM z=7R+@7O4lkSpH`)PHms7j6qk~)+Rp`DSYWpdwlb0<_jmxs8EIsg!nb^&{|gVjz~8O zPd+{McKpz?a3L>`PUD&8iqtX8UFCjcc=$H+mVm8z(^DmN_2DmYKwU@kr=jjX5xB>Q zp)C#@#Zzh0=JrWjh3DNx;ZTyu!sktvxUA->k5j7{7rl4?KDGZYV)r-v@YA-!wi&xB zm9loiwp$P<8QArJ`t08}Z>{SmKTWmJ8R}Uh%Q4LL5(JL9T5;chB3Z&C9$zI6WOw<9 z?Tkkx38^g*$Z9A1h3b>OzkMV4^Z8RImwFyTQo~2IABKT(Vfclb)>dkLq_RKh-I(9l z=BEyyAp0c+4E>0L%U`w^EXFA#U2w5+pU)(Xk3d|LV2e*fCr36|-X0D$!+R5(#pquq zqKASudLNX8a6;ON&S_W$#b%QOt4N5vG%Su&#U|&h4! z9wTdhj`wws48I-SM)Uz{iA5euJPaQI1P_W@;iMD~Ni>3=a6Nmyp}71H#$g9Dh%B8> z5SfS668g=QzFD#e{GDQSdw!;W3G%d$a1W$vct!fC}M05*fWxsA@@ z)pvu)y-53(YZtz`wDj{15mqfhzL7nLEVHT3HDl*I78S=Ja?_Day%rTj9P%|ZkY1~w z$O%L_&I7BB^T@gDw>VktkzW2gdl*)TJo${XMoWr{PcP4_st^(0W@Z}KV4u1G0#Cbu zSOgv6Os&nz;Nz0d%pOCDKk3V1T~8#A-3Id_T&fprMtu^F`cln|tj}1@#(J5@FB+{5 z3|oH8nR(YW0?$raY;(MK7#df=1KGY;*VolKC*}}biZSfu3x?M41h!aO=Huf{Li(h! zzCQ0;aPYm=sG^8oSq_r}NDq=#oXl$O2L2lC3CBrXgElhbT&~jUaO%DXT~YXf9Iv(n zQd6L@AfzJo^{9&`z;{T@zuiE&RA+kzJdS*?CzMU0*lG6& zhBuxE6^&bFp2k0Z+&0&RW*;h0v0fE~6I2jk6Ljozw3|(-UM?{h;k(^Vz7Ic(Ozp&_ zA-M;E#(C1`UEO>xFSr-u1em{U{V`*N*DJU?_+aSVo+<}yUulP*{5gK^aKAns(*)MW z4R^ARrd`=PS!uPtoP8Cl7b2e`)8>qxgA$UVS#N&ozE$9VT44p;Mz>dftv+@MWiC;`ts}9BX+@N|hm!`T?q6x}`=o(F7{O1s@Ri2p3V+ zH~7H!30k*r0Pbb7M%LgE%-nX>4$Nh(<+d^r9VJl%j{J9}y#ZrGf7@>o1NIY|@dx5Q z-0g=qD+D-?+vy{M25YoV?Va>U3CN4_w>3ICMaC}zg>-(v>s2Ey0_lBm5{&3_`;lH> z?9bpqNYu$y&p|LjT1JQN8i6S8FSFt3)hp`3z!PL}K}7aK=_&lm$-~Wa#~KSLR^tO!Iy?t_9b!WhQk?S+|*KLUlop z*0)F$+vI>h>EUU-lMpkXI!i_$S$W>N2e@IvUlvP7y}N=LygWU5@Jh_~?)?>S>Xx;B z1?zvo>T4yVszhSq53myRz(4c7bgfr(WnEo7Rc-sN@Y~gPUwBVoO8d(9`tJQkfB5Z3 z(e<*{W66TNgr`j=ea<-#RO$S>(l6IZlA9|>CF!dCckz8Z9eI$^)coM%bXyoM1mJPm zQe0e|7c9vlY0`XCTE+lD10iVKb zSBGcCo9fmn`r*H;TvzTM5U_25pMbkTz9yB81^VC)kcm98>dH4D>EZ2bBQ3NCsvS}u z1N{`~EYApxK&VIV<@&6Aes!OohE80Tb{p#V0K4cggOM@3%#lSqae73l-wk3|^a>cE5!EEeGK<~shJM$$rs zR6$S+cnZ0f+=Thg0PlP)qEYFA_5iny=i~Z!(!P8sM4D{Ch7$)tTeOPWJRVqjL_CM0 zu@HC=x&Rr2%#g|1SKwl7Sf^?~GcJdNsv%@K)>oESFNQzg3BT-=v*^ZdWZo>UR9|;g zm&oECV=DKJ{+n@6ujW`Nyb0@TW%a8gFQJ%&27Z=5bUyy>WOm5~vlMjw#^=&yU#=DM zFJA7+g5B_sBdODSHHdCyqj6+#(*~w0eAc~e7RP5=Q`p{)sUBc#J*aHO?n1D;DVeT2 z+sP&fLWg$JSzKqs5_qcskp0G)E*dTHY#>+v-CL0wQj(f{THhqv>Qc2b$@A6y9Lw@gDZDzn zi2$R_1`3M5rg7Y3$_9DgPVD;-pBSMIq{=s!=ZcM_YL$L!ny_OSczRnOdxm_D5Dd5v zLh-D(VtUV`MK;kQ+Z=-~S-P78@8^C!M5)OfmPrq*OX7I9;V31{$fdCdR^^SUWGq%U z2ImEqZZW6pN2()!)+wV~(JuBg7=3iW(J=`3SM2 znvXbXKr6NBU_lD?-&9z6-{8J+hOyDol24kr_euNp>H$l@8*7Ess{BX5FVe-8)GZj% z%kZj}@*!HBlth^x&*|Q;rMS;q`p>mkfPoTUo(zA)wN0Z^WIfHv}*1MV2;ye?-UiQjMlwUkI zNb^p-J}K13swFc*w7{x{ern(Z&K88||nH*(xRe=Ix> zl?yc7t(a!EigMM<8Z!I~`xP%l@nRQv!Ovdd*hRptS4_v-F@v7F%v_A)S-2eO_!UHS z33I{yCp%URl$qet-hU@XbbZzHLth}Z>4C)vR17`v&ZSubz83uh_q}EC9nE=n3xV8b zaxg+D&dqhkkbVU*!i(VjD=6l|R?pO1H@r!bS7vlt*P!-!BlIoUUu5$tB=sqfD1wl9 zv<0Cmm)FUXqQ>j5_VeT5{^JQr%!Sx)mgmBD>N{j-NL3r!gJ;5)^qrdA~e712RXQ98C{GmmecQGY$0>2z;8hWfl z3%Eh5iE&BhAZq0XqB=Qb*vp3Sw&G2aF{!HTwQUMt_hC?6sTySss}`?*c_-LFGqiU1 z!5LB*+#oAGk19}Zkv>eTn{9Ht%k$_|PD6swPI0OPsCOMGRO-XZ(38J>p6KtQv1D&a z6G^Hr@Aj!;l<9L|5xg@_-ChibLU?|D(qm+}1xI~CXCc&s($QCI;ZU5zbLE@6I)>uj zphbGB`eEOhH(JG1o(R%(n8ISK3S^Fo&^0I?JuC4YyP|%VO+7!xQzqDlX@HxaXx1+~ zXz%Cg3VvB*8$&6vfT^=0u_ zTk4wN>SrQV@N{3;^kg7?`xPY|?>k;z%0}i(N@lq zP-$xBLH@u>D)(ZVfaSfgXEnq$jY0NbYc+gl4Q;c%{wl=*vB=y8u)*Hp{vsv>fZlQ+ z7)z7Nj#g=N$WY>e&3+mtK_(c}X~=+g2euU%;ai1a;T>3P!*e|E(`5Y}H^hG2oXKsi zHrQvARHO-Puc>W0b&+kG<%UKG+8nBq#e#`$ZjZXuOO!=^OlJJ5_{FA_xESZBms>%H z^D$umbhA$X$eCicj(+Kb|8GEt3bI()e(uoL39LH)qT~FTm4@hpOd%Soh5p~_>rLb` zrLUiopN<^D|6+Q6pCiS|C-2}~QsYqu&cTTKK|16oI*pKf9pRm4XB2>4K8 z4s@ZXI$i0qu4<;?oSImQ(`796z6C5v2|fyWXQdY$wxstI$$lHNgW1^Q*7nCX?sz-vgtMRna9wA9ey5FGH<>f{k z;x0K>YB#>>-hjNTL}U>YVMp>>F@l#@q5%_;amluD1gVC^4Ph(XQ?}f3Bxc&@AbW!z zvB8mZOOkEO<37;<7tRgcrLuac-J2gHo4c6QgW=_+zW8Jg*1@3~c3A0 zISrAIe*UbKexE+zHXxl+;wow2#dV2#Rl}l#B&L&vGK;8GO;~E~pUIVtZ0nXL`24Xe zzEU3|FWoCxKByXOGk`3SAImW6^h1PEOSFnZP-P&@DG@Lb(YI@j4gWj2>T{tv5P0kf zHMU*x{uNb=w^@qCFe@G$?o)a; z4I5oA%>i-xjzNAtn$$ml#C$8g68wmxN^kfS!G?gJhAgyT;+&!BVTzE~a=wonxw$t~ zQgb=Z$%ZAQF#W=GEheflov?@O9?f@9#PfZBf<+t#l5_*_>dH$OjBC2B3}|vBVLMlc ztU^Qol+3>%k>{R**kFIZW#bV&tH6F|WvKHK3JzN8?jUvlL8kII+YIsfX>Qt5=c|a# zB;!AN%|Qm_*Nc}+tTJCXA*zD!Ba%xVJRK@xo+S#P*S2H$zJ2uT{qI6%u&`NeQR@^J zS!S{@*y^92mb1`L|JZmH0Y`^dv)h6zvi(zmXG3&^)WXSy7O&JhO?K33UD{6bqbr;D zGfr_+J5*)h|d<(F1a`8f|39nk+T-XCp|5it*5N#X> zuU?a{e-t^45E_pl45!OH+!1<~-wVzTy1RAnmvv1luVooe{?T2vS5uDf%p z2IIRgT2wTcMuz`YCzxkbY#w$Xl0uc-rBm0V&eBu=d~tX0C(`HjS}P>IVg@bZw!s~= ziEhlm{6JQj$m6!vs4gA%KDGU@C_4MEiq!;Yq_JIB8~0lARnp>a(}pJnd^GTB%5aI= zrPyu>I`+;r@Unt@XY;(juH1K*y(6?LTC=#N1hv5xMeAxToam5Z^z9&M^i zyokMvA+Zbr@i{C9AoI1AC0=OS4dcJCwnZe65jPXXAY9y+4O#=<%x~(OxYHh-k9SxL z^A}rCq(s)#u*WW5p)@JDRanlZo9zyDv2d0R{A}ic?o%h zdgAs@X+Q`TbliUZcciLmEB`UT??RIhrIPY$eN10sB2CIWf4RuFPW(a>}G+DNyMQ-rQ4EUDt@fRlMfQzdgYmM2L53a$gIdveVM1gdST z-(GKT!xUWBxX3wZa`(wRskR_(|b!fqdL7eCE+3K#eVUYHdDt(am*vW*{dDw1_ ztDVEfhHgMa&Zp&11KCf2hZ?hog(1^WQzA=R0qH$ZGeUfO02T7cQ$F>5y~Lfj^Fg*n zAmc0Y0zR$|0s*401snU^K7Fs(FSeUcYrg%#*1Qj)rDy_AchQl(_bHnP%AY4myy1}!wPi%? zCDaXi20?s&VjT!`wu?M1zM_*fQk{6v^XDFmmGAS)U02PC${cW;UATZ#cz!|Ww-+cHUeu<~4E!^{Lvy9){GzxWARLX3o)DBOcePquX_=~H zAd~u+%ix@Mr8sK|kF()N6IUx>c!&BIBqI0dG9L~RMVz#sDpoDidIoss)EWFVHT z9^1HxY@5Y}3nI2xa2~#s35pDUC+~c3vjMSc z^TRzGWWxr@P!7oYkjuKPB@zAceGc7z2wvtHdBHs>1GQc|)sP?f=JqDjxG7p$rFZhx zW_j$KtcbAi>GW0{7`@pS{d~dHrgQpE^$gtR!CgNFA&ZNOY>s>O_Ydo=N8)v;yA2vD zE6q*_v=7eEqPQY5%idU9wB3M8*nvx;R1zG;zdL{e(9*fsZE4YKdq@(9qYniqf^L`K z;_rmr3+$X zf9VRoZIJvgmRf-6n~Rj4%Y&GWVjUcCwxCEB5}(4q^oN{}wYkzikH z_5$mNzps5lFuhmd6qak5zqv@ijx|`L^aua-n{?mVp|F3dnecVHLQ9;<%a~}P7Cr7$ z7M>Tgj(`iNW*Ua=eJ4M(p&9nzrXn25-CsWV?5|bH%V;utw9@7h{Vr_B_~%*Ehnw%= zoLt)>0&ane6&w$iU4J> zWB^j$angp&kH&hfc((_&n|N+^7Ahq* ztU8q4Clmi1*M4Q-x1l_IY`fS7OXT2-h-ijpiN9SKP-UupIZM30VX(@a^vVqJ`sc3k zenllG?1?;huruWhx?vphHAp;(U6xZ;Ib8i?C{H6~0@PhR%yDL`m|V_I=Q~_z(qG)Z z`bTY?LIBIFkIXF}DS*3p=&40PcN$IZLFBJMMSU8?YMYuHJEyag;(Aq%8N<_`)#Cf>2b z+e&_gf>YDuboU#i^luxkk3*u1wBBPypFtxgAui-B*E8D}S9E zX^mhy{?SXMQh5op_;Ww_wzWAY|!~2z{9r zoyv@tfSG^rn^F@&Om7M4gD_j#>ikEhi*r*;v$NHcKS=MIZv2tdZ(eoty5HfcdND~s ze-&K$p`8Na-h?Te$OZ04^-Uc*o1!;xDCTf?HwSY3&UfXVsOU2A&xk-Ty-QOUU+bZO z=gz2_$ZDOVtUd$;UCn$n|IpJ<9B{DvGbIe)H{WnTR@(u~Qxjx-?-nE|H(ghbedmZT zk+O6-5{Jd!tB_M<4zYBSj@cqDzF)yBdB;5Kf4)Z>;T#dX(FKeTRBrem>Nx#IkzVD! z9wUSP?H5I-eiv3v@%W2~+YCUzDgUA_4s_hK-Q^I0vG$L}zuQv8f_9PVC28sYxo0kV zejFN$Hc>V9DM*1wx{+pw(|mips)RpiBsHU02US8ijbn@DQu60rAN&L@J^E6^ER@3W zu)XoRfIUKr;P)%^pK2>>9d6>RfyPB|DQEeMgpGmHin@@0zZZWv^TAkmz8ycFO3^JA z3Hc{%<68H`+NA*SiD%;=lIzcr+*}wPo!K3+F?MY|`l06o*rKU@eS98e0l}Vvk%}!Jm?^YkqP6&dUv;+NM$~Qm%EmWG z45bHi+t>Jdd3x%WcwHS!!>8m^#^oN1lD^|3Fbo`?1Hq!y-(g0xnybjO4J#nsp^u0d zz1BJcP6VBAyiBtnKbLjfZz_9{rC^qI-C?s@(^30(rBs3OpU0-1GF}=ihB~A+zXR24J&h{{=jQzNR%uT(Fw2l{gKV zTBuXsIe!&<3QzdNw-EbQ_${AVjoo;bUr^9H2InIK>ZIS8`*Pnz*#8=tt>5{N zRs>PrjxaE<$(^$D^+5>cJ_-Nl(zJyoHkE)`W7n022H`{Eb4M48;JEEg*fe*&1S(Jg z%K$$b*v#}QycJIt`Zt1}+oqOH$1wv91q0XcanjDXCG9aeQ$XY zx(P?EljE|WcL59ayF8M6pyt!Nq-Y#4_T-A?XplSJKtj{2FyAJz1}icLi6H`r%1uW! zDBt~fv4^%@Wn%JtxSraf(;h70@dK(iD3#YQ%wxU)(#9Q}X-he2VpY4p19ss^L3S?l zD@%L=4Bg{Ah@apsh1d8Yo?X&a}P*y zs&7a60U_0I00lrvJ>K-)*j@P3x53kgh05O^DENtoez|nT+0_;Pi2TKmA2X-f^iG7l zVMGNVYh?j1cu~DIgSSZexicbRLYI=Xn9uz48b?S1L^`Nf{Px$I++%4uPUh~E5E4S| zI>Ryzte$s`gAAtej;;{PcL-W7o)R>YQYWO1@m?ZAh0e?Vw;Rw6|Sfxo_~b$Q6Hr zXYRh~nQItnJ3b!76CFPZL0$okQLL&-r6Mk?_*;fddashja>Upv!~Fv#3nw38Y{~%< z;$PMM8GMRG&l#{t4rL!s6W(mxgnW9f#s8XjYlDa}S>HTDlua!?c>vznGUxXZ201j3 zl5IQA?N?3W+jwJZ?R*PiY*$uixG+z-nJnj6=}W=eb}y>#K@Bs8zh1=;Upki~=P#Z1 z3_h9lkSf;SrBz6JUL+9I7mMFLbZE$xWrB6SKETL zft-U|p3|=C+7f}F#vigx{pExKy6RMsH>RmZub&fbGP04K#eOn6LSbswrTGy(w83mg z+R&I%=<>A-)AO%TPOCrSza;L=ymF9m@X+P>X>1z@;r0my_3V$jP#bV_=&E3sEYMRrdvx^ufM~Z2u^a*cW-{{Ct@!?oTF1oF zo(VCag%h!~b__o>jfMLw?CEK<4%mX*}h;gAEp%p$jX<{vEbbjJ=d)|tD!l_cjCe2bZ?rmYl za+ALlI1)bww9R^xawFZgI~>4$42egi!+NKzSp7BuhJT3U^h}pRItzyAQ5X(BrT}UP zJ-e7Fa_!&!CoktRzzKG;aoFA6wmG!6VxA&|WjDNk#>jZpP9{(lXOKX)QRyslqfN~jq-2bMNlp%=tg zZymA4^k4#uPH}X}Kn2?mj4^)3s#y0t?z$pVJxWCsI|&=S)IjlXB3;f5h(HXlf=?>w z6Hc33jy@t@F<1h{?@y^NU2qu-)Xwz4evhCf^3a_Z$w#rAU*2jt)qOvk{P zOy`r_Ry`$FFP~p)jOw}q55fCjHEw47!P=$g%|L3CJn9{>rV>JsBUwKWM9x;9qlPmB zP-_kGo7upIsrl)ztA7JisL{QeuhJ@q1nNg+xrbm z<>>mvt$i!W8#1W#1%MrOa;)CAT>LlQM^3%=Wp{IjJ9o@jGP)CNG7PJqe+pWr9$otEP@9CN^bYOZ zHp6nxA@` zNi&K1H0(r1TK3D*Tr`clEl2c@h`77ih}Kx^FUvShSw7yAt|$Bat50Z!IxDl((g)CV z{@)^w69auH?jJ+TteI7 z&4M3xjc~7Nu>5ezb}%vDGMXNOZD-tFwp+=4n*^%z-{Fdb3$OH8nmpj$d2GTPaV-5c zwyc%%ENy__#irtit(B!clpY=;>ox*GEKRN{xP6OqAI%RMH>h!Jx+>-#ZT^Q!_N+Be&r+!0q;8DlaqO?}Id^SOhaLIEZLTak2v} zq)4WQ(!JL~fY)$#ci7gy#-_oMw{+@~yQMYaNP8`8F#7hl`9!J+N7JYLM!!F;cxW}i zcKX?m*u0&y#@_3FVvP*rYYQIA{z&_Bp&PS-!yhTxwIaB_*F!L6&uhU7BEj56C_Tj8 zR*UFZp!_U_m*x(Xwl&?t}!VlFyF5J}QFV+9;{|LCpq?{Jb7hD}BGz}P78CI4{&CbNhZD8lv; zzZpOj2V;}gT!Tq$fFD3)`{-`F=S?=#k-HEmvEyeeGk64HCsqTfK{=Us8eTd_8gU`k zu46>xa#$$)?4~KY0<86ZiN+Z+-tZ@hdXLiH#l6uR`VLYZi|(Gj>B@We(Y%~y;Rh3t^3&fZ{#*qj;1$Utq|1= zippoZL5n|P^MhAHF#|RuceEt|1u3|gf z+M?7mhVFwZTF*6LKHJ^Y&CTpXY2ws%jFrG`JeD9ok*~4v=^LrRP2z6#7CCL}+>qSM zr!`7gwab`-PL?-e5cIyiP7P@Ab&vQWEa-PhVcVavIetDwO?9{N-!d@05^vk6K+~a0K zPBLY1F^BoXQ5jbheo2S2ck`>Zl_%NJGiLP_Fyhv41Xzt|L4JO#YlID;23SOD@H&+z zgJc*O^E0YK=IaN&4ag~`@7LD;CPcp=0xa9+4dG0Cai&r3%9+u$cMV zpt-)tA-k_k6mL2!|1K(bt=zk#V=!L$RLuHrO7-uMeeR%ReL0kX@}9hh{h#7oY*Sw7 z!|^T`zvwB7Z&ezuTyssp@n|;oP9=p&1pU|-Lzb^tWn$4D`Yg%m`*E*OuSG2|#F8Pk z14`%79fFPrUaM&Kik9P0;StOkzA}6ZS^oRCCp!&K;^E#+c&z3LANn8qvcH==F|!ZV zkP3W<(Ob%yW)NFQ7A5Jo8a0(rSF|;xToNwxITWO7k5E}V-;)7 zHw0$C7wnv!zk8~Ri`lUW?C2S4z0?%NkG=QQVQt&1Vf>9JBMWfvbAxrO)ry>&HQ6*L zm&p%`2dO;M*#CC{;8vDq3|-Z$&rVP0p1!(U@h`9shm(UDX3DdDRJiUs`f_zfTc(7G zenA{2!Y{L-%Zxz9L5B}={B>8@yTyB+BByL?fKY7wzukhu4Re}v5Q7pgPZB6V4Ka0D zWtsfT@+tmVmD%nu(*vjj0hkc=b>r@V?*-Bcxe52p%4D~HNIL6huGJ~FVDE~Dr&dKh z5YO1gOA;fr*`;KGng1z?PlUv~6RA-9srRORtYXOP_;NcYm zH(>{{3nm2J(Vkwx+@^b{XQX2@{Q8wg?Lmw(7i60zUMu(bWdNCyt+HLPfqL$*y>|NqB{n1YDRQ9p%yL`>AtM7MV78Tq9fZ2kKuJ>UpY|bKyhKDi;e3_@I zOE(9--seMLs}BwwUnuDEQv6*Er)m_yCRF&P7zkuOSi1!sUl1Rgo*(+Y9Xa7qS}`Hy zM|mb)4=@w8k+v~DR~iA8T=IMO{cu;7$k;{oz~$N)asVCbVvm(9TIAK&TPSn^J%m#k z;7)NjogF>wxD4A7trGGBBRu>{c*4OSiH&pKK-G(sU#`7qpYeAUnNe(f`j@Tu3)T&K z7h=Q)jwFjpm3bN`2c;@W!A6e8-G3{5R*0rN3)`01rvZ2LXMnSj7K4xoeE&T8NOF}K z1+(|t#AAKXS@B0U+La*mFSZ>-8(05fki}>W@CDj&w>#gE+z!+6vt!(u*`td*2e%IR>?Hby&%7`4-N%mT5r-uI22ldhZKb4TogZ>;#t^xpU;LC4B_P!(7LAe(oJ#mva%v7ME6^qyc=t{&j(Q-SRcDx7fJL z{zQQi>Y3Y4I%_F!NW}&C^?&~xC@H^cr~r0_M`e1u#4sM`axJ}?zR>*=$dcd5#gW^y z3qO~i;qNS_-WGaH{z;x{KvkeeO7AjT`A%tz5J zA8>SzE$A`%N6_s&y+xS!E3P0*M#+=hL%)L7Ru+9FGr%l;SmI(y8!-|EDI364HL2_? zMy}dAI|TdX-Lm%?Q>=ISRgrWrV1XXy<(y0|6XjQ5*DxOA-1_zP;?K?&NXU&u2k|Q6 zfv_0(y%`mIQNPW7>lW=5aGl1c>TrK18wBE-E#x{!07PYl)856 zW#es_y$J{Ve`G#Se0O^8thS2cwI?+Gm2ch}KL3LC^(P8vQl&tK+^9BUSAL&Gi2^!{ z17^wNHdobA_-XIQ$%+%;(H3l2`^j~`8vA@y+Og3pqEtTg=9uJ_7<4T{1BvYshn|%H znbMx+jG?96M}G?vbThryhjQ>RFhL^nwcmZRQ+^U^_rH`Ms}^xuH!4+5R>1xt$hg*e z1y5C-WyHN(J&@#B6EgMnHPW*ZZy534hqMh)TF?FydOhbALuCURjvA61RfhBZ? z_jk9|(5hxu2P#8$1e;c(YKr!Jcg$F<#B_y+*VOu7ID^h3e#_qDZ;i*h5*;70Rom3mBy`ES zi`8BO6NH<6E6uWvRS&hH5;@9Yx*ciiAYNBlV%Ae-2uB~T|1rsj?8n)*^Jy#8xKlS( zTYuG|CmxXy*Ul>>@!?p3D01`&-bIJXnp|HKN_yf28oiC=xIH9*8)-gtw6ig3`OJC9 zZ0xk;p8qbxr;jOUuXZAmm$%HLN>wX}hhn9@D%HOYdO3*hHmL=%XgU-am_xD*)@NH_1kwLw4?)}3CE&v15e+UJ7Nt8%xF|bA1_t8V zS4>T$B>k^yuR*`zzLkvLI z0n+xbms}0VF=)NKs?l|$`o(~i+;vJ>{UY(7Wr9l!i!r{| z*wP65N1{v+_~{-7g|OA)rGXY|vJ=t9U29AWuS`ETl$Gg{I#~3p=+<#;L?zncDUJ&S z_++~PZkjwyPgv1{v>hz-#A|eJ&cUB1)W!Dq_wj_HcFCk{k=3pJvq8uS@)#XeVuRg8 z%$~2ZhI!6;Cv9f>j=+QVH&>9aO@sCun*U{odF_5}O)Pt&pP7Q$QoP6%wM#7i6Yqen zAMg3E8Oy_`H033*bx`{?St0JV zQ_}jf2hQ!;>+}gd;0;}RZL!XXo8~*(oKQA|Co9u)gZ5C&{YY7sD%Ex7sh-iJTdh>O zT_!OwuMX&P(V7+GrHE>!SApe8Q~le_A}fo`%;n|s$n&_i3kdvX$i;Kn#;LIDquXf4 zO$cv>ObFNSH6Bu5s^qnRr_~di&Nnt6|xCjk9^biF@-Xj!pe5J)bKRncj@FloWYd z@HV^6qTEmR-KuI8Y!n>yXYNz~*`MGpw0Y-^5zjTueeVOp?B!J29Rl+zR*#PIxo2*< zuEK=Osd*ue+$>wf=bf7Gw~~y^^<_izm<^uA5^gt+wRaxPZvA)L3_@_n_-XG6e!)Fl zCZEjX2}5kk9?rQ|$m;)aI2jiIrkpLj#&t;Q7JCnj)gZSi-Qbe#yZ z+jFUK@kR=8_Mdu}tE9DN=qvZL&(ErXEl82S%7|wnP+jckYj8(ynKDulO)_?0{Y7gWWMg4)l9ijHI{Xn)Y@Vyz>z>npwc)zRgc6>wuK4Co5!+Xrn zyYPAEg^&Pq6BiOP`A&tj-_o%&MCLpkT7991Pz`Dkhlif`wsvqCU#& zrcb}`LAyUPrUHhCe>iD!$pz`SzcDM!djl;nMeg%6n)+wm=3ZE{6;IFcw>3YU32HrGX*HNq0>hJlZscI zaeba5B_-STb^F}WmGDmTjM^9K6;%5y1OYh(7(W z`ZYu-T3vI&R9=Xr22|XzCOG$7=u)Q-<8nj%>ehzdQyp3Sr0C%M+n@Um-cxtbI^>W! z-DI4egD_q>6A?D%bH<9L3K99?H?K6f*uCZ6`Pc!44If63Q$;_yo+eZ-F7TwrK;OAiev5ID?rA0T* z*tM!rTO8%hpI;HWl0S9sXE-GQUxuxny|#++F)rNIF6^QiIlM-j-0J1R;-K{ij6I6l zZ{VkJr2M2|iUnx!9-GAQ5cPrugWV8uAkJ z&1x%vhDw^iXI|9ic%W5Q20b=Ev*0wk?1T3zp4{z5-!1sEVVZIi0HKEwmvp?jomO^E>s$!D!JNAMGTvPEFkl6Wa6T` zHJ-IMgcP_M^vNuyn!(pCzJui3zNOk9MQ4tKt|Ls(!@G!66u&U=Yx}c~ecHa!kJ$c( z=g+gZ)rhR=e*M;o?Gcc#G8krJ2Od104yLb+%>Gb9lobrp{sl2fnc`OXj8S0jX)o9`cQ||9ye)-1>P|h~VlxpcrBFE!^GPV<)c<>7vaQS_o)VED6j3{{_RrdQa z9ox6xAK|l6{lN7GEZwDt=45k}ur$VX>FQ_5(h?lIUeI<4C#cqsNS`3#@1vjO>7RAW zr@)|@b*0aWqWe>l;LPOuF^v>JJlC-XjsvEvDQ#WNET+HbrW-MYu~o(+qHls)i7+E2ku53#l?YOI#A_UF?2|>=57TujW z&HMF(pb=RoJoG@`yJ*xjvJ%9sV)PR_RbXjJyAMevXZzJbwotu?)Hdi9DzPhVGt_{9 z(WWaM3RG`*=jRs$P1g$$o9=ZAo5uq>w?$L(I2hAFzF}(6J8`{M$_l^PI9%dEj`zHh z9N$jd%hJ3KLBEoubaMgmcNNT2*^#0lU!9-BMPr$)G|T)J@^^BuU;fCnjqH*{;t8;Z zbZ<_j`qFL;1P{Qb1xJ6WrTh{5v|Kk&HFyZY^U9#no!5@bARV+DmSb$&yeJq3e_+9U zmxCaL0c`fKtDsAF;OjR8q7D(Ts4zSSpN+8-D}%9=VB)e^u{4#xxx z`7_cZmD0)ujbD?Gw7c|D3L%EYlE$5$(!zZ{nbsR0GVYyhq77%^SWGVua%@WW)v>g} z%Lgx>duyrxFh4_(BfZfZL?7&Z*I1zTzXjh3?ifOG4}b3p_cTjl2*eU#p;(Gmw{ZCjO;-id?*vZGZ_<(OtE%;oKWE-6PW`{q zt2#az9j(7qeFZrt1Ueq_@*Sm-v-LXyJoc%V{GNs};4?_eyMqF({V_cs!lriwVvGv_ zU5@W$Q^DxrI0Y---(eytN{+#ffT2fgObUN2F^Si2-bmugz_#11KjI>ie6p4|rp2Nq z2>VwxG!Fkp)<0S=cphWG9`R+kDK9LE$FRjKLUj7~)j$c<`M4i1_1NuD2zwmun4QWxbZ}*w}E*>HeMSz7NZ4|p;zl(uk^7*T`i>` zzA@n!OI4#pPns;3PB$aOmy?YMBzTt`ulE_fw@iZ-U#0)bK9oJj2yAA3{J7|z6(lIg zuKQdt=Wz_{Igw|6-k+#mcyzA0K4XQybuGr&+QzZNQsf@nf@mGrH~N>D{FfE-(|K7_giYfOw*5Y7*>pPiG#jlhU2Rp^CKBWa&V00Pk zY=6e7>%76LK{ZA5M#j5v32D5ms#{o$Zmsj8ZCRhjw04Qk`y>8of@1ZKH-3^zjBrbR zISWmoIUMa;gbf@Dc8Vv^ByakTSB?z!jJ^YDDmG4fXsN72P7Qpyj>vZJe-g#}1+{DR z2k@DJw`K%r(_^*Kya2>sKZWI-7r}F;Z*o-%j%$$@gwc zDOuLDtj2ZvGOkQC1POm5#+Xv%8bjmw-D+3Wgi4t3x3a1>b%y-FFHjNIpoW$I%Os`# zzc>kt3s)OQ-@fHvBnReEGGB0)n-D09k_?P_+z{wo9eGDDRb)F3f9i*CPY@aHmz5dD z&2nEf@|S$%?0AHsQ1^aQcamey_L*k~DS4^>jz1J@`hYuqOcPxJiGk7XD863$7nKI8 zicdn?lJUAS^{^sRh`PH<#Q15~^@1Cob>)>6WcppT6CQv8>8!LWv`D1k%Z0|^ zDMP`2^QheQUbDtUS4f{mP=K{zUs2tX`q;d&EgW@>7ioglIga^v!j3H{46`2{fd}Dp zcqC796KR|ho)z36j(r}aW-<`-c8PYFUm=vMF%LM~j|0OSA? z{Pv{9U{&%Eg2Jw^7sSNC>F`^rBbG-nbazrP!J=8np-{zX`;sU4u;oPQaz_twH3&%` zc$V7qoNRD=Yft97v_=lQLPqD3uQZ+>m%LdtXz9QU&gl-?!{%F{0i5m;<2&edU}b2)fPHhEh3c#L&1iBErx?Eu|BaeH_-+vn5+l2_6@W}aM zlVYfdBRtfRm~o-s&xACfM^E1nfrr>8C*Dp!txv4ZZQ`e9Ey`B5JO}sD-A_$pK=M2z zix)4Fd-xUmd4S~7yFVl%(On<{>{cws{8*~M;Q-=Rmu}?(yU8Jw{equc;0}g_o!U?v z?a)5Py{#SN{f{Lh$yFx@Al%piXg(utKSlG^eakI86=g$KlE4{S{aS`yPMs&ncJ<#E zina~J8};4E&n-3pU0o6gOmIyMjf=0-EdyO1kKj_4SHH9^6^(# zwW`#-bR<4ksRhN=u(r0Al}@(He)AYyzqgqQb|}cGB;|XbMK4)2r!G*dhXlmL#D#af zj;ngvTkWdZ0)u|{hb7)O_Kt6ue;p8o+>;;_!PS}?|Lf&7IClQi<8<`HzX=v$g(b4C z0;U2!P~M7-1S@MQ{aUiE*{ewf3|%5sY2*+w`0_B4>{)aX7KK^E&#(v|aS!l+$-&m4 zPmXE9;-VDVsE3wMy3xuo;H9S?dyjxQHJ zIH4K*kmOF_T#R>cqk;erkD@|jMDKvF&Cwj^s(J9|FDwrwUxugMUySYB5jS0zr?~HF zMo{k2UpMJn6f?lFM*X$>HjUIZXWL*!&cKU6txM9^+y9InSA}nk)3o)o_(|kkuZJM! z8kPrSmp9>_azPT~dEZt{`#*ZjSu}hE_ql&LDpKdo5_6*0_t{ZeZNf@53dOdTu^wMD zamA$(JO~-($N34LV_q$%V+Sly?g2myXmEq)yHo~XdnJ0Q6FmHs5#wElDZ-8*#M@C9 zJ(6-AHa{L{+l^0@RtUhBTPEIt9nSaWcoyrO+rKKuvDXN!<84vH>uVnr8S7Tw9-5e+ zO?gYe8=f|&-^#~pOvT%TZ(@1%jiQ5gE;sL~ZB<>8y@>3On#&tB_ZeC%cyOO3lY0P; z{>G&V_^{lFt0YB{p>PR1;&mGQPHyr7Fv>lMceX!Q!TD5Nm1Bb=VX2b@?)zG!4L$HB zeVU34gBH<@FdvCH-aD#Pi3A!dBtlBV_ouXcPU!MJ>oj6QRwyp^w z26S0vFY>326n5_v(xtwya2~2=}86`*Avo*$^3m>&>CElIFCy5;(B88N}#Om|@m9-^U|cfHpX;Vm$|) zmEgwFdK>^gV6J$M5HLAGPRhyR^p5$r0fsqX)x)U^V~`0ygw;hplW)E9x$?~J%88Qm zK`SKTZ->pr>m=Vsxi?6J00q;GxqNhGr^%jqsc@XI6&R zP@mZ*<%XYp4{wUF$jHykYo%-R_0LG2+u43$i20&;rmrK^acdS?<>)`#L>4lRva?_Ur&FQgXd>q|h!If6%iO#=gLpN7?PH2nld!kcEsfa;_BZ)#-d zD-tBmCbg*f4gW6wot^33=Pf>gjbPws;>LhH{xC8!dB(n*LbHHdse#LGHzT{3-=`x_ zgNj;4PLSvIVpO-i@qeqb^HG_EWdoIAGcp$KS_UXg;WJi0a|Vk5RF#U!I@yYOo_uZHHq_(FQd4=nZuUpk?WC zn(Y@ls&PbVBI$_PvtD!s==VAFs<=B{__Qm9cuhmB>xhPRq#L&!Ht_YbK4+U zWG<^}-?MeSuT$R%mhTz9P5TpyVf+5s@>Nxl^xiFh-r=&vKSOQ|Q4#j7XS(e4u#P}N z2Bj&PqfssYy`u+qMs{I*HO@pzW<@`Wus!wt%-(f*$IXR3Y4chG8umekLy1Yq*w;o{ zP@W|b)a41*S=;(uqerOEX3}nh#GA_doO&H#@Larb6nx+iYvqs>Kke7cPOw`X&nEui zj(6BYj-#}+9q*O{FygF?8sZnYxtvv#6QQ)^_2>3AfUNNQq4;;XZl}0~P~DX^_PG_j znB)EGzl6=CdqDky9kG^loiyn5MRWA{ z6k?Dt7?%XMmH1~kope)f51;1o){2hZQvlrQl=gemGFC5m3_>*y+OC5f5&ItVBM$!nS@})RV7V40t?6*NR#iKT9ue^Mwno(TX~9yf0tH}}n^$;(-)PlI zoD3Y=zW*&+RHilpHIurhvs1UK^&ua!+1nUV$~BB0{LfDY;-9GqAU7&6M+ojg+z3=! z455?QnbNKfJxRP~a|A{U(gX|-aH@OV^eHXS8LkpOgCBhZpGmyT+^8nq#sSfFZoRld=oOnnU!$J9yZy8+WgQ zXI_@A4U4__L7fTrAKY{Dzc#|p!=j{U)rBY3Gg}5mge~#C;*$ckl#a)SI=yJ&5;j+m z)-<7kA_qLq?*o*2>c7C(D)y`I?_`Ow@IVvd3LS}R%-W5##aVs2W#;)yDpEH8xBV@d zCsnJM`l2ksBVoe^=keE$hBFgZDbU@A{}syh3yy;e!UpGV)1>0%&C5+Z_n8gMNtD;_ z>J1hq_4=l!(!!xTDS+I*9zt&awEjHO_n^@?p}%CT!6%0!t?nbI&MHDm$2R89Lz#_^ zg3PvX>=^NUPo84_Cllyp9tha##gnAO!&X9(Zj4Ve1WZ@0z$Y|`9zWnA-0Ir024@uC zVR#32t^N;K4V4!KU>W2WCfK%E)sb|QIF|dt;1YY^jc zb2ejyC)+D5ZjlgUhFGX~rcJCe793mad^-rJT;Nw2LO@3is{)bW) z*+qpL*5=EgVm0R9Ip;zMZdXB=&O_p{g>DQ86@{@iQ-Ja?e_%U7&SU13+?YEtTqxx5 zfOW~GlM7bT=J>@3hPuPbMeje7$RHvnYSi7aEPWgN1NG;E@y7D{*^ z&zGK>{-k$)OU!a6qnDNL`0oRRN_9AoL|~OJVAz@b1Z8v^!gWfloDF)ye9&E*jH3{L zFI%i`QKZ^v3}ubA*V*xkN2Xdz=8z2N6;PJwj|V42{5jm-ANRTZA?2faX9AV<(PNyI zTK4px{I3reJ0%rjHVOUg(3>v2G13zsopQ3V(d6HKIIVy$*<-#Fi06+)cc(I(JZsm% z2#%Yb>;*M#j`7*u3wfvg)rp4bB}_`qJ&Ru$8arV*n9`{M`<))iZ!;dSt;cEBO58RV zrPE4+lhtU%qOgCiv3RAV_~OOPHQ(=lSURWFv$4Bm_{8nw*WPzvC1nkn^=l%Gwd*Kp zAG*}H0_7i<9Fz>$Sls9+>}Xv&eKUcvSfT5R=P>KKhUH}L%K)-DgE1#-Y6vS<-P zci-74nB1hV=Dzm4{$RU~JgfG*5qHQ^6CH3G$xR&&c~Kt8O!ZnnR@vgK>#tbs0fJLb zVN%V`cL!mtvq8M6qxaGOsGU$~rQ7%5nLpl}6i=zx$o?Z;#!HovFU;M|E!}H11YZdZ zbsdDrFP_Lsbf9$GQ*VuEJVNe)s{}=v!^oyq(;hPe$_D1Yq`(xN)e>NtC-br{cV0fgii&LcxG-abZ z-4$c|$7RF_x#T*=FYYMrcUJ`6DYqi=@bEZNd48ZGu~*soyCjt|H>G<7JGl0p2N4S$ z>?Py=3gok$crF0<;esDGZwfXLI~S^}QKizFRI5?lZMJx;hPPDP@n7a7K&K~2>Z<1y zLAhGc|6kx=B(BFG6uvj6ep1hB3Gi@1n>+$EWPwssZ)cle_ErK%998I>KRz*mFD*tfzwwtuuz zR9Z?jKY4#!$w|GVk)B z>Q2g%d1|=gx?!48(C*KmtDsNcXbSOc=;fSMCW7NU>tq7c+M3V(tKKJ@ zz}Ygw#IL_w&%^~aF)G*T@>7Rf!;t3EAnR+)RBi%$NZ~B_=fc?OeWqPg-qtSMN3oPw z(+Hzh<(scV4&-2QKCWYajuJs_g}E;;DOBn>b9eMe1Dx?`(^ZAfWej$B^0=Q< zoqy+d#*Gu*_k&_cUrK$t#3)myX3$L4F(PxdOY|S#j~>wY`uAJ1N3xM#;jEbSc%5NF z)Z@dwJx^h`kk>iuY?60zGB% z6}Dcx$uZVP6|=c+@;v=oE;41H*Za_?e&OJ*;*d;BNeLavhTN{vUdjOll)}%WF2sEy z1Z#9Ah;x4PJ!(O27@FKoeeV`cVpa{vG~WK9YK)bdA91C^P2yD&gPPo*hC8}e*mg9< zCDIn4Pfkz9H0k7 zW8Yk8eOe-r4Ld!Yx8bcrLg%v|cP(#YGfwsLkF8 z>{*Kq<&GQmqox93PX;YmX7thODHH&!*dN{}7X`r8Ov7-S&dRH@YNf39W=E@Sx~&Em z%I?}0d7h8Gt7dm)2zHvRa>r+Xal5A9db-oh?$*#ye<lD}gD_JA`(nWihK~V$UQx2=*glg{@WtXiYGdbpczNsP zhSA1oMz2o=NtuS!wzRNECLR&$briCQbD#d0rf0h2XQgk)9kx`M@C^9&26ZlPxy~DT zOZ)F&TBLfT4ZwI)Qi#+9G$kjTtkaH=rfZvb@w`=WNjdh$0I^9Co)9LTPjMZh2_vs)ncg45jBi%G|Bc>!fVA_o0vbUT<;8hPnw5 zF}cTbrlbr3Co+eU7rqFs*Ds?3T;m#%z8c=6zL^c_Uq)s128J_Q5DGRk>u)?DByudT z>TgroF;OYADfcE3cS4;Z#!Akqt;b*^S8AyrcI)Mi{o*TP1(u6luX%x}zRWiF-wV_29Emx(bVu+dJl|x*O%1Rsp zQF?*B_lBMoc*(W0ee_*BQ2!pl1Cgi2q~YGLM@yWRfXURYVGTu<$qGHv_8b%N9}*ad8l2oSbM=mzg0L(c`bm3s(tl?vp+j*!nU z+M-#t=lNN^N){l#3;#4*61)#*VDIYF_mddFQ=QDJ5ajB)hspPrqpuG~@SRfQ$!26B z-T3&_+AkYHNxq%Q4`3XOM2yf}xgMP$(vkD7`Q!CT#F}H$8RbE-Cj`(zN6I6R_Xkes=%Q zXQH|@cfF{rwmIgKtc_^--Y8Jr5H>C_U60DwA!IN!gv)kzGlq6$uv03Dd8XFa5wjoh;y+X2sKgpw+)Ua~xe%AK-(_zC>QWd7ky5SfjpB@$NLgDs zryl$MW(8Eba=*`zHrU2Blbp0=wIlNi4it=PWcc(BrNB(FLXPXRKU$27C?i`~jXK%S zb!qGV>a`tHo-fUah)BIq&>=#h73!H^XLNuE>mebOcup%z%)2Zxe%>(t+9{q;ne*$s zU+neYKrXJNQf@*+a0crnlaPee?U-S9Thl8pL6FXA#@ioMS7fYA90z%!mU;$_4@~gM zU$u{?Pq3R0s`hhIVfPfyj%YZ3tGr)UI^gDeMM8gn21XLT{(&03noj&pyB&e1vBlc0 z>r@{1V7lK7Irf8RGgwFBf1T=ez<%Su_HAeuz@C03Bbq3PqOKfg-hCi0=#fCR{^C*Y zFma`FLGVD7(`(~Lq6!v4p1aj^K zcN=J}6saLuwqq}Utm&H$2+y}xDTheZi{RXf}+i43D z37EqYR44r7$34Pv0iUNJzS23o8q6u}%XOT_JH){iveF1#@elp@@PbV4C??7;IT4E2 z-b=3@bv7VPgR0DX{t%Fn>}UPUQvrwBDvwJfM!{4z@fPK%hmxjiLNEN+*f8+|C{Qq& zvx|NXg}lxJd%y%A&~W*2&)<`(;jEXW4TyRMc?9-m^AC8v zq;w9m6>xVecBH%)?zNgv=Y%~&A)qUl)U!7k4G)ss_-GIVzs(~h&%2(0Yi~Rjs+8|! z?3ceY8SGm)W`3z2l0d@Mmx_Z-;8qip13PpjVVK+}AYUv|GRa^bvm0AC*h$4>yf@m! z3(Kt@F0!}tXOK9z(Rcpi1aSI)G@W%=lkeNcRX|EYQbeVsOG=v2UDAz|NC_e#2u!*` zN}AE#4GN>ByFQzBALQk8L~AsJmuyd)7XI39#~#*H z3;T4`Z4dA%^xrrZEP^~|OXK3a-cHz|HLu4}xRy50ABygy(Ip}A(|b&TWv2J`xopQE z!rKEanbbo{MZr;<>bij=AkzNddb^itzf8Bp;@g_1vt9pv;er`>&820%^RPH``f%A3 z7&F&gguR07od~NwKIeNRK`vMO))`9UAIKwl`7RbrjLqYd53?uS=&;$do`Q@4M5MkX zO=Q0Ubq8!(EkneMqxhroXm`6T3+K|gNqf0VhUlcjwBC@RsQkJ_;kusQ>fu26r(oTu z+S)t#rf!KG2;w>&2V~B%b`?ZBLPyzbdV*qakxYu>8DF;|#d{Q;G9&~zk_fLAtX_;L zfp$|dBnX<4WXXVO4N6S8OI+1+a3n*TB{7O5%wH2P`TS|RU-Eg`JS1OXFKART;d>ZYVYDarPEy%}SK4$Eu|=%YVlB4i=@pK4yTeUCTSlIf zrn`ud)}IbS6v-G>Xu>FAKE!!C8taFVv-_vL&A?fq4w`yHD3;E)lw#(yoqQNA3cppi z%%=}f7kgT{Woc4lZB~%+yfaeH#P2RB^y&3{kM-cVaA_4uIypNp+e13M z0jY|_{~XJY>}laSAr}J+p>&t_hQUCI`nu3N6Jk(`yFOXSmUzD{x*&|*FtW2en0?PT z-G7&RhFXq>PIz~`Ac>EVDqH53;1*l|GP|9`-46ZIBzYQw)<#Arma)s#+R1i9H@tDO zfO-1-Fw2xYL*cDgzWh?Fojq=jx4HlI9I)%Ys7!D8O+(|W({jOb zCKhvz2!GiO=qHm%rqr-77BNmURv}r=MqI~&zA9J!Ut&0>bGg)`6c^oOFO+4)^q>pn z5d4VBBGd`zgYntW<4k+#Me;`<_{3KAps>j3cYPRcn11(<42Uc@2}(N)hj8U)(f<~x zqar^R#xhe+VeQIvZojx*z=Yug|2vcOZF}Mb?C0!y#xW@;YW!Y0tsKYTz15*>z4Y&- zm5X$$7~N_z|Dy35(*qE2G2Z?&)Bk&;hfoUEO=4`B2)_T;ygtc@Wk4VE(BLlIHc{?C zYlrA=uQRiUY`0!pcN)&F`t!ZqXxua4WtN*vT;(h4GAOLX-rc&f>(Y12xjV?K#)W4S zNsIWQ4D9Y|6fWMz^prqrkp2zjc4d~HJNweY`gA~VIJT>uSJ$c?7i=9PUdTC_BnSDpmiW!ej0+@j1TA&SokRRH6<~jALt7k z^&q22pM#IYq*8I4JgK4m)YzRiWz!~dHF}o)?VlD;2Q2Yj!9do14LrkRS-N05Aswz9!>O*8Lz#I(*S>*Ba!Suv%T(qmcI? zL^tSAMd0IoX~e^u6t)s;_tP)z+FQE#2LoB64J2X*8vs}Ud#q7$X!Sj~VN4i`Saw=S z7sT(M|Nij-ie7>g!bO@=(>_@9F@)9=Rx?2D+&kga-LUB7Y!9`xRW8?q%kzDoUw zRX@f@RyqA755j&6aJVjGDV%Zd#ja;3*8FeRM*pdG*i1L1aSMrgwO;nIW!JpS_`FYD zIkRc3w0VZbU=)dxGiAD@()_7eI14McAK~}EdPtNoKk|%0?T&MBV7f0tP$)E=Ti~Ft zn72B}lTrW=WBqoP(Qj-!9)#Zh5`b)gpktWJa)y5{RRNtQ7&r+A9;xM@_#qN=QCxT< zlyqDBL1A;R`Z**mNS`@I@pF_(ri)?lS2#>FtWZ$vlC#~7Qo2~Cw641>e`TE_s z_o5a|7ju5d>wgigUc?vZq@cg)$NQqJ_FcT#qsY#>VMKb`KMc9qQ-Z$7UvvG`;Dq*p z+#2eXLNcM8#@f1ecyDcSWaA7=RelfM(4xDY^$sVb(N!Ntia_(Z9O%^W&do9!0pD4B z^aY*NHBeVi9BR?CFGnZ#dAd*~q>rjSf~PMxx(8aeVJ1S$kg+6l5ngK?#lI~ff(|}{ zhUn(gHPPl`3VSj}!&H^Y&g?Cdj|d`Mu3lX;qr_(duy8i>3i2^Hh({VxyxMlM^B8H7 z?QP|4;2IXZX|pi2y-ASmm7#G41QNr|R!pEy{mZsGst`4=Ggp z6&N#d+`OJU+Wh4AQTH>5@td2+y>wkq=BCaIY6j@qegLmC%Bb)5*7iD?~pa-sJeB+iC)KIsL|5&_Q=3lCVjv zTacTh_rg!pPyIZCxGz2EoesLf=TKY%Zx+g5O*~3JQ1vrJQ7~7areeY6I~8F{77LYs z%JMNX>WBhcnb?8rcZ1Ul<4)Yo76xy_3Gc3 z9F~$O7&!>u&1}ACj(CCnBLLn@XAxJoFZcf}fN=8^q1mes()fjeNRl9}+UzuHGD;MC z6Lj&h3ao@KJ|0f<32c`*hX9f?OkUC$5QD=T&SflwJOu({xa>Mem=Lba-L7rW&*ssJ zc|sP7au5Bm61EkpN=Ut!#i-pE%^tn`dgu$J#9=2b#oXyrw;^wrTH57`YO&uF_(yXJ z9{cJa@ZPElVUGBsWYobQ+m=Y5xM%$B7qq&KI6vzD5Vo*Jv5aXU^ndl-tj<45Of=`U zJdY@89=|t^+ZYq?F*a&+L%*=-vx!}-0bd`3Kqx(2S}UHzoxq#7D=r-~n&R1RJ^13M zznR4G&X=1Br*aH@Zwi9UzSLP`sx!)AWK!w@!6Ya8n{wsYIFTq0meY?QG>+H2_4}Yo zw&-9s#`3ct4`I5&4#Z!i?7Z@dWl9Gl(nex?a8iNdi`VpH&>dR%eLD6Xg{v2%ctdJ+ zn{wDHWxfXo6I~#QF~k0o2l|sFD619^HPcfaTMtPF@)Rwr_Bp$J-s=RkQTU5M{zS;{ z57J1cufEQ&w=$LXv#z#z0}j=IbOs!#bMF(Yza#v5@#!x#;MjB@3D4@dS-zT-^ZDL3 z(HB`!YZP;GG&*NLLu%H;de95<`?*Vt#Fcue!X}c1eN5>`p}j$((n==_j)}kvd0U~% zVHnDp6heMO>G@OBP5ZhML_?RChgpTbJfX(qKJWef-QKU(l7kw~Yu0*R%M^z!nE+Q8 z1m6-O;GN_Zw@R@uu9qfq$~B*9nOz^UOmgLEbp+Ns-ieg zsAWRH(ybn)huqQSC$lgAPLd8ylj*k>6ns3?%>%VF(71IpoEQpCoDHgC%5rP1MrCW~ zGQLc8zD5aCJ!T&=6cp*w#c&{)R%n_n4`YAJre##OUK3N!e<7`aV-{x7VJMRj{%3g_ zG%=xtr>mDvGpewvb>F=)XcR{4#*UCOlgd`AqyL7Gc6#C=+G;VN7nFLF=M3MF_9Hy@E*FI$qCx^Zbi*GUV)+sXSn!vQ+&utBr zSSu=Eck~@`ler7)196`m9bX9e3btv7rs*CZp8h*IaQx_KYr;L{jlteJjb4$-V2Jo? z@TWN+NZ@{J4ELbrbJ%;ZG+9+s(iCGMpejdxwC^bOwxxs8d(4 zn(qDLc6;5-~2S+~w9Ct!FxUyR>Is6PrC~?4LNtva!oC77h=r2~#=dz_!``^Bfte_b?bK_Oq z@_s>mFvgwMttd~<9+gfN>kZ-9_^=gAtbBvFwF#duEX!Y=j!d4`P(+6m?zooy!3=W+ zJ(&n zKF}IenVD~Xe>2i>)^XN(H30&o*s}$lqm1YCQ*~V* z0nyY(@dQ8%1R9idrml`me;906%|PpBn=AMfm>F)$$ro~LlAE!gy@!`ZZ4~_Uc!0q{ zzjOQO->w-MEBB~Cmadr5xRC6-*@dOj2KmB6Wiwn%n1m4#V-4ea-Z@=Vz<^R#`Va@g zxHC$}8H{G)8p?$Cfrglu{3`H{OQNOaB}U~Bw%f^p z36ukrG@W$AZqT+svVU1(@@wB09OHS9K?Ani zF5PJGyE%pdmoNRb%iT>!^)I~UNAbdQT)Pd&< zBBg^9-xN$s8J<&vY8C=C3_ltksP)U15=Re0zuj}6Cnzd|I1vKLXr-U$6RN>xRhV?W z{?1`I?L7x7)h%(2RfT&6Ogsy5zb~r)X=gbdlB;D^=!d)L`ba|C_H|*@JE3!LO8ivc zdpG4g3{7-BShqd^4h_8_6!_rph7MjR1eJTu{sv(}d0QMy!TB+i%w>*r6$R>vJ2grm zD;CxAe66}>UWG`KB7RgKIHc%uw3vqHM9rXB5VdR>>K1Zc?$~={dI1VP0OygD{nHzNO`L9Tg4u40c zOghhcZ+HaYuKJ2Wk%(MIFnKTAvHtDaDw|9E$6P?Gq*K3_=;$c6yg~~+cDV{w9J-3! z9%qWr#LR>!Kh$ZBaz?f0i)nQ!z9B*5b<8ak^9`RwV}I<-Vp;AiPzvU2hr=!MFKSOX6IM_b~A1 zbpw~>m1UXww3a=+l9$ME^$IlnCROsps8YFiNRZQaYI=70?^N|}jK9giU@F~%;LLyj zG{w4_E*^gxu*bU>%9;n4RcC2VLu61SWluBjt^1hXM%uF>gA>VrM2f4d(#XPHYf)6l zib`!B&o_NFvM_;MR21H^)FCW+bL~9(!s9pAK^U79^oXvX1&xDMChMn~iq;dKS_mo8 zInct@;%ppz*rwVXYS3z?u<((g9bOsByt;*(KUP&`R@t;?!kW`Z`;{&OHGIO}vWjp8 zByZq8I~#;U7y#s+>#h2ZDyo|As4_7-tB}fH<6@G&Q;BmMi9*kKOlIG@@34Vd2}8pQ zp~cD9kf9HT)A_HPf{aa<<`UWI?Y zq_{p}=i3Y+c~?YpB+yd=GN5=?c#5YSj7D=O@4sb1w8VfAu zJqhIfckuO$;mq8nX^xMGsw|ckn)vrmWmUTG%ItA68_jOe9eau2laNPrb*V_LPf!VD!nOujA0@}|l3sVR;u>*^;WRAY7myX9mJ@lCrvMf(+(puG}Y<>9;v;vx)F2N!5i2h785!Q4UudRes&%7!1 z@3VD0UVI5$_~rtJ>?88hmTT}xH{=?jiFox1L8?Zu9%J3IS=$Rtp^0k(D(crJ`iru1 z6yGYE|c3u-$NY+;=hS#>J*V$D0Hv zlPx#zewHGjBvqzky>_O`9-gGI!GAtw^f2oErjERf>vw|D80a_l$X8R=G7e0^|BP*= ziOsRmW>JNHX$~C({(0$<*Ckr5!vcRPtdaVy{1h;?Hgb&SBXXH$0*E78rAu#-9vK~U zK^@6(JrhGiXDCw?3_jnk$1UQ0d316x`I!9r+Nr9Ih1~Yf3-1%4WbezBhA^;A7V?vF z*c(m%?bHTnb(J0C%im7h_V53C_spBRxUe}Y#=hV_)FYtD0vl0bn2{-*$MH0jQeiCT zfrTXfhi@=QcL*Tx7_wWugtS0VI>sD17kn739l}py(y8_z4OAG6On8E)FdYruQau$Fw^f*!fnmLG?`0icm__;d&=EA0^5#Tr>Q+BGS|> z9BsqW$?Goa{sw5Y2ynWK86Z0l&?;1nY9os8`1K8Tof%3<1uQy;dC|A|fJyi^MfA(! z1B!ouV$^W^_a5-&e_nCJSFnMeP(@+ zSj<)|^9upbs|Ja6n$Ndhs&Z!k_xpu!Q7?v_>}7&)3+T{zFe7R2?mKGq7BvQlcCY+L z()>f`ja)v~WR(2I^z41R1>4fJf=jfiX@>E8kyUWB_G;`qEH-vyLXT;<+GT30^nb_1 z1FQDFW$DN7wy36U+Z*$^09z2%Py&A})S?6BnT|qEMA<-lPiy+xB!Et_y6G1lOeY-& zmMHUENZmuGe0zEkN8`*V?4z2UD-lapPAtn7`hom!(pmpmsth{Q>ZLL6fFrw&Idx{= zSWF)*e_EE^cu14S&qi5|SM^wjPbQw`*cKKw2UWLZY!t>brh>H@56(4SdY znp>Lsd-vyX2FR8bkMN+b|Feij*;%CQa}T*RuiShGgyEakZNRU6%xs=5Xup-Cv*@#i z9J$=aZ=-o_W24J-?cPxQQq0^p67vuqk;U+4jR0fkN1uPBv=kVxgGK2RT1T&=6|(XW zjwifpFua52fJYybzO*WyhU z0H>XNr?mvELdxWal?~>`5Zi)WwTWAcD~NZYi3C;wDT|GsBacY;*z}u&*i0@asm=B* z`&3GlV)Hy_#_#MRa@3R{BjMr%4}Kn9jO8P-!+P# z`EJ$y*&ENi3SY-;u0pa}0D#adkI9E;piQi#d1F&r=UnbB#r;l`=I9{0y55|Etx0_s z+l9IJ1kL;y6|nMmS(U?JFyB9Q?W9H7F$LAGrytTPA;r8DdbTg}6}{Za6rVtcH|5_c zVn<~@!gSfcLG&oq8F#i*DHXF5xosL?M}3n!Y3X7Gpq-?)#+W4ZGs3Fv0ut?sBfeH! zFNm-Ej~Ydgv}2?V5WhI=o1u0to@Uj>r@=5hX*RPw4!W;NX^rt!9GLV@lwlkGfe>e^0aLM@EW%l z*zm3V?6Ak6(j#>R4xIJT4VvORvJm$6^IEw(dmlrfZNkh8V;svZd35exd61t&ka$h( z2whuysn*f<{OrfHL}Q%u(ibfjwOh&45z9_A!l|bH@<_aRZODGx=fZ0^ncZ*oOHIe^ z=12};+u0Ta@_Fa6aW7mh4yFn&x$>jxFu-%g`s_eEtu3tRTG7g_(2qbU#=n?bUETV| z{-+)>QPYBI;=ir!rY8#w7JN1Hwd?3((RaS94JxxC<C(!ZhDpO0;>|7Zxr6LG-br53ex4Z^V**-dw(j znJ=ozD@#`r{w`fHq5Lah&emh=+x^FvHYZi~W_lcz)34qyA(bS9M-03F#B$(K!fR9W z&mWQQha?A9_JGdWXQ*G{Oa?;v7=L^aJqS1aEcP+{Qcli>GC<_Qk}-1H-=X|R!}bu8 zEoD*vZCt)l^;C2o+`DqR!h#v~&c3JU7^m+&BeM|7?&>Vr4z2&4xSWY1Uy+04EhlUY zY)jtj{N^2~*rBn0ZuYVx@398`Jk)+2bvNOJBmt;7>GnB3yOf5_yykxUFTGd2C)P0N z?6YeA&pk`_D7UkYo5V)5@3X<~NHQ4;HpMq}PnT7LF?pcJPo8ifAkQKNvnnp0UI&3W zm`BRbYaB&QV4mGIm#lY+2L1X&!X>odC!`T9;ffT9^g}~9$ufmbvP&Rw81yhRdn2Y{ zkq9wT1L+34T5^+oJ(Q@^=kdsRo+0kLe0t;~mXy9^3H zO0B|Bc^;6whSSxPAYalauB!U=eSQ|<9fQ&->w&hpkL-en5u#}rnx0Nzf4d!pCRsVF zKS|{Y^Czih*V99D&%QadUQI!Lkyq0dRdwVfn8M<{p9nKC_Z(A}0T=@f@M1uq@q^=<5HNhg>i%UZ#t!pV8zYeXummPjQFX$3 z45vrQ{ah4oX_P?imjt-YFHxgmk2<5nB`@4Qt9Va5cdEST=k+^eqPsgUom#dFQl!b3 z$SqdGM*YkA^q0$QYI$Aew56jWc=zq7C0}6C0u}9ZQ^mMhw%p?4CePPQLDz4M;&Jqx zL)KS18h=%kR2xhzA2)GE016}s zFxP1|pFQB)%S@vehffA;AtzNNwwZ?V6{43&?;=@W*>}*Gg8gm;APz4@$=J^?*?F6C zF_=?s&2BD$;%&6{yEgyz=e+d1bWJvHT zZY6@Z$ICV%NPL^W-q#iVrl+C!ik}in$UQa|{`>X^`QO_>h3J)lFDTauz0Y{Thk{;rc6UKkedz&PF2G{+-tKc~9~t`lYa$^jM>op%Bb%ZEfarM+hT_*)V&0soYfe15>B z$3@x&*{4~5ATQaN{D@j54K4DJk59y5cLoylV045&IkBYMku6y%A~8%mh0Rc{)nHVN zX-YN8thFRU{$-~3xH^bhO%J;pTD;`8(ufQ83E0DEWDgGFUl`FEDCChi$Wv4E)Ec+y;2XG#ylm4^6+q`RdrZL<{fdo zIn4Vx3R=C!W~nDP-{>2vyZ^E4hF-n<7NlI(Q(2yoK>6-rIAin+r5`#mkq4}du(ts} zSUjJeNF6uvPP?{lh76RSpQ&qr_uNwA2G?C zs=Chx(_JXsG#yL_Pv0j=v-4G5Yl@#L_xdk1{wl_NM$`a!WfBsJH^;lFc^<2u|1>qf zrE}{F41ejnXz}eh(Bd~smpWJcC+YA%&RlJVxBV5&unq3q+{r{Vb_;&zC_@ZBd3MNK zvmOwud7@&sEMOf@@vys89cDmkq`!Ef>cAK^*Q9c2i>R3kC=!{6$+HkJ_c6DT)pQ4> ztVpzdNUUMQzPHYi0~S*=3r*syTm-wcT6#e_GD;;DD(`!VwQMVH4_`{-MK715KR7GY zty{d}CPWz-$i3vEU-8*6Xls( zY4S5w)WWI;r`JJ)WG)~Xf_z}9>R?%!xIunOeZc>0Hh1iX(|(**aG1tK2 zaR6VNEBt*nI}+an#9`S^n_LaI%&UtYkH;s!a1`^ z)8s+F#8U`VhBH|++g^*bgXko+`;l5TJkIBflK^t2=A$hkcN?p(p1*VM{1VOCn|iY9 zRF>cr4d?UM;Gz$qQt8A#pppgN&FQn}Oa&y^vCoB~BT0wBjM#<%$1VvP{s&YaimYl`b>DV%T?F`*%}bv$wfvCeCsPCA4U5_pMP+5tS})#E z9(Sw&Y6~JjefE-{+_X_;|AAS9jWcs0Z#d#_f%}NsCwz1hT5|mq=tI7MsMkz zBg>9g_~RM)Fk{;GvHQbmT{2^$(Z{wC+R4V08bPBZWnq7yfzPz{Hf<`U`xI_%3Wpx`QV4mN1gljiNQM<~%pP?TC zK&2u@>Bl_64BqWGF2aOt#KBpW*jDaeZ9czkeV}D{a ztB8nT>Q-=+D(x73APvNgFdfWFkzf`IQ~P?&fOu#{DaF>TqCZpeaVan|nH@)pLm=GX z0z`r8TE|IefA-Wr$An#p2CSeP(d|CXf%uub80$O|tQFMjp)1u;?M`W=0B(b8J<~g9 zzv8Xehv|p;u)$^&ILs9~Uw@DPF3xONp6&fNk_6v>1B-5BTO6><@mZDxSR=>@shiDx zhnRulTA^RF9cS>{mtJryQ|G$=gLegc+(vI4ca(ieCAygea6VehZA3L6skrg5m~fG$ z8~5LRCAw7jJ|%XuK!+Fkk)urBG8LJi!J3+?wMq%^I?BDtyueT6_}(2c%wI-u=!lTl zu|9x`2+&a?t03tCeIL*&Ciz<7v^~g@k<#!|sjs7IxGxcnYkPebC0v#>B_1hRQA0?% z5twe7rgoM`LVOU$(a)j;r4#<%VrL9ie_(i_6=rqcv`BzqVjc>j%Te0%%xgr1wqeaMW|JUfLtWNX^MF=pu zYn_AMxf{$FmRsx4CLx7Jz2Uqbp)G$)F-pYx1hlRJJ2T;Y7-I3td2dQ*Mi+p6&H}Ui zQF+Cuw!05EtoX+!t{vCStM1Y9S$>zVew}r{rNFWJT7cR;)@Kv@xj52Cv~7N8!yR+u z)e(uQ#LOY1H(RCytbVfJKaIUsON+HeQLdkr)73SGw-8_#JVu{rt}G3$rsteoz4>0k zhdp=kQ$^xKasF~dZ15BHiqC&+N&P`LUhfAS5o){u<9*D}BXJBV6B$5QP>5flZssyK z2i$EZbxcH zaJuHbj!KTtzU)Ggn7M7@+^&N$Ou)E&AO3X=b_?n_FY#kW4;I8Peg5;ZdD)lKAgj$S z!KUKlSAxk5LiTyy7X?Ud%Z7A^Uj`SBepF+ z(W&q_s7AJiXr=^kym`q!KFe4d1zzA7YEfTD3(T<6rRMTWpoIv+%qIy~MLZH7y^cvl zv*mbrc_qx->vWWZ2jNP5ZC&}JxdmIYgOP|Im;ME()Tr4ob-j3Kktb8B$C85{{IzWR z_MGsnil6GP2mAXV>8%{PawdJfnCDOQwIf^K8EB-}Y0CbUIv0*q2a$=k)w>A=W?-A~FT)-lMMp_=(qLgiJ&G}39!SP1o+D&y(7 zVZo~yw)o?6-Se97>U8w2_>aif1?bLI8mSCylHUb~fOoGuivM)PD zj7?28e`4g;S+sJHo^%cN6XP<(Y6Ki}XzT zi+W+}M)yR+Ji+{-T9YM4SBidnmM6qLP)E`-{g-^S*VFMZU3dJdjVJPqC8h2ijxb48 z?h7mXRi2OUp%T}pX7z<_Wi)G~9-n`_3svDYq`%is>7&M!La0SKY!?HHp1KC1mOE{O z^9*}3dWEN3oJHL;qYi2yg&wu7BWPU?QD^8<0MU;ak6aYNn>!PlFGCzSrEy*0R@BQ+ zV3&7!6n8NU?WmW{mZXvBFVZ$@h>GQPw+1VyALkFW0L!qV>9Y&&meotZYjF<{AGR6j zEbF)M1&K}!R-F6Q+`gSQQ4{fmsq;S9eEkaOs6|aTh3B^0oKw5hW>p7^tbh*Q9&K2* zX|o1I-%mWI;8jo%JkuX3-XruWV&S&*Y?z#${T-2F*3lv1YcENil3S-O>GU-(vDmcX z{mE0s+u+c8=cI|Nd(kfzqFd)ouL59C|2AQ_OxX;)30$08nOa=+*g)fEdCnWF%Qz}g zOSuhIT5Fv_1MV$r+7KBP)twps5=LB)MWojz2XP<5W0>&xgcQ>&S3e8bJ?kx_>L~nJ z+C?#?+_zOn03eT1vG)^Lr_^EHsdW!$UOw-C;DB2YZyP+7>!(fJN%gmX zVJkyPtOc>(PCD;_jOF9i9O~`hn6-_!%Mgzr^=%k+625c3HwInZ2*_CsTA)VY3gPT6 zYNbZYo3Lq45?hd_!kBpDkA{V})3PKsr--==rlGLHe$8nohyM}!}SE)2*W+YWCc0v;DZnp8O9Gm@X_RNMbc> z9RU!F@I{;WHJb^^=5;Ue$3ukFJhu05Ui~IjsOSw!EPZ8*d?k*POe_@HuHR6wZg)}h zmH4WLL&2nbFlDf(#W>z0!N>zAkFy4%OkKpRu8^Pj2bMh~U07*LicasmV2ywL3PIhe znu>K$v7_f(`+9l&r>p?R4xu^3K0`vV0coJy~jcd!_fhXyGxzuNhm(Jed9P@`5DF zlF_rnGUps}it{vnHdJG5f{Z5Ep4{r;d!YR(SJetN;(w3xcy*rE93=l?F4V?VeIFIq z$dIcJ=@Z?rm=?z5vpq#92{TyYwtLo0vSwS?UGUi`3Os#Vk-Coabdj$MR4kP$3jC1S_0X23_%$_j5uo}t0;^+gEv5}z>eUl zp+Ui9q;_&pQ6WfKL}d-60(LL&KHQfmOq5Z`3Bn|Mr^8gTzL;$K$qmG)q4mpmYb|$^ z|2|&YV*K2rH2|C{MCOBV`;f3xKx=Z5H)g1v_-I+LvD_I^H4m{v;NEj{Us)TO&a}Ko z2d#X{biX?Ll;e9p3qoDQ2UMFiw08Jou!#TjpER!o+*h^y4VyYi{P)RwP)K~{@87@6 z%jNv_D1OmC5pkjdtFXNoww!DS_puvGy)NVmFPkEIv&sn^jpjbBSuDabFr2`p68|N3 zbVq7uj@pXz@_qjMYS1Y~efQ1oM!NhFeNA>bOW=uX!gB`VxF;>+%M`mu7SSdkdS0y7Dst|z%D5ucg_8$(T6JhxmKTo z$jr1K78x77HwZc@;Y6)Os3?nXO9LKMOXlg$(aY0)2q!FszW_kuH?MiK!>S7U{NM)VLl#ZP@zu1O25K zvP2`>S*$%PIwEW|hAh#>(ILNhc!7pl6&6BNod6pTcn!5*k5bIWoepQNOVLbr<|L?o znSq*kj|`6Id`8jA$6Ws|LvubdCO(MF-%u>2oeMDsr+{(Z6it&KmmS1_w?eVOruUxA zSL}eLg&7m}EM|ff+1o;f^*$XO8qwTnB6Lf1D`WUx9+qhXb;Gub+U4uRGrx?##GCfb z>I>ymXk`>+cy`ohuDV6XY@m(3uRP@zdrf{HjgLJ=;;krI_nA{))#T5Y2hFF>E>oIG zqds5MW=1Lty})M6w=jVTH5^vq~W^R^vW))ml)(R!rg2F6np6d zxTAO)@i=Umb><9S&1xdg5YGj*e5-uza2A3uv5Zvz`nW)CCU?Sx6yD=UtfKE|FSPyf zXs`DNetEYKS)Xs-5Qeq|3T^UsoNq;g??yC)*ZoGSbUUxI&rW_#1e{?u?t!X> zl1OsH%}foGiq_;&;1rvyg@hjAxSVnha*Ux82O(xM7 znwyG$T5Y8f_|TO|7k6E!^=I&%pM*vBV|!1rgPOD(S~9@`HY`_{F0aaGo<={>z-4L- zqk2z-T5x{=3(s_BB2J}Qx_v$O^Jmzj&x%E!+vEi5nhHu1OY)JlGi8?`W=awt4pZ&7 z50gj(>A7*)uo!u+sEXwMCN1+$4cXGbC&Dg*1%OKWBJDSypasE?_~BM&RF z%&~hD^p!%zueqhAsoBNReU|;P>~Dv5sb*iUn*giXerWg4gsPJ#O-p;GKt}P?ALv;K zk?`X8Kbq=MPxTE)W+(A{0bBStITPZEK>8(#S0C{7aAz)i|Lsbqqv&NK&-&`j-=CA< zkPXH4Oj+L=1P-foUb)VJ_xc6Bs46O;JJV`F2Cw^O{AM65Y>z6Z<`!_%&%BHlhb1>w z$Jzg@e40dYvR)d?0+n?_Jq~X6+q_?rlmv>UdkJ|ZgstVbsZ2lJb@|`cwLX?kvwtD} z$;@y+G4Ag;jlkhR?WsGeIP;V5@!2QI+uxAYuf0S-k_MXu$%aQDyy`InTrB7?$UE?6 zEKO|>Y1f+(^Ld$@kG-~B^pw8u>UnVz`VhMREK^oeW1y3~YSK&ScJ!CTv+p;l1oh3D z%Q)_}cwv6{Rb^hmM8anFYePWDU{Z}2y_<1d-F7%YdzR((hmiO8Ir3^^-FKv_@~7AZ zkViqDGc_-Ls7NvRbfYS!Um5O7uk4j^HcxR0t!~U_w-Rg zxKTS(DL;t^GYdxcAwa8WRrBdZz1-Ix^}|P@s*jVyxHK9>OA z;$O-UOYLx4qAr049cEWAs9wpJRCp_&Abk4$V3_~C(E@@{yn4%Wsg}0AJ<3B&Bu*{h z0((47GSGVXVm$FOy|bJblIiqM#IR8oM9=4qbsmVGgiFi9M&Oycl zm=rw%PreiW7D%G60V7No4J5GajLn`9Odk( z@Z2@>`;S_$}Sa9__-@7^1f-k2Sj2G-Tb#Z97;le;{*kEjEDJp zRp$@_wwIxp*OjwUTmHBQtav1O5K6>Z#Eoz_pkLEOMERY9zWH7&rkEZ*|LK0QO62Ka z+E^NTTdOGehO;|(|I7H@1%P&e?<8sE`*5Fl{atKq{CpU$B>@0#b|I|lXNFm169Fv}h z&0Aa-7rB-}#p4VeYleM6d){`a&?P_6r$;k!6fxArD5zVR=rE*yvD5dxbLsOk3p*;p3#?aukB|4>X^2 zH@e)JiI3fY0Ao3ac}L7VVw?h^T21+A1+A3c`Sw}u`2Hx}t&ii~b}p0)t{r_P?RUDCyRQ1L_Lamtxf zzN;AO0837`GndXU%ytAOz*MW3;L33I{?f$>Y7HT1$J}Thf$Z;e8Z2CUc?Aem2Fx1B zHulV~u>-f$(|9Qi+DWxEJQ!vodpAwkusx7AxlG5CdB58u^CiK*^ z#Z&I}RijU0DdKDLmxSU^C^ilw)1;UN&sqoGbMa9izs0a9w2Y8dAlve-Gw{|#446!D zO2H3gHVeRyUh8m3!_xw0UnUd!?YQjr^1iWml3t^ObtqmSas72Q;St}C7#x12z(x9OLpvSi`W3{T z;t2WCdvEF-{9wjTBA*c(X@1ia8#40z7!DJaLWr2&2V`NC8xxvwF<1KSfjC6xo^6Je z?m}LSL=I! zxh^};jNPo-Ol-%2yzWoEb0Pg|or$f5Ocwq4Gwmm04;AWEzPcTMk&SVRqgc}d^RsQO zJ3QElt+P~HV*V1uhsWyHAHzrSHxO#{B#^byyd%r>?#uNCs9a}IwPoaIjUoJe$Dq_9 zZ0e&!tGUvI#Ss^V3|`rKL+ef*QeJ~o6)h6u{2)ni8K1w?;GD}*|0}SS`8QOFvw68( zlaNaA{Bw4t=)?9>iW`O2WFv&C%q2v%;)xD0Hd}uVvBNR+cukNm106J&Gz^pN+*@}T zgn89j8Vb%e#efz!&WJKmUuYmFJix(p2jv2{Elfftn@=qK?df}OO~-&v1#>owe2ozuD6z|qK1$)iRMbJm#o?4|(iHm=1$1#8gVRYvAwZ|d3W zx7p`t@*D63dgarv(;d$Rs~Iwn%IZ&*g6}$N+85{KqYVECU_qb0@^9}j`^7mkUA1IM zH}i&`@6&jMwMcx|OWQXweN|V_3u|M?vSZj7p3MU?y|}&+dbvZr4tA9#1slt9t!9FM z?2Iwu=%bH1bV)az!vzZ$*#Fmh1DF<;<^kZ%H~o%X+wh8`Mvt1f>O`RCEjcnDdgvij zr=D>B`4=o&)P{nyv+DeL^Y`6%pJR?X`oGUQYuwmzv^y+kGePVB+pzz7o30Ic|SR4ryX}@Bb{T8K4z26Heoba z_|I^oDn-JO|1@0sB?N)KgW6x!P70@*EoBrO)m2sH0IZ8{e_yx#hLeSUXt!IX1h8Js zz!q3T>G^jLFNJu**~0^F-8hL$^H$Zr5~1PoFR~IeWqelX@vr3jj%$DA=yjw~#E7i% zGBD)h3WurbjCCnOe@S=UQHQ z<>m9vJO8GeZhY>!=V_>*H*W0MO*h|cw_SJp;0Hgr;Rfro?v@45a6h8HLDb9^o9wem zr7kP{J0iHZdlaAYk=zJ?)QfaL!M{1A&Qw{g9n@SW!H^&bAav$`@b4hoK3Asis0bcN zUpy#bKYRpM6+Wt&3LXT1B{U6@QiT*CW*rgq6|QQCBcc-jgW@A1`qQD*{8Z!PfI;c^ z!zYzeAt(M#+w_gUrn|%(XLB4VhW?cJ9~}Q>^PlNg_%Aq-zU!t+KUF2rlIkYTtLZDp zVEjWx-6d-i@p_FY=(jXC@xB2QxU6UV+^L_t=f3+cy6ES3O#Q!So_*@kM<09q@yE~p z(b@0WZ~sFMIe6!tcV>e;O6T=V+7`N_BFC@n%lcWuC-5K0f3zhPy1F#*CFQ9ifX?CBJ4}(^WU`^wqMCR^fTiV0@rzP{Xts<;cK^F z;g2dQvq~+~HxG*I=bx>g{p_o(pIn=yNiT!`VERuzoEcu=$o3k=aURV7SL{nQc#qChk>_z!`Rhcj!OC)%=^nif+H5E)r0+{`gK61BlQoxgq`um4WiD;G@L) z(=jMMgb&faQ>GL^@QOq#r9w{nW7?*l?JK9u{8#mV68|f7`)eiYkAFrl@c;X^Um01# zhdokXccOAAHd+DW@MvoeO-Sxj&bFDT1?ccuL zPygR}_RD0VaTD%*e3R9DoWVFgZgL7RsPcw_MPRsV)mcuR%kYS(DqlnM{V@` zdhEBV{SvMCuVrPqeUbK4`m)CtVHnQb-#-tJf883};R@g3_IL31{mt(`%Uc+K_5+B) z1*&b4NbP;7H)bPnXDNj#k8A*MA4aA#jy|=WObn1v_NO+1q!863ECO{YPZP26aE3=8 zgtJd4UpQt9#_xUado1`!2- zYH(t&Pdf)ikc8eAwV-0FLFu96v7QdLf7PR>{;z%`|$YdaqjMG^56 zh2?8?JK!wvfro-VrQ6ZsTU8GZ9KRf`UXo93YT!fLDL`F}iYm&}4IWS|LN^xdh&Rzw z9myW`F!cl#lvEibCG>R}#hSK~s)k~fv#x7+->X|LjyH_=zYnSWUHb%BrU zSz#5zCYSkFKF`RCQS5J!T{0I66v6bvA^e5pTPzLKkKT6cIK}(MM>Du)55Kx3vAXn0}*K5Dk{7V~D zD{tap^9=SBDlQ^QNfhn`RHxd$43T2Rqx9TrD*JWkLLG7Q zqCcvzYN_Jnsxtlwho}OCB?+Y=A!N$>6ZH%Fe2|Mb=$YvBj(*E)zxpZ|Tf?ve4xd-^ zdjveUUmp9V8jSY}-hhPB2yH<_eFHBA`4Gp2e0g`Kr@NEVcxjq(5m5+B_RgWY8HLDK zQ%lpve#x|7)VM=y5bD{?nMO@(4QK&a8r|E=gt)O#9yVZZZfOBcOGGbR$02zMF$pjQ zmF%}ypIX+!0m={mDdc4a$`$$#{~FTyrKhgWPKbELG%QW|mzo z-vt{{$73nUkUBz*g%zQweCNvYSI4-(-%5dA=-VQK+J|c*EAjoC(*CL}$v60aGwe6S zP||`2YYw07*naRDz`KCZ^0kRjQ^>O0p!`3Cg$F zeuIBlwt+9N{R%Ry=e4)LmM^)5yXtQIxy(z_VW-M|bwj!cPUv!D`G|uYs}u?>=!Z6| zB#A{4RqdD3E5}>X{%XOISINQ}9qI5$QXc7lEfX%7^0Uu9d&U{xxZ;Y-iPf7{HFcxM zj9P1rHUHr=|8U45hpxHC8VlRn^iGDbQQu|Um~ox#I^WV<>PM=Ip>M9MPEu3J?ww=5 ztUw3PmX=YgtUO`foVmz)z4h0lmg+%LS3|&whEr_-59HJbq^;qpC|i&5z6*OIH}EC_ zur%rAQ?e=>Fg7)}AQJEgXLc*&Yn7y70UDb_-9F`3$dUG#V~>ZRWEYy$N8_T;Tta-C zt+zhy%cs5ng!e!5%ron6u)&v4`*L%03yuc)zJcwpxYetEDHy_{%Su2@sYjswV^YrS_LjS{S&Xmvq#x!+$n5Gl8I#oCmhQ%$;jf z>JjR7lYk@r#7zkwk)W^N+pYfL2z_5vN$gmai>gXZRSqR_MiHCuHu|v-i%H)kGD_nq5%lOoTQ`o4D`59K&f>v|0J~8)Zw7)$(I`I61 zpr;H&0zPkQ`>RbOYnEE9LR>nL#fZxY88{%Rl-@|xA!+#nNSsbcraqa&!PP=ia`_$w zMh?yjluI&wEz)kLmJ1fN{ru-YyXl4-@4owPn&9|t&5{n)QDD!=C4jRkoXUYMlFC}8Fb@yHReB(67JBO1dhIxWS0{*FFbE~{u$5G**6dDYv{m_V-*HLMJ zxy3gS5vHrG;z$%YE4j=|q|&WPTm3ZVq3La`Ygk&plyAT&5y_MecH}u^umoxPQe+*g zhTp(e@S!Qf2idtO4G+F36ln1qe6&$P2Eq)~FcD}LU5vZ|@=Neh1TWAn>FT26!dC0_ zOyD19(vSZDIx!^L;$#`}g%yd%DdVh4scQT>t0Ge&QFVdls#BJJ1%-+Y{44l~bGoXH zfe*b;(!Fb=jrjb~GtWMI|NZx0ddbDVz2Qbk&w6>*M?d!Q)4%?;P2RQf(MKQsz7yV0 ziCtZtC@PiZjZ07C>eDlV#6^cH;1|g*hVv1EF#qsn=7@ST?bn5^l942iMfEgcg zVU!Yd)?sm8!auXC%2+5>_^0Xw{c8M|Azu;({vs+Q(y>hXrBv1Y7rC9i;vWjiJhh47Lob5gWbZBWk1K>Za>;2x#Bk_W)fLP7S~HNbt&2xNZfm4jA#19|gb{&~ zOKmMf8e)WfF=SGW6jbOxFDR)Lm&ycsM|*oiLo1&b10Y1sbT*H17$DWrA5uD`q5dBt z_Lxf0Xr$^Y+q?8{f7My^fz$w2eX7)paiLMk*pXPf=LjE|rwT3lq8yYhG$9c7YvHLk z$*Wb`$VSyu^)*!HHc%q5I&cmlD{TvVgR11z6p&C!$p?X={YAG-$<+}ePGM{TW2|n(FLt$#5rFCSOC(QlviRl;q^uoKRO}ppb`!cpQ+ittf!3Q6*+itvou^U}8 z>=#O-`FxGD#Xu2g%JX8Xx2hZZ*}wnuPO@KKMQN^U+G^`9KmF-X^R1*UH{XJ$R!{Ch zGavZvPQFu#OLhNqNp}|^G)Hos-gKWH8*B6=AO1Mks_mc&;Jab04`5LqipxN>sig(w zz3c9~U!Failxz;O>n^+KK__KdzZx2w+7>Nl#;vDk>By0-J8ZuLb3i}8=%P(G+jQj# zD=QU`L-5AZiFdO7Qa|vZtwTdqSKF_4*{`izV-4N~uGwe5_pp@g>8GB)@x~jkx%Rq8 z9({Ph!iBfre%t>0y=Q|BH(>nu(Nj*@ZP#6w^z`ujiLaWg|KpAE08k?s(^p5!t-0xE zg_rzln0`$CD{w|YAy;KoY~g=M>%%@A@o+Pw|1tg4YmWA}WM7$1L7#vqWl;a4SQi(3 zsaRccs`$4z(tg2Tj%lo0~W|)30TafG7 zFgI-T$K%HCB|KuJ>?Pejm>$8<=N}yxTw-v^$T9XJSFaK^Qg^eD-rmwB-BFP)9!xg# zU{bvzXH{wZVUKL{M<oX$Kz)Pv!qn9xsvtRIL|n{d)OGgbz}>Zv*rGI zPp=+kh#K_O{nKOJ=;XV4+Z#S zFX3)IU1c;AozSYCcrHu(4*&9G)_l&;mqpmb=!!BD(ZExjO554nS(9kqy-K4KO0Tce zAXkGF0n**Ggo@CQ=0>?hi=HN(@!qr-Jb@-|{wn$z^r2Jqp9vv5?GbqiXX=2t!$I|5 z_4JW0L|?jS0qW!j4gLGi2%xYpRivO25ysyv^l560!_svV9E5H8mbj|HID`R;+=|U# zl@BgdCux!oei5tGQ_lEf$aa$BmBfZaFdmnk! z(I5Tj$-smf^6Wimk(Wm`!adX1fGHJPlpcj1paDF)`=y7HzwSVaYR! zpp-*dHHwVFBdg*FcCi{#xR$QE8Bw-KA<{ygr#f;_z76`op?l&B`tDpLTOs>W={G6R z226!Em|IE|{~2UeFR03+C#<1>xp<8?A1moVYf%c?*D^GsK&8Y((x4JPW=hadki?Bk zVpT#z6YfUE>Cgq-=u+J9P^)W6BMG>SP_Dp5$e^5T+|WG&ec@$6$;=BIbYkGD4LBhd z+9`*n!bQ@A8SE*G6p>&TM{csnRM`8$Pv;^psm!qQ2L9X@`YA6FR7IH-{NrBqi^9Ze zK0@q17sh20WM9hSP4)!np)_JRp?UpST9OTi-hNm}B;P&;A>4{I1p4Sd*&r(QdpC(_~~9 zVbKq`psjzIcl`$c>Ovbw7$mvtr(|!G{Z`R87Ezjp*Ls>1{z39F!(uEQ8Gohc-)pm9 zDOrfh4V*-&`I0x&e*5z;gh4WiA^C@D`z`ECx?y%zRe-S=g8#tG{7Zc~5>{A8y}pKu z9BEIm6pS0p*Z_sLnS*D@>@wTj(wg#+7h{vuwZDmqQH1c9!q8*jqPH75geqP|!oT!q z@!~}(we(F6aoTgGt zk&nIchWytNDyFyTb%|l}K7a%%S##E6bN9p7MN5=*vk6 zGI4~L<|byn$w1rp=O_O>cI;Rtur}Ig!)#yTK9Q`UKhz^@m%8YY2V2j~nLDqwxp~b= zYf=l6WmfqW?5eq|{_OM5f;?rhuQs21HhXaBxiC^mz9jXkE)`ev?6c3bwJpYLt+D1> zqeqTp<#6baYsRT=?%X*~KKW<7{DhTO-f*Lh2tsg$BSOBZqC`MgZKxMUDysk$e;!*LkuTT{7B>(0(5m0hCNV2q!1v{Dk3eo<;XxVT^P%}p#> z`s4KJci(gGHP>8y$DPxSG>K%4(ph3gURp=C zh(Xwtw5V+{Q+2Daxh9)!dH;0^v2QRBQiu?ec!ke*l>J5rNoe{QXuotQTAEun*l42- z-nG$ZKliyu9)9@3pZ;vxv^yVr{D~(XdxBoU#g|;d`Z7M+w(q|CvSI)D@nacoGuOuO zg#M3);7~SwOS!-O;tH8kz(3H;p#k&+aDO5H&;iV)AO4bp^hM&gD-GU5i>m1>D~Gj< zOe=i<84jg<^hAISu46c9>(Id1Lu-^g+Lyp5RX*ye{-MxPR`m@uTO$Y(4nlygC<*B0 zAfP(!LSFahr=FQNfBwvuXVK-@a?33m8}!_fDIq=_0s&biMY7v?*Ag{Xti2e~AlNNV zq{h6A3zuUP!u2DV^kFjLl{v5QsE-9YD~%s79%^f4eNBxE+ZJPGbLY*YhrGj%J22uT zQr+XenrEJQZoz_i&|hu!)m9$A622E_tgM!xOQBj?gvA_c!ubi}Atm8@kIiWklCp%#YHjUznOSTleAd=@9Iw(7+7H`q`bom3ab_*Vxj$hW(* z>%Mz`PmdibPzaBbnW?1j$3cyAht>{>OjL=yb+wdW7IXrC(N;wKnLfd!w!ElOI7tfC z;Kpx5FcvUNWFemsCA(d%h~~ots_iSWArzK#{EPe~GbxL@?p7}a>EKZb`h>PYD?;MT z$ij1Y3M@yETsiVVoNts^;r&wg_3>uevBw=tYt6t0OvjELho3^#etO}Btl5LM9?|E5 zp~}dRyLfKTZ9zYh45T0YN3lu|MP*7PJwOt>g!(Vo)ZAooIB8BuXv&)68G`GQSrlKU&QGA~;5HkQ&)V$Tb}6eR_j z4}N@<@sE%=m-$ag9RU&1$p>V76UsG+{g&yIsVHJ7`l<8*^iw!L6&eaF;}i4+bWRld zvpo1qxidadR-B_WDF1EjmqN?@M{TL2;Obk-90yYm3{Eb(=ptCc%TduOx81hgcH7=H zZ5r0cH$%F+^d2R1K=bC&Lzs_7ajn-eI`l?{k9MX|35#H&pwHWL>>5Kx!;s`#AaKJC zzg>0WL{#pu!w;W7SC&;AL&-N1ygvJ-YHEmmu&|2DS!5} zpAjtO)>7?s1S}wf6>uhwSUwPj{*(QCXP&w0Dic{J%Ss2{$A>KPr7RA*KVfZ{9REnq zGr*3HcIfbuBeQZ);s~w)meC`v68)qvopu^YD4tm6q)0~=Zb`Y#*4xkv*l?o_*~%LF zGDI<>4QL={dSVi|uethaRz1Ia#~p9I^;Ysy3*HzFI`bDSc=5#-=DhNX@^*DS^!q=o zy6UQ}BU?AxXrmWjcroxttgpn)Tzk@_r=NZrv=J>);e_}N{*}eGrU=@1rg?w<^PeM- zcZU6z{SP%&W%EI`4M*xx077Z5 z?YSpIJDeDcLm8{E=4gj^zkAx9(?~;lEU`>KaRqtXZ`^V&K>D1@eK7pZxn4y@nU} zesjZbr%jvoyWjopUGI7q3qUxIKkhhpa+gFM$wdyF@WmHjeCM5a{^Tb=VZ6fwUPgM) zKmQyJ1h9TN`KEM}%I`cZhK&gAXw(S_yyE zRKzSpg;&Yf0sT(OU8Wz=CXvVl{S-bRb&5vSwDf7@WqSUQ>Bd$ zsc7H-{N$5-Spukeh@}R|dzG-a?*->yz_K#rOB+dEtVvxH&biY4>=$a52OWs+g8y>W zggzWsh0xWKF4MV;54jYtOeqn?s%Qnp zAI0h@lWcfY&kR$KAO*BLWrkj&<* z=dK*rUVH62>#mDp9DexWbef`eA>W4{dT1j3vDVh{D~*5p>8CL_`kBy&$j3f*$|@5k zj2$=b$Rm%KH+L>{CBcHTsI`G%i`fMpAU) z=$l0q=$kGBAJV@vXU-=-^+}%2zW3mRAAR&u*G4B=0{vItR)6j;yLg}E(tOZ+20fCH zfD&Q7ebHiuAt!zKB!&YMR-V8s+igt7FJ4Rvc%#?K3Hl5+esR$+#*Q6BvUMjVF>7c?D{z*YL<{(*wm>#j1ak%9fd79F6A|=$+FNcUhLLsV_F9d+XDN(cB2^W4$ zACYk3XKNy(;3#Isj9L1)4BToS7xQ6%C@rXls3)4!JgF%&r7K90E*!%JDiTxOOq8s> z&br*QSd9fB?1Li)>uR)?PK3;@86h8IK^pA)AH1J$5ACwc&L^F8672!1R$I@)rQlsP zQgRO0&2#oR+Uj|}KRHMY$Bxxibp(g2+@UQ5nM5l;K zh{{!=%B8#s5#KvP@#(@4OJM&$d*=aQRgvudNlavhtdax;l%ya6DySeJ3MK@^oDdVP z0RxDEU{0$TP!vgqbyd{WH3I^o;F3f|QAtV?$rBhDn3?r6D#}^Ksy*zszg!}P&f+T@167}&>?*s6;;lt0T_`NW*bbs z{jKd6HhYwcAO?T`{PrvPG?ZO_Oj5#1NYsqbD&7S!+P-474s-MEoKad6Hk}X;e4}dc z#Mq`|il7BF?_QhNjYMSHcnM~8{E8PkWoW6+O4Si)wOj*GM{jWYUAqg`e zGcybLv}i5L6J*g2j}|}h@)Z^o5}=fsk%batA}hYNT0-@Em}TH;BqvS|Wuhj3!TIM+ zoG=kD5?LlpoY=2lKa2*3AA-*xqA5C~Kdp5nCIbZgaXF>sR0R(@EE*}e(`~hBbNI}e zGsy$MerKF<*kP^dbdZ14HXRbaZL9|tM1%HJO&jsJjUPV&ZH)TDq~Q66jT_E8_k5!E2ynx{21n43JjYW-r(V5A4d55#<)6{7 zA00M^$jh%x-c_^%>R~;kCbI!hLUK;>mtQQ}v}rRWV3~GKP8GO{vITSGgo>m_EY?ok zF8z4s40<4JuOp8QYsTK@k@}t64 z9+wdEN|3@avacYGr}*FMe}$vb-%vH`s#=bD9R6|icltOXlnVeNO&R+^#Ne6Lme=YZ zx&9gUGr&Y9Dj) zC=ifHYI3rA*d$g+iB3?8d91}>eRckM=P|gY3!ryH{}<%v!u`z3nT;AXfhXF!(=PHr z=fC}JZvXxlGa&S67X|{=s#RyUc*4X9u!BIArp=nOdyM#iWYU-jO`^pmYKH;%L#L3I zCZjhaE0bxRj7)Y1ReSvJX}@&9duzW~0gw%*VW!ZqwNC`w6|hTnYNdt;9EcZ;ec4v5 zSoy&R@BQheml*F8V1D@(m$NvdWy=<)opvgrWVP$m#xDdMI9RxI2QzH(`V%_S{AB;o zB!vaZ_*X_7n8x6VaD*!rwqIVTJqwD|DoOp`RVRgr_hw(Y;JAtc7`L9>;oIB)9s61G zPj4VZRTJ>x2La&$eXMp-{SPam@@bXuK=H5=)|9+|5hX+gr;UJPav`5^Fy$0=FIuqH(xJl)c8O-7HmNo78)3=U%zhoiWPae`QLrFglMmy)~#Q$V)@FI zD_}QD@}Y;n>)p2x_DoKd90<$cnui!yKd)a;>3R7D%sHkdrt{EPnb4rY{xzysV;v8e zFk?M@_;6D%$)JD9BM_o+8o|y$_F2!rhGZuPBHNp}3QWlNb_dEkNjSFBix z5yrSKCp(82nWK;CcxJz|I(6#ALxf2-y>`;s1HOZ&6i3IXX&Q!NLi0GnH((Y*x>TozWIPAjP;P(r=K3f?hm-JtqEyR zM`evZrUwJtRBDj~_9<~@l#5jHj2|@1>jw?e2+huc1_EwZOeVTt3KTYhwvlyBB-fld zb08(yA8{l*P!hVShyzBgzGH&pk9`Gj+NYY+j-Bj2$kYn3I0XiYL_u6R@T&Fef9470 zKL5fCAAIm313BKN%FC0`tm(m5Tru$Jv13=P_%TJ}sN849KFxv3;4eoNH6dI%l{TL- z_L*l{mjr!mI7$=MRKUQi2C{e?S1F*3R1n0fFY4)0JbeJBR&6S$L4$^uUvU}1pU@6a z&>h4{9%M3Lf@DCQXQGb-71RK}-m8a-($3TBb&xAa>0beSZsPf?-@#vqK#_yECx#9AKP zsZ;%Wby$SmG8Bt0`XVoFFqN$1>de&GiTB|@Qml>ZKt7Kdg&!M4;_lHhu+bHhTuC|J7LH6jC8H6EGgPm zxLp%sG!<71^nw7TjTG+Kj=x)uF>eNJ?1dSqCEN0>rKe}nu7Q9ma67{ztLfpiuwUqj zzqTM!2B>Nma16Ug+fFesAa*e|1+|d)@r;idtuejEc@e6W^jso=QB|tu5Xgi%(yQ0$ z8hFUD?y&Twx(ic{^uV77s17ffR~QP+W)|%zBF2p+lk}|UPrQoquxLRLss#m6yx)HN zE?M$5gkHMz`-dKSuvya}eG-dDq zBJ9hIjT<&IBT6tKrL=8(IGqnP1->NM+AsDZ*&z^mCMPFHgyJ>J1c*8;!YLL>%Sg}N zn#;%mbBDOyr=EI>BiiuqSIW+Q>ZxbOjhjI1okr+LUUJHkCEwJoTd$n_vtl61Z!z42 z(c6qTBN^ig)s^8Nq!0QwPeAoz`o}VPg)8oVMUCJugl`?JAoHBJzpec`P~ji;zk+E` zRYiWOUHdzJTc!7czh@E+xa$9`~_|t{+@+TPp(Qrjk5IdhtXU?$};e|F$j*gLDM~dURDf##q?JZ z#tr~fq@VSkE2MuseS#MOR@{a1^Namsh){^0<@+`CiP1j}J`E7(IeSj2(vd{w3zx50 z_WEmY5RSz3q>T?t=`z|M(e6a1(M~v_>A?qAtt!QW)H4aLnng4ZVRbS5^~*LZ;f1)N zOStYqAD17!76k(O8(=*`#;xaqqq1)i9etpgC7%Eky5uSc-^z1>{*KzO)Fa+VRWT-? zs5R(imy;lJ=pn68c%?fF8MD9?&Y2`V-4e@RIv>}GLn<8VFL&i@PU-VKgMfNl~RG^XI~_K_@xxQv7LW1Gc#H;SZ{gANhkFj zc;!`7rcNPNXzBOg{j_e~PwRjB@WZJiM%>Y}=ZSsKIHPr|R`@F!zvSoVF+a!P+$)S_ zVy3l5TY_tJ3~6r9_MlIU{&DTs@)?0b{EaCw`Umtd&X<9|NB<(a1@!;_Z$E=D;}_$c zK7@a)f6)V@k$+jLnEd>@{ucw4VJY^{rI!uBgM(?vg?XVVIxTqIfayih7QPi8ViH%W zR&D&Vw%Zw4&M#4tLmZoU~3h-R!oMriK zv4?covjkk>OiUt6G-M22;;_Jk((tS6~`(D{`Be7;UNYjkP$$vUtE(vtzW-p?P`=B@L9fc=bd-ZA8pIU!N*V{ zjSx8O+uprbG$!@ zJ}L33W~OJTvys<8fa2gwiYbxDax_{Yf^YTK^s_vXs)W$iTRjo(p@))3^5^rwkkdzS z8ze7}ztRdI9Sx-z9Mw@^gFh=s;OtQVpNFcvo*MpaBmhbAITaQZXrU}5W=Louox)#k zB8@pQBn)+{Aqvk5VI*l)rp@kNyLJtMzj&p1CHlxqUpiszvy4dTZWR$GwB+EV=(TIt zE?Tq*fP`gi+O+ApN#ofIke!Jb%*gGig!(|Z^+gNTW^M@wHEr6Za~BxF#rS5{tXZ>X zeS$awl&=5Xx8KIIM%Cn#N)#WDKIWLtojbF3skCGljz8Kt8|AEAv63QCRR}S_`3-;F zrN!VDMdkD%K2!Dt3&ac)9%>tIy_2$c$&1nlor}^zF(N1+MqnqSP-g8IJW~W&G}R-P z8-GMS0p)_GMo=$9GGS~n`E>p|N42t>G@ea~VY_ba0re6}CR;q@Wf}Ma>F`w+H!h#b zK(09XF*k-M7FvR@2ChIA;LGIIFqDXq-K>M)1)tr!cI(-r2mbqa-+gD~h!H=oT19}* zU*G;K|8X)N)v?3Pg9mr%*3G7RCI8SQ#7%`eUKA$$RAfLGPXn@X?TfT!=@Zv}R_{ayy|53f zwedHYA&HkCrLv1WWG4%kbeIe|H8CzaupvWlY1OLL)z@BEkeAE)@&_M!m^OvZq)9@5 z8QGST(Gd$M3Fko-fRnsD@?}Ur0G+KsX?;v{Wm)Wx3SpYiQLpC*g?4%uqtTvIGOnN}%BSd`s_z5Tqst8F*DEdlvn{`u$q`(J_$O{_gGJ;FI?$oD^Rf2%`VQPSoO8*{2wx%~1gm~tfQp4k3n z%a^rm(GnLGfu9^^CahFnJWTSV!C4wRq-VyD?8r}67I;-=%?BJooAZ#1sM1eneZl}K zKR0jr^5rDNK$$z|GeXRO3=X{N+O_N2b?cUW|6MMYVx)AtTxj9+ zbj+7sY3Z5hXBSJjCPa<5Mt@0+88G6Gw1qyYy?$^>ca+mVdMWb!#{TzT-F~4JRpEa* z{brR@MfKX~;MK9jCZnpHDDBkNooFCpkk@EpBV$u8@$l`Ps3(Ay!}YUntvJi8{>zIo z_<9$GFPx3?!pq=;NtFJyO&|zJ2tOWw*Z*EEgD*&yp*;RTP~CX!cvQ$=!(VMwX{_3o z7`E%y9{ltAUx)8?6IK=IXC7BXf2R*Qz|*)ULV?UHv z#r%)@UunW*_N6Y_vSsU-F=IxL9=(}3*+9P~RIgs6Yu7F#?-*IHZaqeKzP~9S z{Ta>mid5=Zeslk8EdkiSs{M**CeqTz$tiW}*6!Q4?|0vShyLHRX#-Nn z#U=hT0u*A{&)M0`Rg^LbTD595)Uq4-Bty2GKKdQDum7_48`#g1Ze#?4lx@F?k^n>#et7)zF`( zusLZ$gxR>PEcSg1b@6X#zpftw{bqVYoEAXguS>i)spNU^s{;8XiOau4KmSbCGb{;%F?YaryY0pK&9GC5w)WJR|<`aC^}X+3LaT2Y`>CKyYwagSPhl@ z@3ddi;Q=8ce(Ka|=(mD`JjO_Awz-&A7?eivH=*BB)6=ku6$6!;%KR=NPJ}PfMx-Pe zZW2+;E^;Nsb?enXt=H-OFTSW@!-iN#hK#67yTOpd>=Z4mNIsz0`0?X0V$)O8m|4V> zWvsep)v8-=8Ctte9h}|YeDjTrqY(ARWvgmNCiLnxa zfJndj)|+S&YH?=2ld`k3-2R8OOi#jl5>ONCd(ge|?of0}CC-nM*AEjK#_@@d3~vOL zXF>P#&pkJ3(sLS^%j?G(@l1p z(Z#VCnz6>#lM72V)xODma#n8!u~2!rd1Ai$ci!p(|FHiG>~m;CoEjd6G~6cI&xBNo zsW$D(#Is^$trKw}{a5zCR8eJH@ULyduXdtmU~D23eybi-#2bUYbz@ai230^A$ilA( z&e4URFB~^+9D@fIUqIj8yNd9X;7vUK_!CY%@q}9Y)Fft={LIp)WN0M7dfRm352cI@ zCG$d81Yd;;No#dNQHAnn=;$4HXbZ7NfUL=qTsqU*qYN>_*78wNfhdQ+D_Gtb1Ixh& zESW;Ogx}@i+Y~-6fDP@q7>+uc%FDSGMZ2YG4OfA)OSLjgXuUX_>0dW*-o$cY;;2#C z2#|Ri87Q(1WTd!s_lP@2vJ{q47VCgP$Or->h_2Gu6Vrs6zVQ6>{72M)$+BT&nnF|} zaT(fsJAwAk7;>L|_c`nAbB=7+E<2l{QNr5QYp)!5B{6A;DT8aA_M#GK%C>=FgnuUj z^PYR}Tf1frFNnX~r&cYT%CsC54QU!4(2(L;l7rnqtT=^Iq4I3W7w3(K3+9jv6=;wcGa)RX0ct*iiVLnJ= zN%KAQ?238Nd&ORb)jS>AB#m-Rj033brm&S26jt zMZG#Q4@qO}t3f=8)ae)vM1)dWP4CltGbDch{SPKj*78+$l$tkh9u28!)21h%d@`Ff z9dX1F2nm-}7a$>B!f!7;z}@gyK_Z0&LbZ;gkDmRC_C@}9`U{#W8xkx=pLqBx^jE?c zp#RnO3-zr|`gZ9N%|iUCMx1`IH+m=5$J0L~2LSdapQbES0`MqFr`536)!FBq+qlU= z0|yTLYVl&+!Ay0s@!X6VALD-^7?T4Ws#dMCZtYKMcFn%~?swN6Bm4I4hqX;$ zk}d0D9<`E!Rgo*)96}tK)ImS|^a<_VjGVM05-BTTlP}dMNi9qIgTYK-)`OrQ zaGksS(7!73DkX^xL4Zw%3!eM~qSTb(kF$8oX7;vFL>1FYc!C{1T8^nI3N7BPGdidc zknmff1HHns8~KGu15r}~?D@wOSqd0J$+>^~Y|YxWP{#Eq-7e~j*&1;%L94=N&-u)M zdsVPAxW(SaeED?tr^JivE&mwRl(0vB^5Vr`H#@j_{`NdpI7wxfYGXu7$LsW`7hh`M zzAd*cTO6X|tVrhW=q{-^hKL|@I&&dYqbdz+gM(?1ChEl`s;Wa=-p8|s4jH(ucJ#6em|MS{!dHv@2Q&zN|hCvzp^){rBCoHEQV)*ZcK4swxIG54K z-~yiSIQ_p@`jmk$7{7>rS-5+^7utrpDi0I*Z&GdaFT-C-vLqsswKl$$T+B23P0zix z86nHjCk}qB{}qT3e;~!lkG=Uv;9DiY-yo_IR^VRYTRMYZhVF6n5n@i80JNi80ewWX ziuzyL?=#Pg#T@$L%SCI~tkt@b(n$3hRaw-0@+qesepsve`#0E$0yLuja{4AO7PbOfV3UfikyvXL?j- zGT21yJWMI zob>wZZ!~Vw2$P%8%SHz_%1BRBR1vcKo7=C+KObfEn;MP%1@O^$6s9&~d{tX5-@p8` z+OJCVidl&!Kl}>xKfmJir==4{!pyUY{!#t*&+C8DSLRp3d}Y!w1qYDgE!b3N$99S) z(1Zm9TCbdx%p|4pL>?}b8deHUF#PZiPDv zu#rSp9Xu*u6{ZWb%;~JaNKE?ULl3X`VL4$?IEhHcTSnRV%Pb5i>e27WY)`t=7hMblru(=0QmHJ%x`RC`I zdmiXvKcmv6Q_aYP?+OW0&%+ha-+98(h0M_^eyTKb)|vfK^qdNnpSunGG27HE3_iJ_ z32)&n79C^rY15`cGy=^Ct*E`>R;A>#=3Ltn_UTq6D)k8*t~5#uF(oJdn}kHR z96|x%v{qFjksEFtv}DP*;9spqbs9|P&YjtI=l=Wd-BGlQjSr?yomQuA9XxI~-gFb` z(^J#WJoAkE?tg&biyl3CvPmK1-uv#km-u9CvsAVjV#{YbrCmh~eYK7|A%Q&{AA9U^ zA`!3;kR{bxI+cIQtd?!jd+NMwypAcLX|tQ9Jb2mFP=B-KSK=cpB<5+!_+ zA1DlhRx`^+dPyf4mDAOR0pJi{)R7Am)vpBoiDbc@@9?#v5#jj zTejl+rQhS5cyja;2OZqBM~@zT`}R5b;HHMX<_zdCh#C?eW@pQJVaIw#@mI)G_)m;? zYPisc?F&&6{-m?Z>!*rY^5ALowKXZxBFGd_&no!u*M7yLsQg%qQnj5Js!dEk=pjVV zWj{;>_ya)*UmG2G9MA{96umydy3DtrAE2~$pVUlD2}jt#xAyJZPntMsz<~a%R7S96QvhO4VwFDY37L zwiVT`0dk`dDb~4DXOtehK~oj!<1}p1{*uu??CrDBQ855J3 z@gt6U!{fVaHC!reb`u&p!KNb?qPmT?KWl`kD}sI#n}}UUy(UF!*4l?T0F)O0#kB)kL^b?qgBZd!~w_xGu#~;V} z2wCdauSYc8Q5}v#8E@ILsZZZCAPqzJ{{8#1-Ul>PYqSJG#Tx!qV)qPKKr4adSZh*% z_(KVW6Yu4dg z07Lly!~R#$RCm$e<)>W#t3r%ERvOg_s$ytcrBoUIV)A3JpqwI~K#NyaLI9t_&@@_o zti~&NWEb@fm>}twRWSA>fM2fv)m)m;O4`0-J4-9DZ}H+YcvE=7N7Gr6%@+Hp9pK|J zXW)1E;cff$?%ktjPlj-KURc-yT&-kJ;85{W^t*!-NFnim{cpg@sC-JA!+&C|2qlJ9 z*nVLxeIo4_?N^KnR9a5ZV`4%sFhDctMYlnJI=x7OwpZ4rG9&835=l^nJ7$Y{2+P=q z1gjmrh*eC7sI*!jmbhbQAq!~i34kM@i=SY%t%Vl%;<0w2Ui|-=bd*hT)5!w zyYFGa%@05Rux0b6`Sa#8jnw71&J10TKDuM0Mvbt{aE{P_Q4Z{9{8CAqZwCKVu(&N* zFbN;@lccJGQfZ_|-hdYEAHn1|}Gofvz!pXF;d!29l7d&-n4tJiQawy*#IKmbWZ zK~$_=uweeHlP4e7xl`}neZYa`gm}uXyzKJgNC{q>jvYJhx9@(8iy34xX$sSCA9j1K zS~Zz;WZ*;l0yl=DOy=Pa6^@As*;!c8LR*duVx}@Od8<8Gts$fOv52s*NP|U6dQ3`y z;E&tA3k6G@KSo_CRvCtt`d?In!vE8-qBX>3=Nw4@!~ZnuOH*cJGNMKr9&kX57A;7L zASoDvf+dgI6@((>L1W9yGVA;AFjqS#K1nNY2)rEoQUn+ zt5+{*=1ra-*jT_%s$>9dZ(sZ~_#fK0ZJWC-7o~N@<(GBrcuZP4J0un`@p|#a7ZZpF z^3;Nzk-B#~o+>R|`1z}^yiC9s>8~0%kWs+ve|gjRSkk}$C0AWJ5aY6Dty%*H48T!1 zf8N|jMvbaowJKP#alS<=(>kD&Y#uOY@NPDLf{t{jaftrL1J%qg$;Of;ORgMv`GSR? zgFkEik38x~M!Wmh-yZ|g+b?OXeepUr{!+hd7vKKg2xelWUnDlF@~J}jQ~zGB|0PkZ z{YtoLyM{mR2+u9?HNh50O$dq8lKikDj0+$fu}pB0_$E>m6c%y}#KZ{`Fvpo^W|=Nj zW-s9j&Oe_6RZV(d{{lD@jpq43_!I(D_%E_3Jujsq$dGg z1YMUv@)rlc4E}me4pq{r1rM<1j>wF}`R7Wmrm2;l-@?-r&XSo4Cn zf`)-K2bGgI4kjBWRuP6zU{RxH&B|Gsn>K@+g0_rI&_XqZ$czk`DkS0*M&UkZs)}Zh z*8ce8j|tDF%@W4dw0U#JnGC-XOqx0%)aqEDNW&(}&Yd`)DlsA_NNrbfn^vs}zCiUq z^Yqh%iK>{ygsZQ*nyp4D{hDj9{mYwge!ciBc*+=$!^@65@`y%_8?RZty2T+apBVil z%7uTo-g?_rR}G|`k3api=gB8$Wn{oF^cCKJ@W=53{%o?th?4fh0k+m`63{{5g zAokQ&MEQQ%vcA3hEL*;udO@<288bT1l8DaPt$|ujb9DhrscBFb6 z&+Xf@dPeEOy~&RQ2@29z98`YtUk`&nm(KuxS^0?-Ef7J&6hij*3-FU|BWo<4{E9b94-V=YG zhz4=^$H505@>hs|IrxAft#pnsD33ol874-bIQTj^Lu;nB>nC0q+8T(^eo>(W80)+X zZi>B-pQYcZu{_8cL5- zDUul=LQ{5G%t%ewsUX#=GLgmP1WH&QL!p=W-~$h^qat1*I&YsJ#zthdBSr<0Z`6^W zpahp#0nWB=un_}BD+Y-b$6=R|zV@eel`}ZOpahtxQ#QTEiAMEVdx-oeSJFyVV`>sb z!@NwQ%ZiJ)7ZgBGA`5=rxZ&|9M$2o&I7|I%WaaYZPmURbvs7IPM+EFHB7&Ikb?p<8 zn%KNqvty4rmiT#2v?PoR;|A{`{R=^VFb+MLT{&yN{r2MwP3X^}bwrL)wLG4w$p;?L zh>1SB6?}yF!dQz7{*t}o63$;kgD+nE)#iN znUS7TrRvnFQ|s2RPZFg1n0{-ljif6YH&P6wefw%3j%Pe(k;gvR5(u0~`YQ$d2hx!4LHt8NgK< zhoT+6^gpXCO2jWc)N^u(K=Jr1wbxQ9^d<_#en=@s87LV3X9-Da{YxBw^*#napuZsd zwI_TpGY+Z%#1J7vtatz${!#vtR5^8(r$0ap|BB($^MWSmfxqg9{ui!lmH_hp3k`!g z$Iiuy;Ubxj7cELx2pWH*^cM!vqWD!-el+8(F%nyju+Duq%60XRt8TsRHkLr6-|$o` z2EVAN_I~@0eBv&SRAU@Mg`)VQf>0l3KCui0gEN31;BQ2Tm(M7D_C|l<9>9eJpF$GemVNY7kf=Z~EP-aS7I|F?Dw;cVW?aNq&jKDYN53Mw?pTqVQRSKR&vadgb zPfqC{tJ5#Ze~^_g35X=iGMTglm3F9l0&xCYLXrYseldCh9U>&_M*2Uoln@OoQR>bv zzR4rAM6e~4d`pm3p0b$WRs`Rl{Sy8mePm_Jagl&Mc4+HXAI+R`?>+ZocM$?MZ1@NU zmH1YE{Bafj7c93&9)7r6*KP+lZ7QSMHo(%hr}(*a_?wW%`^+a3!F;+7=gZqKhDu@4 zjCOQ zZAt7~DKQwCNq_sVZ!P-jOYAy83?DgyX?4taW@K?_GW&>oLA?MuklbvurLl@{sH?bu^GeZJ^(RwEH$!Rpa5W1k`EL5&+TdD**f@9y2YkAHR? z;}Q0`<}DlhaKaw(Pt=2rjO*2@%gTZQmtFqRw5i8;@2;~ut5gAjc==bTM>10jC^J!5 zGr-b(3>`ogLt3g1L3-_tH?F+=a^O>2yaa4aJZ}86T2U_Enb7fqpAac*za~+F7QKKU z)6XWKzq0=gpg?#lK$uCmbq7_!mEZOy48%p_lyS^hgs&9D7&B1Mb?er#%;$~Q|ANPz zkpS;`UZ-yTv(G-OOP4Mkj_R1of>E3@R&TXGA<8BA0QSnLCPB4~VB7@l%gU2@FqRKe zD8GbAm%Eb~(g=SrVWz^2l7i{*ls^QgZ`ChgQb=}3n1+s05;QB-KH#Ka z@>`z<(C}BOj+hN{!aR&M6H>na?%P?jK4xUaN|TdMItjByIBOFIV55FR{DrQ=S6u`j zoO!9)b{)^cx5~oD#E>c_<;6e!iHgu7IDi$i3{7#z9d~{_^J8|b;ueeQoU_kmI7DLb zkEf4CdQ*a^PCOS57d!q2Riel&{K2%kJMOrHBi`87YC0R!;MT5`!q}4KmRXruci(d# z`{vNns$^IBv1H|jpMO5(l#@lq(#WRG8v(Lk?Yb+MEq`tDWY+Ct0T6n@VT!+t zwNMf(-G8b>OZ31|0|0#ECngTXNGX(s4^n_INCqmzpH!(FzGM9`jfm}i9Db8VO&)!8 z6dm@LUw$!R!o(>bPC@?hB;IoC?QAScWKGYWJ-c-2!irwJmcm|3HmQ+Lr=kGiq{>4^ z;{&%`j=#NBM2EF8o{C_h5XqTOp9szEDkuLy<1#sQxT1|>UHnp=fvbz~R0+IkAjzk7 zh!Dl~k7HgeFKh ze|6(Uc@n`7_{7y6q)G=L5b`jic^M= zs~c{*3BMT_aC$RpfhEr#D8yQIAS_$HOq&8ECT`fcnO0Bjw&&TBI%cA6$#ZE5%*Lu< zf56D@omvh_hpJWBY*Uq(*tA(QPIRhSqXtcne>825JH#TuCoY5DNV#)&aav;Phf}6? z?b;uq2#J5-fba9B?V}I-|7kf&JFF!-wU9 z2mhrgr3rYTO-#wiESN&otbbuxxNGOGA6EUaXwereg}{n({~&bWtRrLtrxmJ$pgFK- zQKvDR*_u@5AyLv8Uho3YZ~-S9(~>AI9Bn2)+Ry|sUJ6$FT4;(Rij3Y zUw!qpnmQ;z-bq)C(1*n!gb!notXj3YefxGyq5$C0M;|$=Lr1Cs8nHGn9_WGvEFZ|* zmdC^vv=)(Z7qY~)6yN#|=30LE;YadjW@SJ5XbVw+nd@^&@Dx& zSYct$O|`}PU-8$qjM6Eo_=4yGPc7VFh-3yI`T!Ccn(zB9?=>^nn3=!QPZZ~Ca~3JHZl0!Pw3IT z`-!X9tp4!B4>{ms&g|LrjKtTUa@uLlnl?S==wo`F-m81}?sUG)G^1=CJ^-P)duKE! z9riE0)f?{7v>47-GWy|nIR2HVV|yVx#@4G>7sDx8E(`jvK$5H?*c5ofe-{sj1O9kk5al{}o9wWihTW8c#p_p z!u&(cVtX?Ui5>Xiln<}H_F5)H*`SrFMB>qyo#m`dVn;B5>(|?V*znt!mqi)Ngl4OI zhG}YyvHq9V>hM)=ls>{CMjzU_+ep}lq&Ei|>cYYz41dzHp|E0D(kih>-fg!JFJ%rm zGqqvEhFy;9f@On0fH_VUg<#hi{Y~8252PR;KC(C<%T`fPPgXu+euFT-p$~0<9areT zZoT!EZe6>+^X}W9d^-EOK{r5r2nW7gFTL>MpI&$o0j7NV9UG@7+;tZwpIoTG2N;n$ zx8HUfLzRc@x~jevzaJrLgZCzE!%!$_8TNN{rNxWe*=UmHIQ0_Uiz8mrYs^lQ%s56 zTr%d7n3^b^wQm-zB86MFZsV{phT`wN{~r5*GT0+{fW?xK=JZ~@I2nf-Y(^cioR*uu z0Y8z!F!`0ja+om#a+90twWer?fn>?(wxjVBbD~TXB-Hj zMTJNLyKS#tql?a-_C`6BwQ|LB_RMbAz8xcTv~N2H|qsR!0{PiPAe5GU1B~!9JoKQ=fQ_w>0Q9z zSh^_v^_D*pMAM4?+;OjKFZ}hF(L4bPF+8ZS{#l9kw5CYA^Q zBXf??1xN!rhXxJUYXkupJ^IP_-+%v`ufOILxz{JZ_R1@-9CYx($9L;?TCZM2lhv+W z2kOB}Tn}hN1@TFgL;85xQ3Uo*A0T+10L!xUiB>`U{RQ|g_rLGzq=%OLhZ?Tyaod`{h zhuaHva1Xmp5fXLSVXaw}$#dK>%a<-gmgM=aRHgo<;cH*VU1Zek%PNJ4GalWNq5CFVQ3hq5YN!5O}(TC{>* zWeN*bbHN4Y&zLcj;boIXjSe~VP`qOTjW0fpX@lGwZ@l5)gPOK$-}Z=h?a_v;SK<{~ z3r#YA-aJll$J56?Yf_F@mO{^pb^#w!5GQ~B`R8*!Y>gTWfH|F8#v$})?hpO`;gl(# z&Ylh94m+$3J}=ZDr{)s3#V!m4a^iSw*0dS6G68GPKlcKY(pi-&AAR)Ect^*M8%v-Y z@R{)BfC8*5Vng|4LXWeDP?2Q8ihxKIN0;iZJrp-WMc6s~g$~tgR3AQK7|T?t&gGY1 zdFm-A!&!c#A0Tb+rh*us{<`I$WPfl-zTb*>?l;C3*b#@px_X z3$_m%cKe*!bAUhK(gD3r>xFb7JVS@xGHvQqcEDtB1k?x80)1FQyz@HTB;f8z~Y1|kR(B-S&-HIr~jxobx8Nb+AszwwOMJp2XysWg6-)jz;DSTXok zVgR3~VJ?zUGWUUw78-&5K;pP`-qM4z^d<^F)Ne8TWAFvn@Q;H}fhwZ#~|Ui%d=D>w!}iofF` zq{`@@P<}u&N(9M|ann3&1}jD%yR|}irt0a{|FvIHEXLoEJ|enwXGDkX(0|ck(GTv} zp&erwA>sC_U$0*O{{2}j_RTlnJofnG^XJd|?%VIbU-~_+@aLX?zIpRz1S_4`2)PCej&1GU^I>UIS_jt(%9{`cA#9%b>j6fV<7WbpEDKEp~F zwsu!mKI7Xj7AAHVaq(=Web(7$Klj`uLO`Av{lti2!|`RY`v*%Uuvl8OY_VbE&q>Mq z;)s9?l%<6XJIR&LIs)`3^)iL2DE9pP0`~NP-P1mrcG;zuVs=sr^TgRXIgDy=yx~UX zFE71h02zt=z&qBkVME5h1OTmGvx?;xGIC_4b1>Sw=;mfEnz3me53x?gdqw?66gDdQ zKonKb*cfHJ-HQ&Q2pRZP8p)@eMEV&#)601M1NIE_*~0g!C!bvW)t97Wwi34#_BkE~ zoQAjDGIZP4T%C7W5*cYQAY$}lsR;Dbrjj@e+5G##2Oi`T zw*{+Sfy+|lrp=lTy?N;2?b@<0ui_Ex979G;RE$YxLBM^RM3j`a%2P?Itsw=etVESc zbJk;?D-Ee29i4uNYA|POZ-lE-*q^)Y#RjojkBnR@M!XOuMNrY!U`63uk#NYKY6`*3 zFH3813*tk8HE@(v8X`{ghls)$+lmK1Vv5RO;UuG=Et|Gft5%gZ-}$&MYGR@M;Al~B4%n-H;6Oag;gj$K3ai*pse z(Y#p;_8o(tJ{YD_U<7bBKcW<^#LP@fOHsJQ^5x6du47wk#&~QlRwntEL{w7j7qcx<7@wR5(0-lP` za5^R|5K4qBBR||n=^A6Qw=|Pa?O$M5z+X@oKlle=r4v+yz_rUB@KqNTA>Ij_xZ=pm z(m~*d6$d6j1>Y~_AuiB_-Sd)Lk7xjf4WoF;A6TfK8qza|Kp()TYE8oAgzU;WH{Z;u zm4i2I-0;fFuM8VDoIpR~`<{8`nQ`OBv0Cc9^Umey20W@W;{T4;p<^r!a1pJ+7s_H}_jK2YkYx7;#Up=#P2Z@(lFosKIM0oeh}WSe|WCjppHSw!k-X0@mCV0)WIdkbaAD`N-*N-r=G@Hd0f{n zk3aS}OQk7}N{SIi0U9IyJvq@2G`hE5e(RmL-$CoLK?28M-G1Bc*Is`u{N=C(ngH~# zlEco7i7b?}swUK}Q>VfH4bT-|F8+!LGFk@!4>+I!(Ae)6EulsYGm;YVhCltx(>1Es zz@vm8{K?TzA}|C>b?$n^7%aD;22lfgSJm7rRPPd=suZn@>= zXU2{t7HsCnGn+SSj-&AQ+i%0p$j-@L^34)pcJF>XBiu|p{$cyVp9Pb^2O#i^hRHl4 zds_eyzbDlPCES$R*_BvYj%cw-Gkz<~M1qBZamp#DLM4>^<(FU1hHll=M~99b5C+K08TQr)BOQVn3|W&r=v z{+BiqZNHEXL}QoQoz#Q08EAPO^g1q)rICIR&deWDp_u-MKCV9vrurb@E1f_Ih}6CR zBPmNurQZOc7kry;^wN}#6cENZ^&)r`vS|sx(!pd&idd5j7 zoy55-_3G8DyO{KiLjFM66lD(|_ z2yjS$Yl4^LkdIu*dYp7hgPV*f4B7wsUp;i3LR%IGA8sB(fUGw5Rkl@Iw8g zeog#0vG@(`7t(z+?W4Z^`eKPO1ZGbdih1j;w_f@4D>+rNw{6Qq?68^t^2T4@fA4*? z<#R7QuVD)t0+yCAo#4g~*b;6gJd+|hSvlB>BS((B{)X$tBj!ib)8H?-7jEAU{!AG{ zS&T_m4lpgs0>p*~G-P86Y@We44QYSm5p0Qz%>#Yd^AXk_d(1IV5&Mv>QN#h!TH~gm zop}F?ez!Gt*tbkeiz?i!8{Fjaz*S_rl=7Z^{YyGk_9v+Up5H}KBV9`C8@Y% zmanp@oYswz4&trZ_`G^039g5jQjjhJtdFSnmFYeEg z<)Duhfqm;T=ITPaLS0b><6l3+7ju^bqrO}69pM7R;B@KQ?WI4zl$BMP{*mfKKctR2 z;z{8LC^KXT`e*=)kT(93^<$>&0NU8I1GW+;Z%u#y7nO`D~t_0il zjr-DwwlU_pazzl6FTPqle*C!k^XKFEfYAJd8HcoPb6T&{PCn`6Ls}h*qmi)z+(i~G zA1g-`%A}phG`!Lrnp$Cis+AcuDU~7QqwfqO)f(;!Pr>6>ddpsk_qyS!z3Y5#l*Y?WH9-?hiHE^AFa}?nq6j{h-Rm<*2N;KBZ&j?r51wj~mf$Nx^a|mt65Drh zL?e1K!kSaOq*{@sJXSfGmoUQMtGbSXN{0T*!=8v99jZ4<<`>JbAshLa@ZVKjJb1|9 zA6Bdo{)sp^Q&`MIWGsylcrtlKcv({5P1|O)M&)s`FsJWl#rvdJ*=WD<(*@w3zmrpPM^={P=Zi*MS%VZ;hO} zjK6XSBG##C?#6cdc%6twM$}&l1jB73M!9n83-+h z+)!1W1h(wn=x;!cpP~E+pFjqjWg;U~Gx>BW*^|F6KdOi46@b0Sr@^OKC3g8St^pJI zuK>PM8PKTwD02v3KfO#1lSr$J>Z)E^yi7W}5VZ(sfp}{vlC6Mp_&}Q@|>wmoC~-tS%KcSPv7eY4RZSD=H(F=Nie21J20w% z^{Jet_R!}RIXr9jyEtElMw3opQ`qnF~Oi*e(|b514w zJ=1;9J^vg%2X312<0c&3^k9rUGyp?9uvD$t#IRZoZmMw-e57RamQ8vTNvK)9W_DEt znevJTsDP{_&Yxz3VRbNc;pD|_t!Pb2ZF|S6?k=28C@4I(FFKd)B2yDd~!^$Q+q+)7KgT+J?B&WgYUib-g)Po=gJ4a4_mQ)_~8@+ z^eDPs-FkTQDP59o;=)5hsObMOabyLm3udJ%(?G>oV4NoaNiadcU#%;Wc;ST?FwX%O zI&|#Vn5}tt5hKPWi4rCCQ~%iZi%GC;TesnKX4+GMy(J|?g21@SCG3>(qul)orR>^9 zwNS`x*#J6KY%@5{$HYKKoq{8*ME|qvS-pC7M~xcAIu7VTy!NuC--ADWE~`gpe*7^f zIx*XUbs_|Cb?nwbF8Kv)`pK;6PNGC%Yf&-N5;f&zeqyyYJ~Q{-tVcO_ul^oxc~#3 zC{UN7VBFHLc=-O_(?~k3RQ1)?zc@xw{K@FQ4c`zxHx=3^rvH%=t}^%w9|Pd;^j??n z3gMgjD5L*fze(tUg)AAdZnQo5nuS#E<$0|?C! zHyFXUv_zU7gExMVm@N^RJ7mGm$41Op-aX7fhksIJhU&ZzGd=;UBAWjgXgXge`OBgTY*;Q-s$80i{X#41YolBS;8UIcr0kGJTd#Td5abg zJ^0X&Aw$@qpY3AadFNdm1oV%$4;wae7j_U?7hn?jO!Hhg@Q-z^Q*! z`2rrj0{-f~E^dE)|H}Xbg8@5-&F@*UgmZ>ft^z^P*r27wM8RXvGUcX+ntm(9Ym;Yl zz1Uh<-dfp^%482loMe+fET6pX`ItJmx8MV-GVxt7b+KW?hUwF$K_858_Md?uM<07M zCy!&0CnjW4Bs9@4K0a4vJ<}N*hExGa?{21WB%>-|{J}5)&C?%5+rsVJf;=_;DDbyrK3`4*2dLz$Olwr_o1q+ZLHkyP{){ z*FOf6azzD)uaf*X_W~_yMf*p$uVldl>6J#5w0>6{A!>}|Y9{hFP`Ai8OO{N2?X?fy ze}Co5l^Zr~V0f^jXlJ`NZCkZ&)xUp#4sM4WB*XG$6a@&xg;vNyT}B#M#()Bc5IR8wD-%9}R+<0`&%-NF6kuS;@QDS(WosCBPx zdxdF5LG>eSU(Hl}5{*_PLzdNOi#}DtiVsH@=B@?~<`&y>O&C8OOrSqQWtQPEbg7n^ zsTJ3VjS*DEuzFbmYwWqZYxSzt?8!~bk*a}%T7%=&(>JIaX^o83vovz|U3c*s;2?q1 zgm@)>03W4@ub1CUm{@G*?yStp_`$)1+VNaKjS}V&vooz}%3c}c25BIup(llC^21P0 zrwR-oHvF*GhXX@XuNfH(wS*OJu?&MO-dHS;NT>D@PahR3irFQ~3KPS|QaOLEk}8}3 zyhiXsGQ=_)>6P5Y3u&;9Kv~FSkoBt^e5afz;+@shXe#`%(k+8B^WPx>U-#UUmwz=7 z0C-vawiYkJv4`{^a{1#y0G9m|f1y9fb?)4`YqxHH_`@IO&YkEDnDuhNmiyfAW{ABq54Q`fqE$;#0_FmkH4& zXp5q`y?5Dg0TH2$VRSqEBm~GC09=$3C??=J`NbDsWEa27F1?IX+nY3LLIHF()?q?m z`GRPexS0+*sL6TfUyz@li;v^6M;~q2pdmZu(?b>PD0q6z)1SYfE$vWpQn>|K)8CfYP)t6VF)5# zcxJ?UIyoVO!!eOAil#w!S}~q?EC9SPBrya`Iy5_)T)$rZapT6ZwwKv0;@eL<^|UEd zr*Ue>@)gS{mouS7nn>chpVvJ%={Zi+W`!oghN1uwDHf5<1pT-A5G26&!c}OG5E9N!J4pq=ZRfxGIXXA zfo5#kPu&@rqt&3*zv%oexhe$?RI}^l6tlWjn z&cPAthoQ-N)0tUW`MG%#p`@hVefqGD40q;7(`WSRbt<*)a@=vxK0B^zwW@2^tUJ72 z8#FeUPy}$mU*?3mdJ3SeNUT(039nVol$ki;wdENp?Y(q7Pfc|OZ~vkG*GOl;0B42i z#(t>uUuwUBaF8~@5?I3am-WBWTAl1uzx+>7OnMLyni9!CggOQ__AG|2KN5WcG6(|M zy%zj7k{Epi(2&#Xu;UPkJ%d5urzJ6H{@fDic>-;jwua@NnxdV0NrNx|Q^8Us0KS+N zltp2ZNuHSeL}(2${0##uTKkD}=RJi5>4Le?7CM@b&}$JZc8|PNl-@gI&v!043z?UOl>%6SuwDcs~>)rAB54WF11(88tF2=eM z1H_X5`t~cbaJewrPl;w(`P7~0Wdg3me@Fkgev{I$T*_(m7Z1F(s0=+)i?Jqat-|!r zzoGr?{fnw=?jHJIM)B0uYwzugX)VU#B3uH88)kRy+LhhPSFc|E>Z`9ZTgipDdgjcT zgl|9k$f(w>TMxM8k|T~hvSy7M*_E{)xn|v%(X>r|js6NdajABS%BA88hEfClCi*h~ z6eh+AFyXiBPvIsmi2i!1d$B`dQ=xv7D@t5mjw`%9gZP zVti_mCiA`=r9>W)4VH;Y+{nCC7ULw3lvn@I|JdH_h=Mi#{KV%tY>jnj^uq`V6=#tI z%%WaNP=~Y9@kgt;>=|6CLcl z;!?)#*oU%Dk`g(o92SG6(yCgTM&%R{jw%2%Ogp``i=jHz05k}zA4G18X)iPwHd$hw zvR*(<^^d|^B1lbvt37nvR6g^Gx>5;ut2GNiQ5T8f-ZTu5Ro{IXUfUo+cDP{2Z zVa3X6AANMs-FL4>5UoWbqiBTQy-vSr$dHB&8(HPk)-YRU zOmIw95Keo2aRp!c1{eA2{5hb6n`7jU#1Nz_$U{2etS?@dgCD&7RqzdXnBU9ST>wH~ z{OO54A&;&&{H0Tr$lk{A=YcQdb9lkZfj3g}n`s<4>y0Bn-A;RQKQ9Oqy1eAVI zRxzg$X)vR$h{*4=?q%T<_K9?I0^Q`vFW+#(4R8tgz*PKILWEeM#!Z?4j`m=T4-nCq zFg2CkXE^e9)sH{Y>MCclyxhta>8)1!GCaTpHKdcL)4`v^&K&;kh)#c_@#r4|OFLXc zfnMyOl%7Tu3=dsNS^W)~0CULQ&dpo5a;mOY@Nr~_BwmE^28*JRtVK#VYj6GfpLnr# z+cqdbVp1yHG^LVl8JI#9FzB~#*}_PS+JYCp?SN5~U55VLivGc0C_mili9W?4e#G(D zUIZV(TiunFPcRK0;+>0@5yDq;qoscax0Iv%y~s~l_y*kyEf3%HltD7EV5m!g6BD2h zK17@{yGnNQIno z3Nxz#V+bVC@3cXMYVOu?lhl(r7CJIccq9mD)FY3ca!Suh&pj`@BwVuz0GdAyq)XSX zi@sPykRM+1qT=24_pi?@<{^jOe*2{7p3BS4rTw)#ye%Qt0H<+s5^lXZ^`0H~Y@gn} zSyW4e@2RJpcEb(V-8}RbdS!Y?@KZ=Ni(7Gf;WH{NC02-Xu{>(mt4rerehT69)R*RJ ztducm3=)Ur!Er6+6fyY}U6g|OAb8s7zym({WLBFtZSVvUCCAj<#TQ+KZo+0Egso1U zI!l*+4|*$Bu7p0+4u79`fdC+3Y(Q$t-c^>siobf#AM!&-gAa;Ga>#W;_wIju=piNv zr%n5a?a?wTXB~6wF>EA3kQ%!%EM25#!UeMxgJE@ozxt-HVZdNCXPW4Moc>@m`?3B_@pI|HAU?DH2aXxg+X z)-&X#bm-6AAODavmU+r0$72bwY-;ELS%|Uc82hWQzWU8K-yHYsIMBxE#BYv`LSL`g zUR=G*i$b@A2#epT5eEV=>nooEW}U>v;Hw)FE0d|T<^J()d^!T&A#-|=wxG^Y@U7<^&m zPbL55`jpG5D(w`7vS*RWyanH@GW@& zpMw3B0EG0RbmxwU=-o{SpTv3zAwbUn?FrwKl!5Q~%lk z@Qik`lhZbGOXkd()3Zm9rp=l@@x*9`$|zWxHi5uIZ4-;Z$wiF8a2Er=YSru;ZyJ2T z1sBw+S&NqF`Wg6y`m)fH5^3h#iayjRkWn5u_5S1SXXB_JpVUw%>qKBL4J5p8@AcjSj$ zA!C)r@4~!xt15dM4JuieMiB18`SUUMz@H&W?$&Kjj~zR7=*^&n`8Rg#vsJ59D=uc~ zQYz`!u34+SSra0h!M$L?f||8zVgKC8cJX)K$#A>R8GT1T{seT!5~dIcNN5N|AS4au z%9)m!oX(P_NTk+2wJ@ex(9TNClTSKDXPlHqh&~~Hjee0uk(Xb78Fa?}dFP#Xu)Fa7 zpx$**2KcL1W#oq?cXXO0DBh0Um4w1iu*fZ|D(O9s@~Jz(WYSMHj_|*_EYK!^f8(Z& z{Menao5c#)_SkPSyG(MhR4En;*Q!x#-~IO0JPLwD*h(t4?smrSSZ;#oPwkh9AHVdJ z3;PZHRajz#f%eXlQr5L}?bel`E$BlP=z6eek!rRyf&LcGEc}@&ojY&-ZMWT~4V2h- z#Q-|;fMwOi#0e9?mf1E98o2@v%l>5a!Kj2MtfT>dD7f^yrTs7I4<7sMyALi7A3q8+ z9aoycgm?RPZO^~pLi|x+LYHb1F8l>{5Bl>qW?zZaR(2WvMzv)4#(_{vc2Fw(VWjkc*{Y<%5ygU&tw`~wecNP4EA$wVC?8G&_>1J-fN5>vs7 zB(mQM+!U%@Mk$_4a721>-^?S^T2`{~GLs0daCrsw%(9s#;THyvRlpuQe7z6MJta_8 zgjWDx7j#F}u@j-N1L z#PAV}i?P`l{9>485|6+h&UpO|DEO0{-_B_UJo)*%hztTBywp$vZNf0jidShoLGB$J z0nqF*4-y&06W)SCs{)BH`P{08=Kf*jFpANwgR#M(sa|`6s zWLX5X0(fKJ0;pdB(0KTIIKvdsXRpQIs-I81fSD$+Bp6`5pD>qc8vY&x9hQwK-B_#*7(+j}b(+l|u~aGK+R9 zMilm8d}&enK{=Fs}_y#KtS3d3&4)46?#}R4c5X(ue?2 z{ovALuLJnzs}*>I?4P_hKI^fbNGPLE`1y~(SB0!n24CYq!~a*ox1Gtz9pGO?`wi)1 z6NW^Pv0|7(Y6ft7R52s{eX`z3h&&@XvB>;+2<%eQ5r8XB3wwj90!w4J?IrI>) zWvb2Ln`nT)WEq-o+^}Kjt+!UIQjJ|jVI2=YU$hW5Nn1jYQZ{PDMOln{hztEjzztDR z@@?rv1y00Ppt-c3xgAlM#LpJ)DB@fZSc!c=YzPMeG-=!f?-)80HC?%KCgftO2~CJU zYwFY~?D}-+fJ=st7=b1yhFpr26NuO>Z1-+9cK|Es&-SRxR;&Pv&*pwM=d(H0t5>DT zQz*fqf15kEa%L9#m`O6W~Occ{)WB(B~P79`* zAXcrqj=aSFxQlbsbrxqSb_s)l-28mn7J3H!S>3vA>t@oEFgq&?Mi6}leC{a`{lkV4 zqO)C6B?{bA=cUfb9$LGS)REpagtQp_r@Wf z+JB++tU%!}B`RoEQv*)LMQZtVoCu->u;B*WLwGiC-b`uOsZ8YJK~{4Y2oeEK4Lr2C z1m9!I1ke&8@ch)R;m?iIb4wkCjlQFn0MA`NR|p?SQTp%Ezf=I=4P$aL);cwJa%&!O#r3T|9>%X86xkbuE^q~q+)dff9kn9!q;7dvQ z?z`_Vz4X!{g9mr$(D9DD?)rc1od?fC?x%BN<6U7zQSOzkk&^ckT?J?%Vfv_w9Zc=7v7q z)zy`{x;jmrI+?@Wpg0~2epkhc<$Cpg>hjAwvATr(C=r*ap{*z*c7`#DCj3;jFc6VN z98uKe(C8D;S)`$ELxRkF7fSr8g+VJrIZ|@SO~49kB?YBaTOjA5{Nz2gZ`Mqpi=uQ2 z#$T&YPXlO%tUw!le=y>Kk87J{^KF)Dk5Fd~t-y z?`*eRQU0yVXMl7xd?l&8{>;Ap{rPuc969p8{`!};ZO(e|fd?jh_8C{r(l1B!>lph+ z0?l(G8Ob0`d?682h9&O>GSU7s>(%X<(c9<~u@A2K0ARf3_RR-_$N zkNAhI2?Q}QOL_;gFprIjnfQCe;1C8FTk)idMkg`J=`ZP$eO7kLic|U`IJy$b>?hNt zJjLrf=bUrKjvdps?b$uL-*`jUuJk}U9!~nh@f~b>U^Q`k>i$&xc|g!E9Wtv6`dkWh zrXFs8JZ&SAm?;ViM@hQXYn$|;8{04NcxmaU6DBNLyjUyUG%!bOV>#ShzJFF0W%1Z) z2L1y+|IQuT$uB24o6|7accSCY4Jrf1O#FYQuJIU&oryAG{{lcT0~8%1pPDVs&0)XG z<_|Y1(J$O%F#%|bTFOZkYu2vq+Vw_ql1vJbVm|qElG-Q9Bpk=T5~oeS0LM3SN#!E_ zqS+y}(>SQ7%mN0)iz#%{NtJu|dh()+I*uOwmrurhlAd-5sbXe$(Ce?woISJ4(GH-2nn5oS?r6pY;-QOGOAhkuR&21&vN^C^=-AVxf2r+hh9+#+SRm z`t<3;I2QN}db@V*HhT2?TefVaC|8M9u2Pvf0R$Og1ip|4q+M6RRKWJ_+o^`9;*60- z5)sf}MDzZtSkc4`(;ik80Rh!QMI+R*_y);RPLp|})Ew~7)-VkY#b4TBuWWW-O zwqy0G)#>SJ9KZn)Fj5TsAUYV5i)Am+M+0xpy^7^_wj-N<<~Ep3gb&EWk~Hux zgodac&_}3SOM_67wFR*evcfF{!B-P($)R+^6g_+x;BO5v&|-_zU%;(KN-If`2nb2Z zz`)rj0gw<3Jy~*6F|MIx{)ej%@4D-5Z=hLxVPn+_fr=F>b-eJxOD?{ch|+6= z2KDdPm(UFVN|r2Dt7a_@+aiKF;+>HcWV>STKmQrc;yafEPMJKZTellmty)DpzL_(3 z&YW*S+`18jpr%N zhW_X~*?7tLTVurt^=?|!UG3y*RLz3(H-^8Ne)vdkQ60_BNzxZxd@*s8(_1v>6ax$t zHt@60KJC}9-;=$b)Iowdxe0}|@EdQ2mOB-Yx5DxA40zc(S=C2>ufxbvQo3}$;?>t) zLE72Ta8r+)3MUj{wc6AvQ#qny(V|6cEQkK|rmqhja`;FZBrTqlQomk(fpGeU@2;TO zA0on|-W?_P*Nw0Q*%>QJh^aImzS@BU5JGc}Bo*S2CZ@17@rduCZk>}p;^S-8s)cuj zer?~r1Ltz<)@>j|oVsrPy3m0+z|||)uttyYH1?S&+mfYAfkqq(CvVG^EqHkD0Rs+K zqjc#q_3P9la=}~`3*we9U&cWv5S;y7+#x_7`WL!mSya~kXf94EpHG+weL&_9=U>37 zD4@Y)9hlNswIg0LH}o$Y@y!A;`j@?_dzUX?y6}gE^S+%+W(b7`j2u#gckt)V7Gh+G zE(!vWb?Y{mmarkWjJ&0o*dys_%qV6b%ucZpJ{X$0#*u?aQfZOWC@*>%S#BmX9}a!zvByEC}Cm@?yvHA5^cKD!xVFlf=v6 ztJZ?ANQ$A!Q^Vik>jz~*|Hq12lLPy&r{2!rf*)8nRp#^otQuaBKF9VimW@7C@<4TX z!@RS&nDGO==AYUodn;HG|AGG3a3@`g!5bVAbv@PWGnP;66B@$A2;uzXYha%}yW3|& zIk2Bt6*VMytV;O%75Ku#QMBqJ@Erl~U*wD%6eF=iH~I*203VyFEK&Hw0Px4-KQ4R_ z6oUAXZ3Uv~aN z@f|yM-2bQhe)w*|1?@YWcG{_2G|i}t@g5?J=5oC`{W;pBij2TEN_V+1{6qQicB#o~ z1euNgDkI{Y;HB)+gn-s!AaIT+{|+M{p2WtQ>R|cxoBCxW4&eWP{SDdSRQ-GP_qg=8 zDvl5TH|&quPqE6x2sjQu{1xJ^&*Q_F4(N$N&|AQKQY3K4fnNY7`6W^&o&HGj5tLJ-VA@W ziz;;Bg9z(m0kc0EMyYP6x2m-a`TS?iDOw)rBY!OPKSe{QzfNeCk(Vb&w`s=;(@?-- zuOjgA1XvKn9Nv_vQ}|9hvo!_DhC7WyIVY#;$<-cz>`_k1VBH**;QvsqXv2qame&-q z4~7UB{I-1%ngSs9f1*URSK<}NakmN32j7xY zAn(2Bo}ZTd#54x>9~X{R@zNruTvh_!tM+?Z18T{oa zYo>W$TDp!x4B!PKNVsn%>Aw`sg8^mL4hN+Mt8K?6x?8QMEA}!*L|3(;V z&B|44SFfVIv{dY2b4ek-!v`(dF&R+64u_U!c!p#M@rks8n+*<=O1;F2mCDOlt;_`N z%zcJRRLnp5hk<;8k$e*PtNhm6!aJ0WqewhHkdc{=E=m;G=|XPch4SP2bz=WY{bLmU z!Nq9~ntGYYitLPk5f-&vn`!LTw(VK`fBp5rvu4hI<+WEQv2HLU>+7$+p8fSKR_vV8 zp#CEdKhnAL6$CM$2+JMiBRF({;TKJzwLzLyE2Z&9Qg>i(^Z@+5a?2&O&@how$h}t7(6JL>ISqW>tRnK#gR&59h0!Tpu zRjD-o^M#il2PMJQ3tN_a_yeE28--qM47-P>Or6TCESrZ}MJ271M3aRR%eBSE;gHBn z_|worOeJQsfRkG=JcdNL99wYft#^F>`FKVzXlBvGBCPizdiBwV9|8mk{`%{$XaI@< z927%L27lMjx=z!;6*uLu*4hSbQQm9Y@eI5&>?| z8o49->#zk&YHCywP)~OG_ZE8!Ie<@t$)X7Z(WfAM$Zcw(bYSr^7D@iq22qIwRUtbr zYb=a{6y(addR+Xar`DJP_#dmkP7TGKfo)8hHgy^+akL{iGoxe)9gUfkRQ$yk2XyP! z&6a=C1;9qT3Bd?p?Lv@*GTkjf8=M=iZDzX;V7a&P?6c2)=be$oiYBdGwTe{+bimAI z(NW<;Kw~tqR;{|sVX|gn*6i6tD_GjltXWg|j=7-sC+0#dW*0krSXrhnHS^??t5Nfj zqZud^)ygN({z_vCtrl6;hssu3&tma*@typvpn|`Un-R4ro0E$!KltE7^XIXKX#uN5 zIg5u|0&3Q%VT6yl|NaN2PMyXb5s1@s&p+R$O&cvJ%hIAz$N*6Ack?3n)N3+M<%ls) z!Jj^owjVZpC|MUS`hm-NIc%bF!zsEfIUW7MXk&5=dx}e! zDXobIp_`|_^r~vJNkPd{rPy~&l?afnZ`Gu+E2rbB>_NpkDLL#q6fOt@% zgcfrFXWOOr zS_yVcS0F!q`#$^9z?ZOc$dAQAbZQ0>x}=okE3dd>;J|@wF?iv*=U*Q_?7)7O4eD|g znoPaSCZ67+>A-=nG;Z8j_h3S(9Nj32_X%Z)z0z8`GtR}MA_QA2{xpfgkRjHNk4=`2 z(mqO>O7p|Mf*J(-NA$nmze}V-{e^F`532uI`!7E5K*%EMroWO7d#NS%G=KjhK1qb+ z?d92jffpP-jM!(&E2mJAH5A*G%kxTp3=SErVi`N?D+r&mQc6i)#QsP3U&7yB=3nWX zSE9;5lX{s3A^;BGIv^1S7*_L%;9I)5;P@LH!FB#d)88p*jR+C&A689Ji#N$r z^7St&t+V#>_g^xEa3czT&J=FNWtUxc-@W&p*0c%!A@)-ffB^?Y6B9de zotI0XvGhN}fBp~AUsFpdb--U^wKU(pIO7ZGue4u!6pFVyQ4#-2B}4lb*ffXbAf>ME z{{;FgIim(W&2Qf#@LgM?^*4n7Z{j~&>rKntK0AG|lRQuuB`%h}k6ArBPr6Y8G5w!= z?!8g(?A@EnBt4rhiL5Yl*SmMGE3Uk%e7SNAaB!ud>nER#vvo0$Hn4Bvgms0GJD|V& z&z6p+7qD+iHUo?8ftmJPY1nO0)j$0Lm}8GL33oTh8Uym|82iTfJ!0Qz4)b0d;ac6! zI>Z&E7hq)!i^^-)srAahSI%qKj@T1s7Tj<(&>0~_QYeApGi_z?uymBkthVX2ss z9Ba9J(1JfvP2G`@(P<~WRT2C*##5j{6U~(q zso5>d1SkJ+sDRJ(DsV_gEjbr#)8@^r;ow-HiWMs&A@B#pgq!uhWD|X~%u9F)N3aX| z%i3XQ95csU1lg!z!#Q*2f)aC_Ori)I8mBH?tj570UREq630MM~17e53ghP&O5KNk+ z$B3e_21Fqvwdx0lc*JuDy$%c}i78K*d*1oyu~JIYocMww7uRPcW)MB#t|vtm1F)>7 zgL@K1$BpWl?ix)2@@#GrS z|8Rag&D?VbRAKofE}t+TJwm@~*R8EvAQbP@x*~-Xgj^00K7%Ux13kUO$-utZZ__&- zk}P!&dROuqPmBTjpXUM~lJeE@31F*+0A*v5{Z_>)733w&Tt55h zS|%8VyuK)R2w&q6z5JmYfH0>>LROv{E@D4bsI7v}r(P$3&jUSD0YlNh%0;G3fQiF5 zC;}*IM&NsD6nrXX7!Qx(Afq*g$DEanZowHfoYF*9krr_(f|=tFSVNf8>Fn+>*AwE$ ztUWn$6t^vn6r$s|-F6%20;-DKZ0xcx!C<7s`LC$JVOC*CWHr>`L^vdjs60Z3DP%}2 z`sd+n)dg6PU+4Uhtwf0u0|pGJU%$TTDu?Kc4kDy%j#;K`bc$Wa@vJ6z{`u!;&YCII zp%*T-$ivkYhlnnp*Y3QD6DFYB49S>OQM}pe3RwbrEUQVs`<}bjtXa#`r=NPNP3tx+ zZ=gbX!nPYoL=rEE;-|62jO#OaU<($Eb$M+8fAP>-7dFu_2Y2VM@~SB)11lf>ho=>k zAEolLR{fUzkgWiG5h4(#;3N1gkOkyNWc9inGrsGKB^hKPQ2@U1htx0wdqJz!6IkiJ zse2|)m^gLn)LC<869xeY0#m+R`LoVCi!<8KZ`Y0^$qBQ1Tukzbxnlirf{sB^#}}zQ z#NVi?ws?E^aQ+5lwy1XS5R8;6io0;@Q)o&ikpG}b-Z+DQT={qSuD^hT^T>k!X_DYL z@U1p73BIauZ$R>|AC7;hzk&mN10&twi*gx!p_(7Q#7RTMg7_b+zXm{SsDMR&z)&OV ziQ01oAvEWH3GN^(Rk9@g8G&!?J9%Nes#moEf9dZz3H90jeMi1Ml5PLjUw_@up~LWI zdfwUd?z`{d5)Dw|CP#K?!g2M2>bBJmbevQ0-+%u-Am7=u=kn#tIeejg`}S&-+6#4H zbE-LY-Ejy~n6ii9E?@N|0m7bPH z|AHyNN5Y_B|L?u`-e;eEmcUt?Hf@;vWGx$}e(v0FPjArzYq5OUa`h(ZM_+#Bl~M1F zf(FPT^v6lVDeTm_6PMw>_`(ZpK9OOAKDZrDA?cSAAgRdbFJp>WW5T6W|N3|IzXjkM zY-)_QKfh-G^V^3AqM)4DzeLNw+VT7F7a3$rc#kxn1hL(K7<_#1)AYt%X?WHmyvB_jThT|W&Y;O5hhsylUJ?Zst_7A~APaT3R@Ffd?z#X*4|e)v~bnzU|nW~WYfy2GjW8ik)}ApSN0-;9;qUR<;xS2pw*sRJ%AoX%fB^Cg@T+;e|`XtyrP})7X+f@ zQ<<%%pp?TGmXuKe!`Hut@{^YqHYPwg0$)(3sP6PPDv03u=+9GS3Gw&rAu)|#$MQE%e;wwr{Ppw`(Z{)Fl~P|Gf0d$y6Vl)C zk^Dr%5ApYs0f;bz+7poby0IC0WM^mpdWSw2LBUEwTms$wUWNh)n^2c9IFKdfAkVzW>pj6BzT^pQJaEe4EctIR&LP}d2B z|AuQp6yM0CoOJs5CrR3)=cZld&pGStKRx)sIcJ^2&Pv92WW%Ppw~2RZw!{pMQ#xo9 zxdKuPZitm9g8lE*U&eH}I$UoJ5Kv*8ClH`ELsxJpTdp)QJNymCr%)C9iDQG)i|3h? z%z+_=SWU0-F9-8e-o`@$6Cfm=QX{1+P9{Z&v{26cD`o^Adia62-+D`M@QCcl5gBvM z{uWlLSZ$wTh|2i7XmYUuFAV73qdV}y!5R!q)dcjPH*B!A9dRe0d~(qwHG#Mf?{0s{ zq>j^2P@1?(QL)p?i&WVE_#8%L>W;UF`I zvET;N3wajHMlzX==lZR$8Y<Vv_*G}aA5!l0fYfq&0<9f&0KZG6f(zo;=~+83gMtdh!mu@x$EGVb_O#dr2g!mVIm~e?;3XY1uO$JJ-1QC z3RwX)L`-sui31_LfJ%HAe0!wY_(>)hk6h~u!_Uww;Gr_udr+xzs{%*DzM9R^z|}I# zks<~6OGQ1efsnF!N*<+6Y|$&^jc3RZ0dFJ^8;VRoA12s^KLuf~v{#H23doLob5ucf zXlzzCOBjlmC}GcI2!}oY-1D5HzhlQv4&)-p%|Ht{C=8YY0cA--_3AYM#p+NO1t><@ z^fXp*6lOg_hmIXtRmAcWswebJi@?O`qY*NoY_}uW0+vDM;6Z~}PqCjO$;H!;9A=0G zIMAdRyr$ud##xYpR?S~9-{l8oFd5KFG42Xs9LTxuXSQyG0#X-kXDkmK%A#*>>G^$N z{{h}fghHoqiQTD82HK|)OcbV{n%H$a{rjPsNQ;tu9<2)tw(5yxCv)Z0> z(M27Ze_%w-{06n6>A+E^ZRZ*iQKhleEG?5n1V}j5oj%d>Z_gzP&T!ralnI0h9MFi+Vw;H<<3xCa7?cq9cHOE#11T4& z20qu~biVwGTW`IYhzaTRF)Y({G!7k**wb6UO*&oEsn|oD1ecib)r_w=xnJj?bAj|%!CKCW!RP`O`EV^5A(qVHEi@_+6^4A4?5Tj{NrL-5P>5CwloHH z#C&rViNohGRA6A=B+LK{r4Ibfk|IX2s1%*! znl)>f7+`uCe;Rr*%>ZqoHq|p=#^N_Rr4jo;*^c2GU9Y_IN)8O$uyF(MyIgT4e%0p_ zKj(m0LV*CK{{c^ktchz5g*~c6PqfRD^C+{kvA`??+_rry+v=M)Z?SH}h6W8AaO))} zYnVls8bD+g7!uFgw25n2bW~`CN)>f5N8!TU&GN#F15)?xZPuhIS3#ww?ju9Ph7FUF zlR<;M?VMymHXLXUk6?!{ptzKP1216O#*G^{Y~1MbPup`6a`0p3&70rwl!ith>6Da> zj)ks8eK2{&1YTl=ebJ|W@CwtVTgaTUD)Zmi|C(}AD9=6{r-Ir@=NpP?^CG~vYDrRE zf&F*=b^Ad55mdonDEiCr&4;vQln|U4fA3!$KG`WwelVhlKpQT6Gjzhr$YYY`@^7TF zN2Y$JJ9_i}3m9a8+QQM{MyDuMw?4_6*9p6Yb?PWG#@Po%Yz0+QERjYsUk^?2arMOP zo1hx?NT*E1KD+${Ch4*sp3Qmz!%aN|iX-@HvAtsP2gKn#`3e0WE1t{pa@l96kM%Fi zblXoA7aH~se3Y&rdU z4}9ddSzM{B{&>|*-FtAFC!;tf^f8jyPl9WZ5%OZ#p)&H%tUGQ!b#u69{rU}<)ckV# z^v@?um^EwG>NTrZtXQ#h*%JKfR;|vs@~SJ(KC5lbnzcYbF}^63f;Uz|yUqauvv^=+ zasGz%QFDbfxlJg8W-3+mR?n`689|h!f1Ulw!a)l@o&)c|LO!v4K#1rs z{X9c)qyjmSQ;`#=?oZMcOC$xK%?t9*OcKLsM^~U+qj@0s5^jet=U-RN25S^Zcl)M- zs-92jq>vM@)j0i9cO`j_&_}XF38XxH1%+HT^us`Sana}hw*Rd5=ci9Rar)5ytF5#c zn#9GSn~N9!^w+=sW!B7@+%lvUISGliYSn7ks8OfOE^FDcWzAYO<%C0%xLi&a!(US8 z!s!ZjSQV7h_*z0`K#g%Qz&QeZDpfu9&^V%+`Q`K%9?D9R#yfoG`M1BNzi=9{5nnke z(%>63E&ej7Vm5?@-!j3>&PPs=W1Spl zzcK~L7aEBF%qkL^3JBWHK3c*WKocJzX-@W@-Fuv8zy>C0Qj5CISk|3oF(s^BGda2E zw`)f?0SqRyMI3*TDO5@&@WbHx!!hH=;nxvUYtf>Gn2u{*MC;D*fi0UA00-S6egsPb zp}0bgIhC?y%94%cjeGa*#rMXE)r~G@5Fn$vNWD0-h^|C;N@$U|tpr3)Xm8PkqPV^j zCX8p`pc!1>&VmE1^Nw;DOtNrFP0Y=q*MmwL7tld)wpuXp%qWRj_V^-+iJaZ!<9E3h z!e8?5FJzc82)|0RKbKipyPh5&n^{$9jp>NQDU^Q@<7E=2H1N;J%D|H}n<>)@(z}fCM2?$J@LJB`BR&P7bR9DO0ar-Nzq) z^ob`P+q-AaBac4v@ke7Ar7&U{@WPAS{!qL`$|H|F+O=D^V#N|!smLM;4$(t)A+9vg z{52qIw5nGKUtiTuOA{tS#9nwOo*ewPBIFPOXqN&Pl(mOyr&{Xp^_Ud1I{qC&utw&C zFL?Gk9(+|3rVGR4!Vfb9{fc2j8vg|v{2c^R>%#$!Ct-VBY++Ul5^~lcP--PU!CV6) z`p9V%hxo?oD#jQ)cmA?%`!@6+IH(`-dj?&@hrPizY!2s>_}aln()L)^K9oGE(n)pd z)RIXG32V()7zN$sSZLNqFj`~lkO*M*I3N^bW_Cphb?#fp7K@a8eELV=TO|fx3zjG&03P@vRj=Q{Un8L21VEtk_-pWa zgPXqsUvyB>;3Wv+xsU<>Jo%5nw@Q>dEF^k?0hGx&g0Du8i)GP_J^W68lBo@!7?mL#hi4%Nzz~09axoMzgk|AuXQP-i-drf&Iw6@4kCj36J7KC3VJeu?+vljT?Jur%o&ZqoS*>`s3GM&3t#%y98>9 z2T_HYMhYVvk`k}Y>BFz}8`k6ffI-!&RW(!)iDNIlw9}l~b5c`NKOXZ@uU=0=F%EN_ zH-BDNQ~8STtIvE+BGL<9O33i93a-f zZriq(;I#qBm&p;Jfd>YyF0Ggq%jcJ%Vvj)v8=lew3q4Sa?FU zS_{v>8wOy?lZz0gqGFUC$F6s;-fYI)pSmw4x%da8-+%0}N1u868TgBuBOdrw@+2T! zZq943zRDd4;LpkWoHFR{OQlK`x$pAwPM24yQkiQUW=x+Ua#>SxV(_#e8}lgEWV~ty zVj0DXCF|4`=6i}2#Xn<3Fo(EGinSS&9hbUFf~!;&3N$+osu2+alAApvB2e?>$5|hy z^F*csHVGGVag?H8{vyr4x_t6o{K^lXM7Pg83iMZnD50zZt#AH|nnL&nNbn7`!&eDj z`Qa;p@}7;azq5Y{;k&>K8>5es4W2#?Df_Mdg-0?K5&uV$7=?c{eFFJZZikT%f5Eqx zJ+n5;!o?PjV-LQmu~%g91C_M=R3FmEnTaL@@P(|eXrM;mTeE^%Bss!JJS3kUc_|u% zPN`XXGp>*l^oc5Ly^)>)k;lQeX!sVRPgooqRS6)q+UhiHgkb<*pCYqp6pDsw zx82m>lh=O%>5@w>dF{2=PCDr%*#W?Tuo1*MTxbKrT z3D-ks99gz(Is3DwPMM5p)$GBbtv4Hl2c&1DS(kTjmB*HIBu?CVOpZy`x z`Kz+d1{D(CyyxR@5f&ej8`5OwuuXF1$`ubk{K%QD&m{iXpng4kKi0p46-QCO^X@y9 zD_7>^5P~&}e_AXI?Gy+mCn$_*C6t513r~@C{A8<$u_|Z2X-JNn&Ctmh#3^G%STe%- z-5dild-f~^3ORa=opa}&+x~*~oDr6ln+3n<2U&%Zd6biga^4*^YVo2UF%Cvcv5&#I zXrb6D>!dOY#~0%EaKcYaQR9Cy{>70~d;J9^>?LxFYxYm{ux7-@d^KYRGrlZr82Iwb z4BznI6mp^!&>b&Rh*;IMjxb`;M9!T(oOTFTb6wn8h=-A5PMtUde8(smtw9eOs3J5H z%jO8|Pfcaq%>$kKQUn){I2xhA9L9u}Spt-tl1xj9-V-!b3<9?@hU)|mV7u&8I95{? zVndAaQ~fpY8H6Tr5tB_1nER*NsM#6_NLdQv8#Ib^2A0Ml_V^*KZT_Mb7sxH{m$E~RIb{( zO`G@Me_#1?bDK14$}wP)5w=hjA{xb*L=X1u-S_BYk5#Hzk(yYQ%Bjgvf|fuU2F6$x zBAdVbvK!%MhS>UaOokfDgftL{J>z%*v~Xe(4rkxKL*v_@H?Ci|Zas@x0d?smm*QE_ z5Mp*%3VgD?yZ3zc^-T1Ie^*^~6=P9GZjBo^qM?`}M%9!8J~YBeVX&lHND}^|#)v#n z<)UCHnLL6HS`EvMc#zLR{%RZlg(aurVXCp@*{t)WphzWl>yKajgFX@dl15n*8vANF zd~{09#xBRsGYUSOpe8aJa70PGjg}u(p^B_h)rwS`oih1>sys0W0er96@izw=(6q#H zjNmT`G3->a!50F7m>7J3LQhU46q3R)mZdc8)Bfa>Pp3_rx@74RI%(`a7*(%+@*O?z zIQ#5#>ep+)sy!o#uAyO`gd>VTOKXJe9tKe$q74#T8|SN00o%kM`w2?_S^8`88BHJY z98yA?z&?mD#tY+^OA6#Nl8Sly61#5A+Tx{(GrP*12y1+Bg7LHIjG2UD9*Is4cNY5) z0LqDB`U|*+_oogUHk67CZ_G-`5s^G; z^o5m=D-xfa%z@A}60x_aST0-nGBwcw1jSDS0mi$Qn$S|mMsZ!aTY@d+B z9Vv*AAQ(v*Co^U%Ua~l<%BFkZV_n(W4C)*(2V6LXt;L+OJZsFDkM{1_x8diFseAVP zylL}>4IA*EXbr-Ukj*rpHn2k<;NW(Up>)Yo+#CvOYH3dH#0j52`DAbJj_ zlTUQ)cp>9P;6WceOYVy>`JrNHLpxZOgoh*cGXDeZlqkG{{@sO11P=5|Oj0KQAYF0+ ztiV4}PT0!3+i3mZCycfJ_kT!#jZ;)*9k-B?UK<`!{a0w8g^SsLp=qeO6%bOO74+|* ztNz7$4HrX)FIw^LFR@Q^<0UxaK(AHFM>lJrtO zS9_s_{<3sS>r?dHK#*#s^*aLJ@N)Ve+kY8+l@9S&Jz;H5A5Q@(@mKv9FTLkw4DfgQ zs5tnmRz5`kWA)c?`v>*E))-?Ns2KhGKmQzO^ne2h?n$&s zVGVBBU>YAb&}(OD_2@?*u^ZsqdGkN}^wX(Rr_fn%-M+2+jW;qlJgaTntFFF^a6fz1 zA&2o?^O3BnB{oC-P9IezBL7?X&;Nb>W&cF_$74PoG-z;e@JsHT*-6j`L zfj*kYCC-5j7qv{kVs+6I6^eTFc#Zyj<*y^#biaYiWW`E%+wYk0-dMtZRvOrNOl?~7sr{I<4d_D7P)^9HPY3YV_>kgzH;Cv+_N3B}5 zxc1ts+q7<5yKWr_%18uBkx%3_lvxuL{S?(jVsSvpGP%f)s^f>~ue?%J>wgg(7=_q~ z*L;&BPC2)|x|+=8$CySb0RxQk3pTzNnN2nr^ zDmKE9J)pnJ1w92v%QE*?Z0_87jDNqL`4vH*)0#BlbShvmk2~sv_tvjl%Q4&xe_Ee; z23|3KYfJ{qvQliGAVwFSQ?Ge%kky0bXg(O&&W4RC2yDuW-KWRoev&9RI+#qXVJ1#fw&b|3$*@AEYx>-jcjEoSyFva zP7WJTh71`3bQYBD-MiT0y10tizxgbX$A@zR8!wQA_T`WQZdNQcn~dkqZRcw1@}ZaQQYW zh7oo-kZ>U`pg|Nx`zU#dbfTWM2T-0zvIl+aVc;JIEhe*sE`R^4O7wu2e-UJ>%=Fi2 z#BbSOMH!rM2n+wzB)ur81{B7tr9wvzQgX^5CK!2o{)HF5{`%{YBS!r6$u7Qod%4O6^AlL%p|zu*PF_EcDduaPN&4|yWl zEu79TCAd`y;OF6A0KPpx0emVGRo%BSbfV!qx|$o{pTQT7`pQRuLlpi%k$ni{5dK=5 zl)>Qh)4nP!X4N|Wdg?%}y$JgDojV}-&K*0KFI&dg6^F=A*J2Z}ydo@+)|WA}B?UW@ zd345%>GbIGD})vZ2tNN1h%$q*^-u)e(3PQ zd+z-c_VGx@QTB7aJa8b3i5fJh_wZwnlq*w?5j8<~6a!UR@ZEO{zWzDCK9|1`SlD}Pb^jZW4>=;{1Dp?r#<VFtktXVmePh1pVKUY@96Wg7^U0sT`synzGG(fb^YsV5G;rpu zSwyDTRl09)>XgY-TDCf!Fi>(z@lzX}R=rxyl;X)WEwPZ69T`Cd(Fo3m66m$}rtW7u z1Cw?9EM2awm=VWBRthtml~1a?e!~WEUAAo5NtLVIddsahcJBrzX@}ECy!F=MLx-U$ zj3Q*?$YZZ)hXu>Z8 z0DPGM^FBx*zQ9jvX>dv-VxZu;XJ0CVC19Y2o40Mj>4nHHQ_PZOXbBegY3d#yblhg` z_GUW6&> z2r#-Z1;h#$NvyzJOH7<*P1tq&%dR_m-p#uVI8Ctuy6VLN3!|Z|002M$Nkl;F z6}gORYMUsNM;{D=-5bksW9gVd;$n2TXo~EC6Embw@M)zT0IVSBkM+hqX4|p|#;wFB zRl4SytNA~F{`@IZrp%i+Z}yxyoMim@=M%<^9m^Wq%e!3ctZT0zX5Lf`u9Bi3&=n1QtO1Ss^eE4e1811^;fn!HtxW|v;i-?#5tRb z7A~q%r81~8XCo=J?ZZqC{jzK4`gI#fkhd4KgkR{5gitM0a!jpSwaS(&haDqJ9Yt&PBpgif`h+KwCr#;j#~s|Kymi~QcSpU)&4%D$*SixsV%o+XG4 z{`)xe0KX2h4p79eWaCT=xrHU+6q<=SbRm3poJ->eW13e4>)IeU*{Pmv!K`f)F z6nHAp$%2v}z9k$hKT5V*cso{pLR2hEH2jeM)Dze@viN@?>@WqFmlf7qKKS7Sdu&+@ zzKiX?eW`dgtJkjLQt$Ua7`^%DpIH|JYv57ChK(-laN!MIZ>VE~SyiBm`Q$aRGKsNQ zVgi9o+3bS$+3csm*U*J3Pvk%INKbhux6xm16o28i>2GNNrEn4Y8+V283Ghv<@CPG-b+U-3A&L$LcLk6Jz|#IxME#F1q-l#~yv0g&^#Z7&&s} z(q+qz9M*uecD=faj}Sf5a#1jd%Vq*nGs4Ij(|@ zS=KOS_)4?di|J}C`Thfk@VAG*@doypdWi=qR>e$cc223%rEqA0&tgqnW5+wdUvW~h z#60oXpV{Dy|5L6)g&H+#YUpjJd63O=iiRSeSZtI7J0lVFXA>@Nzw+hA6iZGf*eANE zSx$eG0QO;LYmh`=09Pvc};TEU7I{ABuljiIp{#xrqG zQAn7VyKDDuF0bGG^Co<9#=f7cyoHs&zpW`P8ay<{ffkMNXWhS-iv=orE;OIwU||n7ig}A>~NTIDJ>_iEcvl8 z7tu%wyuDob^vmT_;$g%Qse??x2g#;iDkpjd<9Ai%Nk-iQ^63ffI&Fa;u*IMCCV`RlvJ7%OU5NPHT(nCFlfEGHj8Gdwzszn|( z)UlENkOC15Lw*Rdau~j9RzCc#zNjTiYOs|sK*&cPe0?=2v<{6jGazZSnne-H8hm98 zh$F-dHI;x9(1wa}RJ>4L@mDgX!o1mcQo(>!2V-angjHgtJhxIPA^8#70e^)(rIjBr zDkG|f7^zNA)pM)V;1iu>u#MF*G;;T)rcVEI`in2TxMcYf?gPdpB0I~G8Z~M-Xz-Al zHEVD_AR^7k8OsP?c*?nN+TWyo0a?&Lfz9FevIYk9r*f;`S_66ruF*$$&=7UaDr54g z?{ddBs#TN7^*3|pegDJv85z3VcGRem zntaaW8VW5b0yqYNYieiBnL|8u)O+vFoH-Lv1fF*8*zxT*bKA6S6CT?avt!4Omj?`_ zbtpee)+;`czyy&9p)v6(T z^6&~L(0?Z<75(b#udr`f*|8U0bRi)snO9EAut^%42J$#ha0R1`kxS!-rj%sP+hS(y z$l-Jrnx$tPS-y1Hij^x_fKMyfKgb!Nz}I$Wp~$XA#y=d)LA6@{2mP^??0ChMCIup# zb@o|Y9e_dlX3jU9Mvo3L5#6+DvzJ~R06y|Qb2zO(F)2yTHvvsahHQW|ot}h?1WiL zk|4-T$?dn_zIfqcP0Pf@KKbO6#3E@FQ|ny7!pi^8#*b%V3sWlK4_5Sh;;Ly1^&;P9 z9K>J48v9_{WL&qefqwn{{jb%)o8bSC=r7GNoYfA!q{-4>YYQJ>t_*rb`46VvJV4LY zc4d%PYPl^d^bst3BSBCDh$IE}Q>l7j5B+&`|DuXzkd3wyB!QDE34-8Rf=XL|F90xw zBjgkDzStq6L-g!BB8f|ASXwuzY(iP^-T#U19}O~DJcH%>x9fjhKAnW3nrJ7M(!0~Y zV?2tn_J@-Sl5oI8U=spz>6fuXB}5v%IVmGxbPf>&!=aKbhBa(xEzIWSbQZCm*7UUc z_39Px-J5x@p~Hr~KWY@WjVxcWeC`|ac7Sn-8n$R6jKvmC_eU?6#`TV zEc568VEtmHZS$--~ZnG?h(tSU-|TxRkS;HZpZAiqo1uqSW&(} z|3n?gn1!SqiD69&(>W-o{=r{(WTsHup{tdxmo(tBPO-Fh)B>4}}Ut9v{uURn&1}B;P%grhj8;_2lc5vt5 zfde0X{89gY{fJAl<^{)wJHg78E6b%-j9)5OuEM~HY$}eMp#x##Sr(0%$A7?E;1U1L z50rKTKW}^y`p*=UK`>uh_Fp4Kho>j}1@^&FGXEJ|-M>;_CO=aM2#C5hiIQ6wYZMvm zr@_|<$1)mJA?M~BNl;D3ae-3*B50T$fwT>YY6rkLZ(OwkMHNwouEFXL{1c@~zf`#( z=`G***GdbjvKWz-N4b>Z_={G@^Pi7!WOBMfUCWj&B|G^~J@r&hvL-jI#2LHxnrr(! z^K5#0+N6mS_wCuseI_7-d)B;Jb5xSSHb(@aBlyo0=8Jh`+nNc;I1dqgb{6{v&KozZ=ep0shYn#-5H@*z5{4Y+Y}xif6$=;sz)`t$E|3>1mQbh= z{-;^9+@eJjwrt+KVcmM@BL_G(mN;hJ+I4R3aZ}NhVn8#R@CM}q`NRZ5T!JC2!sEnT zPU_gVZ!ZdkBw&$%PhPNK!~y;^=-2ySGFziXD(ml5T5S4V{ck`pH zA9^958Yl2vKWK$XJ^56^sbzM>d1 zFK!_h7;AcKRu7GM`|ZkAs}iLq1*;8Fh?P-0tM-AtcHO#%9(WLC!^9Yhzz|gWNHTJN zBmpuCl3!qR>30*^1&q>SJyAkLmI!)g9H-DKUe311J-hcob=r+UlYuY|t3sfLDGy5_ z*mNVBFhNT28x5z`YPgMKwZZ~jL6Eb#{!}7_n+SZG#%?0AF=}Py1WOf6;6>)}&*^WF z41cGC@DO;x*B^qdH%kiThhK94L;iX`GKvZX;d^mCISzcQ$B2{mm>~pTZ=31i9RjXBX0BSx{ z4T;QT`U^A4!hQ*g&XEaUSR9eXNn<6$Cn8%keWLI`zWmF0NGb(@Z>q~+dD|@2NiW^i zpVP;4DT(Y5C8U2o`uI~TL@tLbh>@?BBmhdx2S4QRZ{iOyA?1*MRen^5y~JNb;Y9i? z{s(ylHxGW0=pUYJ`fCJ0QM4G7lMdoyKmO=rLCDT&(Y$4~s@3QS2Mu}^GeR0a%a$&S zKH{EfWTjaKR{A8iZQG_&g$hKzwr}75(~m!1aYdKWqer7n&ph+Yr=NXFXbkm4wK+#N z_22=zIy{f3dq1VUJ+h>+M7K9?+=y8Kb|p}mtGn`i-{=l!r=xK5`_T= zAKBpCn5Un98Yfaeb8^a6DF2tyquD;GZP7V#_uYFRvHM=Vda(!&c~+v?0ja(wz;*aH z-H{1QE)gp+0fJzWSR9&}Ieo@-&f!8p%9JU?$?mckcn?`Q+)Tnrmmp!eC41lu4LO$q zBCw9^@ZlrHQv&N+WcRngAaNiNU_jO9 z>1SA!4ma8L%6YKN^J8O6q!dporrQ9BDDjBHLm`^_AXb*e#Sx8yHFIXq=7s?_$gEkt zddrrrWG0aeytBZhIGv`NuH7`*EbtWvab7ja4^}*YJ5ZPym;@6l6U8(6tXRH$%T}$p z$Dm%lx>c)Iqw{_Kx#xcT@h75`)laU@`5-_5b5LP`bY|<;T$pq4P+DfjetefVh7aRb z1S&nP@u_3Sj;UO^vJRopMthy&Kn#nboXAs*z$X(eIrxznc={Pm_c-iUhzUlbY&fHM zco6X7i!Z*Zdk+l$;6a10x#k)iuLTPhbnDh_;DDE^RpUrFUFHM+GXKbmDTLbxD$)bt z8or4+&LO3pzNqQuGpV{*Gs# zc~X!*(ePCVe}-LzDq(`E7x)mqeOf~J2Cr;2fE;`SCHv{{Rn%bT!8b_J_!odrv%^M^ zRaFB&fB#~*7=4Zd--}1lCmKG`gzWF2f8f6v{zm`MKIg&LH$z1qd0+CSo)CZK0zjU8 zhVTPO5rP@G06zW_jO2xqDal^gRITvU3|2xy36>_slqp^A**?!=U8jHfB}Qe$$dRna z-2KaLqS72L*|ces)@|BccO4taYeHQ2uTVD3CVM-Bc2KKO59kjABtOPqOVVQ@;`Gl) z9|{TO0RIT~$I|Cm`2URlmll8sKI+rGpSC_lT8C}Lr=>5!oN);+tH5Xxu_X3cU|TV8 zOg7{kh2$~Xik_RxncNJ%X)A?~;=X3mC0V4wq6=6vm_pyQVZ(+F9Xf8=vYC8bxcu-# z4`O!_G?we~V!n!;9Qy?v;0q6XsVraOVi_%qs<|<%R<0d5aKMKjeTXT>PV$fiWqN zJLo3-)p$}E$8jz`u3fu!?e4kzF4jh&F+`~tijs}tYRi_b&O7&9iZa{iD$F}k)KX>2 zjQ-$*b?erFf7&5#PAsCeMWTyHv3=_{)-Hf#GW)lalekYw1jC`yiIbus(~#87nKq1e z(O_0@*)jqEf)&T?yDW^0Wxca5i!6fV3A%Wp zwm}5!1Dy;97UHy1h1^KugOaGJyR z#)HCfy7vk2BmkRj4RkbYGQj?zU%VK!qgiQ3(u)z{0Zpx{VF1BkGw5H)15-c)m>NEq z{}0A!Cqo$4ZKwf4O#h?b_@Ty`{fyQxYlL$Ct^Kbma51);0O2oB&WjIl>WQdK4b-ns z$ILQIs?1{Qk%(@Tw+tar2&%v#p}xX}8#ZoyT9c-C-P!ZUB}?8N`OXhN{D2ItUAuPZ z&|xf4Y~8v|=PsQuy67TYbH--GXqfU~w1f4*EwGgWRw-{(7s8iBh26O&BO*kP0$G6E zdD#tri?;-D1jF%1HO%94{7p*3wy0Uk6@jBRi<_kI)$y>lgNMd-16X?LyMmwWnbNpd@u}r2S8m$4F_Xo+ zF)^pNXh~Tp?0Fq0=t41a(p$TN)vR5sX_IEm+Y@utgogTN&Lq;NXxdb#cAbag9zv!i zjH=AqX86-EMzd^#`sJ5h^t$LO$2_lHyH09J@wnJCPCtYEMAM1r1B*U{WPtu! z!XWy13&jSBgVV>$I^RhQP(p=*RH&_{onAhM)8#Y)Nb@4Kiol~us(!BzW@G*$x|kM zKH>8gzrvAYnn018itHnA0fEpR8Phl1ntg z%=F?nbiJOZquv?Csg6uBaX|iEci+X0u|~I80x|Fg-o81-IxpOCHf5Oqi z3579Jjw2lyz_}J0?_39FfvI(AVlh9|Mn)6|*Q{PM?2X|R0oCd?YdqKgdE)qz%LL8x z0qy01FC)76>&Oq0Znj$U5Fy9!q)bNE5w<{TXb>N#HDzWW1RdJP#S;`3VubK8N=Hd>pG=Tk; zkgG63}l9NNBphsPg# zeB3AF@CP`^h5_NMS+nRVUVZuH_8mHO>D=YiCZ|=cQUw!)5hzNK-NCB*h_DC?pgVun z2$1?0_Me5|@77<k>IqwPM$3OE-pFMka*Qj3O%Nf&IZNymcmYZ*xIC&CezxLW| zUm7?N9BI*bfCpd&zhh=OHcMUf+Z z$zWpSvFd#1lu7+$>9;952AST9y69Z6+wS>;u*O8Z<69=Oc~0k7(}6ha)gGb-SvfR0 z&x2Olzk~k5I@dUwgA+oR&BcN%RxF$P#TReC`4;jZ&mkwLLWPQ~)4c4mPCah!QKEPW z{0c}%5r{)13`!yagfk$lL*t9-y|VbrsOb8|!+%TvS@R_LCu*eVpf?5s`f-BsuY~A# z>o4#ia%MJH636Y{vxn78ga`;8-Q1%G++iOQUL3J(g;Zi=QEftF4NhtJ%B!#73nAQ1 zn>IzM!31sMK0t&Se!)51BmlA^OX%jtv^=9FH@u-89E^Thr%qRN?#zvIY-y(y@X3P> z;T`=t|NQg&_U&sruZDW7+3E1oABqRaC>O`C z+l@D}3mz4wZc2IoOQV3#3x}MO-SD?@sCdZYEUuX%IJfYJg+FiHh&#)=tztZI3v3;SP}Pg+l!*?r9!GmVOoh0=f%(6BO@d;DU|sD8dptW}4I3`Gg2TE*i|>LfN(4>_o4B zO#f^BquML`FBt|M{e>^&+O@zM7jdA1oMA5iB+Le`ccqAFm#1VsNVL!h5F zJ0%ey5fH`&ve+xYpCU>wfWJ~aTaekx8+;OiYE;T<1HOO;@Pl$u^ic|B0{ktnWvW=Q zLbZ`9J12{V3ag1gfZ|rvClfC|2nhSa%GHY>JZX<$N zSk_mc2B4my(RE!Q1boPgq5i{u-EL;47m@U?2M7 z{0)%emm?rFg$e(aHS!AaH!KCnAOD^Dt4h3@cJI-hg53SLb;}l}76_Iy5MnJ79oQev zKmW5&J~@2k$hY5qOJKTo?b=L-vJ`vzm(y8lPDJdHhacUtc?-Hi_uTdR8xXT;Ura4o zq7DQ}GSCs?ku*M~5uGLe#PFfR z{&2ws@Qx{Nc7$Rs5@Lye#c=8Y9RxsuPrU^7SRa8yPd)Fx`!46`0~R^O?1-RP46uxe zVT{4GK|;82VzZ{r7=Eype%G$uR8FhuXylsYY84nEV|A^%$0rU4MJ_&0ZK3!ld-cK= zfi@4ZIm|bH_d{DQtfUCqMt=Rv&MsA=6s1tHE!#HDnez?1&c}W{2JHh3yudyadjbft zAuhqhB_f2!324igFC(~x!_6)X>Js8yC)%bpECZ^kou+)C>Wy7*jIY7Ba2%$+%CdR;~he>^&jruDkAH?q|?z zgJ|id&6~P*>sqIF?f2e$j~fy25u@x6r2~D6e36v#g4w3(E_P}wDWu6D=sW7~a-mdz znSLpkpeom~{jY!;oM`{S^D15CLiiyFefsNH^*53!5579*pnnN*A|ZsYCV7p@rdNPJ z4?_Gs+1g5yBW`JVKRj=K_jymWkx_%C}dAepP=lo(kH|} zAABJHOZC6j$|(8={kzdpWQck}jVSqy&_7x}MV64PDj6l8qK0T{$@z0xnt(g}=>8YG z#Fdn+QRNZCHe%L9*ss6g`fIMfno@uH%U}9DOB7}=hfNWY;XD(pdh=$@9(v@F^X*0v zMZI~s7#+9&5?1y&&=QFVC4xpo$>*{7D@D-57eAut6Ua{h{~w{hbO$V>VC`!E=lUb> zB~ntZzWVAHUVMQ@Tz>iGm{W#f-MV(I&9NjgF)VInjK|3#Odu`!=_jH~oiDqbeT&#n z>@)iwR;^r-l9bHZARd~eY1UZLhi*FU(4n{Ae3Qc&$y%dk%?78OLjTAKM5SU%lgQ2~ zh{B+dql@SoG;ZWvYOY2pQlx6-s(AA3!(d$rch7+p>a4Y3xw*CL*2TfDTel8X!30=I z2LUH&*PrgcpYERii%D*BGAE>^g*iE2eDMY6snNf1U_zVLZG8UYi!Y~b+O(PO39MST zZVd+*?&{jU*cgVsj0Mg(;|zF*XkijqH79O^ge?Q{@GdPSL&)F%+xn|E+MtQR2VLjJ z4I7_&=BXJozS_O(7Z%YlZ$}48)PPe;@44^Z3gs){P-$Rd=ek7eFT*8;g-qC;KB3%) zpQHw^DiSA}y2Rw^my(RE!5jHi!15m7J_9H3^-tGdp<-f#&@F z{ulbQnjV9;Wiv}Mxs<%x-S^x>1q`BXZZ(HJXNdZZ-8jjiX{%PP>ea1>{^CXvfG$ih z4A)9=790~~9Ho(*Ph4<3yC zuxTTr|IF+XSxY-~5cs+A>~Q3?4c!1vV23@ApHKXJ(4aw@-OS3u1?t_q7dQYFPYFdP z=!RHC%a4LziN(@mIKU(0`s=O--{h1e>=>RWI!?^4beU2Z5Dkh05>sAZC6EeA8Zgsa z^)J>C7IxwN?%TJI%@eFRDP6iWhk~)^l=BvK&|z{hV&XWxax4j}#pE_>)Ckv+Y0mE! zEO_M6M+nj2219T{G~K%1P`-Qx6vu0&X6DpkGPB5XdXtlD)~s>LDW_<>z$4rBMWhe( zSOKOrs=$x(-*9VJuU@rm`7#!Hus4C?oKv=N(IVP_SAf|h1CDDbF=}w$i%e1d3!-Q; z=lxgEzbM61y_i^Qd^5WI{{B~Bc!|t>i`r1>)b_{rOw}1bj17{cfFnG0KB;Phlt8dn z;YdSBNkmiQu+c+?4rSXavoS0Im^x)D)6q+oF8$-x*VL+2T`e9>q#DUl66-!7O(l$24B zl$;ISx{K7Nk=0;9QnkR>z)o>!&S*x0Dc-bs^O7Y?*wM?z?+^zSV9KaS5#XbcjC?Q( z5QZWMw6MyJ-LP@v>w^cQf}B6a@vi&#?Wa!8hdyea7GYdrEILp(_7Gv1l2{sN zR){t^y(9DwNP!kOK}8E~Yhb8-h@eJn{%jK{K9IePfI>#xg+$)@30 zR9nylf#WicZ~_9)W75(NiWj^jdrD#{KCAP|>GVhEKmelglfD_w`Tj@TBk*O{4D_dW zdH0=nIdTzcWdeXMpEE~l*QtXrW8bi!?9=Sttvj>s#Oc`itBWi zF6GD%_Jk&2yDz)s(r5eit68fSt%k@6g%jZ&EAXFq;xT-w#~*)u_paTXw6gK%pN9+` z*1Sb?0(&%yri(FrvBe9rsE!-o`?Ml zb|KGxnw*-6gc5>eMJ=oSlo^P!pCNrjBNt`|pi-gzaZ4%;JQF!h3skFPBO7HS0bv&8 z0y46#=LVaDDgtNLnxu3}^Zehj|5fYqsHLy!?n3$-g)E1?@kIKbpZ-udV2MbmhsF`h z#GCgWe@Fj|yX8w5#DW+N!vvGF9qn`r&Qc%>zis=DZ@&5FlTSWbv}hsrZRX5b^XAXT z{?O(KFJrL(Gyb!rQ$%-3 z(N`Km{{jh3Rq1PVauZEnvu4f2i4&$x`*Q8Nb%zcd;K(-gse1Jq+;`Eo%~?&FHNh#; z`A0grl`~${J0WgRMHT!V`ulI@KYK4OJ0LvsTi15hjtxy-EB7vhqO=?wg^IP{IRShep9BI zp!B0!o?kIr%dyqIrESUnV5AKpKGlam_uQd}9;&uki{_L(7hQY_fDnc9m%B=DRS;HJ z-mP4;q8aMA=9+6LAJDsIEp(y_jel?>r!*jE&8X?vNdwi^boFSn7Lp}dY-BNQ`V6v` zC<$oWuQ&VVu-ZcoIppDoAExl9K}hbVO`8Jmb1UXGs;Od%9Oat|i-il=_=0TF9d_LD z$1?D&!Qfh*a=v<>KJd>TCjPRJEu`;XHv8tOR*OIb8R!d|O2*XZGqO$p0PQz22bjpQ z5v^?H8|Ji3f&QPfe|0v)MJk5eEAhtV323)t+!^6Ocv-ytpNH+gzg?XiS?P~ zlO~|c{3vKVEUD!ie~ll@ljT#XD38R(Yecq_aFZ7hc(w)hcalsb_!(gY9-1uI*qWIbiu2q zF=_EI^wI2SaATFmQzK>y!#_AT;AD*!1N((zxBAAz*F`V0sOl6u6XrVkhzE+$;hglFRv5hT<|P&Zv}^tAGy)W`uKm2$Z=sh)?K$&<=9k>(CEo z?JJNKAg5o#-~I*rD#BgHdO_M-< z`t&*bSHIq9!;N>@d6y3-e8|A|_)o_9Lzih>RV%Hy5{uXP)LCQAHI`p-h2^?;+imyV znWOsq-~XO|v5uWO(o%;#?3d!`lUcJ@T5$ztgR;f`T5GP^uYaG{UV9w_Y}T~p^yyQX z9D4rm&u8?8xl8oLFpACrm+P)G;FOb3W*;tsQ6>z#=^50nQTuMZ$>ut& zT0LCN72`C*40HhA#h9?LIJ?$G(Iw%&T&xVLHd;7e;lLiez+j<4qI4z0>(XU4{SekgUC zEUbe7X$yzO0F}K~|5JqOAWkNAtKCDuPo_+x$b$1tH{0xYzyBSj9FZyCdavAbryX~M z*-xfSr5VAruMV9$d_3`EVo`z-kv*9E_36Wb24+WVQYaX2`o~ROI(31dKZ-P;8b;ZS z5%~faWu=FFYFo8n$KQEOE_jvw`gdsdlXV2Q0!3n1p~CQl!%1p#ESZp(zL(j?mz|k z*1^a611YhloD}0{^o#ma^e@mSy+q=(*T2*+bP+!H>~oZ!EC#3O9b^ozEZyupuC3c5_HEXTu44UrcubY$|SA1ead+r*qFe7kD|D zVUOMR;QKr(FRCY18?JBp$PD~xE`IpIhesZD#Dfn$^ruTNz4EH7cidqh{bDS@r|yO& zE>eJaO3A9wfPUKhYqwwUr{OH?fqQlECa&Wbc~RLRbn?UQp~!$8cgDRl{<&wLz4g{x z=}(l#`AmauUAwW{1I=oBQ4y;o$`K9f`>;9i27T=sS4Q72N~!^T%)w{1U-K{1AVrP@->QGMsqYUL~Y zgAYEOFku4j>ay2RF4I$*_wV0-ryX}1I&3Ik#dK$HvH4~o;-RmE59!s>A9~e~A9VaU zTD0TFz4g{u=+i&ISaj#kU1+n>T4O^dcyq(M@tm4jlqwkdycwAOeA?$~sc9715Ety1 zHa2A!EpRT3f(|d1egX(4&la>jx-Z9SwX1`m3^bhk-l% z>g-<;6i)ccP;(vp7A#nMDP>WY0WbmM!xQ#EZz~I= zP+u0a@Nvav$<*9Tl8R0r%3?@RoEsmhw-`pZ{JAAZ*@!l_XOGq1EBC5y+!Ph0*xIn=GiVUGSqH9e(0TCwJ|_Ef+%DaMj&pPXu98o-NDht*sS~O=eZR6Ee>3if)esb_Z z2Q#A|LNtlcf2w|Z*y$!VWjY$zs)-YhO7@lVKm=4rSo(=K=Pvk_Kt8YiMt&ppz$ekp zBq|t^{Yn3mAqok&k+ujXu_JtqvR;>3`xTGDuaP7l)??$EvKi$Y!gS6Qfk0W#Qm*YJ z^-#?b_?PV0&{;>+`sM-q;$!CFhab87?z?Zf<(3UM-UuyZ7>Xgy9zB*j{6~i~HF}LM zYp_$vl&Modl?gUij^w5L!XQ957EtI$2yDtY%)U9aIwp(v|=de3> z-eu>TZn~LS<#5KHQ*0ph)KgDSnfghO9?LVwXZPK9r@61r*@}vF)*0Z%DOH^~wX0(% ziY#XJ(yue<_(7jcok{}z`uC&qL#17kR5otfOofid$234^#WNm-4lE?j)}CwxOd&LL z<_x^4OsQzsUvK^Gw%_jf;|6gKG!#TddaE@c^vko%guWP#+#>o_S6xN3Agox^&IGvd zCpk5%l5(lVkZB-O+2_=fA(lqc2Q)_qz{$L+o;e1%Nn_^EVqfjqVyspGF1Orref##K z%IABjU;q9m4jRFh94b%*!Q<*fvWh2KIasnNoPYzy|G3bSBUTh%un0aB zIN(E>KVY2!!|oXN*K7XD46h@OID)E#I>jjILyq;HY=eNFvJ!Lo6<4G`j>SY|9Q?(0^G15B!Ua zrQ*p2CftWOQwYokuq7`%oF-3*JF=+IdHs^?%k{53zHIx=_)q0qC|o2A&q`ijo~Z-Z z*^!#62r2QeT;ZewQoo|%`aL5+c<6+;^5bkfk}@bbGy0}rVp0B^D+S8&A+YGn!)yem z$-w|vL8rcHTg?>3$(v=EH4X9SXaxM`)W4sKzflveky*=7g5$p>VlR2)B>egHWl)xa z`j-(7%112x>e|l&S$HQ-z~6c-<4H`p6{7M>vXflaTYo)1k6(ZN z^#zseMnL7FN|2^1^R1ZtOIb%rNL5H7Ph)uRz4l^aFAL$p?XJ7-JnYazXrJRvzm+iii;x+RxM+Y=wz_oiONxAO7Irb=O;;jxTz| zkT3L^N_^7EC;jEme^EQ4w&LickJ(`T4T)tvA(%`Cda+BtNX zSqVoRdBo|bolXToWko1)mCStUZ9f<6SE3igYQ(Anx2XooTICA7XaYAH#t%R82(6Uy z?~J4Gkd`ww9qotBHrC)#uPC3bg#@?c%^Y0nQ!d&+5VUk8A%9;a+O{uIb_wKoOK1C8b|!L*9i(%}TVYp=bQmJ-wU5BkA@ zNS1jvbXw8gpqCl16Hh$hgh7KySF=H#FfbAx+h%_f43JMP@lH#G&KfbLf`L*|ZD#c? zxJDGWn(#qcN<)nvG>9H^^wG3-)QaLfN^L5yj8R0edSmF&q0AT4f+uH?brMY5zxKLo zvET09yB&7eVUVQ}h+VVRA(rD3GLyE-Ea6j8yK?W9hYr0BoY>HV?>h!N`Rd0=S^q&0 zR&Ejw254Vt5yR|RG%Der)+z00wj;#$wdlL}eeD)F%Or2T?N++O+*%fk%&=jY^ZfJA zCkq%0We(zT zpz++K5sO%5S###X23Rua0%a=(akvC%!G6(-g_O^G?YY;BBS&z0m4s5sfx>fWD4sCr zM3z?2LRQDUSC0y4$jF}bBZ^;L{p%FnZ6_}D#jYBGLNkhmRP;Ej(g-o06ua!jIt54)0#4KiW+3*j9KU--vhj|NzFfQx#iYdZlN0zwP@9< zHPmpARR91$07*naRI?Me-+ticn{T$`jytWg$||Y@=*g9Si zR{v5_nz@QVaGvhTgB;}i2Yn~yRzaw-N(9H%&19TE;v0x80YR0Tx(J>D8h^n>>t0_g zibO^{r``OcKPoNZpU+Q`R)MNC2uIbZdQNk}^p3JpTk;r+lZmDF`s=QrF>5B;$@&+* zR5@ga<7rg-Q^z-L`oIJCbB61z*)tk7s&3P!b+_fZ_3PW8L{lEJqMBFNsa4qOC!b8& zbkj{%S!GqA<8(H5o(}m2|Ip_vjc2~qe|h%VG(Ot3X?wzn#~1adz+_@KlB2E?1{IZ~ zN54srAtuQFsf2(&Z417>Xl=A=+m_vz2!rj$E@>u_@DDmd$JUeMpJyka<4)mcsg(4y zh`$K%XZS;Ziu_9`*%kVerHgBlZ$>|bGfPs+qJF{z$*?rtGvFH0Bg`7HyY9UE&O7fI zHR??osWiWs49FB7c86u{+nQ^x#p*LqitlP^3iMR7RDd?+V}1Hc!TM95|6r-8pUy?W zS??u$c;mIc{RaK8uN2xMQPKtd;aR9sR15h$*}C+L_!RM9+U!>fR*ui2^k?z<8J3pR zLy*x__B=*3eBnZ>L9UA~{^Q|?9ZqWWOi`gRb%@lNpR9_V8oeOj2@@w&H)%?@1r2+u zB^m+L!K^fdKI}m`^pgNkV2FYC0VMP(zGG`xk3Hf*aJ#BTUxopTV`%eOv=9*IVplU%2toccB+dON`kgp&W zEy|S!34jHB5R&&!jsbtkm!xF{IxS1KPAMR;fUd{Ol;YM5!E>KEuE zECZu_2R^~Sc@bDR!?yU5w_e4Zo$QhxGcScy7=?BFz=T8p6{Z~jdiv?u<>hA(RN^MO z82%M9`z$M#59T^JPy9ztvV0K@gMcvq)jWYqn#jD+`l2v_XoL2PjV%hFB7Jcc;D!GC zRR}E>e~G@K_NS?dU2;<7Kk#=g6G5DeQByhu6@W{MfAC)-l<_YLN>%a={(1XM^h0Ng z^hMy)=r7)W6F2ar2`@TqH=r?P&O>K0QT6-pfAGvRPygc|w>$8K$~s4$=h#_qgRN$ zueJ8t41VyTL0L3lz&b3(;)6rRQ&IV+%P#xzj}B)5`<8#+O8IFu(maqKAO54i{N-}+ z@i(+AhWd-Su^jKq7}bpFGijN#8e`8r_oPY+{E;;ioR2u-NKV0`<^0@p&r-lL&tvaB z_Z&8C82WR}(Z{eAQO)dGRHBTWYZxOcEf(f3SU6|y?7jBdmx0nQUAz4K?>BF_!TJ=! zq%vg4kSngda^#2+F-@B-$uGbB&s%T34ej%nUw(xw+;Yp!eAB`M3^0HBnWvv(Bir!9&+a$cN~Ah2^?EZ`;!?oyY05;#TWmvUHf+KunVo^ZXh5e&5SFlZDwC3{a;l1 z`DNQL{O9F^J2(=vFp5msaDZMg)}XM6Z20i!UwP#v>UVk#XdtaUV8DR~?7#6w8?WBC z54k4VQf0pZdHwRu$cKO|pGB8HUrqZN{iKtC>FJuBQN@JBk2-YFVNkSxKs0C6+JhFQ z_6a&=t@?ERu?iwgm5mUjVMF_bMgl@a<=+3`1J*&)d*Z!!6b8yP7eYT7dW`Q=W;xM@ z;jne8`3WC>#L+{1F3~pT$m}TJ7GM9ONHlLahK}7i*x7@_x=Do!QUkEH%*UUPlB2 zbzfB#-D5NeS@Mj&y5A$^c+w;`NSqA*G_<;P?E)!x?Um8D2(%vvMF$Ab^{?g@#^X^^ zazpt8a4niO=kQ#n!k|A05qxNW>cC2fB_WuSZ$oAC7^H=>% z@CNOR{vXY&$@+f~vOY9e<$&?dnnfojU;4BRr2$J&KPH`1QjP$nMYJD+rJ{a*Dt(&M zl%EqnnsDr~$5HAk3G$yVKLIF^Wlp7EIr66|tNLMpgsUu|{q5;+x`_^d2%hE_gd-}r z;LdrdIF24OhI)mXhW<`AvqWh)0hEpCcG_td`eNUD>us=0sWe2NB*bo9reDmESj;Yh zIQ0_$xWH8S%hx%|f~Q}FNy#U|kzg?xVYo6nCHlBz&h|_}5<=;xz<)@hNWVl^q!z)y z1Ic&E_>XJ^wTx$x;0ktw%}A{dJ`u)YgBgX*vXwvCF>%XJ$|$l9K+qqiPE4K3=P)WT z9T{NB^!??)p&m4AB3jVTQWU@S@kze&)D`?FVFixl^_(->V0igw`bhvsFbhE-bIHk+ z=sPG;7(!Go=-X$yDot0t>3aQ*H?Fzn8b%A5IK%81N-i2z9J+VhvB%zd=baRhVI8`2 zoYI`R!7^x>&tOE)fls*R9OnEeT*~qnZ@gz1 z;hsMvaw41br%2!7lonSCl9m58Y_`w-0?FC@IAbZa*^5{NF##BIB4i_rCimP|J8F>& zg>v`^td~>@Il9=)z0FguKaB4YltvYQ1c4gG2OU-2Lrg(q|YhJ<~DncdPYh4&l!$(9flU*Lm-_?NfJw48o1F4$6n5rPQPPe-s~ z2!FobQSz&=zB=?Xs`AkPFoQbvOObLsL%Bixiy_6J($yta&2b(4?dK4YA;LwpfIBFd zMmR*!5?#k~loEY>#lHh_%k?KE;~#~|cnkSL2_J>e+(p1wxI|9upJ{%CU$lLt6f+ER z`l9N%{#7U#4t(fDps5`!*jy5D&W_{hW>#|wre7Z)2TW-zX{D1gmn9!gzUS1^TZRhm z2}~Jpcm@ZbLO*B|#TLg~F+9(8@DKiRPb_9^EW(URchmtW3zH4{umjT*%!LOpx-X0$&*QjrfDPwWlBv-xi!{UgK+|A^6AU!NC0Phpa{b=>tHl3 ztE#Gh|L@GIp;}=v*W7*g+gD$>jT>`Z2`%24(`S+*vG7%SXsa7LcI*~gZpnu$7vmhP zkY&aPWKTQoG&=QQ+w-ET=pDHI_S2YI-9+wHjaf4&e2fvfVB;BFuTs_|(#Icr zY)ez-BcBH6e*5jmB26ezAHqg(PB0UQ-*8*1?bo0wcPzwt=IN)|AcFlC=mh{hvdJv! zjW*i&)Sv(CvBw@mzFs!y#rCUYeI@kG1rDY1N51y7%7?g1 zfrDLBi1Hz2BM*^1h>A(&^&@X+O8CpSA^SyD=wl)bD#SP|){!Te5iaB3WRy&a|5CX0 zB=5wG9`f?1KK>3@iq$ax4#cfN{)2u-B7T)+h3CI7p55S?hYJd$zAyz)m^%1?2CYfl zX*$Z=6cQE1N%R~&&{hZs%>2Zr3f?;Sh!h%%YlM#EGx~{wX<)x_n^ID=JVus;{Tvmq z@D8VlRK^cZDO#<@p`AMkB!Vj4jDEPlc+K27@JabbAx8T%51HeySOp4@i!ep`DuCVb zgAOgm6vq7E+aNv>i@P4OtIks>ROVUwRTRg|^&h_i8SRcn`N`3OOtS9RuOI6^)K?f6 z{fp`*WyD*ZfYBxCem6P=>C%gE^w!zUf!q_9#a5UY%yd^b&Jlweahv&;9p5VDcv)zwrF?%o%16VAc)YcP};goy6T~=bcYH>BJ4zU*Ch( z>X&9FRCJlMpw^w!>Vsq^09l28Ycm;SUtm;pUd!9nz-7xKK}T)Uz@m4Tw?x%5YPZs60#Fb zDP>Yq&um?MU?c{XbN2WQqAQH(On(dd@XzYACmw(N zSmcA9VHpvkFxJQYd}ilKlS`N6 z%a!BbApf$!FTs9;i~9H|IqikZ7s4#1@DeNQn^@T7Cq>7lrV&B$;k*zft06 zIF|JPi?!d7Z3<$d;R%`RzXIrDN{$pf?3eJ)kzy%)P*9>G z?L^2ZMB{D(k%HMHob;dd`(#J6iMh-}MqjL&GA&v?No@$AwD_hHRYg#8%$V?Z1sRVc zv@005q>{9rnFEE>4Ecxy|3&-d!;cYHyr|V_b7-!7McQvp>p4M!=p-F!i^ezHev9~j zqv~IYPgh?4NRyM=N=Q;#uDpKX*$%qPd+Dx!;Sv7%=bR`n!F-+z9_%Rlvg)g3i6`l2 z0)kM^Z$tK52cJ)&Z_vVi6a9!#&>vx%Q-LR5>e+7{{zWyo&giF%VzDwyF0U@HtXOBj zS}U*I>nBGZ#pcRaUUlV#7hJ$FBpqS@ynQHZHQ2d^72@aq`qv#hbrLdgrNh+dmZ@pY zzEo`EFkgotiy;9jP7|O7{_$CQ+RyOM{N;Jm=7JB)cC-hDPJ&w4q){WLNB}h3Q1IFpZDx8^w5J3vYq)G zZ@j^mC;4K12`4+VWQyJhlCWS^8ml6SI?-7Ul?=_4N)paiB^`)ZD%KmeFZmbrlh-=* z)xwdhNV+O@JwqyTrdv5}noh9382$F2QL4U4)>P`LOANy7NFpwy0^g8oRzd1Ub<>d+THe2h4F+|)aPlVOy(M*$gSJ5ozj15t79J#5^HvI@-chRh@WU)OP!7!m_n{B?w?s_9ej(qIV|J-)_ zZJZoWST<9<@4oxk2T+`1}uVp!bh470izIe1eIAOsf6_>)r6qB1$7A@ z?*0+1=xeyzmz(1#8dOY6#1uF4fM5gsE7gEQ94fAU;5U-Zs6)p26;6bKk0dGrWc))P z_C<`FIy1##c0o=PS%YaYsD#BSm(x?0~j#o$)Uc;$O@JOXi!_CBK1>d4Rk?EYc_DJUXuE zVh zQt3nAuL6`$S@5Tr1rXEJ5Q>Qc5$a6HhLZg>A4$j*vQG ziu^04iXBn1Ad`NYvh^poUs2(o$SAiI?xf`-q>bMrF6QRyU3GGg14|=xv4neJ(WL8X1^{d)VU1(D2^x} zfRyxcVq;gnP6WPqqHRD`NN!}yU@h*I9eHsV>>y1Z`43mp&n&Cu`l+zaqslkZ%gTRo z?$neS#SVOICn=Ru`Q}nqsVk6E(#;?Q{iuIM#YDxP_|)BwPC)U+xqsD(mg6r#6jD@K z*shN{`o*MOseX}+V!nOGzsWRgzfckbB0_TDl-aLfM~&IRj;zr~*+h-eW%Y0959o)e z5YJaw|61%Se~=|o;ga|$U9)X;bN!JprFFU#D~fIDg%!u`AL%63E{bQ3`U>jbTz}L) zDXF6E-Og(OO(%RzhO+ueZUdrLx~3$+c`cm+*CdXe8Nh$DD(KN?dL2( z5m1XnQZao$Mg2>r)$&!ENR&!*AzcIXiLe&w)KgA9>d2pr81ejl_uW6_??c`l{|+4` z*ZlPwPO#l*!;Q4oa?d^c_v=eUR-#E6G=30n;!$l)Tj?j(- zO{<37_;y3=VO7*JWt%o0SpV&Bf15dd8nqSuhOD>*A0)t+<;53W%#JXe^E>5}PndGhxM_3N z#UFI=4>%98b=%fqR?vL_XP))@3oqbACO+KN_o?!^if*9REn82XH;?T> z!+x2X%YYEQW_;muUT0G_S*XyCLImcBCpMd7x&0NF|D{#yHdF!dt`VKD$$q7{D6QN7 z1{nuA<@TFiWFm59zMiV=+S)Or-@Ny}`|r8uo@bwVcG~o5bmAiIcAYwZfBWrr+kLl< zHrSAkMRJE=uy5H2YlX|}UntA=NHsGjLh;7k*Vk;nWGC1o+e=~l8Fhy}AuRq>8K^BJ zJ3w@)2g&@~-(iaQyUda+5hPgzwKRSIN@29UfR0mC@oXKkn#jW!LDc1>XPvVx{d6wF zp-{irHOhKvcqp7nl<=XZ%D(R+72pss0jQO-NFPH6@tEN!V+T4|RQtJ9{~`ogH^l_@ zl59W}gJ^zsYJ8TX9vr=n@UZDbY7os99jOZCkab;~(*>aH^=_#h4;#=E|=#u|P5_3PNKJ=(#r0XZqrH^*j{ z6qS-1BK%M>s?88iV##dEE4#usdFZDQrbOTP;K>yc(~px(F*2u&atdf)Zd^$N3Or<~ zL0??grN3nSqr@3~zC(m~p3O4)MYRrQbNa{^y{v~1lmukb4rf5`vK?K9+PNv<6Y|Z{ z3jB>4R$*F1%wgnlG4$;Vg~Wm5CH+~Xe9c&PYncCpkM&c`_z%a7XT>hkS1-HH3yGO2 z4z3mQ^EeHJ>H33*4EfhRcaI-8UK47ns#>>b$#Ihd2M%Oxuvf2MkyUV3qE0G)f_{iR z>?`4~#ETdBSiXi3Y>W6quw-ANt~5gbv%CoZ&uPEnq5=BBT#^4IRV7~M68Agd#9f!2^8@L#Yc~i9BG=BBx(# zcnad52wk8raTV&9h-czFD#;Whb4rBMPflNfVJZt9-q{=U#iIGQPY`QRe-h#J1BIo% z{w|C0SGd9k*Gvkd=a zTJ|WEZ?c*w|8?M=5AV}wiB4l|wf0&AHr;IVQ%^mGE+Nh$f8xm}-+$-bXPvCi_v_P#2~~YKuZ{hfWxu*o`#JDgYVDVaQhY4({m4mQETRV=mg$`TyWcVW zjjswumB+adBXaDc?XDorQP`(mBp8WCj8Pn;$q2G;_Urp| zxZ;Xz98T98>3~n2^7E^=->@$o1(`}}X_&9feqqv|(C{{S;v`&$KR^8ML;il-KW}5! z8{L1P%JI$p`u5#@_uY5fZTHSyIx)n~3Nda2{{bhtpzghWg#a?{*7MQ!x zfQ4iTh%)%6>L^hyJwd-v{_q?;q_9q%=Tr%nDEv3|uUjnQ09I4`if3pS>aY6rKSTWs zX>&?fn8^A+*&;c2fGBDEiv{p80wpJTm#**)P=G%V9U)3_>+m1+8>nBhgtHWkfOQ&X zq(?#|XQqsPCR0Vt>1%X0D?b;hpUS#o)X^VY6RCRjW0qD4pN8s3#Ws2g!aDTb5y|+a zh#5ik;5e^~?%#l9=>iGmE@@xJf8YaA(@c55G2>bBLw`OE|Ay&@euiQ|Dllr0e(2BF zSpSk`bX;=PRjWma7Kyl3F)qmLCkRWbZfO7?or_sth{07)A(7M|lx`SMo?}oN!WM)p zl`{HDlWz}vH6w&)mSvFe`~~yqxvXx|8fpx5Fd2Bwwbp>h#0is*KmNGKpLl%glxdGX z`X9Dkq%vUn{i&z^eDA&YVkC#Ef;D)Y*ucgsjNwGypwILk>M+*nXl4t!hJVW{acT&h zX~zi3p(WY(?hE>TQamQFTcz(L561XZ4*$U-7p|04WVJh)gZWzoyy->em$(%j{yC%a$@E7LyegPkFVR=hW%`-p z4;r2-BQhC2qJc@{?))oT1^yLK+?s!$ovoaCLYnImKD2SFDjG4NU0ZQCW9_FJE?%ib zq=hth{&eYO=biUkax`ylba-%hU%R7@I_kLN1~EaGXPPOj%GP9EwwZ{yDurRe`h#NS z`YDwVYdQZsO)v3Z-q{#Csk7os0vfFh%_r z3vpgcV#8cz`fo0ieT7;O%8tGj@G(sij&w{_OraXw){c$j9qY8<*!aKc4gRP%P=YQUQ^+6i2 z3b7av6l&66ey!Akl9~di1TCE9n`|Yv#TJ{h zeLhQ40Fm7O``?d_9y5ly7)&eMX@?!*jX5LCTH@roPMtcgzUu1WJ>kQNOxMDiNU?M0 zuCWM!rGK!)oI4g^ataoLpfihwFYMFL%1!s?Fb%EMi6@+N{Gj96ivzYPKh+T6+_&l` z^sO<`Z2x`te_`YcjVfWJwt0(Y^s>=O5fj{g{p)iZH?HEmJorZlY!t+)m*<~%9{ZC( z9Qm?k$930TyWV>1gFkQpj!x1H{mJqF^4rfv`*n-ltr{P~P79rB%P0{IWbol1osMF* zww96RbI<+luwl2eJt$ez=N8B9t={MOL4!DRjLnL>c3X~2FqejCRSO%}%tn+=I4?AB zKchcbL+Dc6dAE{EAH~FCzAtQFjvm?sf&az!vv6}AuAUeTX-Pi~rlzqh`~H<(NikH4 zSy6-uzE|Oy5E2m#^6ao}mtki-nCr7_IYQ%^ahuwL{efQqa5DH85k38y#Q%^m4^Ub%| zVaFZz*?S+3_oAMl{x<)N95BkQWEjAKIG6ICt~@WH!r|(SXZ*`46xBrPr;L9mP@-=ZO7eB$kg7*NF%+bNNwZl`e-eEau_i)*0{;!tFY0Frf8|g* zBTPhzKSZ4|P@u1H3K@PpXZ+(D@%WXaeuBNA*SNY7V}mU7CKRo3!B;e*&sM5KcM&uqV@0F{RHlX_6} z$xT8yOWXM~8u8@E$P^9=T=MuyKkM;N00=Z>Uy#!!sS%>UzZkYh z;y+j@$u|oshO9Rv(Lnv4_%#A$^go0BqSEBq{F_tV=zI|l(A7|!nG^r!nOmM0BbJn9D`B>hg$0MMf8#C61e~l% ztY)MAQc=~mZ9DqEIL`99=Y~J@(1Q;=@W2y~Kgqb=op;@F*Ijq@>Cs3#dQKV4^wauZDt{D8rHXzObTRrP3Sus!Nt8UTQu$EQ&lFDSYiy~S zLw@IRIw#vEw(rpX@+OLgX%XR(#o(Z?K9GiMs2dgR}aUUJF*aSoD? z0~S3t+hJ2|CVaUsk^Z?xd>}P%-UCYEjaKWXQUdn8>op;{l+H0@ns1vs9-*MoM4?p}6_17MI?0(HP*U+`V zzSV!baq!D8zX}6P0o!i7?an;&42~v;e`d6?coY2TsuO0MG3|%f-*|oW=rPO~psS80 z3-n#m8wgj`eCf>7E(G)q5uH6r*(VPEImB%I_;-aRy`oi(KKkfGp6N+VMWCup9kiW)B4b0VvSeMlYmB8UL`UkT+hD5n#7Z0g`_h5v7>3o zU0DYc1=nAp{E@E+$^A>UUlLVXu)-&p#v+ug|HN|kh_^rbIvR_eh~-nA6Y5D6^KY8Zx4ze_WAv zaFWwkB(ZDy8fzjK5!P%a-h`k?C~Ocd=}*unQSqNHg%12R8o`z64{{3niZ26+&m_}V zz@$I&bCM}lJc53D&)JIH$XDT&?Nm6+N%V9J{KG+X6D&B$n@X1%=`=n#|KWp=Cq4Dl z)3@I~^nv^DpD=MES!1MYx#gDo?mFMybML)&95}FT`}Qi_$PWFoqN8jwp2>EDk)Fm~ zHA*($c_Vj9kDK%dZkP(LD5{0z7VL|4^KbjoW7t>buVmz!f-A7)`(@j202Ta;1@kOc zY`?fWZ}{+7Uw;&@DBs{;JDbQy@~43h82myZ>?^|HA`dQ6<|)G`=_l?uJDL)An0okp zE%r;ITriFvD+l7;`Oxk#seH)n7ad6YANuKRC^x2HeB=ZXqk;#&f+PNdxL*WEef&co z6*v&|1GIwZx5s`1A7P&16Z**;nJp`SLVsMg2z!AcZ~`eq$!iIJhI|4(1#5?Gm2>CS&YY!9%^8p0 zP}c^(_{C|XM~~*{M&^!<9rM;(Z;gF@)EgY&c-Aj}dCfDL_G)THB{bai6N^HvV#*Q63u$+bY zY~8ze*LBdUaLs((3P^v%(4 zGP)1g)r}g{FUE8MvdiApb1Rr>)u?eZ4Yk4^Yb`Nqj^sCRW<7c=kE{rc zsWZDuA+@g%C9-5BAq5ALk0g}6C)r7@9KxQ8^f}tkMf+73*c_HzG=>yCdWe8tzZo-T zj2}P#nP;E9?bciWb>Ds3#ZA(zT&Y{Pt+(Eq&pP^W4fQ<0Wd<2%t#x7g zzJX1v8`GyoV&05Q=@jXUnZ%}bSAI-sZjk>az$X9`{{l_40x-WU79ZG3_yqnrK0!s4 zOwG(D`jJS^e?#;mPOwvkzc_+c9sEoDOTG=#hoUsbd{|(5ama*^{OaqE^ufUs{$|X# zI{cT@cZi1QCw!c^7)XD?SHt)dh{S{blImwhe>$gb&s*@ytFP>L!2W&v_Stuzz1csN zjZ|6HyK?VU*ywcJxVLek3U5chm>5Q}>ElWWS?#in8lS}fBJp>IockA6qn`ux$Rf_lGEp@Q2v+=`H{#n`~Cd(YkiOT7MO3OS*D+g&Yy2qa=k?PP}G095=QJw_Ss*=tE2x83k|4z(eJS zGaES(hGYI{NdLe8ch36jZLsco>t1-#AL`N1)ZkO(TUI`pr7zNc8BwIi1hRBgO`kfQ z<&`|s=BNG7R4gV>RX1tMchC5D-(fQ^W>>KW<;f?Xdiddo4?N&Na>S&#Bab-Z>8GE5 z<<-}oc%Ub?MaU&b#h@x2oDMvi!q^8{#9!s_OmZ8~bysBz=Qv(ebp zDO1@BZSv%gIR}95xffr0X|+{XrC*5&S@e6dTUoDOy`uc2$HcIs0Pacpn(P-e*sGVN zQfx4S?xA)=hTOQ-R$H#U*4l?3e)zrj-Pf`Or;9daRWC}&!Jt=Nb=6i|ZG(PNg3vF9 zu$omj(-I7#r`=2H*Dor1mes#e|AMgLP=URe_RnI=XKeh-wcosatJD5hX;b=(ItN8) zK)&C&`qw!_CaL^fR{s|CvjO>rexeM$4FC2Yc8gG8Of;F7_E2eZrO zJMX;nr_26))6F-tmB7|pZMpj%d#<$NibU3IodpZ0PMg-YZCg%nq0@~t8&@|?-=bEQB(qysP~S^MR0ZAz=KQIL=!)X+SO*7eOv_EBqOJ<5X-=WmHf{dsJ3DH+=X%Zoc{P#~*wCg%R`T%x&DXNvk%k znc%(qZo6->`DUxG-iKX3!PaWIVEU|@R;^oVia4w=A{O)C65IZ9a-=8Xy5Q6Luk0~f z5f>q^pyFHmE-*4iu9Q<{88guQI_pfPFoH2cIEx~yD@WJVz7+eFY)Hs*161)}EFZ+N zLm(C9K)Dg4fxmNUPK|Cj!GMMYXtp@^2yZxdRH0x;Ou~-29 zB{=0Hl@ENbvJXG!wF|E7GJ*tJX1||qztR}+60!!+I;5h@6FF5rTR)Tj5G+8>^s`t# zV3*;U;cr?cp@g&Fl^B24f2XdX{QVEuud*%sw|$q}FCVB(YGAbs--?{7L=fI({(@b= z^ZSjnUkxT^icsYLn^gaXtZXGIFUU|!@sOj+&#$rml~ydXU!;540^{Ox(?|ND`LS%i4|(4j2%8$Ei=nWvv|-SyY4v(7p@ z?6AYO+icsdYnQO!nwq)Inrm?!(wjSfZsRIGE7V9wnlyosC-zQ;@K$MotEHTokzDig zQ<0>Zx`#sgH`R~f3L)snGoJtiqo`&^8+oA*>qkF2{DTia!k7N36Hhv+b(^;Dz5DK* zxwBb7dj4cC4LDCnk5pF)a2*{gT&xh!w2X2+{q z?R(`-Waz}9XRT><6E=_`)QT&t$f`g!HGTLP9cNcxebt2*Tu7_n(n~KR^`npZDN7*T z{;Yye`qWljY55B-xbWwvociI!kM920JqH|kzzx^m(3GZUQ~HVKG4-!z&TJMT(y2G< z%{NFLmJT`eu(QuT8=8TCWAAMrxVi{d32d408xLPV`&smDziBsKw%>JY2Yse(Gk10k z2dA=#2ehAf;)%arbM>>spC3JHl*C_A%XVyAZMF5zyX?H?ch+2OwN((DIMWe({EtPZ zyh$CHAEy(!btxB7zf0HjChbZ3vjp`oVxo~w>UH$fghb8{P)+S;S&7m!BmedHFJOt1 z%iX28{Ve$esMxRa4$r#c5g~kj`!y*F1&C$MEmuQUA`^BZFc9slZ$tf?w}12y*-Pcy zviesR@mcKG`tup=x9t10j{WLuTibtxiPJNs3*%J7o??PjQniEJ^c(>chDZJrol?YM z!rMlJh~-TIqar5Do#PTOQ*6_B8mw(B(I=+j*P|b9BI0rj^AX>skCXma*0KjbTT3qA zqdTSQ&wl>%M<03Q(#!rd>diMtzwzchciqJf3g`U#*IR71IU6@_y4hyzJJ5!00+`Cn zoZhBQZIHq3V)&2r>*JF%ZB$%3fKBaujwppO*c9Jz>EDdL`6lZnFNMKZ0U}V~BM8A) zkgCR&R{J1^&zE<4IsKr#$#F|JEN~FogIa9&E*&!6ea`+2q^j`rME?~HA z>hx*vj2?f_J$KW=`uq#e^Qpk#9t74IFkpiXHr)S!{Wjic<4>keWxhHM7&BN)^M&=l zO`Ef&sPJ*pIYhiB{O#kH8Mq7PoqKvDk{GrPXI_6u0T-}2>F(9eLP zluJGFSCDVu6Y{lRgXNDfY6zbaeW)f?&FCX*T!^Pc^(+7UCzEgTv!W|N1S;ykG_1b; zQkGc%>ET3s6bt^i@x#(4JT(lU2ejJt`xjOJ zF0WjT`-Q=xtDAr+m`<*T3#{E`lQ<-hXuny2hDD+0X;CMK8_Y!k*b@ycU@a%nQIAn?ym|Yu zp`1AR-UlB*5zxrtrnILyAa$R;*H~+9mcDXQr&1`pL_6pUcqgAJjaZS3Im#KqWy$Ja zB^CIP$p+h3!APR7Z7)PPr!TOas>poh_M7mxB9`-?gOYG`K0inMnS5IT6yG^bxs9Jh zwS@m?v0rm0`oXJ+@Z|a>K$V6`OO}2AE~|gBxw8Hr>tm+d4d_2=RVo0ax&xOA*Beq8p>ZbkYE zm{D6ieWgkW(>G0ZWGJ$(`KFU-DLoIuFj#>TbrrjcuDkBK?8?uoH%3vYgqSS0{`%`P zSV1pF@NYViyF4=)=OzA~egPk+8+l9^fDhGPemOv0d=|xjq!;*@A48(!33LTMIn=m_ zI9CDxqOYGLFB$YXBktcJ3DFHA0j<(5x#)X)0#i>dRB=5Tfo2Mh)OQYSstGDn;F?Gq+UVD8}>-}w&BCR#Ji zTeRST?N{kqRqMwWp&14YvWdU&;4Z#GL6d&g$H!3;|K&e9m4tujXXw9pOp>ZYUuiBm zK9NRIzD4@DTep-8^OrW@{Q2{ zOZIC_Lbi4Eza-!K_=GAXjYuSnU%m2eN$8gdAa7sgKLvl&H$NeyBr$VRhkv;h?MqZL z`k_BX{fso~>u1u4uiSn$``X;{%SRwt&iO}ywf#+1)zw#BO}!kP7x^zVopn%D|NHh8 zq)R|Rq&~E?lr$?+(#?{t(vs4>t8{nQA`;R{_tG8G0xI1gEU_%G@!Rh+^ZdIrXU~~A z=Uw-GU9W2^J%qsN(@`hrYlzw;pRoDV(a1;st@P~_GKKM0d27|B%fkAG7fa z{sNt%nu~cdK}1A0M1DZOqS(s(MLmHW6Lh-Y3;Mfz7mGhtx*{nl{|OBkhUsm3&Kk&+1R_PD$ zT$Y*wJN-hQH-+9ER*a$zaI$d4M%VwAaA_e19tm+5eb-)g%0C-!mu<0u2~9E6k|D^C zxxd0J==E5AekuuBrkP&aCMzTn_lo(BOFXRAx|W+@pOM4LD;gZ|d`%rQ*c0i#qA7Dq zqPqI>67n~s#W?D#BxXWSg*m&cIUMO!2Z4T!JoaOqrk`<(_|j8!BA2QxeB(`5NkUu9ZbYd6_;uv&1XFbqM))$TlPdQ{O) zibJI(_hSF~-U3;($e}7K^6>05fw_^wt1yB;EajA;)Sm1$AS1ye$m7}>$iKSLj*B}j z6^`2Tb8j1AoKi}E^sqO7fQUA?f`o4LNJ&g! z$2Afd^cjMad~k+rq)Rs#h2E)x0NHS;cr1__T5R-0@vXrBX91M53Clpa8$L&L$6xL0 z|AYVpTK*^#i9I7A<@tIi1&c#I3cYNv!3fRI;JN#r0CR-e(S#@0*R*%Iy*ZOkf%N7l ze9D7$7}O!Lvcfc6Hqu?%Kd!<*$o(sW4*Ra!eqG1PV*B1_mD_P07Pq5!N%~2rN#LUK zLxE30F}v?e;*0ASem6SaaU8r2Y*%|WyXGN#v^Nm$oHPwk6KeVEkdG61pxPayy$vrC z!--%oyto>_s&~Bz3l$Eo|DC_GA$QuuNn6_Kb2$8+oW5T={jfUUA_A3tAYiE)B8DN^ z)j~d=u%cctuTDDbF@iAK7 zm-RbGMi$3gTeCQ_+^RtD{RB3te`6Iz&t6_3QboF@F@^r;oOZa3`sg@TgMOX&mUyJdGmyzhh7Q9 z&L!opnGPJ{-)AF=wAezB*d3jNbk}!bNKw69OvXGzz}Dai+?AAC{idMDDFfLG487Mm zm5LPD(t{9pzxG*qmEt+LH)UDu?6dxzZApoU4Yh+SU|lbyLnx zomrEPNcHBmY<8b&s>h)3=h;@e+7asXR?P($Di)LKw(5=TZWgU8o+B(+&k2C8EW8va z10-xGX9-&Sz81|PXBGy=RnA0u4w5b1o4NmJg`73Hu7o8k%-_}^1W@TV8fJ5RPv`I( zgPt{jsvh309!`4!mhrsC^L{!Off2YWV%z?|PW6Nk&+!bs)s_!hSYUP?&XTtJ()GC- ztm^rF$-p~A!OA<8R%k@xyBaB7zgd)k-Qzd@@S_WvC&JnFw>TZ8L8wq3xIHu#TY$l; z+?(?@rF=O5(n40;c)_duhh0$gr=()ldMVcs<{`o8YHQgAfVS~`W*q8ux7$xgjg#Z* zY3DM|>NK%X#F=u6nv}4?|D0z*vM3btG?<^2iu@OM_2qvb>1cEsX=7YzI5sSFg$)77 z2fr4t08PH@uXqJ9zn2+cD=mZet0iY9rq&j9@ujie#+M}Xo>JPo>F@HeNmb};acn(y zB%oQ#5PT@N7FnE0R{dNpQ@^C1H@h|NL3^aJF2YY;l`)jyx}hYdZehxil+{~!lhfDp z@iVB}i521EYGeZ0rs1EqpaGmqFu2!ZgxPzNOhg`FH>&eLC#;LmND||HwiMRuPxEqZ zJHn##g-&2ap{Zr4WWQ03%RZiq^HfbMwZTQ+r{SfDFI#*Lc6xl9TvUsdh;-!=5k-_pNn~q4Mgd3N zff8ezsz!G1pn0j$Q|bjDdZe#9$`Yy=r0#t6QZ0zPy+3>BhuRPt#fXnRvIitu+-$hAz?SDZ+6$rN2IxI!*+qROMJnms!_t-(_Wi13=kjz}+Nct>p(7l5Z*` zi!~N&mIo#g^CoGj8HRU^cT})A^Y8WV^vOT#*_gi97Icm&&bwOQj38w%E?0@=R)XsV zRpb!d5k~f2j4f!6om-^U?-)fvezrE6jotb;#Dd!d$m>$>IZl8QoD};U%X-{t$9g;< zooH2=(&a6XsPDW{|K}TRz=upNNU_a0=?FKnR5|WlDE$v(@-ewL0{);pbOTpk%LBG8 zRsmve18`yChS5-yu`WuTa;RecBNEgI8e4pE&a{hdgluiJ!bNmE7@^OT4KBLT^b*uWf+|N z<|=!|;|DtZ{GrKV>UXsB>Wr>nnvX)xqR1*=nBzi8L)ZtMNAr`ZJc&+<%^w!;f^k}i zvEh7Xz(GVsQHa-Q3Olp&?IsbM*ttBaeG}4kP;v&o={y`6_>FZEKc;acsX9(np-v)B_E<YMNh*1=8IW6#VU&>>) z;2^@$f{{wxVLIR{Rek1m3fD(r)RpOTjocHm{P^-9=(v^&IAtl39_m@zYlYylZ?!Me zdY#AwY|KdWV5aYT@eR9LT^}muOWJ2fE#B$iXY?|*x>g#8BMU|xkx)&lIRbyL-Zo#- zJJ}kYw5ln%PmzTG5&aOxNF%@O0ua%+h}bZaZ>F+#)si#8ixDb9!o{@4iP8v#ZP~S#{>`$)A3+;VO zU(->T|8#m7n)>~f1rG7WnE&%8{QH{M zb48+-_K;$Xl$S5wCWi_aJNpbPcFRu#M0^R`Be57V(vIAh^3k8E3OK2F5!-`2hsDtU zUGgsAtUItr2#oh>Y^2XhDOHBfX%#l>_|=?JkK9H+X_|$!CYK^KUpi{a^SOCaUwQwX z1Wbb_ZtB|}f9-P}1AA9&EC<9w*x8Wu=X}lU#D!zPWb>ya`%G)#DBlHr$hfZRs>@_< z`lxcqlcGE~ zSUu&L%X`f#nhNHV7X4?B`9@oNVQ=wc3-E_XP^y}Rztciu!7uo|yteyssxEunHaxRP z12T>TJwgw{7%5S-*wPAJW|P*BFCE<}^0UPxZmJt0k^!TMw17L}V23aTnG!?H%8ahu zRt#s>@oFY<*w==%)DEXNebTnEu3SWn2K?hda)xtJ>GPAsuN?csL=ls39|?`ArzV=p zU{ukm{He~QyqhwHYQGiNDeq0P#Lyx~dIkWJeisew@kTwcS^szo`eo_e?Vb5FzZ{^%?FT>v6(RJx0a%phyJ!d6>5|C^CYJ zgG2!LMF^$8zW1I*xOnEOvD_^smW15eZ9G>C5>+X%kFB+wTWgrYbs}JBguML4=l~3T z8d%?TCzf~`EdzmXLR#-l)_TzQg}%eNmaabtShRh}5An_HQ?ynNXQIK6v#*p_{y^p~b%1RM6E9Mr->02Os7KKmqqdmc-F09vQ*= z`|V`jM9lHirI3FuYwnH(;3)Gdd%YJ?i_w2yW+cv=x#3Q-6KC!BMZR!H@+_SdXBRfe z3EY1j=Zxp!DJ={fHW!51O?)4g4J-B9OJG!#YV_K>%608h%{LM+$&*mL`Mr+9pw^|_ zOsA5ZPDX|imj$G&KWsZRFP{%Umv`-rTt`F9%#NE?Z5tlKdym zM~2?Cqs_wmF(M>BniGhK2Yolfj=9veBW84OS9Imhh&QzR@i?%v!u|!$bIO3a2yNNak?(H z%sapj)N|hsQ}o|JtThT}s+hYfjjW!Si@$pc(oz491Z|R5d1Y60+_GK*1`-qS0OgMi zcTazZ(4Foh-xzwW3*KioiH9VZJGgf3>?(I(fX;`@aL(n2@>|(Lj$RW3qp2<=r>%c) ztT*qUAC>>-dV3^lyRtR&mP3I=S@Wy@^(wFZb@prNn8&eZNk+XRS1^-xnEiEpB$2TU z<`tfE+Q%G2Q`kh|vC*m4BMJvn--M1?d#sQd53d29NcaWcwj~VQ4~zWFG(dOy^|CWW)3zu zE)`+tzm|KP7HOr@$Z^W9XofmV=O-G zkMjBN)IjWFAs>yh#9rFLoxK)%Bppsz{QnN^KweVjE4a{qv1oEyY1V3WgTbbzROX z8{{~n!Ho7jLD}}VgVk;w(((w(qgB`HqwI9d$1JcK%2xau0YGk#q#Xguk5&$_w1`)D zJldKwOAwizU0AP}O>U?vZvNRNCM_1|6&{y*EEU?d?UswiGx=5yZ;2ZJ-oN0WmKty!F0R3v?QowsHyia} z-^MhTSZ0ilG4#u|$A;~#a3ko!lKp5pU&M3-244Y>U3&?08?-i4b_Yz`)oDi5GymQML#%o%=M`k+1-57Arr*gMiGrqY4^o?S`RweJ=6m+@8dJShZm<8vT7tYeZ z^r(ExG%Q)6S18<{N#~;_x={P*w^_+{!e?)bpcIGOFN8w^#Pg+9B%K$@tTHw#v`MQq zRlh@96;>EPpvPIMLXnbbjnaOx3e&>KfykIxmtOy6b8zG?HJ`Obh+Hpgw}l;1%bLH_ z%z}`Z;?6KTqzjpb<5iL!&Y7zf;aFMwm*8OVeqG_p1POyfZmQ(?gho2g+>G?yl&C8l zJ{QBVj@<~Yds?wi3FVqC4fOu6Y%vInruq$8@Ah7Sy8I2H^b0v0N+n|Td0_49QuD(h zc{jMGgS4}zR`#8nU-IExcd$PKKUdUWm2066#vE=xWJk4U@>Hftz}${Qa`uU_dbX_&Q_Ue%w~W!SIBO zSL?WOwLT^*+e3G6er?8vr>>x08=&lxCiw0}K>tN{GgOJZtp+4D@{U^vJ~=i zix3iCeRY<`!!H^iY{Qu^$}|UexFBvj^-tl^>OthBgT$od7G-8CQlW2eRka$MjM@10 zV}nYd1v~LjXI55D)9@=YE|h(4TO=f5`B;93#vayF^G(Tr&TzaCkoRKqOqp;USJ=Yk zSe&Nw8U5=zu7hCr0flnDtFDbxh*ih4=4Gq+9vVEOa2jY;d|u9G<%Y^59X3p6*&rLU zfI&B$7jsSP(Zl-!zyc+Hv%st|LByf|R!eO}>l#(^cFw*R_Sk9(x$KmH1LXX>e9 z7hm@N{cN9?b)k~RQ9ycby3c-S3FMbL|3?WAk`kSIt;ZAf5w?DcBq)s zp7p`fdsSCdA*J!s_Cb$P3^AF`FhCw{x%wZpA6*e1jHqRIaODvB(EoK8a$CeE+JP#X>0!MXe160^k-|!Ql zRLzWyej$a@MKI>*>UVtLO4KBhp)3u8e_^e1Us??4i!tJxojDPs(e;4SjL~HXY}%fV z$A;n(pqMhe(L1j$oUTJ~aAsS%Te-8I<9~8zLASfv>(6eMj)7f%r=a>>s8bLj!EuF^{3u#eivRL+aZIhy%>z%KNZjq^bfM7^fh70NZ}x>F}j0}yM5`lwy-eiz$=CqY?HCymBaBz+ql6;b@uPP;*c(Z`r8}8!K=LRE0fd4vmR{( zN5BSKWUix@$zN&YFhdmVF^_jI<~|tjiKdY5m%}GW{(5e6)54!_I5U;fB;Pi%mX+D` z?F0@3gE}9G9V+Hwadee6ox-@E>ziEvRHpi$)M~z5+g{nDg|#t4HHG5Dvc+kkTB^dn z&TCGCx3(McpyR}8um=|$!2t3SCBo|xb$;@B6FSci4gI%5&8aTo93nE%s}OY6hNIwh z`W~+~sFEe^L_A^S7XX|Z4+7@nAH4PG@aZZKLObtGGvcy+&t*=M_1^2F!F6oK|p5}wFMNsZ z*p47DB)y^|U;gUvyW8S((#qgUuwdHrTSvtGAu`0}czpqqw)yo!M&(_G;d{$L-kqIy zm44@!`#=6CTig{;_}+w5#?IL`6_%Z4Y_dlC(~{7}+Rzg461%C@y-0~*5#ehqQaEUNP4vyNFeFWzeAAYN+r{$7-;ZA&~-KQ3jK_siBDgJXfjy~tM&J>N%{BFN?X z?YJKMf`j04AlJ;GElodk^+(65(GLNE)fPAJD^b874;cPxv&V$p?u2mXN%JzuoHm=T%`Qhr(nPrZ)~DBV(pU)M?<3v%65Sg; z|FnP8l+T}kjT!F|X&~Vs$D{1r2qRUZcWe)$+w=DA`pS7^GMpQ{4-j8op&L{p@O|{K zn?GJ~Jna+SIL~8xI=fUME#7W!G*)q@{;13MZ`r(3Dpwcp;f;bvVO0hA8kJRLv|k`= zSN?!AGUd~ZARssvn*zm`wK$M6bVeFRs_w4cqf%bQLYB8|<~)}?`t<-ZG^cbG|HCBAbl+VqCCrr)r+zVBm?Azb&v(B&Sr5s&Uj&j1Ir4 zGZCvkZZCo{aKfeppMIiGo%GN`&*BkzNLK(#3jW_bu?YEFL$6;INh`gxDj%Rqc|umQ znxwXlGzby@v>#*4cz{QvuQv7O=IzkSE=*eFmIGZs`L%=tUb*sUyau^w zUIa)QJ2jx76>DMU`&&E{`?>*OKT!*B0QWw)s>Y^eX$vCnFgYvLwnQ=xQO1SpeA!3 z^ewx0I^Q2)Q4=8SUFr-N;UR+Bg9w>-TotLt|GU43y!(KXX>^1j2O#i1@#zhPRPAj2 zc89Myk$H;N^(~g|`~WlFFJaInj6eV4^|k1iCZ}1nR+eC29M$btz}zLLf`Z736DNJ0 zMUU4(4-x$je)*@NXmlsco$4r0)DLsL?)Ku%F2C%4e$AXJ%n6No8{X z{_xUtT)fA5Pg~)}Q6>#*Sm1jTsPKn1__VL&LQfup#GwX*$15vl!@g{$J`~YQcw+@o zrS@xOEfW5X>%*F%Y2p8Iokf@JI)_5DY?a}H_7EJF7j+Cn z-|I*T70VE7i1`n=NxLjTF)NVZ3DJ?0ruii231i}%q<8YgZ8(fMYJMLRt8~VRQ>lZn z*XuBNqKDvHl%U?g#@Yl?_Dr6PEe^T62CgW4#S4)q;L8^r-Y-M{w4hYOPF&_YWvMIi z#dG1~haSXaUIP$7w%~&=p!2AQMS$HfF%LWi)F>x*OPS54Oh&gj;MssRiB`o_4ATJ) zKH47aY2My~)CM)M%`IWz-gJOJga*5QHTW18@Q~-5zp)UCZ?5NJcue45;ahswsHDyw z`PU!STRmD zNL-H!+Q9b6y#YIzyhZ~zce~^v<=KRB1NgsaJ~HTg^9M7V3IS$sg|Fy}am66hP>dkL zayAao?v_0FCQ~9mH{|uGM$^j|1;uItMz6S02US&GB=`R|PDT*q{y zHGO?C8@b9WtR;7^cB`}p@uVye;gS7kW!DkBV9mp*$ksf3nPS@=R--6t? zLTvjmxB$V;U$)6|C&`p+?)Pr*1)qH*6wpR^X~Ye2GCqHC``XZeq#5405j}j1?!Qa$kNd=Cu#v=y77LO;3zzV5zRxfD`o3-0~w z@jEErtu10yQcsQPJ@)nSA!E*8^_^s)9gh(R)4dP%<$M)r&2g*ZJC+ObOe22wos?%m z=^NB1iyt0z7M}}G*UPN!6fV|_OQ-cp#y}Jp%Tw_oHkLO;S{R@U)-cei*aSv9-0Fu& z*O4b6-F$g|sU(>$o7y_4ccGPaJq8(&8%LhaIylAYt*+%_DKU%Jpq1p1t-v!#nRIT| zk{cqph^|)trz#0eZ%?_%F5Iy*IYTs2b##^=(tz##xXlb`^dxRhnyFu0nTbF(U4zKj zMo;0~dny<8beMQ^r{`M~*f{lMYO7dNRVIw(eLdxNYaMoVZ@C&So>gLbDCd5P%Kmaa z@fHKY!r2rt@MK)hRN{Ki)f&(m9gZ8?AuAVsFid_U=HD6C9UHp-aS*qs@SZeag8A4M z5!pGY1Eq3n!g^i|F5Ze9PhrQ{U^kT&;LBPM>yY1~9GbK*H z5yWhl8a8+ei%N8bwZ3Et`Pr8pa-4S4jh#`vjn#WGxbwJAzrrY}#%q<{(zP9jR!v2h z<#%PM-UYt9jG)!sLDa#Jqw16j$ain=#u}qP9mMKNiYG#$p?;!za!Cn^Av>J7n2&qF z_qF4g42%XP)cx8j=#uWt@*RcZtRJ~qk z9OJ+}TL>;=f&OpMM6I6U^-i?n)fXM9Kt{||!LthfCM{Bzeb}}DkRmyOlKi}(lGTCS zWP71#C_cNmxVpY$&jr-EF#@<9^C$`klbi@(O-jACOb$NYRA&bNRPuNoYgBPvXZ8aR zY?Ip%1MSj&@Ekg1R3_`aqkeU_^c`w}I{XQ%-ozs-CA47yju+F9v>v@7=kZZ_JnRS* z^G)zj1U9s}?Rq5WWmF_l3EI0(a$|O;ldZ~jllP*C(=xlKFXro>vh7_S8)70>E*3Xn zBru>?yc$5LjOFD-vgQzknC6gf0y8GE>v29%G|P0MH%i6r#>byN5^bVD=p4;P)L=${ zmFI!W^A{Szs-D#*+6>Yg6T;SpY*%(D zedenwG@mvfS$P=BZIB#<%bp}AC)ZX@?I4F(@aM_ zOzzw!oD?J_GH#D3O_OuH?T-1CAzIs}Q4Cz4F=3&1f!z@SA=jwO!&#j2+4JAa5{`~j zT*T?s=+|V~s}$vcaj(6}Tr2k~<)UZ?po{NNV>& z$nK8^8(*I@X9!^pvD$KO0twl=gFP~^neWgFHlAx-D& zPXPg@iK})~wJ51uk@~BRA0##8Ob(jp0`HkzNPoz`XX#WmbD4B?g!n^4-mLH!uBWyN z=j&0nK23VfhQC|lspiPwunG#97m`09zG$fvKyJn%g|B=ohGAe?M?Hdd$b^skge~OX zqg+Yby#r)bBkcV4mUnueg+~VtKndKf`;J0%-VG@Jd1zB{G!UjS67g)R3?KF#bNuT$ z>@~}qp)Hkzc>7UwP_F#zw6-{RXJzN~ba}J*CY<~YKC=%Ld26N-^6O)GAa3*wcK-G4$2HI{F0x_2iv~h*mx%W1 zZae--(xhDDomUXNZ$2y+;l+HHeZip4$#T!USGN6?({681#9j3GY}w{*jy#$SzQ3G| zx;u1)?uA~7J5K%b>pbOq!(s2~DC?EDLv}yIlRWYZ0jzz-!ulSi5!qEyU7y@nTEw`Z zc^_0spcIe;WP9tN;Qfzz7_T5g z2sACg)1Ki{e-#~2E?VHW0~c>@FhR<9jWt8)jrNR(C?4kenmay(5Y){3?6a=ZN+MnZehL{f*hBlQdAV^xBPorX+cs&}GqAI6CrsgFz+V2W2}E=wH62R~(9M)=_I4P#_2XK`GxOLVf)K@smY zkAZnV(B2%p>-jyCnO&9W)Mxc9q69^G*yjc0^7tl2g93xaAfjSoBh^u5xo(K8n37M*hq-_Rwvuj=JpHrq7!4e z3gHD0X53~gU4XLqeC_UCk=fG8Q!O&@L#kJJKOR)=fpTo)0Cb9c6d{p#<8PeY3?(hc z^&25*7d#>MFW*prXh?i0wl1HoI1cMw@MdSCjQLX+@)q7miU>QYSRSD&Ci%_J z=PB0_^|?Qufya^$T$k#_hyd)F7xB3c*tAi@aqm8cQqSwYB}!O&C0dKB@)0XJJLj0r2*vzkkg zGW`1r7Z15|4SK4T5gYH0UHi_)X!r-C1(q(H_qBhU`AtUb58(QrfA=Me2V|{bcLH`< zcvf%=PZlp5@|~&&Usr=}kAQ7nfxq5ILoU*xcI-ktQBS{!rY~wGvoE{0=<&S|p%az-( z#X__{JyzS-KqM!_-Y{0`4Pj56IC5I@r}SYAPE6WsM)0+`ez#E1qL+&@1=t49R=yvK+;No7af*ApN~-JN(z~8(Is# zFf1L}YL&-|gXMjO=`|?7VTXfmQ&zvsc^w_EDKUg~K+VBkr}YA7e?ugZ49rA6&&4dS zqHr&o)J)}lCVUEyF`WEJsQ+DtHNmqflH2q+CS>)tx{T#6gLFQ;5p}GLJ$|`M6`DUA zn;81ZEKKaGarE)XV$5$$Xz()3ED^^=Md)uYXEmxWL>-P87GN#S36xSb_l)6^1k zkMe7?xn)9a>A<7)35);|Zr%zpybgZbu!3w3ZI_Z;$@7UmN;l2|7?y@h@BHJT=>vWigt1e+DUp%_@RXX=9*JR#;c{1GoRw-n zCn5B;<}2LTGSrVPrV@$GXcQMAzYXDirCLw~`s^}Ln_|2Z#v6e+u^bghq7IYyoe$;vc&XW1L0d04T33r9obQ|Q%b@1Bp-8fUp@o&Le{5hgod{^RTTqQ;ORIMi_n zD?|0EjX}ai5nbHwQ`Tb;S&Ku}Bbpo9?XMYi*eJ6G3}{EkaAh zQr9K6u>zi7sJ;4rem83ac%H**E2m~}74X-q zx)u?NK3)$*$+i9gujS&4r}D-wi4VCf9ve0#kXinCZkE%boBT@|T$A^igF&f6?+h4X zll0+@zjb7w;qzgS3>*5fhHoq6C)<70XfsYM#Vr@Fs!0I1%VOhF&ggapm7L;%_1nK0 zM{suN1{dEvIM)MOK% zCZ0T{yYwMIJ+79lb-;$=+~h-`jUL7&l`XPpWrX>p{Gsk0H!*u&+O-@XRgmX2P}su3MjK&iIHmV7NcG@xdDkD zPW|IFdK|qMXD2Ct+_6=`UqVj(DMb68zw+Wo?n#ED81lRjs7{zF zX9n8G+1QYlIS3y2GQa*7I<+vq*sn!f(vvY|Zol*VlZbi%LlY@y<9ww$=AV$yqb@eT znelR;L6Qu9XH*SjY!8v7%mzOnFd|I^j_jCj(*De45!7f)eD-@@Q^)tXUe^ON78Dd zR{_Xfs_xVd4pvi(D%U%W56(7j-e?YT=7w=i7{?35KCx0gwpL?17kcor5kFW1EPSiq z@0L;Y>oxc^?(L?dZiL6D2U;;SeA~Qg=(P_V4SleUM%0&1dX!c6b#+i0x{XqskDsok zBYhf`eIa7q;fnL{&Ip4MxFQk?w|n=(nQQ*QoT!NfC0hCjFW0P3 z3w{u3#l8oSCk$o`YJpbSpBeCR;<0=<$Ime2vfwCb(n}RSlqpRN$q9e$Q=EbNX!d>f zz^O9L__^Db9;{)`wtjLhg6%>|6W`;W{5DZQI~#BHz!$|(fhDwE%9l>9Q!)Kc6*z@B z{7N^)az!?ui4NzsEsu)uAk*l{;@^s)Qg7By{BXennIki(C<>jH;Vtyxs6O8YSxN1U z*G5&PX>+Fi^w@BO;!R(u8#N5ljExza-eu1(kz(C;0g_zsQ`0I64X_LE?eZXk4b`;8 zi!OJX?*-CXVaLeTtE``;jv564?^xt zv=%ScFenL7GPHY32!h#Jd=35qErc@05=o=j1w3E_IP5YrU8egp&;0d0A?ezaevarMjerh%DF^ENZ- zoL*r^%8jNdtIkR?D%ZtNd)`(X{tdgEI;o3*IiTyEry=G1!!xq$4X-|+sQevhR5Or) zu)vV5rNz6Ov1BNB%1jfkr$%2K!uF{sjB9sOowWxZMX(=w+r@@EM94cP8uVgaK$oky zX5;R!R%Y*oRCTJ!jq5b@S58|&e0DqqFy%ept<->XK+EHlrh1(@?>ClofM(!1t-6Mp z+{n!6x>M4s4NJATdzAme@9QB5T!g^0lmF!yekPN9)0eyoU`#~QX>j5@K;73Um>MQOayrbEhKMigYuftiAp$1GU-`z5y_q2&EJv{w zRDPqQ%#aK~3^GsBRTVP=@c!Aa=j`L-?%dVoj)$>!Gq&EP=_xIF?R+{3 zn~2-OQ*lelu_1421fcm$f#k6*+x;&R-Y%EzZ1>l4<}K{;LU9_@?4}bP{ZtW4Aw?opnq&BMUvNq&axUUoh|N* zW}t*RV!0*08VY6^DTamVNDUEf;UbC}+h%TfDb^JE4}(^tZsT z>om~S3xwPi$dg~dO6nez41XWarb{FkQ2vXktButZK#*08rU0?5+wyrb40)XfXo)+)$^LDoD@W z`oUvKOl2~Z+Vb}fhJ}$Mh23^g&$V6_Kq{kkUo*^`^fkvVALDzLe`kh{2v`ke8g+h=Z#!fTU4Z}0OOak!jr+$^NV zMPjYQ7fmgWlAxP>P3Z?p3_6H*@cr4NeVDuuB|}%sZg>SGWs%N48}U)(9if}pd&c+V zdIFa=BkFdGElz9(=K0*b5(dJ7k>JlKcG2X8a1}crrLwV{>lvBEt)Rl!zyGuTQ(?@@ zOCAy$WaLH-wKCE^;1?A$Mvn0rYqhHiOI8X)HYThaQC22pAAg^#Li-X+Cl!oX0)1|- zn-u8}VTfoz*Ve^^aQl_qjQsu8iMe+c6=j3*CN7!G|2+S%&y~T*M~0gknZ~va!xpJW zF9e)zKYrC!-c+t>x}bgOw)!-atoL_r`=s9a*q2_tDN}*`Yk`*;Yfnv1e(Lw}$y%+s zq*bH=UK{@IOF@iczj<~uU!9z+kCM;j;xNqL8127J9bKN>&R)I3q-mpL?&C* zYhIClHQx1BKj$$#8Ee|^m#3uFYv=+3Y0u8yC5>O zgJ1=dDUzq+xn0>8i=9&pZ*}4_A;KJ8yIgWMa*_Nc#d?KC)vSfvrJ3E9AN$=&2D0nk zCHQs!LP?!?U))hNybdiyg7DFrKjt!e8qn{mpM?kQQq~##FK*{@U7U@RyIwmJxkaS> z^sDo&awaGmEwyv@blnw4Sg>6Z9Q*9n)9h(CV!pn#wA3tclcFss+_sX# zbo^1Sj&zAbx4!9Um&$QCAFee7^%BnrA~tV-$@kyc`JXmb9z%`Sm3}c-U$ir|zWpuD zC+F5GuaQ^kq}IF&2hf#u5T>OGe(t1f@TO4Ls1my%=%K9UP%Y{OzRX;Fq<>;j#iPNQ z%wPJnT`L5wBD*h+oa%~1mGC2et+Km>-xbb;CMyxKweB%8!OgZmV*b|igq#{*Tbc+1%eR=|5KA0N+ zwow!?Nqd_`x$Dfq2Z<}y-yhbtf-xuF|8Pl&jtn9W_N|!DivOzDas2#b`HPr8ers-x zQ*VdpRFU(E@64ya&!oTxm*IXF(dZa;C%zMLWHJdESN-{v=G`0QlPN@dl_OpDUBuFg zVmRBFE%zQ3uMa}RoC9$x`C+QGDMq-R(h%;U5M4^6G@^AaU3m;k|AE!liKE)9&4lcZ z!PR}1Pe&rXoF;^oIJ9=W)OuB=39p#C?KtU~cv8h5{#_ih>9^e$2pmn5)HEwhQD-%q z_0ygKabc$rjN@fiQ!ow_!*{`v*s3;m{(A#mM!t&C4#IJzP>CInWHv@$XKH(jR|XDh zvYWP0&%_>DFsWD`S%$);!8jX+`!J>tG5sAf=E&HONvAow$i~DECLDh?$+BP@ll&wG z==de7Xm3ABEuHvCHYW{w09r+g$Q)mUL-Vq34SZ!V#C`AEE}>Fs=oKT{w6rn#K@Ew6JpW&P}-QSy2v zB-`~tRixxSx%Z}1DTi4rc4~D~?%sn8;D;Cb+(`AU9ENV1)313OaO?4!CU2^LykbXI z66~j3K`snZ{JAbA-4-%^B9p9$c z`*_X0wh5zV$11ky=X;sQpSvaIU54-7KYqV9RN1!K|2guTG{k;{`uv3H-TQD>g(iE* zGO@M1g}Rf~mm2{PMdG}h@_jyh7rS1JH#f0L4s4W)51yS#AmO6;f zVa@1lX!8sqD)j-qm+~6!5L<$qxCa)d64GZkR1ltEnKA4{K}u3)y$> zUPOL632=Cy?i4@}!Ef9YSQ|)_j*l1laBF+!33Pz7LZG!ou;^M^!y=;&(~77 z6RmVXU-kY98+22F@C&jON4O2m)s2SV=*{2stpwdah4I?0&7HfFJzST}Q+~xSs4(72 z`8NV@^yR6MH})QE&x^YE1hEteXTw*@w|oEJ3t+r@LVTyWm95Hl_@BHHKcp?HQA-ZF zv1^y{aXm`X4aS{JQ4wQ3ED?R?Ut(d(tRLLOlOZj@uhRYIm&9h-#f$^i;(#N-z}N70 zx5KP}x$Z;zt9!$umEQX|3yt*7E^LK~XvKD&OA9mtGUVhU)Da)nMy3( zbb@_`Y4369gEDJ68M83l7%(1w+2YFb-BS_#(MGu+Jw}AMDsTVHHvB{8sD}@pibrPJ ze~T7^w0s}RqFE0I-6SLW#+|cT$bKVWZ)3{QY>C%4;U{;@rX;xw`j{VlBfC3y`B##e zsSmDk&KbM~Uf)p?mvD+04*PO%(8^pZm$MR8xN8c<#t&zlPm{ZvScrVNHG1QexNH_X zNjTGj&u^cXf#_3&MPw%=pOwGwd?3_|G!3u*9RGpz)=OA}fJ8`)(!J3xdvz}8T@3N7 ztCE94GX;h3!q5Y?mj?<_Ua^`Nvx-?aC6TTUeAb8TziOBacU6CN4ZzyP%zkBJS&-)I z8&^DskVJ8p5u_XLa+COG6=^Gw1+pT@%=^PD(z+@h_l$L>E^tz!e*Wvs&2g~mRSFL9;b)8kzFs$<6EaO(-wVWZZ$n&q--v)OPWvM z&Q)8s;#vXGLv*478@pN#F`;wyMd7K|Xpj03HMpQi@#IQhwd+#IvdE2Eqnl#&8@!+; zGoDg1F9y_2k-sGBOs;Wrhn@&f^TH7z6Li|v7r6qq44JRO35yVkw z_34Xa0v-gIp2{M>xXo4?SJ{#4rQPp(FNqAG^O(5c=?L{OMhH*jv-7@@Jn%SxOvD8W zy{KJ|CER5E99;Za4Xpy@&}#+@D9>y&_CO}6xqS;2XxU}(v1vA#*ar=h2i2QOPEoVg z^@V|q;}dU9^cvV&EXXm=C(+u}Y`r$D?NSNvIV7t`4Dt4cYXzb3hoABW(XEC6Y&iV= z+XloE?hitx$9bXjL;GB=%!^-5X-42Sd_NfKFaK=gcw z9o>Vp*=>}=Jl8_PS)LBUZdJ5QhaaO)|DHp5S?zPfzZ{NL+Q2Z9uu~xdN@yTTu1K0? zRLpg0VO$Hm-VDQb3oY(&`1y^0c|?P{ywrdQ%c}omT2Z|5?pKY+Y_V~bHkB&-f`I@T zM(26@Ip*KB(9@ojJ2YN_eK1cVwcWWm+}o`*te#j=kLXLY_}0KLGvAfC>22>(2uBF# z!7MD?b}PvNexcpa^Zo1)Xq$2>kNZ2@PL9ohrL%m(DGdJsFnM2cg9T*mN&F$aJ6huptBETWM#>ux0Mm(x;G5a zdkE*dg2~P`TL=MU$sbrnC?g~hWvV$3>2Ow=`5C$Bh^1SnSu0R*m*FX%vTtcNz)vBi zA0HXs8eswNV@ZzE(3DX3MtvP52$@|8N)5T{<|oc=x@);sYl9td3`_Jgym;*}m7qj3 zFb3o{FT!O{X~^l80^hcjz1?|L;xg6bs3sSDUokD6>XJD(ZbKw+Xv||*%r#C6^PHZw z<=}84u6|18{`uqwlfL7uSJ8__qM|Jau$#V#MH0OEj?Z7+v3JOjeR26*S*&umN&&CD zt<3=oDq(3;BT9($nS2FJv2+i)xj37B2!K5o_h`^rL|BN|1PUV5EYR0b4F*LOaP z(MJeZs`t-|dbrBceWzuX8vxSeVRgS|F|+y~YcO*^mQ;pQk+xDnz#m@iOr-{8^a{Nr z#g#PXHu?ig4E0Sy>)1HJsfW{i`eS4yg zPhD-~q)TYP{)DHO%i*mn;59AubQ+50T40^>IZ=|+T%@RA13GTBLg$_T5Lsc)!&U#I z#t<&`n3vCWPx8h70QmR1#~(3$<0_E(+{bPHV;c}LZEcNIFXy}N=;Bvp>-)xG&fq*k z@C$!PomQbLngY{Ne( z@zx!m+u8~S5fQej34$Z*@B0F9aTkJp>>w>j2 z(+)l4Ll5E8+m$ZLdO~Gs(9`51P)z}CeJpg(eQLE`9ssWgel1T;A2yeL%K`ts^Yh^Un-YEd}K0NROF~Ohzu^7%C#r> zbKJ9z&3CpMcH^%31(RyfTst1a>}g(zN{c}ggY)N<^MA^)y*~wlh=a;w-)+Joc0K)( zfQgvhk=avGnjqNc75{D)?}bu;uDeNR>V=!$FlgG!9DtqB{E6i}oRLpQLxgaTWY}%7 z-H-4Ar+R78dTSx?Qh1`4cOdket3}ZE;`Xp89j)iv=&Ea^sFTR&1tuIh_pdp8`t7?G zJQ~8o!s-D`1uCLZ_+nvu!j+lMm)N1w?6;aOyJ$lXxi)BK^}O6H8uLl zvNqQq5c)x!_2B8PkNBddxsBpBtzCrw@&uu9O8f}M@VcfgS=$&t5p9a6hP{Wrx;DHD z0LgL0VUkA@()Xfe<<%Ma&rDIvWZ*SZK5z-z|G{-)iSoIZ(R_FpyKZ`+={Y6RgcWz44kv+JYNRM|-|q2o&C zpnCnC&yhmw>gH6H-Ow-U2l^%O(O%~};=Rijtv6%SBvh!}|B0gA4S#S}Y3Hv%d>l2e z_4o$CZ}@nA>u|OkVC~H^YpKEYMSB|3BG8|S=!#!lv(FyaX4AfwT=W=^k`zSGO$)uP z-hm$7u)80N@dQjx%5ayk^*;p}m4_Gex4PbjaQT1fQ4e6_jc@qPdmqU%=;c;iP(4>b zOhb9nzV^@K(vzh2 zS1W`S+o$2 z&2VmlATq!fl&ppl%vTd^L4y(4&A-Qu={`9rIoo*~_e#F-o}j%SBhIJh%Tvg>PSO^w zdgroPuZ8F~V~(Z_2{uHvp&X4_`ez8nQtEAcFIumRvOSozl&r`E_*sB+pmC+ZSBuf% zQIzBo@z>NB_ar_uw|LKq8SIDd z9eb_6j(YNUef!}KAAWug@wLDchLsfwj@|i^-d~ zL&m1x!kez{&$-tQuVJU(+xm*LTTXAl(bz}c#T!DH zOGRwidNQ6VkfcQ+_KtJrfs)GrE&>C2Uo_0%2W7;Db`MZ`{&DYIo%p4RH$~}X5T;IO zK5Qxbnyk|FgRnn2P$AH~aujx2Rkh@3fRKjLLxGaGZuO#PO(u@Y;pM}c*n5JeeO45% zKJ)WOO{e$c-?NJcz+#-h$}|*nMi^Z`W}}j*!5=Y4H-CQM)$C@Jt|?f%6B+m=VqNu8 zEXMIiA^+=*N_HK6QOaJ)SFJr|zF*3&lgQK*R!Cm)zdcsHl6633}ynU&|BvVRFrp z7E(6f8J=GQ%_$7~?-70-a1VTs7I7WQu<1{BtweSK>W*bVi(y>m+iB}}i?N%SKuqNg zrml;^k`S`lHr3KQP;lQI18CSI8(oYOx>OAn5gy8C7&K2SZ4>~|7-RL7Q*wJmtXF$; z)BfZbAcVcrny{8&06w9togZ0Is^R0w4GoAZw)K&Jd`*EN^WE$>E$6cO(_fY}jWl># z5QuBi#xs@N$34{#fCqo8K|cWAZ;;EeHN1_I`91}Ouy{UFPL1l5;@nGwZbSVIac4n? zTnhZ@3c?bug19JCh3=|COG?_JzaE=llP6|jiQs+U#{+PvsxZ5u8nj91)&qa>GO8Ig(>52~mbJ%|lwRTN&}6P9UpabLeV3GEoq8+wTWy?^3jq#9E!iopwpX`;n&8 zFdY=}_2C6V9GV3q*eFLfi@ch;>_d(w=BRrLFq!q?>j2jZEkc@qzXD;)j+_xa$-HH3Zp<13C*4LlPP9+FZ9f{_yU1`BGtHAPB4QE1{h_)R= z)ky;_ReUn|fJ>^F_XWOBKGihRsg4Woi#ngKn4%v@ebg6dV^{Sol!C-dL2%=v`gfEX zV$inIz|NsVjYaf@FPU*&xdSw3!L0AM|FJ5sulwKpJvK|uAX@Yv2@vV{uU@g7=eld5 zj^Vl@i|mNm)nu5FQWaM4y=|rYceuSm&#*lDs7jyfR7#eW_H#4;pFy9`K$ZBs;6CaX z0rkRYHS}Ip_6stxY;JASnlaBirk8llIy@@%ZUz)k8Is0f?wBlY`DUp=2fvALOW5H8y^)8mlm-oE&0w~oQR zvBHn<*2tJ5Up*5=j*nLX>JPORzg@`|yl2u)^Ga(n0Y(M}&UPQoufxCeu*(#?TCkR zbDhJveF^h@>KEKE9N{kz{>QA}E$59|!&*Ppz&m|OJK;htM@DNqrpyoeaZp#s zz&RrROnm2~^@Sux_bb$(h2mxwa)8=_c8vp{u*p|n2uVwYof=+EzUY5RpmxHyD+YnA zP0jX=)OlKhuouhWO&37d@6ru4{HPGvt|}2>;-o{X=U8k^p`>mhNu?izF2Q=Nf%oo0 z)2<;Abo?ramoJO3ZP8^7GdCx0_qu*Rm0x_Qg6xO8nXV>dxvnxFV)Y6uwXU?frbnCL zyss%zVbdS(_9jHCH%x011P-ezVN_j2;?!+4gvL@H4eZc0WcsP^q4KAfwQPw6UCMT> zq@N>5dU|N_v>cU$KFu_&(YGM1KLq7U>{tKp_>SPbZv%*v7BxHj3gPc+Mdt0211OlR z$<*uvmQR+Qft7}~Ia%taI(tbr!Q`7t4+Oa!=(^;HGJVEodovwd7bklFF{HV!iQ~hr zbkieapAEWEfOmPcr+5fa_*fle&rFX1xMj1jsO)+R=J#pz7bZyn(x#yekmJ7|IsyK5 zM5N4q1Hzbt=10TcPQAFL$BWN8`O53D12moL6<>(b)V<10kpbB$qhGN+hhQ{e@w#k1 zn3=|b{#A&rSa{Z$j7uYDjg40vUz8S5cZbx3nm6X0V4wF(>fFQIoH%;_Mb6AGL$Sc7 zwc%7#2B4U~J6jz1?mv6O?Ni(Fby?Fr_($tI(1Up%ya%rC01+4eLQe+cP&1sNrZcTm zv<2CVU#SZsJnW0J>*Goga$S+?71?34y#7o9QGz1yOt=VB-jDnmuc73n`f<-Y32$78 zol~1!CAOa+-LmRxyGZe}? z`mw(0*^F52Yb{<{bFW~2WZvf=`RV@7Cq$~vH_HWPfBj1P_G4ITE1*^`vDU0jLyXhy z`HGBpXyQh+$xXoxyC)&+tjhnq6Wg?S5bktXv^0re*Ijts;3#d8oJYHrm4{mHXZAu7 zAj`D%3qWa6n#S3;MUmGhTOVc2swGo=s9UW4&qomWm#X$Z{5=^ms~fy!dl|317Pj#n zbV@t$hHp$(pB1J}jZNPQT$3=-Gsaug&=5FSIOpGoX5I$Ee$|5iqRRMPM$dA`zve_8Ku)P#oTQ=4~s@R+#kr?LQ9e28jSR1nY7l z06Sq*xsuMtk}Z?)H&|=G95+Wn`atgir4DrQhyKUvcwvd< zN3+8z+q8O?+hFn75;QC9#A?Zq4Yp#01nWNttlQ)|n?HdNJB*fz4M?9(jXU~CoUi2mxSdCi8Lh1 zxIz#7Gr+ay(W&z{s4A`K+n(%xKEctf zSf<2RE#wNZ8q72VCfs~Hgr3!M<+2R;!o+V}8%PU{mcEz)%dWHnW;a4X+plpHkoJD-#dukgKc zpTPAEO3ZsEXKKl9Rs#Ud`@jXZ_NJq8+wQmDMV+NTd1Dt?zy-?7*)We~{b@3;LxzPP9B)xP;)ogWMkKk}-HwM7UYu4?+q$M!l_*K)Z55)h%(@>Xm{n z^EyJ!_3HwOj$G@=mxg0~S`oPFA7u$am1L=l)^N=!(*72>`yP*^fZ0g9uG_mwRjMjI zF2?s)_La(f@}3QqdPmR+%gbjX&Rz`{8`R8Whe?BIe+IW9JE!R0sFN4Bz8cZ!552Bw zfEP@u$K+>;6{x-|Q2gkZ-)!jXeUEvX4uvsMy)f(_PPOO$`7auaH0}fOu(G-obf1%z z+!@t)*E0Ihck0#Kw)el}nnjKE_|qmtZLIB6_#++(VU`IPSMa44e4SGyv-LzX72jgi zyl2qed|m>--rzF!iSWdAHe3mlh`mE!cLS!n-Zf)S-BCt8Kn3Uxi0ja*=GgESNN`=z z)44RtUUJt==_<*8e}XveDh<@-jJm;S zYI>!H+3nC;;i?RLq;G&eyz=f!^SceDE{XZ&~SGBR#T_IuXrVj-VyR#Ax^zKCvE-rF1u(PlldTm-TEu)yw z)AVv$+C6r%^n!oI7m*-j2;||Edcr}7Oz}+xj=AfyXf8}xZ32337@d(!3{Lb5#=Y_7 zJ`_Cw@0d3Oq5_eYL=-833KmDGJ<&r6{Sg7PBGwofV;0EniMInUbT|lQ>Q2Id(%3+( znWEcZ-)spkTZja3IwVgSU86o#X1Yo|XTkHGFmmjOod%Yme;~pdg-j38*8|w(i}7%w zPbU4>%j!>D#xDL{m#G(_mODh>kZxAq-+i`QH|Ocz2{g8@7t3Af?nL&1R?5d_9G?nj zYM$QEv!drNg9d}X-y1fXBmPzW7UP1GbUf#xP_G$ZT7Y?CAYaUDwa^aXa4NtS(BZy0 z=e+uk1DTeErrLGr)A z6269D-$8^5CFUN+;Slr}FH=GATH{hLV@7y}yVfA*k8#+#c!F2B>j#q3S$Sv@vSFSu zSGQsR(smO3T)&P1ZXG>f$=5xsZ^r6+j+c@6XKqy>P>)ocG&$GW>zbh9y-(b9zp+0Q z-ZjOdojfk)MI@ZOE!Lq8lS_N$WT5%ty9Ns3J0P@m^ z{+MXke^mKQ#DA8te3@WVh0?^3s@sMRx}OL#%>2jYGJo;%`d&~?eZU!Vv}aV4ED z8u*ay2Y(zYl|lJT9yGyjX^{OX_G!gat4}^$z%Z7K40`&6uxsGG?!D+aT?b?WVv^0h zhMO+|RY}6Ffk#KR`=wj}wKj^hC03HiwY*J{1047LWnY59c)f`2AI>C1se!>em(iw` zz+r&=M4*Q2hTT7|+2o3;>QZnY#Q$k!mN-3rpKx=zirGM;@L7%uN!vS3GI|q8m6na; zowRK8M%ANh*A7y>cSuG}kOY{>h0qq)DrCxV{dFSi`@Ch%i*;M| z+XxpQE*^w6mFSYm+Uwp3c21)Yt~Bz`1~0(bSlW4=Pge{m58NZoH`%Y^&RVzVN2p;B z$29C-#}2XB3THz}9@osonHO6Uey62PAL9W~p0-*au6w(qI2|AHXC|^9p6QwVcG!bs zt6k6GT}#_z_5G*RbI|9=@pFhw0dtxA-wy=L%?pI=opxLaKjc?-^R1`ZQAp8v1mepY znc(1m0mT31ljKCp$$LKM^Sv0#{M)vwgdJri1$|wpaAlQA{#AsJc-p8dla1?Ft1OVR z3bcr0L{>2`_vMM9K7jF$64ZES$XjQEf03CvMt6f}8}PwQLXGHOd_ssdJO;#UD*Z2% zp|@`SgUsWmmF9OcN7d~avsJP1)DKVS;h$85)(mK%)4&jVn_y1xBf{17%3nEv?hNi& z)Rc_lge-r>AtEW!Uuj{DI3Z^7E+d*BuCQ&K5ML8ezWzvdO}w@Fl$B%Ck-P;jekC`$ zTB=w@G~*CC##SCz`moQZp0;a1OMJk1eDrtgEf>jn1~!|q-o$R&ud;8F-|)Vikh{|n z>VF*mN8;`saHUdRZk!-x%WW6~+&GQj;yYskU-V` zZxVw%dB(BAK{|onVapo9O`WZk5Yww63gVw))&E;x2!lk(=Fx`W8UAFN-9_;E!s*>tc*yy>2#m_^m$wd-ALn-IneN;Q}Dg6O2#`I!Rs zs!W`BrPj8_dORQ*VNrhQlLC4o=+=DqV&_N$G7V$ta<&7A7=Zu=)ZyfM^g>!27SjyXeQtl{!{*1M;t+_H)iduCeYzVEpb3+Cv!0aR3h8~VTsRJ0}!8Ahir_@kfYJi zvsDPLuYYoQLwo?3gXMQxZ-LKuwNQ@z^`5e2V~MIs=uT+YWQI{;=`&*y-ZvdI3ecCK z!2{F-A1nq<5L+Sp1I53(MgfF`Ur$6z3ca50dJO_ypEO@C^^4}-tLg4iB)x5W&^*0F zIL(1w7h+VrLIP@{t#2i*iyIktw^XHmX@FwhgP1EdTk)il1vA9Vb`uQt_5U(YR>=ep8CEym9tKR!{ z5IxrSo$Z%^4*O{0$HAC!4akMN;0Qp2lyI6V7T*Oit#7+y4KJcKa6jJiV1`Hq zLcffZ&)0(cloWnfMG(2u0nrAe%hmuxdwY(k?Y&}pM)|AUczIMZ=*ujpP>#;RN`cGMPWvY*>`YPZVC*#HV}71Yn3%R(f_JXgE&fewNw8qF0LoNE9 z{hX@f{rzRX>Baj}C*K0@KM!r->ibwWyVF_?L3?1fbUP{^ia zHN?+7AQFPBYDUxdg7*m^GoPJkFEhz#Xth~jqiQm&Ok5hipfJB9G@uT>|pVIqujWoAAcSL zVy3@%zV;Y_zQ$Z0ufI4~OND$+WKP*5IAM)V4=K|qeIz`ZIk!3SHbt|9uy^fRM-af2 zZO9(QP6Z=RH!zT+>i606I|zE1JIQ5*5(qGRs51wQ5rOTi;ILKc+ZNxc4CyBF{mFF1 z^@!wu#5u}kuc8tL7Xp> z^~AVVMFM?)r1M!^)P30#%4oTxdVsay7$HzMLc3#7^lYeHD?IvyziX!l=v@F4j*fGB zI59Zrc08F9;cb_su7?Rtd!+BRV%r$L9jYJxe4&b19`nbdju4dYJodh9o)Cjc${WW zp#o+J*x&5GrCVS~jF-I~_Dl0mziaM_(rf$s`Oj$@8*4n;S$RrpRPa4Bzdw4Se=6(t z+uAce9ydrr*!A6_Tp~n3uxm`q=&kc<$mL1^W$9aCVpG|%`uWnoaE~$H{NCC*_x;U& zQoF%42H!W6tSmj}QI4<5dmbg^? zK{5ip$&vid{iuNe+$Xp=BGOEesQEA3nfJj5?B#Nb$&q&mho>pL|DX%( zFXxGyUyF;Zhn;BntwAh*TEYjhZ`H)-^~_6)OQ34NnRJqXY5@iiT04?hqrUh=u?e#| zJ8m6!Ta1_{Js<&kwOj!Q8?@FBjo*jdPNCpal~!0saAF*yhJ#DHK}_--z^T4| z!p5HsvM6n`tsd=Z{vyPYIH)1N4b;}vloB?n6gYzz>NVj*9P?R9kFU-*lObNadC5yq z3=1V-B$MU5;u5_OQghtmijD_gt^Y?E?^O4Y``q_gHjm1scn#)Qk(L?!ap(q@@3ijO zSHb4mtkUY2EUAX~C@9V-WiMuOycdQ{+8>=Xs&q|`=U-My^qo2Qkz!YgNI@RYrV(}X zP}3!Wk!A2LR~x}-nJ#)b=6mCpt-N>bIdrlm*NMBEvlT2wm->m-GP5U#uYHcbejGJD>yP?|PUU%6o z>W6P9jT$Wy#>(Gz2R+CPZh0EgsK~-j{&!TT@l%gwn$=unZsrqBM9$CsKPNR}49Y5d zh1wY*Q^+WuHe$iI_3?J0xdVi9OK7`s}-msGYgDLO2VId}nE=6hE$%O!u_3+N{gij75g z-jLqDt*E3#Pt@8n7c&PLebBlqx}o(+VQO`!hnqb4kHv@HcEs0>j8BNqL$eUyt<(Y~ z#bA3d<>#R^aZDhM7J5br+_>8hX)F4bOd)u9%#!yW*; zpgbHqPFFp#-tf3?uAq}_+=F9)OKeYbjFTbWcbAVmi?ViwT$&kd9MNkZBE-h_i3+ZE zm2*=!vLJe;prdYw+3|$!;~t(C&6&eSD8b4~65?W5?UjYpVgzpjuMP*8Q&Fr<%Yl%+ z%sYuxzBY$omLCNW+)0UlL{eU88VnDa14v>-{WxGf`HXei?VjKvBmpTCtV7ea{ee)_ z3jdnnnNQfHO%lC*O-gEH=65I#zdADp{KTT|26ndRL?3xHAW$){DX)s6{hX6Cz4t*D zv&*#VM#5E*ndS7eGpeO{cQ($lfC)@aAD-`kln}Q#15KhVO?jSaM6#oBqd!>)F8ztq z)&2MwguWU+#!tg=bwcpV&Fody;1O#(ECO&6g`(R*@<;XS5=)=0pwxNBq$LO;I@WA1 zt0#Qvk-zii!3Gdl%F_>95FPVw>meJ$he;0%1i4nSccx32K4116g;jVRfQOF%;bnFu z|44&MTX>uaVLdk6Iz}YLd8esn@y9`8SLh3$744M;VKQZcT; z3P&*YBI%B;^>O{rO7^QOuV`)hI-gsXwe6H18~i6$5(=FIDraZ0RKkpC9+jtWw<3>; z^N2aYR4Uk9lY|9cgsZj|)@QK79ufdbx-rfcUrf!*(7JPVC3k5CTl8Ee;4jNsuG8$Y z{1kJwG8-r9$`+mAegMt(H6U4=cX$**)j|An=NAou*<8Ff&a|>&nD2d)0nrnaD^_=jYfzne8ZJy+ zTHe`PP)wxbCvl(Qc^U6n1eO-uXGd#zk+YR*AgK=u8@hkZ5#nJ*Z?N}%5UoS*z15q* z8Bx~s@QDu#=;SUuCkUIAbhqaTjfIo-Jq*9rP-b>ggf2~=P<1(c!~rl~GAC?qG?zdF zwzr32rdP>x!U_y&b&WWWTA0Z6nT!gdFfP!nEY4J~E2^I4SZq9gyah}z3Gf6gw~aE9=k>^?x(@bC5A zgkOcg=l4*zN4m$5sVKbnXX`*7?jz<1%@{bYc)pXIwOjE+!=}U%$rYjH^bmyGQDWW{ zh>3LlLci8}i_&*~ez|u0vCqf9n~|wizhK(}Z)FF1cBY*^hQ{UNll{e#!vpwYYaJtz zy)!d~5d2y;xlq%I&oX8S@L|<^+;ayqZ!>Ma(y z8x^*lYVFpLPJDq5hY3E}{c-zAO77`KO2<#PY1r$(vxb8y@{M>ty}&!CRf5+-x8L<@ zu~oybg`ksX`_^f{Yc<{QgecYmRDa?vAE(+!ef*%!fP!n&HAqK9TfTq#E^PXG`S|sRO=ikpI4w> z)0->OB5>PxFqNZfM2@8ydzGtZ>+mV&%iNo*@Y+w@sz$9mJA65%6*0}cS?+0Et3|(W z6y6i}oqj_%INy=P zC&PKHa!FG1LGLm?eBP`6)85X;KTOtzi+EhW;`GSObJE}vaJ&OTc5XSDCC*vLYsjDY zQyx@wzwxXT(n+~@?Nq@ z{!h@85U*uB7S(Uef4?uN{9S1Vl@PsAWa$BD`++}HC)iw^8a zNET@2o@*9mJi3lTfL)H5S9Q4@MBBkB#Wt{Tk+IJNwq7= z`f?Hyr`h$dl+x%~CH!__9Lb7div6QXOd#$_z|WwKCxX@7fKey%=}l4aTr5%eWjPSZ zcph^GjCfIN;`l=Z6-+lT_qV)5{-nGOpJ8DK-Hi~tvd3gcpqUvoM0aQ8aA(N^V^(I(l|Y# zmy%{n2$Hz}DYV~chHIazmAzTQPo`KAyUGo4hJd~KM{FHaKEC1rQrHtrTLFn$zWX=9 zzgIpd-dF~kOwuE^Y(2gmP#}o*oHqB)^t+HQzB(Tfdc1LUFF0A$ZeYPOghew0w)O)E z=s27*9n6xe1BiV~bxRI_)7RTnONSg{aqQ9vlb>t82sef@le{$cr7vK$pdP!7o- zDo52`aHr=RqE?$XK5X*2Z^C%Cjcg&e^$ZO%RCkpgr*=Srbut=Q5bdhp^I}AN%t|BM zFii-78K;WlrGKwaefQ?{gzEorRr8$=bdEw=a^dK_tcciu=0+l@!3Ro*I!x$lY>^Cs zBRLdElAIcmk2T9@NwDFg$f92cc2l~hwHdva$3naW7A6+CP5mQAr($u=_WSXw7a7VA z1ssyKRk9T81FhstSOb!2W{5T{=vkG&@PmTn+yszSn2+`TTCDxCef0F~o1Q62q0FD$ zRM7sf-CsimGG0rFCbr{ln61!%q&zhg-0l=`mor^IenQMFx0# zlq1@x*8&JbK3T7HBxx8+`gB!LMR!f%(!-c%0=p4=Wwa4^^-rD&U6Fk5v=#Iq?$1Z= zC;LVQM(8CYziarxvQzb_RbGXf2L7LX&Zd00PTfp2U*?QFN0>HnJu*%~2~p@NMwrPu z(}m6AUXCr8W)3C*{hH!WW_SUQoYjms(`<=9IV1||EwZ&Uc>GtpQk0#1mbh4M)6rKU z+t7ElB5xoHR4TbjTHe$75u<-!050fdx( z+Z6iv+BF0(yfgJaRpf3@F%khiL+m#aM4+uHNXU{)$knMVec>93y!>G8_D>1F0^ETT z1>|J3Pc)+@loHUoT=8_fQo$P6?Sib*f4Xp|XWTv)Scgj?S|4|s2R@v)w1g+J_Bq>x z97R{wBw@9fVX9auL@0WA(muG4f0b||1UpX#$`wF|{=A0l{E0mFHxTdu zsmp07W41}keXBJxaow!h7TIF3pwvZ^7Ae-PSe;9TZeosKD}aBz7J7{B45tmKe&jZV zUUo&`@Ox!=$B@H1&_w4!avS9*OR@GGCGMZb_kX|8wk@2SI)vrG!n8M+8ZvP+Ztt-x z^~6zo_}2*T3X#WOVWdrcjbpK6&_;ZOcP|29{7@*&y^7Q9M>Ain*RMW^KWLTSB&6l< z|9}U;%L(UDtn70M_r2Z%L9@4V+JEe`m8twx_sTZz9(8q44Af7D^^#C#Zm^+f!Nl%$lUWh(GyWofxg+S`#@U+&_HPE zf{u+_?8_Ek(mp{jtycKS_*!gm^=(OC#N2-9QmFqX<$Eh7RM{mCVJN8j*QUDP<#%vi zutI5XAN;N$q~iV6%8K~3nNNQS_^NA~upg`Rc&mn74|CnH?UbD9^%Wztggm0rN|E#t z^p3mKOm9q>HYzg__Zd1U$bw!k0ggyDWqtrS>WXk!)mV7jvy)nR0fxP5Zzl2p}*jZmx zYSpVfQFX2aB~md}5}qt3P8^K0$NuR=NZt4~ryUO}e-Ie(^ZW7jJYYwYVPSG&-m;Gf zgU#zRVT91f*1%XXh34F>tAcA6GxPB)%A}Bf^$O%)<5Eg+`G;m}zu9v&*Hd=pg?5S} zn8=4I%P{$NP|nfeBCd9?g~T}gB#>;>G+(5izj-fW;Ja@Prvk;{dWIkqxZ}n(#K8A# z?)M~I7xg3&{;HQnZ>ja2xPqwkp}7f9<@I-G4S9W@8ij*X|G#qEEs zcXjz;@qjSpQr-vTbMO%;JsbF$VGd5OfxQZf0&|Eslra5d>0|$voFExZ_=^*|X3Gx^IYLW!ZkG>#An>KfY}dg- zBW-|H;E>k-mFxZNMZ9DtB06fPv?8d*DYBnB<0UtK=cDI6Rm?pP+Prw0z8>X(7T~?k z`fV+K=(F}@sdo*}2kR+V$Fi@h#oX(5qrld8a!#%>`q|M;tLpodenU0J+|1A<2}^Ch zv%3%{y*>U-6T^xK!ML1|4=g-qHMUpb*VH#>Sw7t9(Xvoo<5znn;-h@X!0v8<+NQGH zg2)n+r*8H32z4MoKDIRzKl0WuGwH=R)!*`+JNt9i_sIP7s3-V(w6e_x5PYnR3xk0D zY6u95X1Z7=hyzBI=H5#+7OUR@>kK~01-Dy^qLjwv+B^4UoDl#|jQPvPXg9N;n6-Pf zq^Qk8?cgW+52|Y++uZ4Bs$0hRXUV#V=Y3ma*$`ylU~1_YR9|h%b;eG6t`F85J<6b> zN~qno2CF><@?a{?`^z_kFDyA%F_V0t0M0k+i1^UV9E zF1vlvaUXdVbPm}_9_lU_Nw2cFdD$@n6IMr=o=&(@FPmVxr|?D0pd>82kZAzEZXXfe zLF-COke2VyKjZ2jFP$`Ef+rvws)vuz3@txvtAQ?3{c8Y>%Ip|s^wqrFfxiM0Ih?xm z^*Yr~uzuzHu^$0LDSy(MgMcRJaJCSyFaQGUo$L@)ADTh*_J_tVt3VeZwq<6 z<|uA3oFVLVYUJwGL8a8P&f>c4P>{)BtA3m=QSVDa=}>B60Bts2GF9&VpyoTo!2U^E zK62G@8m5tg%NvT0M}Kqw#lr6uIlKz7@dd2vD>L4B#1C56JVfeAJHtw&?gDnN>tHxS zat$^BPQc|{VE`xSq2>P59BRZ_S$Rh9jFvaHcs9Zp+MJb1`0;}R{&mlR{E?u=NA7{> zZ`s_;+tw<`S}F~J&G9#8GM%9h>HdAt(+(BdESN6M|NT@#`(@Ka-zWz~lYX|-xel#6 zPvtj0y(B=#lmoEOW%+5|mkCD{7B_gc;Qo^db@tuHK0_P_NkEnNUI4_-gya zn!WU;I4Fo5ER{<1m6BKpk=@X!f2jsk4k6jNr^64Wr3Pw6a?0WLT$3 zeMc)Dfp30Jsr%CAPiF}C{5gcA*9#k3&J6yG$E+cj>6Zb#F%!aP@M8i{^b)_4zsZD- z24}?R`Sp-`$gV|;I+p)@Xve=Etyb)_Zg<2ebJ$!kImMXc=a}^`7(X$#!u>y*zB`=i z_y50=gf~(l^HeB%k5CRFA=x{QjBF|^;v6GnM;V2Vy*XwG=NK6gCws-Q*RhUy&T-Cp z{m$omUBAE1KWE(My6*daJ)Y0!V+>E-kYDdHamp8d^Cf8l`JfYPHK9^HVf)3qp#$Bv z{Z#rY<6GH6?8q+cs~^*rOwzT??`NTdjeg9Z^nIPH(c4M3Z2q53)zcwyH%ouTkFDT~ z(8Gsx;DMAipDzrbb#z=VyuhagyK8coxOyU%v0)eg0*{$eS)J zn2?qM#&<++X!+k|xVZj&OPGv$YHB$2fsP^0hrnEuxDR@0b5gY!Kx&%_?6;lkT4jCn?+Luj22}S4lX3ueg=X@UgcXEL2;5IQ<&^tE`ass6DGqL> zFxuvjJmyazTU`2qAJH=U;sQ=FRWokWPepQ`iJfPNiTJ#vGEoxo|DnxD)C;)^?w^~8 zH_?l^=UF-jtiLgJ29E&uSVGjvQ=#c0-vbZ-qQ=S!)Yq3CW0&h-u@u`!j5f!mlRh56 z^e&9*Jro_a_Cxg)CDu|gWwnm0`WKz?q?*TZ21AXOIHpRUfh86Mfy9B7l9B5gTs}W1 z2F;y=Jb!L=(t${OHx`DjpHnlRg#+^CBiGvOSH3*Z++wBT9@g|}tAGB`i>P|L<>n=sJPj0a1 zS)oU6K~VKdm*37vabAqh(-F-u*QyqAx;VAo=@t(!NuX#3g_&MVJ7vkaJ6nFN;&eTU zBCGQKny$daIMFz5}ti&Y^eJe;gv`1`MWA^)^ym)GI72ep)6- z|J3BLK8b+XH5p1tp>83b+_moy_J|0cV*h$GKeB~_lgaie${Dzh>%=wn8q^hZp?LPE z1Skl3N<3YzpC(*2&4OB(CMAxDJx901CMfjb3wJPj(7Tj|Q^=ATv;LuYf!{fX3c-R% zKNikV|3YOR%8;VauqDJW=|RQu|CDZ_Q}p`_$Ig;IT%fllw?uaha*9u0aZjbo2MDNh z^HV?j_32}^!_4pVy#!kilC%q@|IQ4CSuakPh~WiiDP~G3zMVM>VFy^c4(YDuh5Y)!0WeoN+d2v-c0TvfHH(buYy=@qb~!PI@s zW>^q@p{m>UDgqPp_tjm&C;@eIlCMzH3%1KpxtVd16me@wt9+5) zo-ke2yHvsJUTzt~vz6;v7S*rfs{Qj{XBcbk2{nq`eor>Zj2!cKBZ5C66dzc|D;(fubKY%>r+A2Q;<)- z&P15;>EG#ooNE@&9wZAMH>Z;IFiwocFDCDpPk{icy07)~*394e$1|E9Ay+STDR4)Z zxBbrwwHgrlD{vm3Om!RdcnBx&Td$(5)%d-h<<_%`-ZVJgD*bxSakY$?;SG4e%5eOX zH=p9CsBH4^c>0mBo@edNM97S~54xkHBbwXmL9-_(Pa8ETGUw6z)9Fni)jlb>woK5~ z?v**S_Oau4#nIUON)_m&Yl{lh{93>}W;$bdp+L~*QQwfg@4yw&xc0cNWi4(@^JnD- zCp~6uQe8$$CvAShO3-oxRqz%*e+02&<^6hBHhooAP^>?TNi3Db%^%ON`svhh^(L)x z%d7?QC4_y&Q)Wh}qa?-wSJ7e7+y5DNBATY#8bW}jd^<3SgRvb}0#0@(0|pR?xI(PcJbaJh5iNG+N$#+xxrX^EN&_9e1J!qgrok zv$0BY(WPmT5y%s1WXOz+U1PVMqzR;=H$!4lFTnCV>`*KzEV7itWo(u~t%WwD=^*|-?e}B_8M#3;t4?|JB#x1)bOezU z`!DD^&mI+U(LM_Zb$%UiENd(Ry(@UYO^W0RDj1kXAG&<}U>YiTT>h5Xj~wWAh%rW` zk)L+^leaBKop(jan?!0Ng6nVKh6c5{vU~rthV`;Ggw@h1YWCX-3;i2`I;~Pv&4cb@GBZy?^-FgF{X(W#s^S^83O^1UK!4Fou`~_ z;g}Ol>XtdQFnaMPbzB{kvnzW`#-<1VFd5@Ciafb~%g$8(FQ!lJCjy?);-UG-izqiq zT8r?L^dOwoTy3j1mSr`argC@0OojmGOix-c)yheWStS+}N)19efyiXuqdLWYorZN@ zOX>7D!WSxkO0E#`0>JNmjw-C`zXwt^?(<~mhl|?GH)K?vrWZ*uhX2Vw;;>}B&dI8t z9S(%uW~I-$JwyNNj#FxXYLOK4jikg-fj>gM?3}N~UaECo-MM%>{KnR67XWW$yQs`a2#%P!p-?7!~lP zx^;}0&*fPXFn~4FVvKL5cblI>_jv6_R4=Y$qFGUS_?;YyC-uO+~S0_ z0yiDfdtDX|JYF(|`GPo*s_~g30}XW%rISt*`g8;D*paN}3U~hn+@DA?F)%BKD0&N3 zN|Y36@+h ze+uv%Sm}%24izeRM1SEEj=Z^&d}KUV0*67#FCZdBQwZxfH*TelIM9W^&`!nM$0~ZM zOqgJkpOfHcHmsV7j}{BkI!3dti~kM2WKPuu2-G%JZ;2$wz$Pwf0K#+vPN}3j&2?xQx-*)eDPbUEtgG2) z*Cqoj7RQj13sc9#q2L2#vM?E0IOCMPAlt2?JNR{GC+gGZ2kaB(TCXNVWy0IVu^a1r zgG6!6Sczhnp)F==AMIQ!iUz;?K8yt!-cv2DMnDFRUW-0PPonsFGx^n-XS ze96NnvOBAK@79T&+)F;Ykm1{yaHN*k>X!#;p$-YtnTbQy+oLi2sPF$aD+t1m8@l&D zs{S1(XMBp9648V4=mSw#&7ag9Rzm=57_p$@m9}4U{dd>#Fq2=JMF;!Ljqn2Sd)ZT>a`g15`m6(%TocTD(P}tZKIrj61?jSj|@K11&O3n&~PxB>fm+!C|{mE zT7;J|;Gs$kYyc)DFBV-RJ|BzW^3vE;e{>7Oge?CqGkO?dv(|blduU=kO=+djP+Y-* zNA-2UIwYU}RCXD$>#ONaF$)qs2iKvw#$ExD6j6n@#;0FDH-=UpyV$$GR=^vJ&(5zw z#$bW?z)2-wc6y_l8i(O=lhId6B?kVIcg3Z*X>LOb$>_j+v|1R-OZ9RtX~hLa8pwD1 zefeB30;i}*nRx00Y^>tYZ{JVWtG>aCX1Px=42uDSt-CHnM4w)vAavW44YN*~d$9{8 zwY6e59D$ztRU_-=O|aK0Ww$hO!Qo!d~-HCi0LEtNi`A0 zHgAxdetmzi`lN+nGu)VMTN6{HAe1Fb>J2{%c*%ZEsZ!n52X9cahjw2_5>?(SC28wp<75JQSG zK;%z?3<5L6Jgo6|D&DrW<*KCD2D{#%ID(gAlxU?ZxI^X)UYF*9|5O}HR@>ge>jTsG z6B9^d$eTR4d5;FfP8zw{mZ3_#YOuHpb;T^s9$bs=s{?w$Y#;iflU!GxuRV)-z{+3w zU!a_ZxES1kYWJ$&d{riX;lAoIuCc>r2Ne&=;GG`Ez4>!v)A;4$Q zgur3cGs!=neaUUsU=Y=k^(2HY_EJyU0#A3#JMJ+`5$qy5WAA-v>1~-QG%`O6yKyL) zj{74mkNSg!q;GwV=le&h2n+TO{r*V>0mB@hAgbs6(T_zRg^by>lt3R;I~!MvWanjF ziQE1n)#m@Q42yUX)yU8}rf{ju3g4kioiQ$tT?8QxNU3kxN`@IA%)W#3>66puuvv*I zu5VO}>znmPvpp7+d@#cY&}tcL8pErtVUAUo&M70j&p_V2KJOaS#%+FJL}$iKwCg<) zaUd=sd<;6ET|QUQkDABX;tbZVu-g7RWd@+h`wMt`?}PaS`TxM#jEeJNwfxY{oI;5a zl++N6cOQ%SR^0s_{{>PEM1Z$_q6!*Lo=T#V5N4?oLcb);kh!6k>I=X#y^tLj2rRye zI^8u0IuLA_29n5y+?=du%(U57kqoB(0m&eaxkdkm$Yjcws|Dqe04;T;;qnKlP4IHT zgfp=MihskV_%;IZZwqBZS<-70JEhmr;lmAscVgFQ&OPNHCe1?`T%*8A`y4f(+uEdC z!^aR4@`ZIwB+far1aJ?@@_pR*`XPBLT6PMZ|I!DV0f=GgO5IOQaIz|5?$WjXq!Qi2 z;brcb=$3D=6MCaaEXwLlzw@70WHsI;#5%|9TqWWf7KdUv>m)VGqACxf8hh?ntoSb9 z6d8@$i|~LJW>WKAQOw7Zf3{H9YB{O$E%Q_AZzY1PM!8QQVw{Xk@sqc9s{sp&zdbOI zDh7Vwn2P33Y*@t?G6OUv^&Ue_u4cC$hg4{U{g1+mEHc9jz3Y zJ6Ak;!jkM&ZCX372BVQ9-%%h=UB2M(rQ-DD`A6?6+!yT`8r7ajgLj=QCQJ8kL0vUj z3hnSqnb4wF@;&<6XE~#fpaKxIo6@BLr3)8bNTbS{|KYU~797}@%{l5FZP zLZaF{i|Kk56@wl3W{y~n!?5cU6@AI@bzVs`Ca|0hp6f|7y(-!C?4joMG`k?l1cfx) z%30_`BtO{5YZc;uZ0P+9%!^@#RQ+Xcq{>#F1dOldMzaczltuSKd|vW#21yli6Io8J z%%u;LJx@}isX{@pINr7(+8`aoU5%?6&;=}Gn%UptN7!kjaxpicpETe2LmX6(@CxS^ zLv=9u6{w|(&mY;Ue{S$0=ns-6u!UOANqL#+cXP^K^A^`y(&>@&x#X+IbIwxAuOl-Fongyxq27vM2b@NO(^}9$1v5)o?)NgoFF52{WDGZiDd}kr!6* z14rA7oB>(``x zTwiPdj1VW*ez!tt5r@n;fiIFAhiFFmh`XEQBZuAw5+LiZs3Kl7Ib&T2w zn<6=!@00urEayrxsoJ%`fOD%O{@G%nn&d$R572$wZWq9!eHo&Nr>B}6mta|};{jgjUPC;VfK`JTjz4g}1!+|R;x4}{)yLhS*dX<|0bk1Q zY0KJuxo-nrr2W%aT)Y8e8O5Evek4nA9(>e|+Gc$73qp#IHm&2r>_J@X?O+j3Yd`Z_ zu}!p~pcQSJbBjL3>G>&bR+S_cDrAX2;hjbYgvbgs=1|@{05;HtS3p|zePST+je?bG zoJP?^2^VyHCMhF%dtDF)rCSX*FwrL(8=#qWNWyh#!;4K05*q_*T(#%tl0@uqE=5C7 zn)!Y7Q_xF-4F+ZK)w$G4PEcfBkN=(%@g1gWtQm>nx+{ zQQn;EMa>n6m_Mmazr^=))oXlhNO=@Y(T;eOrb(oTe~Wpu`!4p6f!(5ZenU~!g3qxl zXjv~>{V$%VW3hjyrQ`2R57afW1iYE-EhpMT+aauPwD{PJ?Se91l61Sbti@mWr{pP( z7KtGJdDYOX6EQo{EcL);{0NB2 zf7+Rseyc`obXCZrlz~P!_sx$ieBE^hy|-NIgLGL48B85N^?@9U==+=_>h;kl*+1eXwvDeQ+fjV_1~%c zp`r0iwL9;$C(&owu-%u$VUF}!I~ggrZkj2{kc~`o%2%$lEUXJ&#>i0M@pC*UA@JZU z)+OtCGWv&1l;6iyV|4+C7s~c%rLFdM3h%BF>EYh1j2Gf=D`@<}F!h{we!mIqAR4l2ZKIXnl+$1?$I;i=uJZG-UE|KO zU8$gFV^??&ofqbCWflujzw(hraP?Swc2Py+>m&xRO-d%hFY)3~%Pt)m3Us7%zPZ?Z zZM;6rz~enBcRLPtAh`jH@+t48R!4_VpnF|S#y`-#$tR!#BW2WZ+fCpXg_|OSIHw-Xrw~k3v!Fx|IbeYw1Q%xFtVf}>=?Pfpl0vvz+st!pST>GxlzZ#ykUl+qV>I(iv$1{(^6s2o?Xvh?w z!QyME;g%G6x9_M^ymZaS2O?>>$ghDP zns5Af3nREjGe74?_I&ps>fOuH4{WAm=HYjXl33yz%$ON$)V1tG^)I-jFqz|1e%7qt z8~u;iyp@D7oaHN6>U@w!s#ZvK)>UH z-}2Up$wV;jum~v^F*!lP-mjwzrMJQ%V61M@riq$Ep`=Yo${iue+}~$L^MZMwh^!wN zc=^jD{^~UGL;2$GIR7+|aNiH3A8_~C86!Cv{-?oc)G!;jrDPB@YKMQC%5qOvEk<7b zzAx9U%VL7`6YBao`RoJ_veGX8snxW2*^`|c&6@`B@M2A1H^GQrIq14Zth{P!SKqwQ zI6c#S;C~2HH=>GfNnYatJkieM@+f;NJK<}BvZ%H>?1mo60_#$n zOE_CUO|!c^)^*a&0yZ5M4PBY8FH!#cFb_tDrn9J#d7p!<#sD0AT~B?u`jrXuw5>d`@d)c`jh`KP?)AWRi-!^iGE*~g ztBYhgWk%L5x{SjEX#W!X7T9tmTU82RKALr9s7P3sL3f52Y7%~~FADu{mnm3Co(yH6 z(0nmhGS5!1qNclI_! zK0dTKLT+vvSR+{i$cvM(9`r|_JZV)qKzRhDld9GQ%XuJ+SAMtwxw*nyotLN_JWOH5 z)>REFQOV_Hq&?z(KJ4ixMekyRSY+;J$#7WS18F4BKM<@3RURqO^FSm^!8KfY!fK^A*wCEnHa-NNZoTWj_YvIA8e5GAZJeE>CFMLf zO+vU*ja_goD+*`VP9|yV5apZFj$8I2AQJfzf zhiCwKb-g;AM5%CKqXy6jXL>=&HByUYk9Z(Z_QCmE%rs}NRh)9$;N6EX>(9OuA|bQ# z0*`a?yEWWMdOO0;Tk9^LM;Vx8hDyIqme(16Qv7~=YTjNG?d=`);zQ8qXOdT{)LGT_ zzvyiwu*g~I^lIBWaX?3rKiF80#rhjGfoQtRA#cxEnTn#~N()M~mx2~!lD6y;k5eqI z$5Wh6R4wGj%EqooWj-AdSK^1>F#F0J&PWBja|e(0J0lR=3DVZKKLo0tRHwlJ(k(aoTQFC%0R7R-fl zBhk^CWBQsb$=1gPPOTJYp@dRq9X|)6&rx>ZAk0gQ&)$>c{8@ zLt@CbZ=AR(&T-C2zrwhNI^>_4VH|9mN4Mwep-vTK`pB(|o#fdLXXP`}@P5fG0F z{PGFd222sl9qJ*uli4uNDJ6iN(JtCG;=o{tKU2XCv!VNyiyH4G9z zHx|;TT>VqELUXZj;%{d*Y^Y#`IofpmS1z?*y3#ouVkT3G*(PdvI)@4kW=)}dhDbK% zI|ouzXrC1#LHbP88-m&NzDY@IUU{9Mol^2e%{FS_?JL^S0Cp=_(t_alOT4dRk8vyB zUL4OuoAT4q9sk?qP)NMZ!}_>c#|#S!@i2}uLv+~a(=u-PmOtCsV1_p9tqf$W zbUzV!2`|A{DRLz2_Yeylj$cf>uc&OJarS_td=lOHGox*V#NR4i25?2ZEo{w1j;wf@ zS?tyHb1GDAJ*Bx^mbh=ThZ^<6K&?Y^OEUYOlXgxWQ*6(bP7tX~U@8>pQw4^gUBEHs zm11HQEp#(HG*#ZV7wzS1tvgy->!J-pu@ zUib>9y5*9DRzBa_@r^~hiFy^9g<};7tt;`wO;7eXHDj_$alKllK0#X>Gv=^U50g*nhZmm6jV^CMHL#v}II?%j+y zC%TWrR|3z7P3r?wQ!hk%1dgLk+Bx`tgu%jG> zaxBZD2Ae-)iUaEJ4Y>u=>mpHMFAdw%Xu-knX7dQaH=eNhX-qfEL?nPQe6)7f1xvTN z(+pB{xjBOMOoF{M%|`fUoE%xbpI8k@Ct$g(iA@WCwU1fAk{X({rai0pE2lGV<)=u0J!z~ebdO@oEsN9%#ttB})WvKX?jt3vl z5kgYuZ_G#50Ped}j`VX1VbO4d_nN@L%F0v-W8~#618I$VZ`$=PfF62Wf@XpRhR|BC zh->Od$gQ@QNALG?q&t8^uH>xpHSWzYKJVkOKhU__Sg!Z`wBp5*wsJp(8yL4*;_mpb zx^IefvnQP%xy|6P3XdkI3NSH|o&k+zAAbDT7_H>EPVPQ{u)ul@{#-LjO`INlU?D?m zIdu%d)qjIL9_mW{(|u2dtf=`e|80Lb|6KCMSK9 z38eg}$7SSWz@IU22}H2y4J7WTkhA^?QVf79!W(I2Ufb(%)*}H!mb?Fb8nO#Xg0cFQ zO|Q5_h)tOKsfPqqzJgK*lmrg8rLi3k+Gzso)Z9s|vjiu`8OpOEc|={9+g!oJqp4#Ho29|4N;bZ{;NeI=VJk! zWPI&KSK_`sIHL!2L~VpqhbsSI#Qh`~bzVm&Oc6VZS1Q}tgPh}bhp`1~#A9Ys3v}(@{VQ_t|`Q%k*z21v^gMlC1W}T{UhwEJF{l?KP zWVE|_#FHA!{_=C>H~XzSai%)lB5(7p746nmy*>Osr=QeB5u?`3=il0xte8;lB`zTC z8gCb+2S5$E<|{v?xf8jqhSq=Zwv3M7|33DmwiB`O*l#|V*sIEcaqChih)`>O z)y4qW(3cnYN`N<^^9y8*WO(>HzskNlU8J*FSDXoZ>Z_O*SG~m$jQO~2+&dA~e1KT^ zdr4R=n+wk6`pKm(=~ z<8OUOe2m6Bm>~BX@-V&0Elm2ZJNdz~k~?3_h3)l3tV0tRh{9y?Ya|nBg5*-o#O~O= z9p#(ym*sc%INJ}rSQ^&l$Q)c<;4gB3Zne<++Z2zA!h!CG$g<8sDbAuT)N82?$SXJ%QHAL~lxg&fj zw$!R&zghK8@DF@!Vtd9B;_-bwf3nFt>fYzDxZATEO_*Y~oWa{WeX3^5+b%z)mYyxP z(B6||bB!g=2>(@ODyFyJ^SG)iqS5PUIxS}G7bJi>n$A&R7v^etE(CLenzxtr}tl7j!+d~vmYvL}vo%KdZ z5Fh$Vs_vy&{uo{;TM}O_7M|ik!-D_0@94!rGB!<{XZV~Tz=cGsnabLbS;cx3i4`BX zs@GK0bd`};Q730N!PKUY>+~vmMekX=C2@{W0*b0bqshG%z25Kl@dqF-Is3o^$hJI% z6T;du2Of0GZ;OH%>ec2Y$)~QVp2PeYfeunPh97FiU zPFp%L8!j#(;kDHV)UzzdOyU6hWC7OVSG;S0SK$Bk`m6D7>DA%% zz&3Cqx&kjnRyxB-L-==V`GL$%kCaP;`wtxK5u`5RFNy8 z@jj&L`C#tAy5+ngwyqKX)iU|T4`qi((VVj5<}%jK;a#mW)x&L{TD5=13jA6h_dn_Q z!zza6y_ILld=9S5E;f5P|@F{JTL|hT}ru6SsWx>I+8dkt90twx}znL*QCsv zdA#xDTPx(|FTNl6vfb@!S1QuFx#(RihuH=}P3er3Y5bL!t{wi-)aEz%9l=w2y(FEX zuBivb#qRjANRLN>GFB~A6d473?|XDiB+eYUq#EC7$iJ==c7NA3@xR%cJFA15M(k-r zO4;7{rld`s)Y5)^M8wA{dr^(ojCcCF`1%%`hIIoH;j`E21cq`A1o^Y>Z5ZWqEPpX9 z#6^0RjxUgGI*-krw;F=e6%0SVI4-%TlJhaKQ&l^Jm!D%-jp<_4e$yqR-!iH3cQilZ z3SuHUD4gQsuEY(v0XuZ(hHYtXocC`!w{M*#Y5n|*AzS-=ubIJ*Ei{zBQ7Sr5@X??Y zx~BX=IS1_0jOMFp2XKGqy)N5mjgn#5v%ALXr>97Y=-;k|1cYy`!6UpHf5?#bsIl`H zj5CSapwjXz%{Tlrx9ZA&KHiI?Cf|E)jEiJU#{x`$dT)u+S^W;D%PJH~2bo>I2YP8Q zHGL?}`}XZMpsf0_q5?6aZvL6u_ftJ9wwnqy`&Ga!lqq z;M^cHK(1KVtrTwOsrrw-*h=@V?biX^VdF!oGwo}pHv_}%IvL|+XFPFV?^^~+gT?T+ z{|^5J<&oDnl#i_~_m9v2{xnNduWxw4IRf|4JZ|EX(0TJoo0Lc?H%Xmf&U0RJsX$qz znp)yym+7jpsGg~84pJyo89F8s8XMTX`US4YwweQbdfGYhgz09^*xG%Edo8b$`RCc zxC+vX>t6~$3nTh)hlN6|T^}rA%>S!UWZ;trQ*Poeitovy?AiEwiYe|PRsL2wlQH_h z7Rn2Xfq(*(hc@g&7)Rz&!BOvL4HBo0F~1aD6Ugz}r8`)SOm@AGGbjZtT4g!wQKzdf zS?r3AFNKYM40Z#Nw2WRNz1=t&*+iYbsC`8AxrDic-1W7BKLdK4bvPw?+e|2fC_x^% zjm$vAICqR++&BnXgFUI@3w34q6p}zg52hxFKD<@i@r}8d$(_DzmG$~^5M3UdA2 z9=-_(^s>tn3OdD`JP%=4%&e{|i-(2}7CMrn#QYBpF6zN99DnY+s=WNmYxJSE8;#)8 z5%{Ax;!1>-!V?2ce1tf?7+#sC8D9oOrnea8co4N$q;hw!U~Wb*d-E{TPLW;*w{~r~ zNg9{m`p^sZ=ho>+!l#d<;-B~vF42$=yAeOd|XA>fRma(T3b0R zQzXrIJ*X?D1zRucDWm!ygAVVlCb2Ma87T6F(0lhy;oUh}4-|{GYUWrJ${BhU6UIx> zP6Ch#DW;~a_PTDgJ&-yPY|5=Hsp+owJSJq`)?4_;S@Rwxk)h|Ee7z61f!yzEHpNtd9;ahDbyYeJ)Rpo6!|UG7c+V?q9lY=&P@%tmo0W zUsc`~&V>I~qWg2g1@z&j6X*V@5v*|0MSY?$P!d|cBo5t@T%~~);<^A%G7BDp)7a<4 z%$mCF^(42}F2C-9EwPYJa8YftMEIfOr!>m+ZwELhJPBqG{*RN$4!-Vlxc_@TNu?fJ3D7ld55HHxbCd~Z16+16 z9dd>+HaGKAgDn}D*D`u)1*Naw>?+K9Tis=`ujhZ?#p$^7e`9Nm2~dvbw|PqvM%1`@ z?SOpabXxqTd8SLEFbcpU9O4J%jeQu%?cocQCtfGyDb**fXX`6|3@6BLLG|sy^uA7c zOAp-D7g#{6&UN@K!x|;plmLfM#ljjd&n3MEA`8{fOj&v3TN6I!3)`Qsk@ zWna&j1srID{O{y_tt<;lG?q&4tbV_Zh|mpg?A@1c`N6h2==)mQ zL#JOoQ}Dj-XfU!;r|EkU3j7gqUv?9u6L0d@I#*t!;p}bLW6-R9Q;#+*?W*4rU1#B@ z0dHCLPy4bjS3>_<29%h+j#?Jj4ZxRqUmdU&v$CJUUs8kk%W&(S^wHDn+o7N*+iNFd zR38Pq_F&xhT15u>`=bmzc1n>rn_?qNTiK&u;!$)bmyUGRYGb_St#XW*-_>jG?r}>O z!;EHrK{w5tgj^io2ziiytII9=PXhoGfg6i2vmHyxvx`Nz7^bWPy+ z`7KO)-RAyI9s7q9M?vwdS6U4d4HS22$ik&`k@F14rb{zJs@R!i>lds|p5n*!HO(c92}LKe9hR6@vz?DvA)>-} zW(@G1106!^pK>VNk4u1i4MGt^gsG%(o8KZbqSIRg>NlbcHZY1CD0y6vb2PBB^fl%0 zoV3P#<`KfSovG-w8$V32cv28_C+qAjIb;E5^=(V?hLnezVEe@{QWxHQ{o-tlhog6c zm7Z7!`BI`l>@B)hQRBF5*ak_IBUZl6U_>oxk@dZ(9R(#O&J#YVl~n%!4iv|)vUmLWxf z!+Fcmp!6CSf+F-d8EFd)v+PDO?7qn6t6@S48kTJ}9QY<+2yb?`pL826FhHzmper#+ zS&fdA#5DCiqGE`eHq`x(0+GKxGWm5qJQ_8f%wTXrr}H~dyXvgNpdOUFX}q<(;7^h4 zF%S=@*N>%!*B$jl)N{GKeI)Jbs>!OAJu6>QNL8zodez zpV_FU(4BIU9=B@%Oi(!FvA*)dgZc{C*X`@@Ex1rHW*sBtt$@4&cm@1km|AQ-^+gl3 zY+Z@mkm+s`pKr&95kF_b!BS<6^}Vm6c96{1qm#2frpG-Vm<%9x|44ZXnKLLqB} zAgd)YVu{X_8D`7S@xP9?KhX)OX}x$_TXnw>MIqX*oK+?pQm*gFolpT9JUfUIBs&~x z7W!omzdjQ*Y3*+|4B~biRnV;vo>!?lo%rb6mZ$ZIMniE*gFg7;v!y1S7T=;E?brq{ zhCi=zI{7_ml^$OeELEAvVw5~tRb2Z|LOBcGUlQZmr({W6n!R{lq}3ovW>x$5@9x6Q znvaVA41WCQr4&O{-9BxZPPVMQ_&20N5LXO%!4|AbjXe_)v*lL%7kM|qLNW)>C5wSq z2@feOzhTm<{_Y;|>8hQWz4Xhy#bB?WdR_x>kj~A}sI?rQUKlUv zmb+c;^B4BpcO{NbCbD5SK&#vE1{{iCp zPkYta#|~KVC%?}Cx(Zto0hz15TdX=a||nvsipUrn9o@S&-V{&&0A{jSVx)DkTc7 z_%Bo0Sxme(BDn4Obavb)dw#xK z8*&7K3h2Vm2s7PuFWi1Xa6Y8#pClN8gU{;T0u@Gz-fYkDDW)^%DRYTNk*C)ML z>O(!>F*!LqP{f%SkHOf;c}2~Z%^dZEq_vAL$09r2L(cG+EXakcrs|mAFl$fzWbTej zBlNYac(qIuGUX zk~qQ$Khon*pn%+gQAEzTVR4#a%jFY|%0BQH?vbmj&szQ+=6w&S%&;taEp{6cyDb~% zR2!xqAt3VX``%#$8vDxh(v-a55URBB%L$*Jdu-SraGj|Qa!`}W^#-()s}uh_*NhGs zQpa}uOFc|5)$Lj15ntv7qwaFf4u?sq%w!$+En=(W1%ND$*Sbl8*j=#i?$C6wqQj? zK7HAs{J{0Nx?UYFX#TJWsx4g$Uifh3GCn?`T*fkuBq>-VZ8$J4ejn>2fu zDkL@a!iqu_P3P)|<)#N7HllxRY@x%2UX4c>tC3@HKMD-kuXza~zCXPX<=GbV_a&&xP0 zuKBvAM73UNbAS2k(vuSqk<^-bjgihlYZCs>CjIMxe2MaXd23c%WAI8YdVe04rZk0q z#VGp$-2#nfo-9?!@up6_^45-tQ!R~K6v4@LBB3AZY1c}6n2m*K`J_LEdORaL{}_P( z2+>ZuuOM)2BGGF%C^TQ`5MA)NCp1SBx!e5Er00&(>lxyxSkIrUDtLX?-Infa*H&a> zHr2pJ$b*zE{HoFRFK?rwq)OCFUD&INBzUl1(FJjQ7axHh`k{O@7lz-$m(Cn)1$R%5 zlu~I&OD>UcCG%1b=;*UDGh#vDzF(P4(!K<^bSokny(v8Q$Zh_f;~-3~#&gEtKo*$; zH-Z_8%-=u>=ZE4i7Jo$KSo0i={pmuNQH+X2tfZ(cjS@}{IcP*%*TyfBA0=nWQ~oc> zgM&(7>IVDoCTX3!!k>Ns%d0Gwbghy6&Na`}sS~$wD!$tw%uJ(Qftog5n{%fZzioa! z7{$B^F~QBMjz|PZ&nC-UH2=v+Z3E!Zx2XJ5a$o~|S=-8G*S7(<#`tmU7OWSq5I2*~ zp^=XBCh2ua@A!wj`lpPvJ}=+E;HjlQ*X*pc3=w<8II}uYDxuB(??f_sgcggV6}B`r z<61|sA-_?@5i4~00R-@OD(BmwYJ*@kvq9Z8?T3?M&c(oW7!PJS#Fv~9W6xh`4PhdA zuAfO${+!gmV*sx+Paa6_ufF+;4@uV_QQm;mrk~LGBz|Rk(^YVX7Yh*Gik^O%!uHQ% z-(Q+eVTu3iamj4^(veubTo0)N^-x7bdT^jJX!&LyrMWO}5|2D7y8ci-)imG8E95cH z8g-7tARq#D5cIFKDDvw@|0b@On|Gb)fqzF5h#7Z*B;g%|u z-SulrzABfLr7QU#)8&5`!w~8YL#9M;05!@f>SGmkRcD!YXLXA@c-`x;9Xvc zN~*|6_zZOx-@8$M=twq=YA0sAxzJ5Cdvm<8^{v|Qdjcs8q@NX9Y|vW}6D7$>{_E;d z!xAx)=*)Ws z$t{kc_ow`2d@u)ib?m`0tmBPvuV4pWy}8zy9Q6wuCQy#F6Ccs0B?1;M+w&Qiq_KU6&v zTy@{sV_lHtb~;8}8hfPG^u2Q{4B5-wQ!ZN^y?$oW+VRfg*5K^C2|PXc zV#fxKqw%I#TXwxd6oS47zlka|g6TgT*}$kI+HD za+bhx`EdT(@brBL-mFM1n2RKFeo)-}`g@uHV00*SQY&b-!NE=VM0G|M=u_HuWNENg?o&c}8R@a?%xCJD@b~{ku!J zNhE(gWe>t{Y*Gfl0O*eed!&QYBXtAk;7!J858illg`s~erQis^IPC`ABAK{d6Pr|B zquShM6N_D#CE-2~z{&urE3#t(I}=C=$hcTijHLZF;SzmWKM&}6Bh`M?G->iA?ph^z zD_H~^^$lkSlcfj{icGrCG0P0!1$4JS#h~sgc#g!BZ+A2eA$n0en^F|Wl4}*f*l0N~ z1UPOK(MlphoXL{v0-b0_1W&BPeTK?5bRUT`c$KV3csoF6LU6jXm+gM1rP>wZm;f%I zM{$kZFmd1)&`<9?dDR2(JeLuOrcFZm$1TSF{opHW>O5_xCcy)nAT_9t-L;inzJR?@c?Q>#N z%2|e&`?er-YI%2VL#^8c!7O!QZ1~AEBT6d5NewpAVRVskzh;Tj;AX@5m>F88rY0Qb z(y3k((b1ZfRxhWJL$5>4GdYcZqH|*M(zdLaxC43x5kI~3E42drE*}e`IdliA{c(Ap z>>hZZKgAXxKSM)dE(T#b$EldF^~rsSV;e~wDU0^F3R(l5;M!aCHhovOe5VAa!& z*w6jb6Jf7CWdodQs;NYKq_F)Uy_dn(g#2Ps=yErv5g+Z9npZ&&VB{Pl!qN*PQ%N$4 zdb8=2iF37~DKDu`;QY$PKEd)Tnq zpN3TV?gf_}BZPwRkYFuR&Q5EJ=`o7!zNkQ}ixdCWo4ojRqL~e@E#$#hUi3koKMh_7 zSO;TUQ1)@8pG=HXL0qoBaQl3N{l~2Tz|+7*WDMLrGDp%+OM|3jYcg`041b1H(z~R@ z#qhshJb59CK(rkA*y1Tt+_E2sK6rOT$mqRXRvO{F9w8cV)q1~SYwKgT&hKVo`bW+) z_~?sXGScU($9I`njoYnlrF``elzU+t>>N$jfpCP z2yJ}9?{v638_yF`4Rj7DvjeU!WvYR}y+2 z0o9KPAjJsHe3E@iXXIg4%H9V}djy!ss2~GICZDMw_4^{;{LXMj? ztT01ka<8uBMz9-07ryw`=Fl#Jy2Q9|>tl!*qi@YD;O^~v4?BXah@R*Or*rXVgLs}I zz{&YU#i*9_m2{8H?cnTn2g`b35e;P`?C6s)W&K*Tr;o@eg6|--rGyldt6oPq@S_d7 zOF(Ak*$E8`^2Pj!`#Kdo)lopq;QlDFV8z}Gq6^H+_6ruRI&F(&|neE|NWSCinEkiCEgAvRH^`LR+Ix&|w_ zxTb&|Lwce|korC2tGGyd0wI=m5#&?AeRT);0-=n*r@>@!V%T$l#6AoUJXJ<`#$0DA zC1&nK3Lc)a`HQN%g$WM)V|z7b;~vL*zGaH#&9ti@D-WW+<+HxKl*_nti!+($bLn{> ziz3T-jz`RS@n8HwNqW?k-0z+z%1u8Cov0N^$9!+K!o38O?{URrDVhmej*s&FSDyGs zA2yd=yrrSXG#pcW$P7+NB+++@wRD-X&z^KWVh6ixxLtiZ@@2Xv;$iHAJg~g}Gb6Pc z^WeR;YH&@rXld@6WpR9d&fD}aB}%_@_p@>8IoCvf1*I&JvHR%5uadgYLfTaP!PpMapF&r9*x;t&s29jb!pN!7l$XzV}QY z<!A1LlFW?>ja%02r+edz0Y8vv@TWU*8_@9pGR7=*9E^FOOf(pS#TWc&- zZsr$q$uZg4<{zxo9H0{%z7j*m^t{|&MSyMV^4!*#q^9$4@RE5^ejP+EC0Hd5SG3Ra zm^daXLDO0i4lsvPR%pk6W48Reps-#*t)3dR1`Bx^ZMn?9C3RQJB~oOyBGKv9umP{G zdGFvsHnG;NG1G3jPG>UDVUR^uKVL}IK!-)Kzq@tvE?3*B3B%Wedy!X<2w|O9%L$SN zy(>FO#@Dg2r3+%6t_;{H5&nwvvNZ7pL+7c{6qJHF-`>}_ou7azwvBn=3i76M+nSa6 z^$z)C{q;vilb?}2MXzbP?ko9Ao)oJ+CQ8F)+^N*7N`j=KtFu$-RBldxlF9X*&0Z}5 ziHhkF=-WigZDiMv;&8VHC*Eh}1*8PWFVErr$?@lGJ;XV`Ow&!xM{5pz(%C>ASBN34 zPEX|Ma+AF>{Z2tQQ8F(fHEIMIog#vWx?d$rlV5cdk)ZrZu-%U&HAC>-hx2|FZ5z1Q z*bd!kNk8#U+&9yI+yvO@#zYZho-g(>`e64l;c=)|p!zQtc(kJjoo&+G0A4|)A8 z|5LVx^!+yddx$3$(*FkAd00DruV9&(<#NnHWj)*ZZ+BSNO#?2&`=t%=&da^{rksOf zHUVR|6cNFai*Fl6g|d@y-hHe3%BTF{bqsRITl42*=+$ZPgUj#pohNqg$P*1gdFMMV zWNarRBqV&RF&45`ZIi6mo7E~Mi#7j`bSQL-$0H>BD>BDDYgwc<&c9IKfBfW@IL$0t z#7~?sVYroHr{*Y{U^Jnvob+#A_uK_Oqxw_k=^}I$?54SA8Jjto`)oQpR(Z}d(`1b) z<>_es<4aSoHC;ti&rZyJ5Q!z`2u!0&SgC^-7E0dkq>-PG-(Vu?G~|>QUAd8sEB4;5 zIt&OKK^mSs|FBbcYhNh8_i8UTn(oP;xEW2~FlerAng94ucXW0GC+t9$eAS(}F#p(o zxG3a(p#%a{hp1%0uu&)fd#US?Mb9PdoA18^*?ha?1E0yYXV5Aqo3Tixj)3O5=(R(l znqs_46y}QK==3-rv*>lZ10S2rEg-_xOksTB(Tr-3 zNO?MQ)>!?oCSKc`J31XFMJCpjyZc_JIVdlE=|s2Ls% z8OtxN4+#rwgIjd&eO5)d=8e>+Das+xV;e*RhGaM5taer!DhRKSZ?9NI1$QUG)<_OhVnkFWB=mQ+cZT$qFZLy`Mh0f*QZ#(62 zPV$ts`kKklMdbsaT+a*6Z++BrG(!?1RIR~aZK{Su`~*m`(K{%?sYH%+Q zbpxHok7oryHU8~f1yvu?eef=8|1vQNgX#gXv>&TU&;O^xn&!5zT#Q09PYU?fLUCyw zfFkgDDOel)NA9XwL(#`D)klr`E3kUYsLyLjiXhZ}AG+$(4~ns4MPBt}4mk4zL6lYe z@QIWB8tmrVrEy#%;#fL*JjnC5| z1pRyEgRV_#b6nBC-LQ@>d>j%@xc$T`O;^h5x?aTVv`&{k)agJ{n2_6=*rPls&Fqa) z&FbH{QH_XQKFj<8CoQCVE}z%y1kq$c80&Y)OD(z<;*cv-f_kXV^#F^3%nn`;5QxQm z6xsy(`aE|VY!l^7OjIVD(R7CmRVGYk|PaQs^MvgyoXYX`fr%D?d0q{1DI3 ztIW7;^!tWU<|iyFq>1|A>-v&Wj4;hYb7dtFj2WhR~S zQw}hz#~^Il&KXutE3cqUXg|1N7rM z$(zOkyhE4a7D5|P$XuxZhu7~Q6MRMoId=axp^L#U7(CwH!S>ftbN{QApf7a3jXbLA zgx<@tOh_VJZ(0Z|UOES`(XKbkV+HXq=bj)x_%_GvhU^tJ4WWN{qwl^k_@321L=PYp zFb7#ld&piEE!jq^{r$198VJEV+=_Y}(cFe8bfF5oY~GXJ^r?_0Mbnk6JL(RqxRGuO z;^|Ua-HY50-c|Y{|1FA{-5td3v6D8)P3}H-_av9U(*;;oPN9vRa=Zz#y>^J z$-Q})Uf0g#R1A;QR3gt~Pn%<88-I)qx52tAWjk zOw&Lnx`v}2G*Cc_=Z(CnQE3S9`su?vZ(?>A<+pLGuClloP&3eNy(qRag7lIxdLArr z@=HOEU?O8~-9C`ak#%wtVZ7B-j;73m)?K%L983vVqDDV|(HXD+m8(j}Q~aR4D6J)B znuUNj)pHpVvB0l3aCZ%q+~AM4T{~4<1ac&RlDDdtvi$F>b}~+YSn!6I|h< zewMZsF+_HRPEDTue5W9j8m?%hPNVQ4BV3_RS;%6z%`7lW`-S#F8MIVe{?k>cxk{JE z(5(ZnPysnap<{R~Cg2w{R74PQsyf4ai_pU@nW2Jw<4YlTLg^LvFc|8tjt%VFQrnj` zf9dr7yI;=*=6@E}{F{NbTqw?4#r)e-XOHPn&DMm(EF zWko~euc`d1i>UoHCqfijV?s4q>395C1!5}B&gkR6@TMe9W-+FQP6I$wfEjdtfY@qf-)d7aP z{XWnAcB<#vFrm-WmN@^;4qDYV-^&If=faL<=#UGG(Pv`RYVTu^1fiynw%u4!<+y`- z`)2G>yMV>WS{X9-^%xRVem;Yy9$;TzeP0t5_;JhCmVZ;H8!FYvJad>{So$}ofeo$= zn}lm$D{9N^s;lwKl<Qr zQKW$VoVu1K`dHEw3zVOxg%EZbDy7##NH|HT7>S?q$}>W+qi$=!;)1f3Ug~9vjH<8~ z=-78gk3k(esbSglyi_H1k)Sx~f=>Unl!?LQ2SKnw@2gn?p z*00{?f|@z!;og5y9YEbXCe<-NtXJ{M2cl+0=yg;$2T(&hB-kDd+}xM%Fl6k|oZ1#s z1J&0Io$KRZ1ObZb`DpQvugkuKGzJoef_PyNSG~X^L1$kCIDiV^AFO~HY#DKqhuUo* z73nxb`QePOc3>fJ{5ilMK34L0&mh@605se|L`MIUcY1a9vEuIY8rM(MB@>*`Wd_Y` z%dxnOh>tff-AWNK2@dY5v-z0}D+H{)CWfw~nbS zY)c%Q2`E)IlkoYIK+m`8yt&OMlg;)~>d_Sdk$r_{)Q^nT*j+&aRjrg>_2IDoJFASytmFqi|JmH zrM0jW_r;N@Wdp!J^ z)9tU}UNnUJ8%+dQcWVYq#;{214}W7;b#-4nMK;b77~5^XXYEJ zp(!|S#LFV*5C`%tMs3YY*Xetj<^7ePA5Z*ORP3ZF zig6Yz{+a~^o&SP-HvAa1qSEEid1+mvf~}<~w)cO>yuz}q`gmm&7MEDvMU?03NKRHy z*}j>Aud6isAvnbMffMLHnqbL=pxa?2J`#1xiTLYxodKz)vGnd>Q_M zvO?Ltbbq>m5T(jBW4jeQ1Tgs8@^on7^rDtfHQ)I`G&}KJ1L(mv>8(KWwUw z2_McrqYW1SThHgRp4WB}qU0FF5B<%aI%LpQyD$O^|BvJC&AM&|V}H{W#wx3QQSF^g zmOJZu$nL|Dk$l|!b5BML77EhEdro%J?D%z9hb85`{J5W3qdo-&so#0J5LSj;j)o+$ z#E`o_$X+S$a4Pan=j|pdGZ8wVnIRUbnnDvV`4zDa#|S5EX!e+}+hBEuoSGb&}zfFgh6 zF5amnlMuVdxMO7;r3vNjouL|hbh3ut^4^BgN`N^u*#sEg>DwWiv{Vd%o({Ij z&VJ@xjWiy%ywDK>*OR}e<(ul@(o<1g72H1R4Vis_Z5ft*w-%tP{e?K6h7=49qW*XL z&!~NJ*YYUSgMOm#B1$oiDJmCBPaDUzxjxNClPm}((H0nJ2r}$E>gl8wLH$BUqr>y4 zJubl3iT~o>GblwUg69wM9QqrlZ;F&g5gVL#Q>Y2pw0r+&lqT?Mr{pr%)|XOU;bLRk zr$J|xvF~yJw(xJ1=ZQ5}OtkhR&9W*vb-ot05Cihr{Wt9s5E^>f-B+0ql8`I7YzTu) z?I)WBPu2O#3-;5mYt#&KZpR*#GJRl{fLk*D47n%3rErtx`}oEmP7igi`1cc7`u>e8 zGT6`SBC!fPOJ7THEGOb{@B8Wi7ro0+mT$#<$rPC|`v*1VKL#7kbLQyy^xdarc%zJ7 zoE7X2CDGXtJL8BQBn1ZS{YmgHo&(Jok(|H|p6)|0*@bIaOx0n$Rgr|nFm~0yf?=H& z7H2|;`|SZZiVbqhhql8lX$PkgI?+h_B#s*ELk-96`Os34^nv64!9(XGlhx2-k1+-{ zs9*KE;d3w=!)ki_ph_Cs{{Eq2sQ6yUdRkM*_|A97HWlo<^H~S%oh*ovFE@;onX6}N zcgYyS(>q93=*@r0gV}YD-VpxVkfK|Va_C~z!KuMAe%!R`ac4lRlPTfA?tj)m+4XG4 zK3_epx2dEX_qWBKOA+3Jlk$>p$H4U7z<*xlxEaJzCwJNg?&3~2PVaV(d&hrDk%Ki+ z>`4c>-)K68IDqpB0~`#(wEY1h_t`WU4m^^X79#7Pv88Zr|#G0sGI+ zsoWiPbz?{-sJNs1mc%No>L>B3sueG#0ic#hGQSwP0d#8rT_Nj^XFq%IfDW1}g-_=b zm|E<=p8>;I-*sJoYltxLoE}F@9q$m+H*X(Uhs8U^W`+aD@(f-WaPpQI!-LS`e^pue+{# zsrHn>7$8O}IKp*w_8zQt@x98rrlBifzG#GWB}&$h8ea?_T5=e43yI3%`yWl zG~PW}G18HQkH}VTB;o6_tTsvg{Goa2{*s0zjHk5Zd?HhB^GTj7l(+yjB}}twX5aGH zq{44}>y#Yp=X@7KR?atS5PR=OZJU3JTp#pd5qiZuS}zN2*)sl;=oj6UEdu>z=o^>h zl!NM$o_8hHmln3oE5=e~XtqOM!%a6*S}yge;tR4rNr^mCzW96zUn5i2Z*qe+6o8s> zQ(B>vm&xR9s@$c0K)P6|zKM2;ZwPu#JaI+SpvZkd2hwuwXmfPS5PD)NK&``pszV|S z%mXFY0mIP)pvg|=AHA_E^lj1C2;eIko^1#0F+okGpdR5Ndm&IoYi{V6$YwM@=@CRq zG~f6ApM?khs|J|YX$4+?EGuKJ{x^%k>P-`r-=UcbX1Cg)a4uN5_~NkoDE113&}F`+1B4chJ;*BoJZVGkquYl!EnI8`KuV7o{Fb0e>Kd21|LyR4yO4-fWibf5(IfTR8?Sj0F)n+lZS_`Ti2x0 zt6Nc3o?*WkplM~DNon`P)8w!Voi#t6Z(XO!U)YY_J?JKuk^T^}aRjqOY7fC33);L! z?iRV%jv#+JRp;wAlrPDDc)m9&18Q#Yg1MlxrP&@l82I`p^0GYahJ?rWL9Q>GWwH*a7Cg zA_3F0bNM#alkAgxiK~Rmut>_2Rcv$3d!y>s+I*>J$6aau78Yd(ivo@fP43Wz5sQ7kyIVIYkcLIAD_D* zNo(~1v=71e@Ws@iDcxHXZzVIudWg0_ zwj;OO5slZF62^``U57;&5+|lYQ5}73{Ll{2t>W^#D-kV+y@Mo6Cf4MtP+p05Tr~(- z3(!d-p-J1fuH2NRyr3da=&5hUNCe*Cm27CJ5W;H$=Lx#gp<_TY6196m{BswpEiv}d zI;QZT*;cNA^x3EQWuJtXDAe=(6@?kiADk){e<=<@<&PG>qPy3+{scHsuUpsVUuE=n zXkw`OkKk=qWKnmHMT?a{2>y4&;Y)aofzC{mh;Cm`RJOfrcun!$aG9~KY+6CLEm%1{ z730GY+_~Y0k%2vh83!A`Lt)Xf`!~Z)O~1U8Eo&^x3FW&`B${pqr!SJf+gJoi-tO}2 z(WZVCQW@5l^nziH8##O)k5?Sdvt0JJ0~R3kg*qU|2S|sVD95)njP8Qa7fgGhx$gCW z4$R?AiKN`fakO@-4}6Kg+hTY;%^#j>M$Y?pm#Xe#nz`%7HQQa>3;iBsTAgskwNNjc zauQ$1$FcVOTK8%|^}xKzC+oXG>hlZnkjToUHdM_9a*fLXv~F@ir80!RC&6F zc2tkaVgJW|XN#FOJ-ET_gwMA0kqIfW;Iqx-JnATgBu4{d3eu2-qQGynnyG34xtM0* z9lcxmB{;k5ReRd&gY_dafyT*#*s^~;_Hy$yriSI^3uBA`+f^q z&gm}pw7I$F#-P{wtIh|z(H4^D`nk{I0|ozWcW27aI^)*^QoNt?ZGHYp{Q2TKjj{Aqwy|iTUrDONM5Dt|&Ys z05?`)Odh-%cTco*AK4H?5%pqot4am1k6MqXZDt==7V7t@jxp6-s{_q(^K zc3xkxj!%Q(`d&z3F;`pI?;VBtqr=V@IZBsQ9}Gi7OVN2~iS&6NqWsYXgh;8*G3k{= z3NI)H?*s-*!XYL_M@J`N;`}7ES-P`~8exE+7Qmn7u%M13xBkQ(nET z8@No{kcrya1Ff<51B5F_U#@BClj{k%)q?2)r)Tk-WfcN%5T6f|+*$)ln`jdAW#dGg zglfIPA#}lY0U{otlx{zLVs#ksd<}0l3`DT^{!h`ucI5j4#7bSjh@3eeppWk`4e3Qn zq1zDZCJh%29UA*Q1<^?->GzyPoMVSZl(4Kv?w?d~$0XbyE**Bh=9gFyp9PN~K9T`( z2u*=p-{pKGTcTm17VKLLq#@P(Ao#t3?-O5xjxq0n7#{*Mgm%%;l<-w6=z~B<%}Q&0 zLKHAPoth*sSyN~}+zzirBaY%F^wnkNO^3YX6E+hAgd*;2eQiewH@l*nj&gcW<9;lw z9u*xaL95f9^!`fIFF;3WEks44?Vn2xrt{@L&!BfQLiifCBt|2mbpBTuEAnB+aSav2CGqUC`Cq{o`pEk? z8YO~%&HS0~UpC5O{bJaPV;MFxyXCy0@Zu!Rk^7Z3m@yfa?L(-lG^~l4 z`HgHTx|uA89g15$yS@0uoA$?ykI1f?7wY124sUE+whim%Dd#%ns4~*r^F5@tG9Pz# zBiB<8V2#bM>KOlfWox$#Fqhm0_vI4|+oBm4-VYW0N@lzB3_k^xS9nouhMdIkaBqj9 z_Ra-1!ia3oz7~AD>6OY9lvO;dr#D zf>XlX2H;P_z+{gJY1}M5`pMcN=e@=@pm{@(eqEg6_n@vJ>L_{*CSMZ!Ag2SQC?#p% zbS1R;ty1I;XB4MEY|!PUkmc}A&#ox6-1t|1j>4QYK3{tJ0fuL=<<@q)TviS#NnbP~ zbm3;q;ki91DH3=H87G15wbl0Y!;t}cFZ#p+Nnbv~Z+o99Zyhcfvh`&I0)$ey;dAZM8vvS>Xk-M`D{RES(+= zY%)PWV%zyu`0jqeveZzqp7f8~P8I}Q;G2^!oG8&my62tY0*5M*61QAx` zSA+3sNYC8zY;U^qP+9;7*5!5&9W8Y>$|iEGJk~u{oOQGWkjXTXBkxtTx>X{<%*|ICi*XQ<6u&QFpEP{4wq1DS?G-M%HS)W1 z8$H2a=}jw@YKft5OuWOb@9qlooS?jiVJexB>IiSUk`$9ebIG(^ zsMn-b4VVwHp6i_Rw$nmU-aQIwmRpSQGmZZ>F$8h5KXDV?gl)8bg&+>8?JO;FZz(ux z74irL5RfMp^lltwe?>xNY1rbcRdCp=QGS|xACCu$Q5mYVaa0u}$yZt}C2;AAWp?J1NcIo4Wp}c4Xbv;(pI1y3UC7($o)DTK!d0v>DyXRdR$pH zTD67Vho}CN1okCE4pQ*~OKPNs#-M#=U>QyA#!zQ>kpM6Pp#OGCsZvQfXi5Vb?5M?F zhqw6OSX4q>nE-Q*C5R{CUvo~J+o6NzQIq0)r`1v)|AZ-kR)dg#MD3(}@t&lh7>U-h z?misohoi8Ar9Z4tJ;gqHs?!?2CW&A>=5tAq(#9!~zBh0wJ;o>Y`?T@w4p|g5_v}^! zWng@@&*CC(qva$r;a=ERc}k?SV5%C6rFy%Iq?phtU6pmB@2b{CmwBy%84jKkt9JA+ zMCNOL$Zv0}nK|M(8z2EMz6yw77ppP_p%4Q5g@$^^-oZqdMMWVsDTe2?LPz9?87|jU z%SJ^+JPtlt{Ie(ztz^U%oO93>8***1T%`C8ugs(2=wE+x7$sS4T>X59(>w0TtsAB? zs1kL)3x3>I5i-tb((6`3vfPqDec-qy3b8y_XA_mgc5aS*Z*hc&oHRmGTyhybV>!^R4WvLkD*y2>S;~00B=us>2ew zfkQOiLY2Cwwy-gM{Cs$DB3dS{*j?|^O$(A=bU^z9&Ega#w`IEVh-mmJiHW=!weF|S za4&VW+@gCQbei~YaPWV0Q}2aFnra0-N%Pn5f)PxXZ0`LsbdI?B#BYcp|BTi{vb#}| ziJY9aqu=vom{Hs#An{v@H~SHk7Vuvo0rUJFLgAQ^F{JmUat(niz5f@2r@HPBa^o!X zv7H8S^YPYuDdMO@ay@HTZ8a%LX>|L1bm;^qyU*7ry1w|s7Z*~;iwq&L><2z9YZgg+L6${7ZjpDfAW2j_x;7& zO}(peQMI=M?7`Hw?4py0cW(9ZgcY55+5uD6+V z0l4ck+e|dV*_uL_ilT{UFlh*f@)^;pG)N_Qk-lX?uX+yse4?yl0Ma*Qj=XXobyRM; zvu-{qh^F}#S1(2yi2IZq8lZL}<0nuyQ>_694A5%rsVPGXKd`^UhkBd5OOT#2B!}1` zJq2An;iY^DLD}^i$nJrV6Q(0SI6vJD%}Z(`BFDilFmx>B$UuT3aMIuiuh2LxLwRw; zuqS9|a2~R+Jfw`~!C-T^6*3YM%J)vL|&q>2yEfumP%6Rj@H5Yiaa@{w{u>Ipy@z`r79yX5ZHLtjWFYtS z(8KjZVhZ10rlQ|CXDaE;e8N~DU58SiB0s(xn{{(jp% zcEH5lY*nIkGqohwavWW9Y5extH=(%_0h<_S(L3x^bzu3z`Fl$YPq)TH{Y#P%PVO4k9N^_#+3+Ndf?Dk1;xzm0KhZygpfOQo80C$YuQ0}_4r^Bsy^?e)rzwAl3 zFw{9uZPoDsUm6&%Oi50vR4VoZ5!L;kV~FY7apW3ValGKHXxVcs05x8)5V>NoWGLJS zlsD2>=wWtYqhgbXs7l#qAGiKthxxJecE2n3c~-hS>YqdY^bwzy@{o?~{36ehb!myCZ3zrC|$Y$n(Bk$rxa)M)cFfEX~SyO%@yOn2w3zE3kH>sExL1*wiS-N1H3 zx6kw>LeA$Haw_Fw_!byxCCI)M-9i0ACcVDw&EaZuinQD$@tUR z51}1~fEGOFA_cmPEB`6{zWF;%&UjRu!teh8ffe^W_fF@nHK|4c(K z;M;6Mss)Dap{_)&V?nA5l=aGX)844~g`IU=D{B7t!-pyt0~QJbSYMP6ByyehK@lYv zxS+hmb@cu}SlYk6oSCOc-^(;Z!i6l*yWMSfD=@I&^S=*KqqA5+7)F^5N7$WTgj~9uS6-m=Q@FTE3Xu@$Hqc<)oYEBkEuVWPwTeqPW(!T!C5W1_PPEP@lT9@Khet)ggq}m{a2I!rvF!g4;^h3 z&re*_=FG{wRn|rj2dba=jC|udaEadaB`?PjFG~cl!)fQ=^YS8Q7;j~9>vIF+5vp1;=O&5L+Z$1OGdutaSuHPteym*`T);ejYV+;0wf{%|VL?cS94X#b){3XV@{$U?v(h9cTi304@Sj!hqpnYV`(MSc+GWc}{VbJd4Ic^fWZzuaXSck@VfX3fhfiu(I#oMfZdXZe z$*pxCQ?TZvCC5XZ$-X;P19qMmvylEFNwZR{2hq&4v>2ud(L14~cj+Yoco;d)hc@Q( z^vaOPZFbebz&w4x1tnIpg0o-3S=5ysew$2#re1w@=FT1Lt9xq$FHc?WB~&|&Z`C>! zN4f_&1^($rRz$3+(y^Ndt}kx2T$475T$efn34e@@I{F=hdoYnY5jUv6_i-^WYGiOu zxPdQDO)TN7cz86sc%;QEqqi#&I+hDsZfzDR#ajQVx~U&6$%LOjGLyVNzi8%G;)6&+LmV+?0 zAIIEOB>|!jm7SXVO(*AprRXIBynOy^jt@!v{irNeUA=N)tv_Aa@Gfy=2;*)TN1HzXdAm zh2C7Hys*vP=}Y`98!4_GhDf0fvvzoGSyC`VaJUC&q-z9S{66{(V;ly4919kA*~e4LG@xk=rAlztBy zxQ8tZ{gidUlK$;acK;lFi3p>%0~8E30hi5U7nC~DF4)rg;YXCu97BT7e@{E#K|AsK z@UR~nxRQhD{pRb~=N8#Jc@Kcts%2_jJK7nZG%maJ@tSJTPn1R%geKo#uZ+C+&SD@2 z-3S{d)WxF4_h2d&6RumrUP&Z(Iu!ibB<``7(m>8tovnKTia{?ogbfdzFn;FqW9f#E zEA_epG)y&vZ?T?zJtLiPD#^}C;j}c?)A$Tb&a6_^>c4Hzs4__gmUf*Ff*YE#LH{Q5 z1*B!8`ex(DJ`eNxM$LZC6gw_tJJv!v=jgQsR_kw^U!TUD%9Q7CD;y(I%&f)&OQ27# zIa%!;ld`Pm2P63=zx*zf$oKwsFL?wE7PtE*K>b*9KAPx@n&7s$-u3^p07#nfxYE#d z3+{psUs&X-4JcMhP@NIgox7!hcAw}PYCDvvs{`t@h^1J0eDH3IB&AZ4_6tJI8n;90 zJ0BV)|81&Q0DO?kT0*KpKMGpsf_H86O2p|q0&l_$Tcr3MnWLCsnd}XK@Ir8zd3vxm&5%45K;MVW0Vx3xA;+T&&1-dqt-mEzGJv zwca-P?`gx2Fcg-4YAZZ+A+*NE&f~oZ$Admn1v0*Hx{slECH=8Mcl?w_BeLiwxx_=f z$`!pgz%-i=GzOa!?$9Xr!7LcasLfH1kU8GIwG>&SvhY6zQFzb;LTqKVV+AB2aB_wR zT|%pc6+8Z?YVW*$O*$^p;D5DAYCq+w?cHl{3wA!P9@&v5bXqyT;|xma#B&cK+p$kT zg~)FduKAZKRJaWN-*QTJP}#aMYW?dbYVPvJ3Lz}=&|~$~cHrj)Qt#=FPk=9-8gsDb zO2F^!Mq9D~A@m=FV`^HFUk6O%F;iawrsb%StYu4E{xpZjG)#{AG*eE%GjT78J`5v^ zK>@?VSr(e9bHCLX>7Q9V4>EhpC+qQIn0hrRdzv~y!5*2baX>MMkFUj?%LHplW z>$EI)$eGf-H>Gz!i4jsib2@xKuj+j8DxVMy8GdRLI*$QQNPe)q>mDz5gJK`%pRTJcBbq?)9 z;KrIna4t6-`n^ax8UBnL7(&V}fMK-&Pl`UPlHCu21rE9{|HIef>1>{aQgeNBNNM#_ zaAMlwfvpnllJPk7PJAZ6`N8Fy*ZpYd)QR*_E1WC8W|Qi(>JvhLum?#sft!eju*Y`= z0Q43Nbsq!Yp|>0y_!8$mk9X<6C8@tr!2i*7=HXEN|N9S>kVGQu6iN0dS;AzA(qgA< z^NvI$6%xiwvXy-o$|zgbv4jv~-}ikVOLj8DVAgYf=kvX;-yieWT-TX%UFXa>uh;#& z@5fCxS{LHy64iR`EPwb}bN$_Q&H7fO{A(SE+NN#DJGkLbZ-k5fuchJz8ly|8PLRBY=Y&%EeK6BL_ir-{K`GMhU7SIO0Vn-p@ z=}4__AcL{H2b*M}S?z_Rt%@EEmAireRIz84b=#cJnlQmuKJz{|+_cP7yIE=zzV6^~ zgG0h0H~#Hl4Q{OkeD{iiD?w2p9})hbjM=m zG{&NPJ1yV=e!D=x2I=1#98dZFB!1jMC;R95HDfH>h+UCniqYs9iyuNyKFu{S2#c(L zxEA*eReozRHWYkG>)%EB5>Py3Psry6Hp6Mo9u36y5t|gB-T9I9&df#vb9T+If(`t3 zSaLqI!n7HfTVovo+aqx?@rE+-x*q29lEs9i7#x!HwRo|s@;^k5HTqn#TIMxVFJ^7`wHt-D-8MDVi(cx}AX06Fv%w%km~%85n$B9^M<5wRjM8G;g(;WneV# zyY>yejEBn6H8Klm3xV=sb3i(rR)Gd#^sgCLMucrNi|`>h|J_$qyT|3nm_~44&0dFJ zKpHXDp{Pt-jNbrp2f~yWWaGmqHJlMBlN70-B45DmdKFp@HF4N%6z3$})zn>ag^eb8 zWfue14ss|0(}D!F54^nz=(&Z?{SFa~m@f^zb_1_52@JRcMI8K_G1u!)GnS%7@!D>Y z-QxAs&zfSzKB-KPZSuq=-@M&yIsTaSQ|yoD6y{@6yv0rtSm7+z+z5gDWI^t07oNS> zQN1{1@NQRkv9RjzFJyaBw+Jcbv0h;;Q9^n#3gEF{9`Q@NqraHv#GR#q4)mZVSYghQ zJIGTD2B52WZ4sGGt0spW3f=)r;t3ikp&BBZ* zh)DhTTdshdY~?%3J=Q_4K4>+^kIaM+^$Q6PMeHX()Hf^KiBQNgJA%$S1*FQfs{ckc) zBx}lfy3a4tBb_`Lq7Rz>Amo81+pMB}5j%zZT!ABK$DAeT=@mWD>STK!&mc3bEVh&; zpQDsJ@Ef@D_BTMOBW483ej~ogbXFdWT@Gk<_VkuG*w0!e_0bMA4RItrt3vob7QLDW z&@j|+1(4m2eQf8M_WD*i{-^`DTY+Jmum7n|+;5`Q3Nc8Fj&?}@))kZb*V}uh{L}LR zpj|ywsH>LSyFSY25M^eykNIgrG)PxN+s~lekIuvITz>W(7@_$@VmemCygIIdr@@V^ z7RuNat@~fP-Ax>|b4V1Snie|TAqW1}lRlRLh9+4@V7{Y0R&mm`1Z=~%{k3e${(EM3^KN$Fm->K(OL@>NM_KUt5oyTe=NHk6cTpZD&!&Apv&m0Nzto5}5E z4y&AUy297=36~NyqSR7$=B}82PM5h&IkHdO{ZTS|*DJZP*k~i>k2wL`_#11k@1DIp zh+9mMRz)K$sZ`D{K$CuijPu_P)#KuJ5yHe6Vvyk`*RnVsYJ0WB`(QTjoQUGBmM8zY z%;`B;K8~leBGgc=;TfSYEuLI^zG^N3&P0-UP#MW+HFZ0>%wFd?!*n2PLwze`UhWEj z_dlA09|oZ5)#yGC{(qDZQSN1Cf-`!hB1RN4Jsy=9ud_W?ps~UK1-wfVrD>m_@&hA0 zzX=NOUy;9WNZmDHJAP}-;4mcrLj}QTh4(hPk&AF2MhbdEwVEwe@a4V8W*ly)N^b=M z&C7S3M7qldLMh<+&lnw9)&;vvP|EX=(2#2C{U@OxpOH50o7oMcMDvU1U zW#(?0O%(1&Z~KIA^y2ONh*oEE)*frXHR@hL8zbeIk-v}sY$z&fRuG~PASZeOPGRxD z?MJU-986+T7zk)ij^07j-O}!E8J!?ysC7l4eOjkl42(!#-=pV$gYDmL`Q3fELK@~F zPc*Nq|L_0w6rw8j&#C6Pt>Fe7=^|^Ao^)jvg+X#U@qUVis+Cx$-{wQ!u^;FDZ9j9L z%D9~@pMCA0ntSJgUEHOyYr893$T-V6y|Y-EpAV$I1nJySeu_-tC#- zQGnvF#R!TfO0Z!|fu4JROLn4m$-r9Ar~Gjd=zwu!B#CSYE!Z446-14|U!I4r$}022 z)jwVC6Q&micis%kBk-g6=$a^f`hrQ13tTvUJcSeP`0oYn<+W?Wf?S)TaDQqT4az`( zd=Db4o8e7p@z*cXc1FVbw{GmVV=U>vfCxq={1AN_mtVHJ77R$8)z}THUp46Z3)@FH z?`{|g1pJbvI>K$}W^|u6YBUo+ZK_EDHR>|q{$!_1m@ift9>lodK~;!^uzSX=x*Xtl zuS!KztL19;}t$l3Coxx2{%*bEL z06^eEQ&j@2C8NsK8QDp>Q*cTGoPq}E16y(u@xv)@=R0k~DLDX4<9#OP*`OTxzMZn! zNuRnqrq!7Lfg=SbOF2)s%;;qlC1n;REIXcST@XBFT=iOsm97SaJ{y_-w69IZFFSi6 z6$>}NY~Tn}JJ*vBxnpoF4bmeKr!QhudoJ@Q9AQkoEn|kHcQLL^M2_K zM91)qr}CA}4jV7~Md$07?afhp?> zF*sv-Y6x~oK^#TSCk4VXu9?+k;naie?s+3ewBto5?a5_l3}uunhjzLD?ZJ?m;l(p% zN4s-lJ!k0jgo4}X1Mjp^;?Om@3c=~>4|)gelb-cgEG;516ZZZ)O+jIPZWFNKSf%nX?jBRpm7rZ+7UZtSCRIo9bI!dsdh+L#4zs#SfNj;Qjt#OL4akTu*ydek5WhNFz9`?)^AgTJ#TlDQ>bY5EcF=E?H=h5eT- zYQ9Pq(h9p&ehjJ| z?0L8}!%e#pHkM-pZZd@Eq>2CFG63uUj`DoZhMrEHaDfva+t`wtm|yB8jZ2&Tdq|w? z*9Oa7D)vqvKB3AI(JKfR;C7VH1OydJfEvMA4{^-~JZv4ZRXyd9f83@wlCgDpq$6cN zqGfA4kBF-r;KneNL5_y4EJ#ZCBUm8M@^io^h#qfZj%{`~;T9|~WoWn8l8(Q?i@c!l z&p{I6-1o8TQ<8gEzI~eNKDl8{%hUTf5so&jTkeJlp^UAd<8RSUAMU$Su$cUzxgjB1 z%9qia^4ah09UP3+v2S%~!VIv0gVxbMvBsNMeyQtugaWtFv|XSWN2wOo8uf7bYG<#- z-7w#QRjZN@-f#k`;h_zNQWw~^B%EsHS6>xqd4YVuS9nBXJ^6aHLU)G^J+Q_33&!|m z26%>eDfDOMg9;1+{~yYReiWF^DK$eW-bu?~*P6@mi?&7IKEX7In_0O4JnS$q&Iu(> zDKPB%q-Zr#4Kr%%@I6mB+599urUZ@j2Wu**p}%N(p%Y8&h*A=C8a!>NmJZSTb= z9*0y?xp=*T%`s8*!DAz7wq(Q2_85y(4KgR-R!NhBUn{i(UaA(v^1V`6w?WZQn-g~> z3psCR@zDwR$=oXe$L->UI%&IS?SyN82idhss2S?DiR0v88e!kLM0cUp1F7;VDT_0&^Pf78h2lN9%|5~6xF&|C% zl6$(fTJX~!USlN0YSN`3xgHY!!aW!)z3X;Ta~gTbz-Vb*E`v~c3uw-22wNBu(2?pw zCa1*M0lfqC-d!LR3vPo)GSW11R|2FP?+nmW;ro!yt&b{y@*|R>V>R~Psq~C5VVkZK zQ$XB~2}js(P8r8EdIB1Z6ch7^Jkf2JR3O4AJnSa!fV+1A0$7|rXMgJ1{E)i-h?qru zhwX{-G@MjuB?XjRrCWXL?~0t>;;8FmA8x!H!qZ>-qZu~P|M=`J$#0^98!#|;zjHyye;a8*(}j@rw=@3yHjRTA08X`0b`r; zM322FQ0I+=fGz8^Sc%H;O@Mpw2c~D}XORZy6dr&Am364K-P(>_bFHDR_TS%}gvT(J z=dZ3Xf8Dm0tXv+68Jh_Tctz!pD|v}WY#=E8!qjSVo`pEPKugXulp_w8hr4fi-?aUX zSuDp73v02u!>J0pqI(K=Yc85p>XE+y96%;K{CLy>*(CamOq>6-O6_@hZ@vJvL@^*oS|(-CP;rYyzaRGr-|pY4 zYEwfuL}SDhcwOy!Dxz`HS1va;`;U9Cf681>rre9+sxMGOhq6D!^iC(koPzs$v3#g0 zr~&~k@?vBMu{PA>{jB~|&%s~fvH8maukjNPcT#nXxRE_6dE)B1{`8w_=9~Y)JIgL} zA&=PdFREfmDh)Dv?#U$V&dA3S)4MlrjXEG?|2?lB^Ubjm%{C4{U%Eg-3=s?|SlL$n zHtH2Xrp`^J)Lpe|&N@k?ZS|UI01v!v^V2{?=bsxB6^d>jU!8j;sJtB2H?4IJb0X*0 zf)R9;-8>sx9ehX7JHo18?5_S3LqDaD?U~T9U*A!&1`FXP)4fFg`&&fZVc&o%D6r#6 z^?;!@wbEq8tSJ+0pwsJ0rg8cR6LE>YRb`c@iKtI)Ipk?snyaOGV!>p;O{(d8wGZ4{O?+ znm;2Mg;$*~KN)q-9N5R3(Rsn=+skm!0#}n_xo`8ez2^dcG*qFVY;$;h09`2l$zs5h zJ|-hLMlt<(E?{R}Lp18QA2;WD{qsNXy?ZS2CN*K>V%K&lxght1nyI3~NsA)mru2n8 zq=BNI$a$1VF(C^*`cB^>+Vt1ma997IXmi=tf(Y@0?gR42d4jH8R(qhw);W3+3BG{a zLJQ(tqffd4LAXWq5CU#`M&)(oKm1SoPwIpBNe6lJNb*A51O2BuA@BQ|vJCjwoc+?j z%zt>@Rxh7HTCINj3eFJ0EpGuN40Xh(fAw4+wwcz6Yj){9>MWU zPv)79GwPb*PchIWJGVFMdQ}V9j#39ZXjWqBZGqvxMCcE}ZY>pt%*r@Jh){C9^qzVM zp57CGn9m1iMGA1dT~;Ee#K&ym30#c(q&0?;&l^DtRD%moh{G>Q$Kldz zpU>t_E#nTc5d)llQ>b!ie?Go|7&QL(E%SOmR~|S#*Fkx)&&uY$Q~^P~eVy_ow>_hA zSIm+p1ZEb&5J9xE9`Pdo{S9O-TKP*#;YMzmQCShSDPe zmT1F1qROb3+h2kg*iBT~SK*(^K(M|EsVl>D-+BcWql=eZf!suIdfDBUBkgvqJ6O<> zLM&V4@H>akp3VUBS9swv^dPd@+Q1^z?y&%2xz)y0;cBI@u1VQTuiU|v`-+2afDtTc zOTHn=b((x@)94^U`b=7%`|YuhwqJu6x-BX!UvSh9_gFoUE$T)->~)GOl`f+0Qa1{O z?{`>Lc5)je`fcdKem?x~{=I+;o}_^K#I2qEpcg}!wSvTqcW)l99u{k4c;!6)%PfSn zo*_JhDHoI0)T7ziFfyN&%mQwl`kpIKSl4T zeC%oy|B8JaeAg4B*LY-FT(la`>t0L~z`1??IbDDsH0jInA-tfSM7%blFulleV-(}5 z#W>P^-0{(|;%dC_np?xFo0siQ3-Q>I|!A4a~sSmss-kcGl^JD$FWtD<&zR`&p@*<#`w(o+hH5=>a9cmgMO20)&oi5k{w?-viee zM;r$T{Hy*g+yx_n^Ec8*{=D>{kM+iKPkI~15>qbCvJh9s>bSIigR4S#@_53Zp-+%d&#pf?+a7wQZbL*op zCDw8x)kw@{9I-&bM$MiyvM@RjMEQs7s`|fWW6YM896WxwA=d`c&0fTaw9E7){FG%s zZpDUli;^5ZCaI>Yw>Q@AH2k}Hpi<^_wodR+2l}M9pf$VV*AhaP_H?-&diuqUjG&Qz zPJ+Z+PsYJ=1!^?K&{V!26GvVfJGJlm!D!1GY@n>B z!w>m^r7e&8{3dcVPL{IJq+7PB@`0L!sf}eleJIWW^x_UvVKff3cFsH+46qCGU)!mM zAqls4Hzq2cAN`y2%(1LqOyF@UdUDHHaOaXi;}FcGFj?d<1&|PrC~msrcs!KXViQqr zI;Bh|l_rbRwZ%)3zAErSQLY~Do zd9>Y%{f`gQAF&0+dnEj$8F>&A2H^^y`=95>m+lQ|-HT@2(yfx6u~Nhs%r)Yy>F9Q1 zwRBdk(vJ9YPc(Yb21(}{tf20yqBH3veC*)TOsCNnKDa@@h8(ka8-EW-5=w|Dv~#@p7AR~3z(6u=4nkcUqoXApVOx$@(7M@XI zbl+MR#h2_O`K5USr+@*M-uKBJ(yT-!nA9dr!0B}OT~(97Z>t~XeIkEm-?k6Khx=UA zegvLpL#!VZdi^fpvK_3^NNM%v#4Nr3sd-{W!{Ih6{k%DPQp^u8_+9V9xnm2pUu;9m zr`IP>1p|JoZR2@O7j42_o;q>=3ts;a&~?@Pjb2HHfSYdg+omGAmlI_=gQEv?P=wGAkffJJQ zLi$DD53Hj3!g2c>Mk}}7?3@F38Zz+D-;bw-j33)04Ig@<4##J9?`BocBd`CimCyA@ zoTgU>2R?@f0gt9v&}ByI&DwX9_~4;Yf+3Ae567D$B^ZBaUUe^8-I7VrOys@demhw# zQTK9UFh-YlVIlL(}EdtM89WBz~f9l z_d93$*e%}!|9`c=as+-iC{PHC;&(cawB953T`$f0?=)-<6|Wr42OJqO8Zp%QU4|&z ze!F4@Z(Te+t0M;geuwk?J$FI$fpYIHXCL{0U-1nAmj>ompm z;%J9Bv+q~+hN3lG2g6DZQqF6_bPQiIX$eXQh}n1VBYXo(z+V-a$fF3)OOmO~vMqmF z?@JdXxMGM^5=*u#54M+FLKb~P*@{PbR}GKfCqBA!j5h-(fA5`0k3o(=Lbc#8E~5DK zfdah*ltNmVlIP#ln+r!|YN{_avJGL7aw70zBe9K2XT{>f1IZMv(tjb3L$-b{+knh+ z+|kzGSr~HG~ zPKw^71sNI7PSi{;CY)dx&?NomaBeh7n*%I)_tQg=FtUSl-J%E2+-605?1Ea-b;UqIAzCFdB?t*qn|PryoV%(gX_9ku~4xZe^I^7V=Gq zs`MT+5p1%MJb&-)8ePXHdekh;BAi z_C841o<*MTY-{^Aj3*1xrdHNQ$yXn8S`niH9B-g%rCOex->?ZGQDO5gmBCCD$U|6gX_Yu-d2`5$_?c=g=#1~)ElL0w~ zM>AN9_V^T2>qdLSQbm_E>Gq<{HlaV$Q@Cd|3zFn5zYY=oI8y4842nsY1tK!|KJCP| zgm6}v+COt3E`LEPHSK$^K8|N=14te!o(6@I%ZyX`$Cg=l>oddD4o`hIO#sx&rPBbwnSj$0g#`d|X;sNiDqUZeJ$wDW_P`k7@ zBgo^b$%E&D_Tr^Ps2jto0ceKvG4V^SRt7>){l|;WEjVn_w+$=r^|NCJe}9$_!^!{c z8T1U}WKio|6TxD4EkMCsa@=vk_!#~5f5y5(nmI9(F~^Q8?ipDi7dB#HE*Hohc_&eZ z;QD3lgezB-@4SC?n$aG&gDHsTJjIA`xjk3L(Qx1~d_gQgiE%WxkAIG7t)7hO)hAv8 zB;3lUq&Z}4w>xrO&@T2NlwV&WVZ}?%Q+aaUtuc(Ce#Fhk|Nd9jcOC;}RL}OPu94G^ z!J~0|Lbv{T?jtlO+;?+4|6FHG>>|D`V}CHd!rhoxDK@KC1KYUQUns@>%#WgQX+m4d zPrbtrNSpoj5i0YeMvZP_>uQ;6VV!r=XR5*VBSsm1FCAyCaA*-Z=I?*-@{8?J9O~Fs zP#8O8q-yN?6`%Sv_r#50#c8u=PLyOk*5}poNgsJ!pqW7z&aDht-mi> z`4puQ!pC$XO4})t2JI8lqyg~k|z#V&zv~puukx%Oj96OdB3#0=&-|Lev7FS!{De16sjDStl zFJ%3eaV3Y*Pm?Vt1alJ4Um@NA%~h7nCgq+QaO{kRwYpJWP|ZG|p)yDxl&o+~`Vxz4 zV<0#}&LiJ*$uY|br>RiQ41m7eLm~L|oAgIhd5RfLKsB}S5F%7Kr@ySfYfIvS+o3aO zV@=TUvp54n7Y5+Cb7d)M4?h`mS_W`BS#?MWp<(}9MyNDGUs*wXW{k?AS_i`StmqG7 z=ki1-Mu==>YjWk}VCAwuK>{L0eL?pVkEXT_hFj>IHiFAGFvQDxT{_$qvx;B#UiA>c zpArWmw9wJ$;sY3Tob{+x%$hTQZFUZ2rzSa~E+qyl`UgCgc4AIz`>he_5KVxV`UVX& z|C$rtw8J?Xc4Ld5{R%at&6l{>wSTIBOgkl<`tWhX-FCmW}D6#|!qd!Z$&F-T=r7HYdkmi6>)%{B3h#E1> z&MOo_IqjqmwkLjj6^2CkM?XnwYj{`lLAHJ1{h6qc8**osx36jo$m{NOt!S$Ly_;d% zDr-X4_`IygzYus5{_gVB$+lw))0YEXH;E-@?j{-)n`DG1$sHVU81Bc`Ct=Ke_V&Hr#XXV* zK`!cDTgKbp9?AlCax0ZB4?moT zyBybmy^{D<>lHSu#sNyKvL3m3DEF0qiht{Eb2`MrIY>v1+Czaxye{j1hfs-zCx2fp z80$v8#BAa>7ma7hOneW#PBO80R_0u`2n81m(7V+>)#p(gd7DuePvmsR-@V1r8xUu? z$y2HPo$sv~Td?WyiBey-nTG~6repaVIBYWLz_R%c+wJObvkDV4iWV>^GZK2fy??FY zY0e0`P%;qoE6o7vHGt;54^HNR-7x#FM4x_)XTXQfb+Z-zMYN@^f`@x zuFl`UBcObFL1Xd^_t%i?Z=*sRCgT#bpJQI|I@9gVA6-x%o*UFao#Z`LUM#>&WpZHl1&@U9ofH(dM8Sr#dmGgHJf$YJ zF%Bm`lJswakD~uNxB5r~a#N`Z-mYqz}rDZy7Mxt(pc*ft2^DJm=DOp z?db`jt?#R4sxGVy_av9EJI2Nt6+TRgX?8T>&5-b?^t)su) zrG5_(-N8Rs)^_V;Vmj4-NVGZ(Vve_^9>qfU2n($yjscq-Frfz?-!&|B2me9XdbA7V z)%`RGhK-=9*jwUz4HDB}5_id&~xdv;<_=XT%eRB^dk;(vM$D_IQHR_*qeyI@BSLA#f$zoi|SYgw6$$auLHFQLTopt90LwA4AO_qq;m1Py(~^zy)KR94;iNG zvoSi~V+=X2g#39$Ycj|iA58ix3Jkq)6U}Fyzc4*CCosmMYmjmyhx=FqSo>$D3=?3g%LeCp~RO)tdj19!ta>|8D zT>2?~2-O)}Ymy@uqw{Md%n-5nBfE>2kEhC!Ln7bjm8Q#qSdhSUBH1RGeL>rGb$;|5LA3i~HOE49MKv5M+ww$QDB)sZugc{Ip*g=70 zs~&Gm1*imbXm97gn8jV8x~l*3bKRp?=QNzJNDZGFD|0%ry2bn-?~PIw$oi`jE{?WKO)vvuqi^nXmL-xGAWmC_=-G)_vN!+wn}BfBvt|v zBPu8PQL3|T1FEi&Jfe*-DgN#4PL2$n`@eL8fF8xl&v@{D>?HVzD)mqO1S67zQopF z@_KoTD|J_0>xS!Z>kia1^px0eA$hrd1L`IR(!LvyL2YOrlP>}b?$ilQ#&|s3l8K$g z<&j77pg~&D+zS0u)&jo9(}2bW)B5?UrZ&gp)i~DHR~`x?bfy6^XQ~dbGpgw_o}XAy zrRl4VP+Mi$KE9rY+Cl?JfyN_3&@Gr38YryBXUzOr=e5y3N`m*?d(|b0& z@N1b!8}Zs@_Sh>mpIaj0zup#1P248Wnva!5o!*rwjE<9-Q#N6VM>Wc+IMiH9w_VKA zNi-MukL1PWWLtj)B`iyBs9h+KHIuR5(-ZqoEM|!LcmYB-s6p2TvYCMN5Sp*i_H(numhiBowC#-Qne08gGdq4QSM27Gr$24 z3P@%|(se~=t@yjOT5hsMFwU}dYjL0c5K%-lpaoLzmJmahrOkItXxfGUQxH{8y;QF% z)#Z8sO1N~{i}35+4H201z1NP1I-d0wTA8EnqpsRr${@N?v5pI>TKz<;#IxDYu0gu< zvA0OAObe#BR2Fm^n)PBjOKY%fk0gcc>@;_ z0L8S{aH;UuJXfry)xbHb)sjsvE;1$&dP(%HegC+BB5p~opxLbL>d6x;4YF&v3~p|p z7oxik)x^Qu9gZUxZ+3RP|D|f`FOizOE^HV(^!I_oTV!|kT-wK-1^$nQ*Y{4sZ-+GI zHb`!eHnY3OUh8^y5cwfTl?mGM9rCF%vihUSYWenJLc;*pcBK1b@>CPN08Mz}&s4f! zu~3(>Ooy#2_*O@9wk;UE2p1HGTjbqzqh~>)!xKW&>x~a(HdGh^e*h!cZ6-1>WLx`_AMc{@*I7v%OI-n070WV9m07F5cBdl)CF~J7^chp_}8T1|1b2EZ( zPXg}D(14QDlb)0+OI*AN9Feb04#@1_j$h9mtNi*u%e@*-eDAup3lTpm1;AFEZ#5_! zK4~f{!)%0g@JrYQ;d+g#mQ%V9DJ;sT?_)sWg&B??$5uKh15ynBmlGp9YsYZJa{=!N zYh;d;jm$6nTiI$w)g{5}D3l1>_$uxQ8-A0f*eyld&SAN^0Bar4M5d%IV_&>Bps2Ss zqscE0@dF4gC9oK%C)fe6F5mpndb#%w+ADY0p!5AJPbtBvSqJkJgN(9~<-p=DqvNm3 zxL@SlcbXAU-V1KN_WGA(T&$S=&l=y_0>WybIE(?&_VPx9_a zP3;i&%ZaP~`UacNKLf&h|HT|wIV8q@8eT8pv1EiO+!l0VaEPY%4}Zk8-|q=vJS@G( zEoR5LufI)}{&`$!7O7PKv*t#o^?#{>Xu(O6LMkEanVhY@8IdYb8>*Y2-XLQ#XMSyS zm)&eD95y;N$rWJRr#1>F+q2>YQ)_b2-j9p;;3md~)(YijC$2oGCJu$SPdFTha+4#C zy0pN7D4-7re9&9>WMFV(F(KJGbs6jXzJCo@ zuJc?GIUuv2Y8oEp*Evz4TKz~#a@E$l?AMA2O@=PB7qrC2EkylVGEue6qQ|~KF(6za z^UikoSeO?6Qg4LBs^RI2FDsNjtiM5@3`4mQBDCYfRs64@(gU~~ z5VL`&2#^p{=n3=;^cZu1cMx8nxko5dyLdUk8f+hEUh`(y9C}2z)@U-XO|IcK@Z^-) z?~GF+dlQ>DM+_(IPUO(23;phZSGM;zfP?Yl$BZQXOByG*nt)PshePLp`DjBs{&ojp z{<5cQIxHeUJ>jMIe-#)${2>Y6%B6#KipE7iWn!9|S`H8FAI;$Odb6vd;Sbi99(`nG z0+ssPan@217c)(75&Hy~A8$j$-gH>_ImTzg*`i9-L$QWwa><-B7!U38LNIPSo~57& zP2(>*^0~o5BHWqSsv=vd_Df`oPGp2j^LqQ+h2Q*Y0g6{ z6g`rVzK7m;VWWI=@x92H)bdNE9NdlJw6!lJ@m`JW6`Rkd#!w*iMdARHzi`3yUfSWS zTz0APd!>F2JzVeahVwk%JPTV%Ovj^dHfO^9_MST0Rp3c^vd~J1tP}T(G|**5GRR%* zpkjY;XHrO(g7fsu>otix)CoDuiH}#spn^^}{IStZ;bK3g%iKSDqF{V+7BWBY`tQWm zT{M5PZ*=L>1w5@9rs?Ok`_GO66)p~PE&c7G`0Vacjd&;0cxYrkDMqC>X8l2zsaEe# zWHs!aiNg@)bC}E)HR!8T3`{BMRXO{^6Q*Vg`r$6&V=rm^RAauRhpB_>3c8zbe&0cJ z+#F0+04#3EPKOWSEArNF{2V6j?wn7~&^^xf{En z{wa+9#gH3=AF;{rfQJvtyUf|TUPMtIfFWp~BFfr_j3?U$Y;(9t2=q^0&bSIT*FL^W zTcUVaG(IouHy8A;;#+@v0jAH$xf1ocKT&oiNPU_s_pf!F6_Xk?*dt!)ZqAsh?i&<` zel+I+n6&u5>7#>3d9IZ&{XEy}rqvpWg4+&X4jzGbUChlnFRGtQ?eL| z9Zr8N`-RaD&n7C4mw!?sUU{pi$||#zp3H||r9Y8de8iN)T%Z+& z#?|dVvuijbY{9qbs}6sywTr|CZg*WvJjEZaVp~yoE$fCEZw7M$>yBN<(@aB3(ah%} zF(W>I8{}o4_C&Y9$TLk7g2wSxWJC71owE}f_}_}Hk;PV z-Sq!i01G}LhUk6V0%o-%jb&B{{U2CI&V&#QJ5Y^e7(=F@@7-0deTd8a0f^|cL~gCd z>L;#1mp`R-&B2Ab>x%TClWjRiVp4Zo2cj4r%WLywrAg;M;nS0DL{Ad-!mAkQc=zdF z0ge>ew(TnopZ%2lk?RZ_I=;cX>v0^hCw>Fp4}W3rHvnaokc|b!*Z#U_?itMsqY)cK zewASu&{nS~6uB0bPKJ<<&r42k{sIs8Q$nw=4XlKJ#}43c(xJ1#r~y@!malND-%m?o zZdqB?h8h-mD9sz_L|n#tVS;sP zSI5`5f`2q)r&68ndU-#iy|~~WvRgB~lb2E;#Vo~fJT`H-c`(5(wZ`$rZVY^Wnh12O>}Id)-Z+IIJi~|IY=LSAOT; z)-cyUd;rj69R749pcRKQna}9U9apZ`-pyQfOT=*1wh2>g2M}|@NL_bpe}+SwR^Yy!CDzl4QjwI z{pAZA`1KGDHvz)E{LzL9VhyD7KfQWWR{P@Z!MvhB{-2B)7rxs>z3Pju(+XiOBKItke`N_?7M zr(R0mPA;=+fl9Yq3p-EY$qIu_TKBuj(k_zhdfSr!6$0OYTgDoU3BDVXYtzS74m~T# z8k5f5F~pj->c{_1Vp>FKvyY!&}BhE!Vw8(_X|p6td&aS1loI$U}UN9 z`~&ES!TC?uZ1Z29WHPZcfToVuLS}Ai@u6AZ*|JyS>n)J>N`$`=+ z4o9bh#hlwOyCF?<65?(b+gMSeT@y?`Yn+9zUP!r$V#jPMkklu|7NCy!C0U;^^(mO~qHl%}6L?9$0 z(o3XEjevA1(u)#0A|=wB8j5rfLQO)l`|kHQ^D_L!3&#ryACU`J(M#REkh36l;?bW3d4RN^pC zl@h5>Gns!Qa%%)9(t$Gy*^5C~8Qu=_vPipC>DFFlN;To$JP-WPI6KF!&smxQc-l_m z9_QqAKE6x`_tQsg`KnKac<0R9ZuZVdmhH3;FDNM=V}Ns{pH3Pdb+OIc|CM$DGkUPcRexih(igxOwHnOVmhgM(LWg6ei|4 z%-q{A`{l(s| z`V}vwLnN0Dg*xfA>tJ7Cj!M+AS3;;-O5|qE8hodw-Ag+8nNt09YSPvZpiH#okU-(# z%k=~G(oZ8rZ-9~(M@>h{)XJ?SGhu)Ypoda%5V|JMgRKmRixYnufrUa>fHf@!yf}p) z!O@eTP{EEsG$lb)VAH3|7&*H}kX(BEhY-aou0T7wgyNnZMPKgVUNDTKeR*V1>fXd$ zqts7rXa8dSqlS60{|9b@IGAYr`wZ-7=8&-xP)ESwC(-F2db{Vv`a^=PAo^XIFcDF( zLefXN-KZAo@h`|!nKl-w!p~$bRz}@?49ALtTZ>N*aDiXfAPy6aG_8K;R{*a2p9b8^ z(>!sS83=25tR;oa7%?=s=KS2#2FRu)S4lCGsFCRfCTD>BYQL*bNgYl=J=s%x8R+42OgXi!=ke<)|-b68o(HHwO&j!Bd>C!W?Dynzg28a{Mi>{Yn83g*~Q}4a}A!TVXnUgq- zKl8nzycA#n!e!GA-J?o@wFc=A=u|K zpPF85G+4ZSMDjWe9T_IbaQEyZ5=6ibw7aC+u}JnmZ;iiN+HRzyT?UA|@XJ8Vb@ZR> zod$e0q!@1y%l05W;83pR@~cN0QznZ=He-+AhaAXmdA;i0OG3#@q7FI&AUi(!B~pqY z72!kOZx~J1efiWFmz^ts@Q}EKQ_K94v(*Pr~?ccZDvq*zZZu1l)I+ehylSAa;J& zerG0GHtq;lu$lH*;G<5tJo$ypi?10O;c6Xa-Ql6lixsE=-@%K(MYykTzG}0{$^rET zvP{>fwx$T~n>4AhR%1>|yw!ov0mi)CEkA*Z@CiMa8w_7Uz|{oi-L?m7+Z7Dj;3*D_@lNRzt25wzpDYqy4=#RQ&4joTbH_2VvJrdAkX z2Oj)VR4ouQf*!9dXEk$zXf=f9$ftZKIatPA50gF%9s6sKc}&f-zcDBElA-BA=mR3m z$eL;l*6sD0_>~Q~0pZ~@)Mb=a$ zM~EBxcnRTlyw+Nj{f&J3PgGnTCeAA!rw$xb!^qGYufXv6#f=rb2THAHMIF^lOU5{C zBiIEOaPbdV@UU!X##?w2)n!A(@xc(#OF#-KwauU3iANe+mt@^0N58GNX-t4{v69_Q z^=v4!&?0Y(QrYT0c|WvGZ*~9Gir_{6QlwmR;Eg9D!|{We?I+kDQnRpQ;>bs|dn_V* zv?oz!1>9Tae2!CAMm#7WnIn|*9p#?IsaxZgT14m&scf@J;NHlL|=r}DI` zl17Ryv){A0X9OG?VN;1C_Dqw>*)dFxdyVF!FhBE|&9{!o*q+;Sb}GuO7Ml^D{@zDN zmWMG1Q$;dfyp3y}1>*Rc%EJ%>jlbsdA9;UiD}+~W>vexF&wp1+oB8;B$8;vfWlSOY zJnRPK&vlJxUMD=;56}Iu%Uo*s>(LziUU8#T%I8M~ESoRpQY6F2i6QbU;to8f3kdFe z^t!eGi%_kHH9ukM>Zi3A@vc1Z(w zNnUXsjQoK?g~^AT80*OEeMA$>!eQxq*9UR;qm$+>60wVw1|7^Z?9Wb3odaz?s78Ig z9{)o$C>}@T8Mu!%?xB+TLCc1ordwXZCacCc<_dcDU)sG)>38jm(!XcH8?H%ebEZ(p z`gD?}s~<&JmX!6WQAyJcoRK>UsqTVR%hV=P)wjVt1YfTrutA0s1*EDA`pmgn8GyQp4H$`ONMc2jUT*F{0-L2|t zY3{L7$0xNw+0KWZY02xpS*Lrhewdf94)fqeoK*1K;S$aF@2vdF1||cENx!8YlxjL) z)~)#`{v0TOGW#_)=kz;cE30Fw#FS0(VO$JT_ z+R$2Yn?E%Hy?)=S6SMjqFAL@ar{RzMkXsm5qx;)Xr&xMM54ETnP4?o)(Is zn{JT@KEk*DkvjMeSiuNN1azhl274N8|24#;_0X21R3=4|PJ_cSOyHkuhRy2S8|(AxAu;M$4Fce!~(|o!ic@k;}3O(sIZ;Uee_i-7d-C1hd*V~ zFdGm6l&O!j5yg;x& z{7zMH<<#)|ypILcUX6ao6{~I{7>X7%XpIW4T1I{wKXFok7D|8sb>n?}qfAuzxvDsW zcLt@7&lFMoyVB|hp_QKgSS1Wb<58!hr9YT`ZcVCq?SY+K)OYh`{qF-oJ}g?pQRS-Y zUwQ=>YRh6({Eq4jhP~fp`d!f0z4}d^h$13pn)U1oxzZ)VJk|UV{7SWUEkP?U##n3} z`g}$^syzi?m6g1z=_GUMr9Jo(mVU49xYxtF+r zzd2^ESaP^PFEaJQn)lv^#Om-F`Bok0nfqlE7lsjSi9+OI=g(L*=&R_16r0rw_RL8J z*I`72ZM1@Sc}E+*6pekj&c5|VPT{5ks-$}EX;`&Hg>{ip=^AM!|G;)C0|x8T_-04d zK}R+>tEv_4)jmM~h&z-pD6Iu%YV9o8=?H^g>&9`Uw5u^_E*?52nCG1njBx5-8NYwD z(%TJ%rbbM(eqOb8I4w;hXpLsA^w#02q!O+A`1+-YH)}ZJ@fuEvy?aUpV4q@LC>r>? zPttnif2QZ-g9m%$oyBdYIb2%vogY<(b~!(EhXT5^`d?8Y)6@X~+05gU$mMhw`C?Zt zmyzcM(R`UrOYR`!oeTx3oojuIt`n}P*<~*kbo)sLSxDN*LA3QjITsBm>6iAD3XM2b z(yeMozlQ$gJ+?ZtPhD@y0@r7Kd786c%;Jalcq@&*F!W`dv60%U>1CMkcK7Qej#MK9 zt>Ug2?lK+#q&JpnklmWO;PajJDp`?e+32d-ky>;lq*yqLe-x!m0&lVJDn_}X6=}{+ z=&3y(;%M7=4B$VaL*(NNgP{Rp1MdS#!YaJDZfBGj_88DcJ^FgPcD{K0Z+XojDFRl} z_u$7y<{CjU)=GyszhbkKc_rvjSja9ZXd@gFByRxyQ_+!-y98>;zV8n;S&QBUe&AMz z7YOGZz&RPJbkir7wL@GP+WcPn!2Hco_Np+q(;*i`7oLbFgs6eo3i z-p($$AkS(#1kfBZLV9|r58)Z@=;v3S@}d>?rn(wb02b<~fB@<`T`}H+wV&z~%L?fg zNdf37sorxRg&S-rY5gsJ+|ZjTeO>ACKda6EjBnTgV3zsa z?culG{R+Up!)-xy5Or+JK2;R;eDmNS0t1Cf&7Y2;^zr>7sZyVg1^O5lQID+CSUw4w z{TOy&RWG~Cz~ar<+2_3e_x|N#dL43%?naoE<^9)9)@y2#D}cN{!F2duoL;eEo-{@1 zo)9E{2F84qdJlb=;sxGih>=+OiF)G>__geDJd(T-f1Jks zrfg;uqWJzZSCGqyhB*Le7n-$U_mxy99^E$7Gd3uld5@G1=sTO$Lc_%yADB6IzxEPV zHgUT#KI)Y6VNGEMw+dn;wv#B898ml6ndoZUUqSQ=$&JokIo-UkSe^0P%;I*r&)}a0 zL{84UwMU-P3jQ6GP@7PRUh#_9Bz|kP39BDW5E<7#q~TIq_B^`V zct;2D^bw{cJp++ST=DlpiAEGssgZkmILr=rZ^8{qCDuOY=%^D4QK~{*Ft+{-Keum>)PvAvlF9;xd8eXx- z32$ouNtPgYLrkpz!A+c=Ae6NZW9Mzq%7j@#CxGW%Xy*~N!|m!HFfv++nmKJu{oAHd zj-JMBCA~U$RQh`jSLi&7(??E`c=pSCDwh%a68d+)&!7sy45fsc0hobRx%Tnbt3YGd zFv*^TUq;+cZ7>E6Z#gHy74|lXMdY{9N4J~eu`y_si!#(vK8PCx%>&u$>HqiF9h;1E zK-oh`itr^iqjgZ8+-Tck=yov>90CU+DH=2M-wAYVV2{wOq0j@vXe?__R^Dt3Hr3s& zv41n_l3YD=(u~peq15MtZr0fsn>QS$$G<99pIUwsg-Nx_63f+_+Kw1OrPO|L{d?~9 zM3b1rIVdUf*X*5$mpB{Jb?@YEMNiyXbLo4+IM)TS(d1}pA%df>h=K=EqJU(#Xw4;_ z7`sR+h&RbsNzpqAR}Q;!Y6tRb`>8g~!?-3$`>Z$FT`2U}8{b(9+7jpfv45F>Q#Znn zo3m4-s-f6m3UmLK-I46S>eEfe5WgfSAV5OC^rhq6E1K@e%eCicd4SFHn89eY(*{{I#diMqfntEZ%)e{#4~nNOgU)KKCl-duR88FP zc_ymXGK$uONh==92Pye4wddULM9rDD^e%)3g=k!#6{S(BY1}1-@g}vx6gNEO>4}7o zv|8^kQx&-L+%YewKR-INQ92@M2pugs)?Wh`)-|OJ!kl|<7HR&+061;9@QLk=O5WB7 zbb`~gC-#gbtZj9l@KdGhKWj6WZf%=F@?-|fOC&EDPkkJZ%ae3Zhg=OtV7I>2qMWRq zW(ZZ^7|6n69prU|m0a7lfP#D#;4VE<{jqI<#;uSezcYHZR$1@%m z3@BxR84wqu{^hlz4=cDcu&iha3{vN1CRr>hV>gK_>9KMD3px4L2&6ZKpZ-}%xf1Ew zF~Y3u64#ZB>h_`$dY68aVi@mpzis$YqWO%BU{PxwW<-aW-F-Itb~7`)G&H$m((@Q2 zNPy;ghortGK6ND4?@rsq`V& zJq|vW|9eT?tn_tT!}U6T5+5~YAld%CEhC>AMVPnyUS)qcwp)XqdamA9+CfpWh^3AQ zdQTB09@L$OfcbB*-p7Zqzg)m&xE-V~1)QcNzv(-ETQCr7>GE4?6x|8gDoCbjwdg<{rpyT( zbl({bpHdfw!$wIPe|Q&a>}7)MszI~3to4qW@g7i6Hhsn}E8_3cswybDYW^WE!I}ht z0}NIQf=aYDj$89im&+*et`L(w&6T!EUh3g?53Vz%rG^+R zFx!0&VAQ**JLzjInyBeLUsJ@BQi6%Q&7lUYdw%jeahFM4^deGdl}C7^<0AXr6^ZCu zs`;;^l{k^_Xb+j0nDD~JoZPiL&GoDJ5w}>DVVtcq%C^OHcrE+A8?jt9&RNT{K=PxY z$4NP~U?fc4P@B!ku@{q-0+gs5f4P58l!;L3!%=Y}nZ==0+l##^=m?6yOW{_Is5#&Y z7TnXpT*Ms=sbOGn?f)Tdeq-Q?1y&*;C{J%8I)BKhSe;>N)aKVRuQ)7X#BIlHCg zGbcYyfsqJ`IR377Jus%rmTSrvLp0UYbzXq~Y(ZPb7)EsEiD{jdr!_=A(sY{btyQTt z_K{)4hJt#PQ<$LlLyJ{5HSEK2&)hE^COz-A{dK*d=*-(hdd8q##;E^g>Y2+(R|OL4 zx=`1GuZx=Z3!?2BFKJ}*YpsNmFobU=fIfXQPr%l>V|RSqcdeU zH7|3yx&0N?gzWZBKGcKcd-pELKI&SG35~+ItNe+{*1fG$U!K3Ww_C~cnrdmMBGlJm zWnA)zc~?L&shhqkw_D>TB$cPApeq^~1&BglPYVq9YNp5q$O ztg~j{JR|hAE8Q+A%Qg$tvWtNl?{c=jHJ4X4d8r$jpS6PBhST?RJ$ZYjkO)aIfVF9yE5ndgZy{&!IO~dv2BuC+%af&fbGBq zQc8@!LG5EZr(w5{FUb3SD;h^~@YhGQ8apS*UHqE5I_yQIth^%~gZbc&8)_Dpuk`ed zM^h^HKkGu!J#dveKz$m|%l7T@IVfahW|g59kdGFsAWd49b))EjQ1mY#6j8@abw5@n ziA!eF9sin3ayzgJY_Q@4asbJd=~N;;jdjb36?b7$v65snuMGJptk1=Pk1kGFNeCR< zLqz|xUxpD95s@-m>07;$6`LzPq*=g$=#vNuyH=0uW>&-=!47sYE}D8J~bqkLjx(|nQWmGY$*sG#cr zeLx)S)lhGh9@CT()QHnO0n^(dPW-g@#VMe_>+zX{q+mZS}RkBa)w@ADo40wOKj#celr5 zjFLCrr#qS#nPm6(D8A@o!RlK_EDys;*4BZXr*G}ZY(1`U(SGSbIMM8Tp8w4R|Jh*$ z>iec1>mX6L)Sss;?fd0{KEHxCyBIb&HI_op%(sSD#J2J-ZBG1FJA(3Mtt*wlIF5n= zhT+YPjXyDKx4t4f%6wmMf8SCjAZgIg!Mmc}jSp41&%3x#tUpeG@3B>Xm6N$Zqn8^K z6zlv^Z^tP8iNtpeTwrdGOyjOrQn;MDctXc!ay+pAgI5F;3Ss)%LCyIAzTnY0MH67M1A35K;s~3_i zP4oLW;Yrq+di3UjX8aK`LD-zf4Jm6OT9><-)UTh5$jT0e8pBsIAu0y8^j6Ew$;*V2 zpOZ3jp5PzC@gM?h!9B!XqXEaPZ$=jM1lB5oV~nS>0U@%^r#mU@$CS=L+}Jz;B;)qn zv5L-4dYFkp{|xH>^nx74%_{=*tjIk@O`$!Qe{;&K@FYJ=Jw>g*YziG8SNTMY8e_lz zyBBh5M;m~OKD(9s=XIc|*?gk{~VQ7Lj? zUfL%(5i402aTn4Ym0QafPJ3sPZqKjbE{FRO^K+)6z)`dSys)$zRDG>NJF=lde)=hP z1_tpd#{NIlQiDRQ$p)!g4fHeLcYcq`P1Gi14lP=0i~2=x9V{RuKVQpITCw3~3`j5- z?kLPDHoGb(;Um)ywS!lr78L)h;6-$AB*Z^#;5td*o7>2%u6SkQgk~?AKMu*hU#k61 z@7{PzO>*@>SK05t&oN@97t-{sZ(SYVnN6}f4;OVk!`iA61zbO|E-zs9K82h3Uf;aK zi~2s^i1UtQ`C@hQ&vMCESNIq?!5JQu*Nt;+yd!E3pr8bg$V>s3bMYOoRk4Yjb=9c? z*|9T%~14JS@iFZMD!^Uzw_8$_hMi@nRy z-rqZO968#zeI?+BPif|2*H8GNPRQ3Mjb0tsVM|@Z@m~7HOpVkOBB!@R!?B#;!P$p` zdX$zVJ8(w$NtWYjC1GWLLfeP`+PZ`m>=p(f^2q6Y98x69xrJEy5?Rugv7;w_cm>+I z9@zJsId)hu?ji6Dq7_?js$P5eoqfnJDE8;gSh*X_v2ZEc(}_g@+09LyL=86`n|K zFsd5kx>&>8KVt6p*(F=-w%%unMGcf^XbK>o-3VfX3ztG%_dp?2%kLG&iWIKSOYmXs))<)nI!r93=_HYTXS~kAtm+m?gyp6^gsvX$8xk`tS?k9 zx|skAc?WZ4Yt7_CsiGu)k`a4PHQvj735U^w0_TgIq-V&iaB#*+W3LlV?eyO!m~UY& zYBV-~ZT1KMGz8B!&{7H1bRRk8Ucw||@Rj-a5Z!p@)Pc1Tk%QL1Fw)#(oBd0b=a#}LLdW7c1ycgXwcw%oAmK*-Dirmi`4NX z@eA-fUQP&XqR53#T!7UdrY|?@>Kwqk@fX3k@1Ed1I!3NePvCRQF_f<@W&QcS&=;6? zqh-oYFJyuc7$eTQ?J)+v3_}h4V*~muTS*Oh>H0dZ9xDtiT^U6WI+R-hbxS(E|7S!YD zw6vo3qD=Z8cuKsGLe@1NRhFF~#Ke?+vMc7S&kvR)9F`QaP<@e48=ur_8Zx=^Tq=Kk z!qE6s1}rjk@doRocP`~+)(nBB0<;a);-LHtR`|IBap!Yo;^Lf1Rpj|O=|dN=L*z4z zUt)Alw!g7}l6_;!>2GN1CY0!o2cN*MS3L*28mo<`PjV-a-gLhoO7MUZeGt|a{X{d$ zqm%TuyOubtU6_0A&|JH=%ZYvF+!9GQ*__>~5bf?8kDDb8+-}9cA-o;3E3g;W){Ym;N$cwF&?=1O>Mdb%ZUL_@afe zZ^(NGfZHeroxS$yzSEgqp>UVwh%e}tG@6u=6~}#1;ic*_5+9hzbU>b8MJTp%L_nlG z_*D}(e2!L$?jU)sWKTFc`i|}>BB#!caHxCYjK6|LvyK%>QrG3w<%6P@;KbZ66(E|G zWD_U%I)XkMvqV*{I|8!zocPvmfhjfYu$-GGVlEg*@BdP$xrqFL8JglsR;l%I>#hH! zNbaKVyTI1vG$LZ3a4C<|G+I`Gxlf}LWA~GbTfW|kkyo}R6juDbWx4ogQhEM?|Gk>o za^lI~y*#C5Dc3{uJQw(}3((`*W$ZD(a=|&yxHVMT!S3vN@vfLW8HC^` z*oS`tcYw}b3@muu8a<9Z_5lYW)Qd?-%pSBJbP|AB)a<15N))Pjm&j35z4H{=dpKvV zxxu}GmI3Rr*H|{gv2H~3%!uriU`8pbFVv-c@>RJr5k9k$fekYr`?Z6%0D1E)o{HeV2z zJmuVNyhv}y2P+2waPu3(*Q;8=|KLtT8l6`LF0qL)MxIRK)jDIb6Mc%`+Jues%#ElF z-n7?-Pdq(iC+~6O!@!=dMvReJW9jhQ3#U#DxY_zHSNJ>E?t|vXyZ(T{jN6|1*q?YX1zksG z?aBT9*&kryqAZ4%$HVi!_dPGY@J)+4o1+FAdTx= z5R~Y~7OX^QE~149lu8VR++eE;8crf;)a1vx0%%7`P0||m$mjs^OCC?yF%A)Ym6eT|&rdISgds9Vc-P_U&pMnH$-ChLE0zYVF zZFGS*Ufh$Qa9Hl%@K2iPLShmQ6lOnM{FBFS9T~o-#~*XlbNww^rjKclmyh`>(z2kU zprE-b#9LHuE!~r`>zi>^(2m2c@t0d|JkNNg{6f!Xq+EFSG^37LTl3$0Yzw33l{F(V zU(Cp#@3C|ZXW$uw)E9+_>+XGCx@R6Nw%nmxkw!1jOCaIbJ$1PN%|~2jC)cbISy>r5 zSs!>O*Qa#Fa!BtZZr#I~tKeKlC5p1H6hy|7&z8rGlZJyS^dJ}fGQ9fbJCqyh8lQjo z-)C?^I{0IFp==*!D#rEXajy1fhh&a^7{w27+2c-6?!j=op)l%9G}M)P5b5 zJZW>eXjc^s!?lEgsifF~cVQH(2*XoPH)~)9H|mGboxkrntl;Ws_oy##Ce!Bbw`11y zhTf6!YIj8;`gPY4gf;~&tHwEIB%`y71!US**KgM>+pM8%wt6dgzw_E!wXu=7StU2 z(fH?P1-c<+gYYH6v>fj2JnH{lTgOo%hxuK6MwxhFP>ve8`iEM$j+u%}QDSEyLzwIo zpTR*9M@hDPJA;eqmG@W9t(sa|e!Qa8AvSvr$@OgMs^tD++0_f0J13u;zk;_e2{R%4 zjbS5lH|qZ4H`uioOkXtjqC>@y@tNAb)mkLkG~#J?3A(u$6Sgw{pxu=)9@~N42s{Xim_rm2D|Tu3SVm|4K4UtWofsWWT9{QO0^0patZyH z9+XY~YrJGXyB31|Te$^aDonk(SD5kbP-P5faZ*=+%n@$R*8b#mnh$An2`#Q+=60uI z4DV&vqsZ7viXkZ*?0_mMhS2)RQ2s_2;_1gmwm&p1 z^WF(yY|BZH-kn(MWd*z%f`RoOLdK_G3gULrSdRg8`J9LvHE1Ia6j>%sBqE|DfEQi5 zHEX^#R9`4GIQ}^u_Yi0yDmxGVnZ$hCgLyl8s-VhZc@3ge+kON>pDDU)$Z^l8&%Y3K z)fP)5a_@>#J*i9;Fv#$jg>g&5X|& zwOfaJe!gAism*hJ$uIh=F27OAdhm_yQU=1V)a`TFqHO!x@5U{>-lJ)42z&e6z{K{6 z()V86njfs#M+;B?wU#Q?olJZne`O@C4Zn0&>Pjukc!XDg47|=P;$1d*GU-{A2`NJM zH63C5`u9mi#a5Lfk+60iP~f@JDvzY-pN@`>{r!DC8r{;u!XoO(pEcmSCSq@9vQ*Gv zy7mIoGqW^`9-`PF=0@?=LCoOg^R(R9CveG!N-2fJXmtBM@*GxK{-^BuXUI)4X?fqL zY_^BfBb9&HQi-Qkd}lD2-S-36$A!+JK7Dt`ChuM)4_B(nNB;@t4NqniK_LqhJ!eQ< zOv9g1t0$u$5ev)Z-K+<7mpxZ-*B*1!q8ik}y1Xhd z6?IH&sK)Q`*hEseh&5AAUtkjOZWuGu>sj3uTnhcdm3Xh8i(=P>J1mEa_J`1h#>&7G zrgwHh=6vWBexNa!Tp2D*6Bw5h+c5g}Hu89m_)P%)<;aujxU2ozecAdkctDkmpO?bp z8(PRUBDhhS^&5ku54SSzZH+Nr9goBPCY4QH@p)vu?DaBO=y~4PYDH7=W1fkOP>w%E zgD>R2#Y8g@3Ma{47v9M&Z?x?AzqBK5FXI*Aapd1y73kn@d`*n^b=<|ojnhFkSI)xf zu;FfjCoE7IT-?hGx0y6KjZa8!bek;&Fxsr|&zlEbKsmg>PB*EMs_!il$?W|*Hc=w8 zUJqqP*N=6xMU~qmFMU9f>35(0jl5g?2$iRJ829W>Je_tC%ip58GUjecvWga}{{7AP zy>!jObuSjQ9&4zfp`n3+v#IkFB(;9lQ}lLhEOKEHhNuf1uOl>BVWJoi8&6b!DrsmG zqH9$nVfuAil(Uar00p=rs7?xl{j9a5zL=TwNp1K{h{XozN58r4A**-+`>RJo--gB3 zE)%x?X_F3nrpUE-#e`gvSY{q2v#3KEIpk1uMY<|dAAGV9NAn_XNrxB@+e-Iu-}A4J zL-2fxo!JM6(DcWZebb*o&v@_=KkYeCLvuQ|>A)3dmGO!D7TznWKhQr}AB$Z=oI-y@ z+e}=f-i{BJLvg=&gFfFyU$93QShz;(aJ9pwul&5bf@^1WKa-Y@9dNG)@AIVjI|J7Q z(Hmqh=bQZmvX>pM-AEZY?1>oAuLtiOu3bMv+DVs*#NYF?77vkJbUOaU4w~^rTkc62 zMR~b<(EhL?J4KN3^l@^o{gZ$A&==sTrSrWe44D{L6qn7t6lpZHhAY6c!TN*DfFIO| z^X>siZS^?RG1+Urzdp6C$oZL0fgVl-it|F3bYY_MFcbv-0aHMsk+gBxgJ0 z=Pq)(jX~UY-(jSGm0_Nws9lTGGoRn!I}P{md(&(Dxh>Y_r{~-I%_V35J?6RwKI3tN<2O{BbhKY+$T z8=Oy#|5S@lYkVZe5_Yze-@oZ}$BwnjxWiN2+cWyb$oUau7PD(58LD%bvmSeN^;-&i zcz$K|(4yjkLwOzBr`xeT8u_uiS<_@MIe0qX-q$+XBvA1E{etEejz;XAcq^}a&)`bQ zPsS8fwq(m___yu}veyW->wPSr7y7}ZSNV!oJf2N6u|PtYPRfQetW>5KTB%4aG>Tkq z4`sY*Q6xv}FPH4!(mE77dw}nae^SD-y?HP(r*Qw%jYS29J(c5kURC0p>77J{ z2@|pa^2Ubt-Klrmn0dVtdc9-`>_VaX}M zb2{U(STX}Z54DU+@K)6(2R|P_SP*#Bh!P+-bfL~n^cY9;zBBI3wT=0iNRSY<*=TKa z%EW%(+N%`5_k0DH=RSLY*pRb1v7C!m%65m_;4~Lm&xTJMv+$QBEFdZpLr;rMqfY6n zd^;SS!bPW%(qHW9v@7493qp{#smkWFWeA_CSj;!7Q}hP0Kmh&N@inqWL3CmBvfWc^ zIk5o&#y zX&!Cdjy+nc8USbeep79bFDCN`2!YEHVQ7x5n-QJY(E@E~Wgr8VIdkcl8P!ZyICFj> z!Z%2f{$-;fdAqr8W8(8Szu4F1aEI`<(X@GFCue}@SN5mXd`x7<8zpiQV>(LyTK3q1 z;%J)_i${+|uibwk8|Gk9gsS_9jkp{GSN<-1y=|Q`Xu_R%%EGc4w!E9W{hTMqX2ara zhBwoi?&X|*mw7Rj;O>&ejX;yTkZiglU`ea2OZD;0m%4Z zAd^6)oLBxMa+AiqloJHZDCE5C*w*+*EIWEY`?ie&-!kFRpvuaSjI=!%3Znws;GZ&p zlER9d+s;B!TrjBpr>~|1(n#KpzCoUz50$%46j^7E`RMjMPKA?tKbd@-PWb>Ol9|xYPSUh@pi7sMOg=OCP?$;BdQeRMm#om_U@4_Fs z67MkbpR4*VM?Ge%XtUP%ur4c8rOUjjun8xhLJqSyvcX5{hkghpwvHvh5Qq03@>u7LsP;-@M2 z`Ns+MuwLx7gCVpUxYa|LRRcV&ff&LpS(xs^Z9&myqVkozsns5YK=*sH$4=5``dNl! ziNzT2nZ-1EuJI=~MzG8>c-Dpl@?Q1=PCl=Br4^n=XN{EVnee2_>~Yu_WQ6$cIZ;nC zeVO&0#I;gJ;w(E}WDwYucdkTS=w5ETRaSxC<^&C?I-Q#>WFL;kx1i7nx+(lBW846x z-0Z26n&@h2%$R%N+KD|TJA=bDr}GxStmjQ1rk0eQa~S->|4}{7+kR3-!eP$x`$;hi zy=O>mR?FxSUG@{^C2f;YzuVQy-*(CPN?z?7f08lKDDdp(Tc-U*sq|e=E@98O_>x<# z!*fznMDuyC5Go(~61<~Gw~o$_MmlFn41GT%BD-6i6uTc=GXTfCV&Yuo4G4Z&(>M3F z+$w35$x8ALnFqPK$V2cQmD8@vQPfaUQSsaS(ktjQ@C9m?B(`kTXI_Mx7eFEpiNP32 zMF5o`t(~DU}qeU!|U8u1i4rT=GQT5+=vhUSL1s0yU?m)tn9~Y zFS#-m_A9Mo&H+D-1|cEn675RRzqc&~Ke|zpfTiTl_-MgSxmuLyfQ7~m6Z|>r1m!?L zv@H0X)h35c9bfJ|_O5I68I8cd#uTko5o1wYSgqfwg{fI7oJ=)6d{%7hD)k5ILWtq9 z@B74Od}aU70-(i=#bO$>LAua%Z2MspoqIPMnr@?E0XI@f_S|OzX973H*xin$54T@g z=J1PZaMq=UO2hBl;LN^l$;=wZVYiCny-qPfowN;H=tQ(1klD*~iavL=vX8$3j}X5->r$<5276*$<`1@Bi9a4Xtk`P98l8S`g!0BC7WqNBb^%#bVkq zIs#X@bC6?;`UVCr$@^6Wp%8Sb{X z=o;N;&fc4b4Vpewi^EH(U%?z^MB)4RTXS7!>AH<=_pLT-mYsYib-s~O%QNAc{#WZ2 zK|J*WOEY;dbxKUW-D+YG+Z{Tm?}tJgVXWqp9Y&8DpGD06#@yu811tt&s0Ub6Fgp6>N~`QCxqmXQLQ zST~7~8;i%&vN`#edm9r3f_QXE7P{lLo{JA(>u9aIeL6K`rWIh>ad<+^1}0b%>1wr` z#LL&YRP-i+Hr*CRz5f__B&_%;(N)Uq%B;r-YDcqTH{hS}8LtE77^WAWzhCXFq8bYH zrITElD=Z-N9n`6WR~zLB+Ke@U7>`yz%JNCY#Q&#=?5@gCkD%KUq~ZbU zcVWyFR8%8|;yxfvb*z3!)yePexA1oj>>l_v0LPKBz#<&jqri|}ubz9lx|&_bRnt4zwkzSX{N*?4FdspvV?Jhy=bz4x-XC^@pJC?mxh{CJ%;D#| z1ksmCwEaD~XV!WiS^-4~ou}>j;3Rr`JBv}L9@gWvG&qledY-&!N#$d#&D83LA@SQ{ z-#HT3uxBwj%L7Xz~C*t#H$u@-xR&Rac}KUVT|&T`2>w) z_DDf>0uh5~vs-=nW~3NlZk698yFDMZ&0cvOziuaGAUfOwNgY2@*!M*IzE1i{h6VPF z8vl%*o~dhq&S8^>IG5?drfD080NsXNiCfT%-=|7~s8id9S=60EcVRz(zI!VGNpP>z zlj`AsyXhT{_S;+GCA4f1M0*HY-!!6ogHyACtQGs0-CRI)yl}#+R_XYt^9e!b(SHmQ z1^e=|!o`Pb>GJlxZFI8yPmKDdu=MrYMZ5K28o5=0qzGrKmUL?f8B?htIuSC3sJr_+?`QPB)Z6RssJI=I`WOX>avD2JwLKDQ!*8 zeFQX&QK(U1M;2v2;4vSGUh6YeXW)*Lu~Sl34q9804PMhn1g_3Uz?X-0urHVSY!62` z92yPBDa zC486a>-sT5-Jikua!}5rqG_DjOR`U}kt)2n78)aAeDlYgc^^NW>blLcjRb$WpSwU0@-)GkiQgKtPTn^8Q zsXfX*4AFl^vR*Zr8Zjpk-N->Z`q^i!Nf>Xw=K0<^b&~%*pq*0pi(B-s#9>mVA*G=6 z`Q;*~mnjB=?$~d;X$y8gz}qMa*^6Ylp#=zw}sLH8oLOy%!`Os5CU9r7-s4#50NhJ=$#I(1eTroKtvQqb4=>=8tP~kPz8*AcG zN&7hn>1qd8igxlffl8BzE%YUg zfDk;J*7kL9@bD>lk6MIAO7Cu5`cOi4`MaG>XuhGP1|CpuSEA4dfWV>6CL-L4sg~=i zL&T6x-f*JSkJxSX8o;&X+@j&D+>5H?qIJpn2IXmA7Pt?$FNS(bx@+|nl0Ke_dk2&6 z(vcP(lPXrME@@t0k3k|QNPQu`4#Js>%SDr^W0HnVO{I_H^m@{!J$Fy#G8Oc=F|c(l z#!0zZ&yVJFmb{hh$5-ngQ>6zsO5ABR%r^$2&JAA~+uZy`B7Jyf_aSq% z#(`szBd}=-l2s1#Vq^oi`JAXb5tI0VfzqAmlC|Id2-=6I7lUV9injibrn7Ks@_pO5 z0iq(INSCzINJ&mWK|nydgh@*y9b=PFNry`fX@*M_82v~OX^&q zjxp#b3g&pN45)q@EUEoxa>Q0z3n8z7_o~xZE!4r1(xgBBrC8|{`SL*ROW@ZhMvosJp`;{P!{ z-oa1?v8|?0q`z6Inj^~upP^Owmg>=jhIg)(on{8GZv4r&8e2n|aHA;2viE~*FER6K7p>;*LDr+<7v5mevymelWs;iY539ECKb8 zZw?w(ZC6;p^A`LegtnUG-raC=`) z7;X`TUz|bWQ1wghyZxEo(m+LrV9V(^0)|e5;Wq1;{Cj`u>iRDa%=fy>sT6Ptm4gM^ zV*h;_8X4YcbN$OGyFAchR46(7og=ETsx9{Ld}OcAjlFax$NP~ zmd3q!bwPSYxKXwDe#QpLctV!)z^mY=lw(#HQQi#=j&LxPUm6E6GTVV-Gx02s;BK?>`aM{JN@`7O=4o+tNhkr-XP=Q+ zuvu_>SL3iD{r%7iI$S%nZV)MrU#!CFA0y1K7Ami4P5XCm|7}P#aFsis7|=?)E~!GB z&e~0qxSe2l$D@&SD1-x0j;?08(9nuj%^n38Q3`bJQEF!<$e*z>^3sZD=<9p+-*IW& zy9Y^LP5!fqEk6D8igaj9gypo~SLx8NH-aWSHN3ZA&ObPUEi&T{D>gS#frCiqOD4WX z)MPs?IqFNi(E2SiNh6X~U3#Rp4KF9q_{)^1bXihmAbI zajdNn(|izU2ga;WfW6N2L6>8(QzC&0+CtGX+j(}f=bvH&dfUSGsz+LSx|O~7jJP&8 zh-zQR$(aqXRvA}tg50_9QJMxO-O+cF(pqUlUJAa{&N=YGt?__bSGpMmv4eJ?!#0Zd z-hbD5f00c?1BgWtQ zjRt@D{kGYfL%lFd4%?2yj5dZ8yU0(A9>qs;fU5w6A(Yq1#m0(%)X<{_K!pgs6-8x2ex8s7;StGBII@WJu5zfA=N zI&n!c_|9Mv-@f(!JyuXIa)JvdGV?tU*qfkkUo-U*MfLq4IYvB}slX2p4sPX`Syg7q z)7B5D_|6z@BGm_3D~enIt~yaz1<4mBZhXmpj|iwguc(fY(3EPI;kQ#<=wkBzU3LUE zDY*kf-czMvg+^a3`5;}PCyWJx!5W0RmxSFZ6_n=}ykA9PcLc+o&YA4x#b|7Ln>ER3TVGZ7tws&Z}jBNZMxfVbd*r0d(Jh# zw}|kL(fx_HY)Nhcv~T$I%5sDgxQ0k68H84?g`qEQhp1WIv~A!2qNhw3d0?E$pj#S0 z(DnQ=-O?@4s)&16$B^`K%3Zxzf$ZnJOA>*)&lT^*y~3+MBcF(K_C2eQgGU4~B9q~* z;(ODXF}B~qq}EosWp3N|_*YQg^Uh#wzoFn|uL=%rcF;)J@ODO&$}%vw&(g%~IU1zp z3s7ZU1&mCMXtULRAqt zW_icu(D8c1W%JiKE!{RCxhE>OzlEW^EZ^txFilL}BQS2$0f`T;_Ud7DLTN=962W;M zWg0uWw*7NdFzb17;4H`XiWpNv?a)eCUM60xFe1{4My8YAiu=1LIr1v}g)J$uEVcF!XcU1RUCiE!cXMmZbHf5G%md+X5 z7=}u0IQOSAh>rh0s?Q!mF9G!{N<=jnt%Gg&2}ArHf06CG?D8s*+cg@jN-M;0xl zpgpJ+Zo{6ka1$`~`S|zf8#Ca8b7Vhd_~4wjurok~4;&JAoGBZd;a@Yj^q!wwOXTjG z(49bBh0U}aeh~^48=f63+BOS_HU@N_Fvm4w(tK$kb1Z&Ejo$?`?*|V6#e=z_2Iu?iY|I~9` zcp`QFV)TQ8)XjmF7mO4H!YQD2jYB8LD19R5PBb3j^l}pRnbme+(b@SKcT7f^D!zhE@o|GJlb+8)&J7bLfMtt?ENT(@qIO9 zU9R6Bf%=8xmV-b;6iW7LuX+XnM)=jS2vXq)4Vo)xvt|=GVnw<^EzGNl*ovIpG^jsI zkUvVCT!jU=Wu{?1_%voR35-htj2t(0)mA41Rj#N1xd!p z-~~LbHFW^{zjm&WB5E7PSGn)9UOI`7rtg7&TkF#@_Tdt)ji{3xZh;ooQ;xu$x9Vna zy3fbex|cAYPfIqMhhbgc6S=wx-O)++b0-4}cw6kPdzFkHsv;RbIQZtcdrymqz2dllK}2^NtPz?8jp&#U4ZjT7mkHm2Gn-P7(;3ZNXHB z79Nn$E{YW!4;Z&RCe{Jt~vl!2fK9 z2HiFws>HKv!NK1a)DL0``j>#+O0bI?PwwWJy1%PkRmp@qg}!VBF^CRZzrTm#ct-#C zJN+w#0!6y?b=|mz!9ux0P6&L54#GPwD>@H_aBmdfXy0z=E~HUcyL_VZpHsM(lHaqR z7J1|I_{a$JAMk7*bsn)7%t6Ov+6Hy~&u2vXYEKKYmy5y;*Mfkt*x*9_CWQY#54cAs zM~v^_n943Qke26Qqb6t80_5D;DseUr&uP;6G-ug+w!(iCbD;2Wy+Urri zN0pJY_n*@{30ysZu-&v;S{HEOhbsK6)2gREw*;78-3Ey(Q{7{I70ILe9aaC(5t7Em z;XnN$(L_V1i-OtWbw~em`9(#m4`=g*IuE(|CZo9#5xA!yzSb1hD7&TSAh9g& zO_v0LH$7Q^>2g#Fd6Mb6yMNV2AsHbluoFN%0zH4`zp46QlukDW__Pp>>_ZYfsk%=- z&A67PXygzvo5D{Y9Qw)ptBB|L*xl}bmnk=YN9M#7upgzg;F8d)^p~)??LzRU*MyTw z262e1)WS+ZJRXIx8d~aDdPq4R>6!m+RjaOf&(%{^YpVL<*6AMy2=z=&ifv8Oy>PN8 zcRn~Mdm9nwV|rj_AEJor)B5QOC{SNaF`wABWoCY@TF7@_vH(N9h`A2z*!=uA&8wV9 zVlL(a^AZ%qLji0->FB@e?`_~Zy~*QLb3`=V8EQ^=7HUCPJ7~>M3Cn>U_8Yx&2aIQC ztT zrV^^*9S=IBax=(tD3ZzM7sMU*fLbxDu@Fo>E*A*)aVU%k{Xe|=f=+ny36-o%K5Jr3<`z0jNr*U z{Z4R1n`ve73ltfprdpzm5=_3!3aA#v+Bddp{cZ;>;ptddrtSILW3d>6?bqvMN zn)+|Q9=ue?v0O*?O32q*O*Dp*pSF+n^+g;NM_m2$LFsDBuXk)_;@5>>>u0XM*DV)I zrkB{i8j|)c2Yp(Q;}F)Yg zyJ?#&bC1I9v9+~4%;wmDULyIo`zoydVv5{%Z)D5Q%7Qw8`tCw9iG~fW`Eg7V^fpRA zr$n4PLk-%tT?WUG3emjV>(Q7w`q=|bK17+|q@m-f7*eS@+!WzkGZ8U{=0%l*q2t@0 zf+BYb*;&@?1!3?z$X=8cyiKbn4&dWcgUbPv$5vcZ25a@g#$%7ik$TTp3@@2KyS_cL z0+eyVtWxo03xgxf-&sG`uo*^!d+JMm@D*)kt|8e#a?MdaC^e)UL9M~3vdMdhGcS<= zh6VZeAQYe<5zl3ze}zZQolLDkybCcD7WgNeJsPanM?VhNUb`;J zqiqR2a2`q6qt}ULC|=xy7z%=McvZC+1k3=5g_-}V4+2lm!o}+szxoK6)D;7Qgw%H+&J-OnwT-Bt+>K!m~0tZ{`mcT zEOXTIser{(v?M@Fpf?wtfWL#7H+iZsIkYF-9Z}3_zk&}FTKz~ah{46<=;Yj zFF9N>NyBtk2*Ta=iiC{ZgF8{3!3sbk=5wGe?jG}lJXuuYVLb2SSp|~<1o&3b8E8GA zi}!wKSw9~m^nDN88)N8QZYY1h&qsk=pSmyK=P0wQSN1>&h??=uq|T$l{*b&ip{AAd@ z{@~($>=#xcybcE|zOzT(a|sIvI(b~1SH2Q3i~H7_nLbMah!)(MjgZXgsL}=Y3=F4Z zor!8Zb*jrjX2Bv1`0d^V2^-KTh5-^^IgyRI=iAZK1c1+saw}>uFCU!gKnj>3+g`1G z1#s(Xw4tnIntjk8212F1LVI*Ed&-si@f zz`P7f1cb~k9F%aHEjkmw{x~^+KOKqjpVZb(NXWGUa=w%{cuK35Ah5BiN7+f4CFvw;6lC$!l9^Nl>XRAI2UK==#(H5B70wgg9 z7l?Onz9G{nd@D2&D-U7@~1yr2+o+T zcU=~WJRNBaCb$1vPvITYr+(FG^z7Kkk9sOWj)NhwEvLO!SZZ z<}l`r7J+#raD{vjwUHaOR;~K63VXF2^QVxYPs3d`Q7YNB)45DurEkUw-^x~$8L!o? z=g?PMb->Jfk0F~Bv-7(8mNQe-<$0smuHLJI?S+HZD_m?YOTJyt>3od;E%hi%aXbx` zz+(vWlk6k~mt+)4{tL68$avI%#iSvv#+ERU487fVwTv{j3{f|*%(V#}NE>&0IY1>n zGqXUj5N$U%xI&j=y;ok-wjPvS*gt!Bph9I>E$YE%ScVxwU6voKS^HhGxO*tljWUOlGg9@uRO3lg(Naff-FV9FnG9uxuY>$Ca*SPZ2MmQXL^+`bb z=bfVzC;S)NsuvM4yeJa2UYvL${#IN>>6z((Yj1L(jyT$t6c=zJP%pc0uhnuIKeAu7 zJ6yl$zDoo`SNIZgPmlhp%QqcBuek72QM*iOoK5~dXbnmE4G|TBJdeob(G?LF|Bc68 zTum0r*EWHJONGv9G{{a+;jZcg3H@QBeK*b#?>^Ay=7`gk{`zAz#QKk|+(oj?4pHuh zFoIFQ2KzyLE>-0egs|O}A=;r8*ArUR8b`FM&c#q9&V2d)aYC5lJK^Ppp0#OJjr1cY z$*0(%`$`%^B%iaulZQiNNnQdE)rwVfjqpzuJA4G zxKQJ@wM=no`0uQ$W(8q*I@Yiw3PEZ{!W~r$h!aLmw1o~+HemJ;aY1@T=%*Lo;ZR7w z-8Bv5ta}X6!13In<4)Zih7mxC;~bT~aySYYspA;s^i`~&Iz3?(_-scb$eYWl%%A7e zpwCX8(jdlXB8uS z92CLp-{2WYiH*Pqp%K#o)UCcJUpGOWv0XNe4pp~a!=Bz}tm4&nq=#jeTVIDc>kAyI zgw5T`iy83(MAjtaU@idPrSAT=wBO(*al-vo&S}i46-mvYdXJWcAt#*3_RH(o%rC%pi%nkVlbkoFST zQUL+D3j}PPfc=&5Atnu1#j>^o+)S8c*S;&69|qGh1rCl)BuucuR$#X1Z#(Vnv&Y!q zUm_wtneaDKN_&6&9p`^?92I5>uf49p6dy(bo?P__7`C9LJLJi6`#tAtCBtD9Rt%ln zOqKLccI(?o=+D)4wM`lopzu8Aq|^Fb%t06HZ%imvRx1?!^<82si985&h2fE1xxESf z@ODBYkjf7mo*X+sdWAa{!XFR0LaU)1)Y4+AX$&*B3tTUIB<1kf5u_PzcV|{h2$^jH zw`?g#<@ubTX07Mt%bj*FmrK6D|HS&eQPsXcoz?HT`TcH;*byyw)yjA_duNUt* z{ac=OvGUq1-9jcKJ0(jDj&-d(j#VJj`Q%< zxQeg|FUpplW`sL6-YgWFb39`ha>z}qEi0?gjo%^L=(EQ}JbGrvBL!`DKwOua=(RFq zT(s)Kp0t4i*Jd&6bk830^^Fv#z#%Wm=X@UK#b@;493l6&AVv5)_sdB@w}BK{S7(6o z$=v#jl$%Ap27L%{ZPv6S#>=@FT=dP0N*eSM_cE9EH&_762OVU4+NqJ`GOsoAj#~EV z!z}$tfa4_vh`RO50I=zu)@tygSGpJZviF?IS^Pcm`fDV${3G^2`^b{l|M!z$@G-+e zU|US|e_x2^+T|-Xq^71;R*Jm;6#66kz|18AKMr5;@s|vT)7(CM6xoPAvAbdKR2q#m zUM&}a(QJI7eUCXMoR&?wi{{l5zKiBfIeP(*AEw>VV0=E>6q0 zdL;QfvyuE>3$C=*Ebz4r#OeJ;xecPSa-FL3t?}QH^L1os`wz*69j3K-t%(05WU&Nd1&1dhViKU2z(lvA(qwichvQfO0g7Se1@(6Bs5Eb z{{ESdN)K30oCfLOOK^zHKlwq)U}6SugYJ<|7(13e9o#q37x~>0GVzC=rYnngA?29N zO0&#ejoq{E9rLDAB!<=C;R}Zl*!C-8{<}9sdJ3v`jcy&^Bl~W%{S=C(Y)_zr9J4N53p=A8Jq_`&e zvjww0BD~tev0PsVjaO# zRMv3pWFZLKr?o{hx;5)wPP~W2-&B!b?WM{(I?auhQ%26y^AQK9rKVGd4jLjWOD;%u z3+inHg0+B9GNTl=Pot)4(6h%Y{6D?Y_6j%b=bpmZ++2@nGePfW7Xpb?q@$?iQf_Y z8n8>d{uUcu4n|tkFZu+au7;&odpS;ZWXh8@HMDl0`D0A&qHcmB$B9l^PC=--RSl?c{rrP5hJ)Tkfd18RM!&cox_0iz{V{6U6TGkS^kF z3EkeAAVGa=xnQNw&NDF6Bi+^UxV<;0rG|vwR~ev{?2#6L_3V z$m!9CB!72*Sv=MKeT{WY24_wCrunVw?a2^oLZ>k5rOuyE77jme14Dt@tFlT<3z#>hSJk|i1qu6J`$|!AVmjM2j%rh_TyLrkDEw3 z%N3zFn1iG-*mtxfbuaT5$bkxcILHPdEMGp#uY~Ex^LzjN-u`c#0LYAS zT=yYwf!1hAU!1DapMFrw8DserKSk2~<*nfb`G^P~t>IpYGA``p}E z<7$6Mn?r}oJV4@HK|No$f;0eYJ0uSqSRE)$-#n*k%*~RFbbo`g@s!`-^!<3EMEt`7 z`32|r-Z0y}M4d0lBj9kKGFkY9Y- zu3hg=sW2Ot@#NNv4B@+czkYmpo8@@3gzU=w@!?W84SnDXwY!iK!)T)C!jmxqwDFgnTp4>( zuo+mh@bk8S>vrus)a5ijU{eN#-6p`}VW9ExpIIj}oh~j@B~!4|c98Urb}TNF1oTem zJ=JIPo?ob=$%GT>>GBbyM!lnkRd>?8Ni`0^a=dS^9bBM3AiHVZKL&B{bgMm&*n&O5YqunW0byA?T+q<`OKcR> zp8G`W`((eqtiih@&^z4clyG9y^&30fn`08(+9c=5zW$NGT8gV?`45vHra#PouYyo# zFxKlIDp~_2Nt;85+YM)Nm``ajx79`0=#*b*ohJ|{K9=fBvMfwfT{YoIe1P*9n?FFZ zVQ&DGb@#|z*wGhPuDx*SkJmJ7l)vKOxO+1;j*!}-2SR_}Bnc8RinQKVbfOc+@v6*? z1w05X`5pQBdZ>w0osPWLUKBSK+y9Js;+Q5-^GG0n{hKoG`)@d09!vGHja3jfl*HH% z{NQ7JrH-4E&wThb`=1p57Z_Le!f#<3(%bkrr~Y)b6X7&CLUZ^UvPujw!AJCRntkd! zl5~<^sPRvku`9(}_r=`<^e1?V^*IWWGlXvn`;#|Y)mi~$pF8xL(0O==M z<7tJ6j-Cs`uv@86;$vuh-CHq_g1D}+hu7{iRehEB0-5a!GkyHq(eUh43vFINE$+vw zrz$FHB{3Z|^MK@es@eXpQxv6q)K>L(JmB97of~p zg~?~pjuXE*l!RK3HMlq``h~v0Z;+)Q@?q`!_lw5Vpj^u2srKpVQ!}VdmsYH;z$q}1 zj^F;8?lPJ1fuYUoCi0Ty!uKf3{4^K@D79eUp!2gHrl)3VHzr4JY-0RWMr@Tmqn>f* z!&IRmJlLc`?aPo`%<-KnODaL}^Z-$$GIXbk8iG+x$EN5!y`AHpM@}v^5Rk72yh}TF zTIFFSDcKR*GAS5qdax;Orwh~9B=M-ebnlrAB4NQ4zdXB&IgtfzxKI0XgUzpls&qD9b9;WYh_#$M+f8k#<4?m;G|hP;o=$Y+PCG(d=(Bu8?)D_1tv{5FtEHW{TE~sT@_I;J9fb z$2w6SAg3!TN`=i)r*fqJu2GadyY>e!TWLNir29&h%j5<3wEVj&9X;M(bZ?u76KsCd zh=+C&i=JC$a~)3TwYF1S`vaO7_AxS^;`>m-4WS59oa2VZ?C`-JqE5SzZp30aq|$<_ zvws%90f!0dckbChwDA!B@UL(v))Z*?yIiOOb^LP~P4m$C87@kX9#qx`1Qh%oflX^z zK0nlGqZxpom1_NR=z#J&aNy(5selH%5DW#u3_9$PG=^j%GL}JuYO$9tMuiAPE2jDd z|ACrck2Y!be7jEjy@A1g4@dc%MkHPkQSWx4ORh1R)zy-f=)}%I;jG57Ff9f3z2ml) zvve#QEQ}GB01YAY)#*vcAUy0J2nN?mfZ~|ujy|Qc@}!>J1o-Z?pP1Y5UAOxVJFhA| zdgY~<9V!%?@<%f#hFn4j7LDpfqGOQHP0J&ve?dM{~o`S88 zkMQ#B4HUom8aj1X5Rkd~k|9KMVx=D$O5Um)&04PdNknW4(~D3Ue)X;m(IfO|9`g<9 zFywqyeFV07LP93Q1&cl~uUVVBkmw1cI!(a+S7MGO}xs6Gh&IM zv@`6@evhQf56Q3{)rw84${X&*E zSr}yeQbdA`hZpF*gvnuwrzn-UYP4zcPQ${BrJpH> zlP9D-*A0RK{IC-GGm=$O_IHmAGHiimo_CS3F=$t;hrU1;$qJn|W%|s)2T0jc8P?;4 zJo|1e&0m$;1>b&tN`b4vW1hVt><`Ok}S7N>gPY$>J z<%(SdTaPP*nm%=hT4u-NHBV4~X3UkJxX_Qa_izRePejPX{1I_^^(mrwVnOP-Iqvh? zKP~pZ;)~)>a?!*lwX%P#=ZUzc5Ce3w-m<~YLF#Ssfmtbfcqbndi`M<5bxX8e*&k+1fsp%JLn5PPRBbXyxg&$3tkKkS!;ftSy*a zLvUa|P2l5cFJtII;JM#M1P?)%(uJZW$tS-{iB>DPYA6zgniSX#aB~-&{^MlkxmSI^ z>W_d)*b%Ge{mw**6u&CQUsWb;jS{$RDl6tRX!-PTcDwA}V(g__qIY zhB6Nzn|0!juM(Tz&dic9xgqhF2>iJErM04ai&d=Dd6f`UA$DS5abv^ojBZ1LgxPaD zKjmN5fZ!$d-9VW24Se7^t0%;Q=c8J_*AM;2`zvjBTYqktDGMjw_X0pUC`BBOT7gfRN>o>`A}3O*KYSoT)@ zS5*i-*z*HcG=50K)Woz_Io^A1q-u9OFzn7JDRXB>$P`;tIN%EUN-J!jt3lS+>V~_T!FU>B0|dd zksXIo(15ipU7k5f`1bM@VWY~#H*2TmGmNXA zr$oMY-ckLAik4AnAp_M#%gTm=n9RRy)Xpt1@)R z(-I>G%)^z{cc=D}qp)I{(7CXug#uWiGj=Zi3?|&`D`^g6;!#S*b8|3AnU`K~a7>nU zj}Cp1e8Ta(ocvQVT@2<)vc?DY>QQQwr%ds`E}d~hj?WZZ^)v4k$~sfyh;M4@gptbR zzkJZC@?7GuKI+IYC4+%aoG80^AI_eTKht50K`&l#n zo{mInoDKc0#qCl}u=pGLY8VHR|5kcA=P;_Cyn4sFF+XN|gH{{1pm;nAs-YAMx#upzvP~ zF*v9Uqj|^qHJe(}j$lfw7oRf41Z2K4On80SPoy*qh5D~+eSOylw+g6&a~+1jD~eZe z`n!_Wn!TSalNv~*t8OXf|I_CJp_RuGVv1k-k&`o*oB{V^7KL16#Nl~jlXMWz_9BmFtA$R zybcy~$L}I>yPHB+&47B#m}t6*Ou}{Z4gxQo)jBzUY54Yp*!}YT(S#755Se_dJwyJ^ zUr=fU>>l-N#%}GO8@HMjwB}3}PXC~A`-Pc@Vu4Mwb1ymKlYXbWqaU1?8+gnAd0al% z0Exgbo!fidy-I|Afqr2Ee6_h(uIz7uX?Sn4>GwW*O{%GyqSBr6U@|WQ_ww9cO|j|C z0372ZVF$TL-PWhe=?{HI%zW7=*u5^+dfInTpvQ9L01-DnyjPc4F}i1hVMB6{YPa;V z0z^>HV%Gn7uQ2_mZc)kLn6VHcrI4sjoka$q=!C$Tg~0#R$Gk6%hIgw>Q*ZO7svje| z0}dT=+u|9{OHc>sPKY8!WVp?-FuJhMv8~NFh0Ngh(dOzLhC(zZboQcY5>M z*6k+|I*NCm*vVBC7san?JNTqg>=l%HXs+g9{GlN|4agC?@Mex4jy3?89>YeX-kb<> z=8sKd9O)Gwv6b`%yFfB+aRRUx`pL+4L^my?V*Beshusc}`?m_=@cXRbzONr-vcZAp z)kL^OS3m>U%8L6Z-cRPY|2cT5;B`yE$*dJCp-|(C82MsQOd!oGBP}Zf(NU3n)#s=@ z5LSR5=$m^C6V3}w3}g`fR`fI!hd~?YLZYlfMRnpMtX_9G5PlS^Ad$pXreAc=N)*19 zCS(3a!6ONAI% ha`OmSxx!Nx7dr`=$r_tcL=AV3Aa^kuw)>nVN&=*;(iinD!Wo1 zs)vLD`Ahi5=2+7gca|2 z!62kuG;$hoD`yWJ8Y^Y8{aM${Pt7!DR-wGzmM-*mq`gjLHv>0*ONB_U@>+LB+v*Iho z-5J-%tihP*5SBvdeh zqnJ_^@@6+{RZjW^-uhKmX*%iA^t+!~ydIIglKwj;q9zeDbkAw)c&bI|>EWUWT9@=| z+b84Hn~L+AA0OOxjs|8aVnSu-4xh5VOLwP<_I7}*P_}X9LDkJAD^&pZKwFG@jA5`H0|9EF%fGORL9Xt-r<6)yS0l-!Oyyx~n)9xg7);Ye)crt_TA<)ULon zA?%LSDq-%+cfP*TuCqCu+TYU6(v=f$`WBgCMmA_4-?h6<-E+N!ol zXYUE*v=TP~Eu8><>Q69&xBwJp;97aw5KJ`bts}ry>Ju>1K>u=KqPr``Q`C6gu zI&cD#m6Rlm?3q0C@b*RrES<0f79M8>ES%t*FQ?GC==9`Wi|1K#k0mKyY-RJXURV!h zOnS6z+|HkCcTPW|qR)Ahsi>LGnooVT7wh^He4rgL$8god(vbFsUWuUnzXOaCf5!;2 zZrZCf-_Jk4as`aasS)PcI)_tSfZ8+~v;3OsI+1J6R+b`d;4m+}g>kcL6$gbJF zDL^h=z}CL(yGH^gR=^#iB2Unv0PGw14H5oB(~Zs1$`1^l$~DM)PXD8Sr#l@(+se!t zd$59}xpI-l+2ZwqxBYCNdr4CqO~f^czRD@q`aPZp4lcmE#5+;Y+ro#0DEZkhy;)d~ zZ?OY35%57P=aC%gZg8;U5#sr**a-4*8b~5^def#xZ)+orL)V zQ+&r_rrMMfo^Lpg2`FWX&`ddPYT)elVjzyGnLr#oeY&ZtMBk=`dp$0Uk7ncRP7#CN z{6L{Mx(7(cJv^Cbb^!FhYrkNFQYzAToqen=q!c85$DW)AN~y5X&J*EHjYMw2ADz56 zrEV^+5DIb%)lT@SV8zcPA~a$n_fNTYXtk%v#ZP2f*hpb~?*^cToBW*Fu2^z~tC4*o zW=sve(d51?DiIx1*5W)=-+ESlk>%H|B6t~|Wq$Nn*HQX}0@k@w2=|`jh=r^nP@D4p z!b0X1LsJN(MlQU3%(088fRQ=Zzm_w_kb#@zsAKmbFOFl9R(o+J4bKU>uU_?t@{3lX z2_|mDCg=WRCXaHCs{Ag1bG%w9jf)yMU5;)2UQ;0~BM}qm zAfhc4WmfAj6=e?Oq!7?a_+=*#qH(80bVPdhKU2}!aV7)1bYbqcg&Pbh8u)%(yZD>=0e(nh^lp4vCA|fBWt?ul z`d$BSVonAPNEP{Xkg-cL7nOW{V>431o%vqjwCfOPUC%fD2u#cuc%hQw@E@p$Jf1>8 zP8d<69R`=@0g;m%M(B#SpzIZGa)j86QRP^`)6NuhUWm^(>A@S=#mttCIqK&ig zb%OhYa5b;&de8jgx@oT(wcO{ACw%fe=~5c<4)v{Ct2C(nkSThea*mF)N zZ0gEFp3fhEF5j%MKqpt9I^MpaD%|OzmAuB&NNH32y2Y2K!-CK&0X*2LJL{Z5M(%bW z2M6NBT8ROnXRyOum9sl&p+5XL5DMjjc4_sp+ILEpwqJ&Pf7s>xErfT?Hz~<7V^>sy z_w|$$Rh#|eJdwKZ(Ny#sh)LMPpUj{HVEzCTwI4O-03}8n=;?T;!yfBR+Ii^tIV*J| zZXW4=VT6>_oyt6hKbW&W0o?#|F+fVJ=L^bzz}@o5Ce$lB)?yXsZs!Sfnz}=|v|Lt^ z4n>i5!d}aDs~AVN+VOm?dI#^xnr;O$3DUH};o;~b2amOf0_JHmWOuSkqm6?)69N7e z%u3bwOwk@m1-{qhdl2#n{97CQR8R^B&aIm5e*Db|HXV$R)w}rT)Q`UV?sgcG60>qy zLyYvYdNNI*y#fF!!-+Q~A*+|oNL#h#&9NPG5Ua<@chKeInjkxkhW6#gHMzdYF>pE55DOnvbk$SRx=qj!+8Qmne zjjV05r6zc(@IkrV1<%S%Wja12{t`QLE6_iNuTs$^@6=ysG!&tnoYX57E48NUA((`Mu{*&QTU|q0**cX*XMQL>%{P~B8TW!qT zD~MD80n*|gXe7b=b51SkbIu{D3tzgDw$>n`P8F4>HG^;PeVEkKZr0(`2Fqeg7hP7H zWqR*bFkLx7ZCezpU23*UA_lZq!2O|FufQ{{$mT|YpGmAja@b?LxR*~0aB~qt%iqW# zenTB`T)N7re3c2lO-cS0xeQ8LpfF0Z{kBeTHU1x)tfBu*Os zYbOqXn(ll382j|~uU{F&bvcnUiWyYl;I-l>TR1CnZ+_O$zxo>yaw_=Dm*QwZ=F!*Q zz!-vs<;nqA?$jQ4p&F^AaB8~5chZn7mtxvdd+q6~fnJa_6tgJq_m!CB_Ot@_qx_OJvJU^;IjkU_*{^!-^GA}o z?r>mt8N_!?n?(~OFz*`-P*(knD^?3w|AaP>2X)EK$R~1_sl1v(3qcG^BJXh{RLp8- z-+5Wl&QWpN9ACa2vJYkC;NXY9sav=_tGWa>bIW>}DGrWetdMbV1=9hjKg&`BUkmjy z8>-M#;PX(#p{R6$!t9C|_O3EZl5yKM$r?NcNb3@Makv?%Tow$960= z_sRDy=Kq9`h9x_OK0mEb1&215Nn}eO^6d;|y@%zgWS}!ISrT_~Rx?}j>wXG$#p$+_ z%D(#iUOA5=uqWD^Mz`=p^2WKmW}CCfsRw#lOK)r3Fdj!(hnOu`)uA-lRrOyOP@}L% zmIQrH&iDVT(p1tscgT(P*98+g<*|9Z1p4L*OGh7$Yt>K)jbs@tf{)-YbLRc$6RZE# zgpC7?tx_w)2%=A9ET#Js1Jx4I4@rV+T2kPr(@y_-;)>!oKV$*au~yf*hJYjPgOLz^ zsOBATI~=||0ijhrH$k(ug?7fVD(gOe#ou9}3j*kbK0VE0ek*V~o3okCtGM%$RNvD= zf8>G$;Pm~NmfCBt@Ur#kQ<>8n&SkoQ!d8X8jJ$fpWWlz~Lt6jTf8SSQBrDCt1>1F? z<%N{H6U~3&=f3EJwZHI12C zZ>;aIIIKQ`YKwjnZs5^=4Q@R=h(I;$@qKlDSa`uz&(>p9{zZ(Tr}>dHVc9{0j5l}z zK^kz;kz&<(B`QTC-9(H_*D7ZD+g=?p_q9~clWQZcdO%iL(({G_;qa-~CM->qyW~XE z#_u!+oRmc3hxcz7eK}kZC+=rwcP9uzjj#Q^Ta3YQD@LLmI|Go`^pDHDlK(ur|HDB{ zY;?gb3H?k+x^TC{`>QDxF?>4)t6t2O=Y-QLe_OxKx$lVK z?Km6*KGd0M@i-;1&)y6Vj~*SR$JUz|uY%D?;8h}E#V0NM7MAxb1=Mg9`z2ud;nU;P zop?N2ADc$P3f(Bd85n`2Q0?Vlnzi~5cY|ojdkswX;MrYrrksCs3E8hbgAvv!IgZ6W zTSjLeLHl4)RkKV8Yk&vMUDAWE38@F@Zy2~V|3yuim+Y9kYn#KCvb{)u&)v~`hE5s^Azr87LybP9|L*?CG9(GNMB1T#y+nxJnEGXWNVWD_KLKmkZwlx1Et z0TMnS{S-3#t^*(DE<#I9#MJ749Jx0{NSF>|AW56Q0klcVfXG}f@?&mWt5`^eB!Id?VOgfve%e6K#_*FKVT!IUkkKdvIU5iHxWN+hS_x1jkvb$+gse&eg zD3#v!5{8<6XrF<31N zV}3iU;(4ma`scq*oE;L#pS)CrTi?hGjpAul$t%QLC`B`&ai%nENbA%~EJszvUxfvp9-|Q;5>^g; z;R&@=$qvHz%PnSTwXJ%ZeKMkKPJ76Mo%QWnIRPjf`8n{izC`o3Neb=r&jrwkIOj*w zxd2Mh$J4nd_l(zl-Ba_=LnwhXVMxqv7*e-(S8As-^(4S7jYfM`fD7;K-OAv$n93>{p{|1+Es1(f*M(p~gx+uB;-{99pci;c`?YK`1Jb`@p4O@)dp1iw~KZvdw zj1e5G{!{%IET6Lq-K1c}9>T=R*lvv^!A+%BqKOLssN@;8{ROzQ)17lfbiut@{4(hb zd%vk&YhkDMx_tuJ#jox^M1&K97Bl8G*#6FkRLjor<9Ze0-nqpZI%I2!*ncMY5xJ?S zr|wACF)I2opV6o=6>sOK2h`z-G*1yqS8$7j?Kb0kCiu(AA0urK{@zzkzBJy54 zseD14#|`^WeyE1${dIx7EQ{5j$S@Dp>%43(TQ@T%Z-qo9e0iKMwRHL&Be-zBC043j zPux2fytnS^EU4r?wwGuYHDj*4x3nK6g%s%N!?Lzzy^x}*XdqU(kF@PfMO$05XPH;oZo@8 z4dvXw-^Q?-{Kxq*fh-R{1NJd@Kdo@`;dl<6MQcDq+1t7KCP@6oYQDzJyv`50I)v6c zT|>mRa@|xkeaMNK=)R0LEv(^fM&ZLxAUsR?Xf7|;u` za!7^{I_iJ{AZp-WHmEIius?KM4yZT}==|!|2gl$3glW*b27l^&Ja11Z_u^;Yi~My8 zeD@1A4nFLTx`;@3t`?{Y;;Pju8dDQN&l@gqxCA&%!8leHW|j!iJn)Hk!HfIhw{`F6 znSyLA%(Xs+4X8tf-fDkN$-3B?Y2rX(~9zlwt-dL#yqbaFe6;^t<)CJeu2h&)G=`S zUer0tp@(BS0DcPCZB}7q#%Gqy|A;k4sM?6;+6d%8QC|XN65X&;Kg=?}AEd{$N~|&o z0T`Ro0~@usk~m%;>0&tZ(_h3;a7!7`0io-t{2T4wfq)EG;URmoiYq=1GvV5)HA(O} zg5s*F2+gJ7GJ^Pqa|isPTkUay5Fe!Q$bX2-udY}uh_Eik%A8H!e#ms8-OsY2HOAmj zS;CRUS0a9eX|VC{1s%VDd$6N!v#mATd{rt|@VWUTn44=aF*JzuLzt=xrtxiIF$I-HotoX=geF*oLGS=D+y~AE7n?6` zmKrg+Am3IcrN!ZWyQ1Q6A(@}v`fLW_9Bt4ex9CwYWdN!f3_kdFkbU1+4Z017tbZN! zpMrHqNWqC+pP%p+NT0cbRfB1ZE{dABM#5z5NLygJuF-Zn$uQh0y3|9+g zCoM38mk(wUPx?7tS|J?iS8G#RQih4NYp2S-#%g)a58wSO!w9Z=J!kXa1t}>2)M{^( zU1}?e0FjLYyjqzGN5Uq&nc+_Lysf?~j`oh|QZ4@sclkKy$-q z4omO+aBC2;6y-4ng=eA|h_aPjZW;t)%kPu^M4x)UMdML4w5-fCjVP76tUWbI+OeBs z1N?l-?L-a4#Rd#kQvnuE2I3x6^&4%d`?h zrbeIi@Ssh%eyj+gRPH_RreUYSBA@9@P;u zH8ox~UvJQ%uRgMB^1sA&Zz#YNzAfHwyrE|e9=fk3)zQ6(48u=PWLEg$wda@dy*odg z_@>g6Rxj!zUKA4(UXQtRW2~Gle=_&!9H1EXV~WSFr&R8DXv~~~TVwCi)hb)(7ZX=) zb=x1R)`f(yOY=jSSsqs^@BgGc%G2VH8JVesq@@VMMFVNClmW1<6Esw`1AXd+vDpKE z#9p%ArEnMhYu`Xki6d|Ha*(GB<%UN%A+0lQW*d$NMr^mkc+ny`vYq9(ki`mcGRIG$yt7`En;mb2>gT4g}FT>;*xf+d$3O!Q+Z4oU|wBHSQ zPT6M@D^MkS#WtyqYQ@&o`Xi|YAE*AHoksLP9g!;u>cmaO z^S82SQAUo+6*2g*UD631=I5t~9k_^44tT;02ALK{RxYpg1(g%%1*|Q|D5&Pm`XnZZwvVXkH!V3 zI*vy%94`3r?a88Q>YsnLYjpSP*XdIdM&XCBzHKjUnCewbh6n)r{qfwKxBlf*m*1~@rx4SwYK^cF*zc2%xqYx9o%I)1 zLy$y27{Nota$JVW74y3*=X!%fEa zqa)nzS1plDlkplh+6uBVtWg#+`P`9s11qo+1yz)S zcNsNLb0(1>G}`yWpSD1EttZdzHWxLPDkZis=>+grsDw|nqS9=1)j2BYztqB7ny4+B z_Fs%Zk+`Ie+>_Y8B-dTFXQCdwyUxRS2oZ2dAWP$gg2lE~gsG(<> zFyllNJKcZgMuo)Q>~#xyYWddh(X&A*lORb+3297NiWanNS(d$9f$#iCaZm5P8RQIg z$!2|w;6heMKs|zN28%<%l#o#LT+>Fdw1T?w*2YaD<5gvCH5Uq)dza}g%I%1oLxoO4 zK+j$p(Aon@wnAKVPNEJY99Ztdk*T#5ykQ{T3G_2RhuU={@sq>yNt{EYNyb8d9rM_ItFA2Bj=^0uS}G`l!E1uxxYQu3UmAlhIBMP|#O zQ)F6uRX-QVB{x5(f3Zoee#2PSuaxb@r5(VAcW-V`N8E?tIy|to@fCWVTQf*l=EEjf;fq1dMv2fmxldi0{zoH4 zuGPLj(M`ZZL8yE}uwO+XesPSpgQW1{&)OmwZZ<)bu^pbLh~lxelK^v-Cw5k{>6}+K zQ(&ZCSShQaCw;m!bU8}xJFb<(vzvlYRydJbJcas5-K)&Vlm}7K(Q${BrD^Jz zVV+~|7+N|$AdFI#45(COeeSxk#R{%SdxhAnbMIMg{lXImL!uV zDs^y--zTV-W_<|er?nk7UU@y}#hDru%=%(i=fQ{jCM<#+Q?Jvj4qz`irQag3ld60oU#n<{h86G^uEMGu4=^^Cor>*eFXmV1y+ ziQE8o)t`8%O5~Ln#>>4I!Dan%6YBNUe#{NZSwI=YybvtUqWG$X;{l5p^z6CxUJgY5mJ`z1`{t;KDe#ZBU61+Apuv#tM zheuhV(R-<5Hh%;N4RkyZVByNdOVXB3k3?pQjp9_i2j0DuPmv!l1yRx;*<1Ztcx??_ zIi>LJ1$33aIqd--LJ?Y3pWdXOq?8#EMTIgE?V+OF1Yh+QEp`!S=Wf&hpOo1HEd|0Q zUt#5jdBa4_UzooRqf*b#H-5V_C4AL*)JiMMLM6BlEQClvSp>sd~O4Y0@Y$YBmfTb2jKL?FQmvS*UZwK@Anq#_QF1cif+0 z)j!+3GyA0elleh)XeK6F#k{(V=VZQ{9EPfXwFA7&7G`K*aiT=rSOR@*!I`0*sj8d9)`0Szlfmn-Tn8byZ_^0O)XA+ivi@t;=VGBd{=#A<- zBBVxdGUi%>@_Nep<*Oqu=y`ZJIq%1JfvXLIlVUqV7(cWk#i#!JJ7keKE|5`SGw+oSjZj?dYxva@PSN>N+-LVy@N1~UHL-UnkO(*sh z0VLoUBeab!?Z20>M+IX;r5<_44g7ZC#VZs^6Ui~O zo1TuUH#D81fL`#u#CQP17;?33tyn92w8B?iaD&m6EPMfOTtp93XR#ZFL*m&AuGof= zeX#~SEFUAl>2dXiNzXLmBW)@Tu##e5ZWNNZeY`o_s8CC&?+3^890&zs8^MZyd!Pg< z>R!P)l{5jD=`g+(BUOOlp9(Z3#N$eH!KbWnPE=>M_X#={Tm*TbFGiEWNj8D8ad;mE zkgPD#9xI0cr{(G%{Rr7MCries$mh~wr~-rk%zJrexkQy!yT}ccJ_;31lt&_~de7Wb zL!LCFadp@j)jpCv@D2cMq7u{4MS6+VoFb=7-=81)+<>#t$tWBYD>_vm4FQ$238?!e zfwZ@}gs|uY$Nw0eF_7K3^f_xTLh23%B=0(tD@gNF-DUF6y1PMBr@Nl4>7JI)pxDlc z^IOqxm%Sl3^v%pe^6|(xEAQP*mCWmTcOa8;1F$Ib$X=~0PJibX9Jb827lu@mf9L;G zKQ0^@{w6h);2e*WX9nD*Jp`TgpJ%;cZxB0jYYK_18f0hXY~-V)4?1fI3ydv>K0~wB z^@P}_o>BDto_2r+1B2^El$;^6@%>!)tYLeVz$_% z*hZpfeDGfDo&@pNN(T6jX`*nGhK9h4X~;-RNACM$3Oa*G2xe+|ooI;P zYUecc%Al;4<>H&xO^jiEeCsg@P^l5Uc&s-`Xv*aP!hA}kY1%#MZ zrZ&3w$HhpghSKmuZIpo^(|>VRl|;O~xZK~Ku7yIK-XPkw$ymvMWEv9&2EDnFpGcO4 z0D^0(G`ejP=k)`6`(UPEV(A{H&t-33pEO*kej_Ww+LBj&l{@+pMOJ=)C=j^8&2OB~ zWeb1H7A!Zb1?rfLtme?3XQUVN9-{RH!SW(_+hDx_)E4wxX==6@C~#KQMdPk*$+Zr7 z=k7PUB(ImBMM|SUShIFJ2b|8n!gTcBCjvNMrgwb8Z*k~qX#hQD~FK+%x zP*Pd9>>UB4k%39D?*69GXTr5$rN63Fz3J(EQ2fck5HlGF`@5&|PXhx3^@=o^1r7A7 z+F-w$e?a5EembSN>(3+Ds&?0`-H#jHBEvOkB+X?Q|IUETVX~wu`{~skcjzhov!Dwi z2Hfv{C=!R{0c9KhkE6D~JGYku_)wULRsy}JOXO&3w zcq=CmRqz9xPk&ZmYGyp7<^naO$<}}QU@shG8}Nb;0kzQU#hjooleY2x5TKKIg8fR> zrj#C?f3Ud8ZhG~B&cbtI?u>e|?+yX&^--kp6;g+FEA9?*K945|{721#3|%y%X$y5K zp4t^b@OwlbCVc#<^=ye(ak>p0sitP$#~I`C`n&rLghaX(?PBY)?A{0Z!?Jr`Q>oZw z-9F-rP#Ox0p-@JzEE37T?$l1VGt)g_4M_rH6VS5ysg?Sd2T9Oz&~~sZfxfZMrnJeJ zq>y`2V+T^<^ePxV8%~ZXDVlnx`e97wGH4H*m5 zDk4bP#D*JI)b{k`{rq`nRMkv}7G3eKk3$c!^LFmeZsc68QCCAR7uVc6PbMY*bsn74 z)1V7zeU~lcd{1jJ*~4lm3f|{(B`&nQvLM{*@44CMByha8_*X{@tqx0jfIOi$@Djfn zN-1gN<4Qj)-`BCV*6xMA1rA9)-kB~=?KNO%4^Ma-1WWY?Bgk^czYgA8x)$GnGTPcC ziZ!yM8Q$Qvm=JDK|87hD;Du+CLyLH+?W2Raq5wrO_)Hc?SFAojx!oezD=I)9O?{V{ z(pm$=w&D92hJFU;;~$Z5LS)|7TSCduW_JW~6>g8%qbj+BXlqy7$Hc$kMCvI?;A{20 zEx=yh@mzD@{H(9vVXO(aetML$ zIqM_s5*o7#aUl|C>VfFo_dbZ?EEOzMx57}s>*CiaHa!@9!uSJ8U@NOk9d--;S>TIIh3JC!7v4TkhvSy@3~eRRbxy#UHnr-{GNX&so$)fcia( zk-s9T2IJjpzfS2)L6K^0h?ry5_q{T2a4C1Ijt8JzsF)0K_aH4|i^F2B4{o-~V1|@& zlH=99$O6U8R}z$>vOv(=NF2{}&^t$AmdIVJht_&v?Qf=A^~yvO|IW+@Wwi3c7uLI# z3g4h?a^Aw5$=r9J13gyV591k=l@6!zBW{5C&uCb&{wAU=#jF(N*1ssq={i#1nI3`% zakQUp2-W}z2QxI3Lf?{PLZ9h|fh)neE;oRc>O}QiiQOkG!f06j3<{5&kZ67p5vui~ zMh#tgqxh&!-mWXd3PsC~Xyc#+UO#Cb8@cgGI*S=AUO|P!^pl?*_w8YpYThKG}uV{X7bl+fSj%c$UwR?in!gsaUzq3e7qeg ze^Oea+2n$BjTBAuSVfd6&cF8>`&OVBV`9~*LQ%s>QOD+RG$X*v&N+a_dKRlDV%(*8%7-lm?bnD77G3)nwj7S z&LpVo$-lKKB4qzNYk~V@&cQW$cC$7;ws2YUbVhkU%WHB4L%c-AivF#!=n2lVls=T~ zHaz+Fbx88>ZIv%{3xCb~%T=3=k*TR5@d~*ksiKAl+XGIbLSy8QqTRRsc2$_O>z{ONn zs+TA~rNlH~dZjRS$MGxjn9;XTCgKp#96+a#z?=d;6EoHiI zr0_nIzO!bqAZ2m&BG2au#Y6ij^d_%|{-)kGq>V2DyuIsi$)O$B_%wcXw!wE+;|`z}YZSHt#Kl-<;yf-cl{?Ey7tt>Q3f{m}UZ<+OCV*?Rb~GWmcBUG49q`By zAwRL1Y6%~$>VOl1rb_c>3k1&))E36F>adq$?|$gzq3-}#d@p``wN)N$`~VjqD=!Il zZh>WP_=*zINtZTAVC|XvFV{X$<*YUIs-%y+pyM>qak zmabL*s2KBnGA9G`sKq7BR4pG@`gn&~aRi<;D}1_?;}U?Dr46r{+ucZ=1C7DFT+8`m7+}fTkA`#^X$k6_H z>(wYX`0^TvxUB{o*L;?(K$`?ME;RkMqb7gWc~@%(<61M$o__peyBJ(TA}CnNp2FsrPgPC`ES`8Q;x`tV&<%+(z7YOjBfTl zZe;*D^a0=bP{9m| zeq(qk9OMc{8QTTHaon;k%({arB)DX+X-l7_k--O*Rr{f@jf8j&*2R#js$2sssIBPt zQQ)Nv7CU(cP6`Bf*u%lnW(|jnjyt`ql$6xwRKroX48Zw0fz-`3M$4V9Z2t4*{M$j! zZE^j1$MxjnKlDK__O+Je*^hMRiGcgR|1id-r=ovud>l-G|MGJ<^*Eisezy6H$(cU- z7isA5Xh2iZFATBxOH(28>nSi_{nYG*52`B1LZhXAo`+1kw!w0Xd0E+Nc}zKAi2P&N zf0uS5btsa}G9sEJ{3vw46>+ka_-KF^HO!*kdZO>Ng@8Cf#BighuexUA4VfKdu8+zK zBaBF7*&3rjNQN{C2(1oHtQ(u@VY@6QlLvPtr1IVz{{>gOfIj zi9_stC8>L<3YXr>8Xs#lBg16+Ku^DRH&d6W#1iHj?r8SM6rf#v)Y{JK3O^7(k(dehcO^8X4AouvB9}PkRg#9@QNxt4`+c?mXC@G@M^~$XS8a?c+Ju3g zIsu2uXGL)S45F=363$l(Odc}^(Kq~8zXJ`Pv+nPHNQAuHOtB)(*DlYq?lWeE^Uk*} z#B)^78z>{3gk0f7DxQqRN5aG`Ma#nIyS0Ct_$_)`QYk$i?(Wr}Je-|CD`QK@doX$# z(~*N3y?hw=gm=xdxl^%vD(c9Aa6Q2VpF-bf2SX&ATcbab@QeG zyn~gh1nmY;I0$%n=Jl|diMq%=^qGOlx4#=>pP8 zZEY%XsFIrmq9%e&ervX+Q~lt0@rp zZ+Aq>FOPVc)g!NDrv0Sd>?T{vaT2c)SeAKc3MfpXsutihoSE`~aVW+HINt(xxk>Sm z;H}kama_siaN;2k_#r~#PO%72+GEFT2O*!2kxHL#v0ITG+mC=|Y^3^~sK;nu*TwYC zxzeY2r6c;h?4SmBDano<$e-7M+3J*jP-EsVyU^QI{_z<8mnUPT$c@fHH7Rz?sM4cQgjjU23*%`c%pSI6#Y_J z2tGCYyg+9S9B;V%Hmx9#h8^f;)PZDDP}N)Sbvw2U2jEWDqly(S^qawjhjL&igcoUU zJwZwX*F8iY{W8F_@%VmFqNlcd7JSIVzgP9nm^8|TH76;uL-sj+IVcsnW(6lQ%lns~*ock$z@K2)lRq^_ zKI(4$jd;LOGg3n7$JcN7wVjZd%7h^Vj6^?7D=cmw2yjaz>PJ~y)CpcU7x9G0!J_4T0- z4vYUwWYR~hl^Cl-nXUjl8EO$?Tu5=IaKpliYggGU20dYlAp6QyN%C@=$SI$h5mXjD3V97m_ zgqJR8(9#R|#^b*i=A%Epz^tg2w9x-e^)&SS(uVB^)Y&}08i7R)SM8!AB(1^S=I>hW zHaxHCQ)3yf8vyy6I^X~1ePd-WKj3-6g zrx*gh57**4U23XI0@wKX6N+EnK{<8E#q7mMsPQPYd@e>{0N=Z@p=UMK7Dk>QEFAeO zesi7ayh*98QX+e?UD5|7UeWqk9Ey$^1=W9d_002?(M=v88#}hO22bcv+oa4 zx&my`%Yo}0G_>nRP!k_TPeo7X->`M-O__dIXM)Md^0hi3+0s1ahGi@h73;hwtVLHh zzFUW_RZ}g+8<*571ijCJ+!=+PI4oCFKRXTmVAD<1%|LA&@1mGC@`VeB?Gz|I#=MrQzH>(2mB@1Cz#)sr z4)*;(hptRooHm2#NEe{O;t6o2L4|p?9E5J2K{45k=u*wp0CxyKw&cYJ9ahhFbvVGM zzJX*=9ffb2UlPdRnaK>`a|9Y&&gc7^4@>R}y{7bKN|mN4I+!1SIFc41++L2<{Wz^Z z8o&xqE*TI>csrj)!H$E|-qyBEe!O~8-HsW-qhJw$zh55D;sBS=Zj5sTo$o4Wn>H+` zr0&;25wsSGBQJiPo|&$%N(|>%wT$qdweU?%oZIAF*GA}#>^=Qps#KeN0Y2B6zi{u` z4SM<`RBIj^(xDKbuyM%xg0nbWnzWyo^E<<|wkmF3D1r5{&aD?$H)re5_@7?(s0C&< zWn8oqYU~OdZCeODDpdSWr>%wSLX`-`c@flQ@=GE@+>9Jypb{o$Mt0+48ud1s2xV}E zkIMcPJNP5#!al<%zLY;qi2vo>VwHhvWk9N>i_2(o&VkLN<{t_;t5fHb+$v@_ z1NYtDZ-CJY7<9}|tSH@Mtaa0>T_yqeaM}mhbv{__M+T{9v`=YKEOFA_8yGx3Zl z>r9;wmr|eau(`5q-QBW5pCIC#ISJ&Hz4XF#7~|X%||<)7~zpy~phapuB#s zvW219_w}m(6t^w;$JOlwM-PveNXu@ESHT$$v4MMa7_I)2sO>#=-3tMUP@*TOsbKKe zt~1{hcklwY3aH8EiO&tNip^NJE_>K0!RaTM?X3FS=Of-Rr3^LJGE1oXAUm_c8MvXN zo@MQ;jLdgLCOCCknrj1gH*;4`juDt*??mTS=wvgtulsk}$?Tzrb@Kue0qTyx{u0_4 zEt|3Xp1{4TqCmd5INg1&vC(H-NSv_w6L1-}e)7BW_+g`kyYkSz5GTVpAm&Mf#@>-# zu`=94sCPC(Kg6OeggKa$gA>}}@Yd~9(Lufy^3)2n)qHr8hBn}6wvppJrwW-m1RSR- z^zbLuaiCIE;u8rccEbHQ&5*R%_FU^f%Gnn$2Ltx;mC@03G)`W)B(`$V@S#}mRC!v# z!WlB}DRr&N}h!?;*Yh&0dP^|W8;JxV!q*XIPkMbLf`gC7>yPyKz881R-1xf@dEax>)$1k$(R z6PUn62cw0gbGVSdi7SfAhtqybaA+c~*>BY0BtBoLWHo;uZ$8C;$C5=>us%(JOU8`I zo(C`iX{OdeNj(&#FAr&nVW!OE7FNGUyX3ksQ(^XxQ65Jb7hCdF|7*lWJY0VZNqpAP zBN4djR6)!9pC@kaZyvd~TqcV2N}?^W?@v-b3#MR(nJ51C2^~Rqq?}$fS#58Lm>hcK zB@ zzE)A#&O3XJeu|n>e_X5Cu)#ls()!)8-2s|n4uN6dqz1A4Z%G@zcu3DJBb%3=(NqKH z^It}HM*eZuQ$rXw#|_S`rz75Q76ei*p~}$l(S*owB;KoTrv|7opLPt{9~2vM=}}8p z$^YGL5f$4x*LN#!dM|79UFj)084NNzPs}npHbI#@^W$9s>kX(GT=H z1*QC@nvKsnxp2z<{bO;87w%HIZ~MXpP~?K z1!dnR6B$<Nd!^0SC zML&|pP%ZpG{5Z`IVO)!_%vZ&iH`(7on)qlTVovZjkh67u(=;j;d_m7|M{}O9&DwbT zS%-ptW+|biJWeYN9hx*c2`k}E$jt!n6#VDX6D`Y<`y7pj0fI@g!i~eXAr>CFu9~@1zOxNf`svIfsw|+AC$*nYYXpz&LxQK(6=(71)DZx5}DX?0soti-uoI&A(@V{#ziWx`6MOsXZLPWR3j=>0cB43$moEOCOqDodP8(n&KtA_xb@Vi zLWc=8lvqB!lusHnG#jH{=(^08D2FaTGx~U`1v}(IYdAnZ)Yu@aFWvJ$5jN$UNGGRx z^P~U|wW2^*0QGqY>s&L))1>i~;_>R&I^j`2aJlH6p1JqDE3lVY8Ozp8{A>vk>kQ0C z$DbLI38C9IJ@*{Jd(47pjgkKBK*y$P2fA^nPv;<5>5J#_#l%-MC6T%_I9qa0G~1%c z5_+hcYYIds!Jc_ygK<2y9``P4_&6DjhAD_FL&j`?WN=~?gu#O-#dOWavk-rr6^Bsh z5aydUlf+qwGSGa)T2!y8#=X$O%INMA;3h{V9+`@ZRhrzH9zoO5+HMcR&h4bx>RVHV zvLEHpnw;5j?^#kWf1gI>Q1kAP9EBR>YvBPo!HMood}-XeEWc#Bc*S&vg$xwBQWsJW z0g_1jCB|QF%hjw5LxvV^2sDm51^jG7QiWLsKIJIZu3cuYJ!X|3Bh z%+#GrB`5!Q7^cSGtxrl}Iju{${`6XVrZ(1u6W`r%)scgoUPjJ9WI|ZLw!5*YdUjJv zGE`+iJPxPoG$ln)8{{n0vBco75Z^xg1D zBcQqJNf$KycYGYq4r8U5#j2!^W9Gft#vGilx*d5y>F&?J}6}3*3G{ zBQc>wy0ww6W!Fpl#P0$ho{=cF@Qw$8pQ@=Dh#b`Ad6XVWQE+@R%iv^zt9{^|DUSe0 z>wN<={>AboYg`Eo`SFlR&k%h6 z@v9%|uFtn5W1pnjz_QT|@ecLi@bAJygvBlEnpg?MahVmS4}OK`|L*&y@H5o!J=N^1 z2R2kJv@6MRwdZ>sm+*XntDy2_p*`;avvO~7df;E-r!V7Nv<4$+hvya-n@|-i|101i zkUYJ}#L!F+*mFLg`7CXC9GLt5`L;9xhCZ}!Apc@U?{L&T`sB6bY0abr-^roNzvHa= zm6aZl9;4IcwwxMbtV`dPEK6-EFczi4vir&LRCLV9g^>?deMN9w!h&Sya!M`xsMYmh z(_6+CO?cz;M@U3W=oyi;Z+G;2@>l`RzvO>dg%k^ymZsu=Z*xMtdyUy^WjRqP6=ZA@ z&p*yq$3PRglZeAnta-cxpq5m+_i zteU893*(2ZxiG$C^{Q*nJK=U$v6gerd{2DFoVK$|eXh#+S-V8u2hHUWmc%``TvFl(n4UGIk!G8tqU3;1vzf!vHreYUF|1Mj_@*h!T_@9h^A?l$Vw}K`Ur- z3oZ)=DKz6IzI~m{QoapOdsO~P)>h}nL>c)_W8d;QXowbsTqW|Vo8dI#f5rG>voU9& zy=5~kQsa!vgD_NuEYNn5KLS zBs+eRLW%qIcd*JfM1Rwrm8uwWdx`rH{Uxf4UCKs&wG!GHQaQq~GB;Q7>1cU*Khv#k zA`aY3-P648IhKGKNlc0##6QAMFtuZYT3>v=bnXjROYd$H&|ylfr*{O20B(0W2m6x!YHm>fl9a<#BvNZ3KnOxklChCf z9MB=aivMw<8vP`I|Dr5ijNDm6tMn%dsY~B_8cY$-h509geuE^WP+ApYMx~GXA5510 zXCzn35WO-inZD}jl<-woaA%C+JgM z{jr`YE>u^yh5sR}>>s$UZC|YaNn6YQ$3jqk$;0$tJpJc}{Yq72iPE2O{d0%cwlC6= z&>v@O*sts`tjAD39sA8Txb*S6tu*1!b*cS&w(Zw$7y2!&EVoFX&r`P@*Mi_BdwUd= z$DZ6F3WX0Mq)tS{zvNYLYdETS%&8=rvZ^?DlV-Qmu-Vi zIN_(4UUvBn*IqY&(SrW|ekiW{ign+*^G)o@~TW|L4^%wOnLK>7qPdj!L;zldN zOou1W9W5PVL{)*Aw0X5+>oV@-Vn{nU_6;#>KaqP|o) z$`7@63Gitlnm(Dphc6g(baX%?i>y+J@Dxir#Gd(Q)gJU&Qbqux0tvl;JEo5|vRPX* zZ>mz7|`D1pMo%spRWg!iWDA_?PLETlynB zC`uQugfm+oBt=5gbJ(Vi4&I35%!g3p-zcAb5aqF^Nt3#^+;Yo5{pnBl-FM%WS6=zs zlYe{ll~>Jq?6E7Zxa!(#u08Uoqqf>=%e~+Au03|&jeYdkdQ7=I9n;spxO7f%L|qT6Ygo8T@-qw{+%!BmX!{j*K_>3eSfMmAg8L& zbfhI1Qd1~Z{J(Nax>fee9C}L&^WkmFV!!>~Z4>X!aZ8;m`Xdd>HtSHq=Ocb|l_N}N{W&*I91wG?N?bv(iue9^q(vH4f~>44p1_T@(*x7k^NF%Dy1~$+)!lvRS&79ys7(d zW&1TI2Ckr=C>UoR<0`kNor6aNymgH zh%#DRq1Mb7a_Fg2Fhoa&Mw;71+GL^Z=R@L5A_Y@c0jaCm+{~Z=lB@*p?q*BYrh&!% zU0vFnl{|fY{p?2pX}4Kj@zzMt4}ALj2l=`^Ulmt<$P$ODF*bkx!f8EI@RC3g^(2iE zfv{lFkGhaazG79(nq#4%f8~SRc;*;GDHj479$~xU$VLbWs3c!Qrg)sUYH5&7%GdA_ z8oxyQ)#5+a$PWy|TzgkXVnob9SS-@bwO~bzZEIWkc8}%j=0oii4Hn-bqe$0Axsh@2{w_=H2qOR!4os zszEV~Dm~$^vIGC+v0ti6p%xHzVa&{<2WQz8dW> z+A3W&HU&O}JSX;x&k8eJWA!y=tg*URjj2(>@-U9XQ@RL7k)V$xPJ3Z0K&0zovyx9D zK2eZ*C8y<_7$?8@O?xSOFn#s%pZ~n?(#tOU%U}O;`tMHD+!@m$gF`QQ$-lh*4cqLx z%dYR-d++YBl#54Ci(CsuOBZD_%iCvH?j@fx+_x1cW^s`(#WJlHj?*p#o((2VJZQ9 zG`DxQ4Gj))IXpDPHeSNVhZ0!g5GeF#JpRNfnsfRS&79wHsa*+tA|}U+K7}l!{!k~i z{AK#CK%qZLzD-T*uKS8LXU^Pxw_O+Z^_}$Vlm7I_Ki+ZIoew?o(7Zp-{o^10&^>wb z-tXG`kPjTpMDV|?y*9h>O1|JZqR&QXhKC!`N&rQ>NJypVs6bI|#e{qViUfdzjX{n6 z2YtsGIF~pPeefBFe^9n!n1->3(0F_#n;QH{_ky+K5RA`6^dQoRQMPTy(h5Zf<4KppKnK2+W5gr~elgN`EprYC8>=N>P3!;rz1t~-0 zAk8a5ABP!_XoTumGjr$zj1|Y)U?qwc3+a$#%JStq%M3P{H)r&sMsRR|Ee=^3%@|5j zwW}W-#@#Y^#)y+a8q=EX@J7o>Enr23NN_`K{{D>i;AH;+2~e`?Y-2*7nFKeZV_E=9 zt4fxZrqG2~UjqNC zUXVztk$$S_n}6w7YQJjzbD{w){@Z?4Z(WK$%Br~n!C%2Y@vj1PJahYX*){qT)HD5w z>^D*gih}=TZhy%md6w`YK*E5UeiGFb!kwoNYbAQ{YaHcGXB9s z&=37lwkX*86HyiNnXP4Se__b-mCwqiXzv!@!2q~k#-~KTj1O65A3i4HUy(fc9O1cO zYyaQ?K9O&8GaW>*=YxP_bYP&LkLTbTv;C?glk5xnI3~^hkiG?`*@IP6RC?ejE;}cw zdEy`X*@o*IYg*X5sAuwIa`42WY`?rs!X6itt6f>}WMi7T((`zWV%oH+Tnr7+y%Z#^ zp2ty?biHD~DvD5xgP(kem*7XK72T|7x}M9EbvVQD*huSW8&hQPs>XuD_C@o-;DtRj zREH>D^l3HtM?21=ZW@IaUNz{#>$bYLANY_6^OTq7IXBHv%jhst)zp>!k9a|zvmLsv0+Rk1&V`IF>z_uQmHEkN><7R}k(|uR3L3ND&i4aIt zm;DY6(15nX04)pCpioc(O^UXFwm^5I7x>Vq7Ud>T!~Z-B_M7#`$cx>mq9l?2xKg1U zs-Lhgw=XFw5?G|lro_JCLid$a5d|os78d2`&>v?_Foph9a;k{LlJMaVX@-1FpOi(> zkq|WfUK0I;zqCB-|FYAE zbS@YEU_SA$(1d@Y?*K;=$A2e;uL7UI-|Q3YB%*HmDkF{w=`VqQ(Tv=Qa9#T5ztq0e z$~Vyn{%i0r^Izy^1s`W_w76RARllz?!Mal>6 zzyH%m9KmXu$DeotcTavi`9>RU^odV=Vz=FPTWz(~`WFw-k`V~0jy9HqF|not7FwBk z!H_+a3)(Oz4rvZG1|Uk&>h!BlQtseC)V$1pk-)H!RgGHI$7B-k6VuO78a4ZeDtv;z zm@yY(iXf@ESkw$s`94QIL54FTU&1zp? zFN+vQheij7`WX|>{1bImnWiV2{8v5VaOoscynI`H1wx_Z9~{(tE<^F|?w%t){prI#@{xxg zdg#nE&iMXu$5EYyix&Onq~Dx+@~J&jdJaDL;E#RmV>4&YWJXx>)yULUDLPpcYCs4L zo^;{*>A8>t6;Q}h?78N~-&qp=we}meQ=QfEqHZEr(APz_FDfd0WVK2EqfDh&j=*2| zKr;a-f;xhKhl|q}y#2b8)LSw!0IIbAgpY%W;9t@v`QmK9N~ZkM|B!Fc5UBoR0MOq* z(5)}|>PflRd`ZFETHBFtcn#^Kh@{ekz6Kr+(%yu9l;i#C|D8N};-tibGuvnQgWGDhmhR1h;Sf!S19bs-c)vA>3A2PWDTl zg$ozbaVOoJN9VlcC2K|1$SAH*Nq?dghZ*#lS%!+)gcfdz*`>k$k$%q2EnRFF=;d){ zRG3Qu5cJ24`OI;wN>u)?t`6{*3xa1x8yZO9^5l!)zqMJ<7(1rLw!5IIgA9c^Nsc9RxCY9f5hlJ+HNYtgt7&Y`6A;((nKX=g1x z4RhLfN-;P(9IND(-F{VuM5=m8!j=SQwe}iSDzYRX700o%wZ9UF_<}wE>N5RVVWL{9 zW<)QC{gQ(`DWm8MBLYhJ7%NLMm%ar1MXiFEsERKU5B{V7199D&KC;17DN$w8W_j|I z>!sQ+M4+OaqOT%2iHI(^BvStzqqIc0bfw~^OkcT3ne8`h%2|Sc(IY;HsxHdlLQP)v zh{L6e3odHfU-DW1EA6||iT}u7;XmlRLRI>y>JjSXp`WgbO4WtwkF(#*tJo!9#-G$x z^0h1=ApJ?DL-Sc`fA#PfqVO((xMNJ=_(|Oh0`c#@xsrfUZr>d#Lv{Ts{09PM{VWo| zKP!*O$QQO-+HsAkIbhyrtaaq!M;=}KU)IJ=MgNUt)K(8l#TvyTGxl%wbWF_Aw{1X{lvCZlC>m%Kp4t~PV%Bt$?yrRQ3qC+> z{YrTZ*DU8cVhJ`f4^J(sbJ{bzEB)5s|LwfwKR_E&Yz^5uHj`_BSHc*)=j{sp`$ zB4{9{1PV}Ee%<q{FSvMJd z<=4DX961@$czC?0XR&zCqYdTPjG3C8p$8v&@ZyUu{@mw2_r~qE+idgA&pP|;C!Tx) zcmLYgz4rUx|K3$sUwz3XmmGM&fm5eWWzuK*YSYMzZw-lOG0T+vmpFk9b48$MhYE&* z|8!g7KLVV`{0HX+|3O=2aQM8O^b34=e}^_{V2~D>(IqFI{c|keHUKz z*PV9SdF!pV`s61*K7YY{nmrnCNI}g1O*GflzsSE2v2pPzW!ZMPB_NRXG& z^1|MQix&1qN=L~?L5hVcaNE0RQM`YY^hYVIG{Q3ebdI9+oyU&$2OoUk!yo?8YhU}C zZMWTapZD$a_>+%M>6xlpXZ*8%)|0P@>9Bg_8`F1gdx#Y4no2)I#>(WJ3fEPY{wPm| zMyVpLf1NgkDV z75i0?#9onaN(3J)s~j1*^R7FOKI-WE{&^p!gmR!N^kO;Boi}g6!uj*&&&6PQkQ>ec zx5AT#;ZZ%N#z^rb9L#|)Sipd_|*|MssR z_}AGR%-(d9&2GH$#^9jPA8JO$%#N$#i9Rt9M|0CzXPtfEfd|f>HG7jyHu>eReuZKIPDjlX|Ahj?1B3q*lxS+-t*q~{`JBO=Py`b5E$4D z8G)t+3l{LnTJjnnT=obqkpoB0K;_9NpTu=))S^Ey3hb)n8~npEpfK9H{)X#!+jX}s zw%lUNEw?!B^wSlN;4}S^QDLRW81nx*Xg^x{sB>F~N?j~bKJnz^3m5i^tlIri+84DG zgF}6i~k54gKL70GNMVuyEo01@j<5`-1*-c69=gVu^>A3i?5{kZ<8XQ-*5$S~7jds!TX3=-1_6Z0gAT$DIR;ODBxXo1&K4elrmh&J@Pu6Q$Pw$K;#2>)`ycp6S}^=4{*C7VbLY=J z>?4Qoyz|Zn?0>-Rx8ELP5S7n^lSK=U`~Gq7dCz-x-(!!@eD<@CJ^nbsP@}Z#uDf>Q zjbHWZSHF7aopyfUfd_D{TAo&8zw)h`aH`abZalsp-|2@Uqco*HXr|ciLr+2ee$(I2 zKIiNo{NT8Q4>{!9-~RTzx%1Qk25}lbniMUU8@&DY+fF&<(EPehUVG3%@1N4s)5&~RWiG0DQg=67prjDBeOiKmtEWYP4qXciOG}CA zHfS{&^NDQX($+dOGW6sVPYHxUJplrrV*ElDCI>l?FU0B4A`!AfB*t-z7SYqNtf*N2 z<RQo3cdFH5qb|5Sqz%QrYF`8R>-muLO z;8Pd>0-uz#O1?x^@n6s$&%df#D*1(Fzgjis>KR%0yEQ^!8WThngJ=NC@CqSMxr%s=?@kSfdW|_~iTW-DON5}t&QP6`AJovS* z9|JjNBGvrxxK3XoVCSHL&el$-F-&^u$+`F5d+)q?^IrDymvwdV!XwE6MZ+^@9YKjH zH5O2t1&y*;V$j>y`|}fj{=kC|T=KU|ZolKs?|t{X%%;)oU4Gf+>?n_jDFRt5i?%fI z(hRyU=ATy}-?;6zyYI0(O)IM;NaGGj1s~jHZ*N~ucaK%7m3H;mV~=H|s%EvNm2J6L zSM|+re)FX-eJMk`_Acs-3?$chGb#9Yi!Q;&Gf&|gDySs+;Q8mB54Klbb@l!C-*@3f7hZMcRo^(~8;pn#`?te4-)yt>)?1Iv^hPve*g8g^ zgC%C7%5*oph$z>t6?Ko&m*0LxM3@9xs>P`7%Y?P)MF}_p!>D(1)n{)smXfq$x0(oZ6UzyFf{xIda?q{OsvMY97@X+lgA34Bb- zepOedLyExPQWbL2AsJQk{r~*uKJVFw$?J>%cF|YA^3^xL`OSC$rc3tPW3OwjzJ^&X zIMwu(i+8@`9o^k)Sru_EExcg~&v;HoUtHkL zc<*L#WDx&`t+8^KNiOg?_q_A2yY{-48*jMwnrp7Q>MA{M^4Dp9`qLAbt?TKTvUqWC z*Q80{W+a*!@vy23f>aykzxTcGxjI5P*{@W>Xxcm&9-#n>2NzH3>b~=iJNAC(-VZ(W zAj^hXqVVzm_>W0llXl(p?L5F|`U?zsa^hWL7>po!tr>({>P~408meDB@{1?^@}xdZ zf-Poxiy1Fv<>^RQ2W>QtcmMtNhaW~z_uhRkjNnYz5EOh^%Rno@do?rHTJzj<&RJ)j zbwVXn4;ikNk}56Le$|~*^2i~f=7RtxS60!p{);*5 zzp^IMB=g&oO2>b_cq;}Juf+PlME|d(iM|S2CE8zAjk>xwQszJKaeagk7Xea$5&9Fz z7$B<4oha#tbH)EhxabLIWTSrwA(EE+pMkPCD(PnkN4b}Zf22t@a(Z7YI3K04t+s?8%<9(kg}+ith*7r(IZi6@@mY4?Q} zU$praTTnI%JTf-2xUZj=LYR&YwqP@%R&uGy8MbU3Z>x>M4&r^a%6y6uaR@ z8z20E58(N`c&Ad#G|yOL4LV4VKmIu0_cb)q3~t>x;)J~vXx&^+zoiwM?QbFJ%cxQ<1{ zk3Kr*pa1;l#v5;}a!ESI!`fKsT-dK)0~s5k_l{fn$K7``!MAY1g0D47<67_HI$%BCuL>%{6y<`!2^E^L2)1 zRFD>mheQl#KwL|Ybzm#xD;@Rv&r&o~msOd6`!@?d>!(SCLkuvIeTjYm6jC;&NU;jo zFSRb|r*3oz0|71{^P=rP2N>3jkSv#ytL#NT9Kt7D7dKVyWAvY&-}cumR@txkmuM|t zWUZ!J@(ulu{&QLXr3mz|qPivj%w;GFv{ZB9<4e<5TtqwYIT-&2|Eo;NDd{)vn+Op zW)saH)ob-)i0h!gjQp!(!+zDu=gMfrYE)EtF1QOyram!!imp1wnm=>IXMX?tKk$Hs zr2?9b3i@{X)=^iC;wU24lRA`!nfWGK5!O^QLgU33_K~3tV^s6TH@=bW*eC_QKw*dN zchFC3>pl10ed{f^Y_;`Pv^p$ohTVlsrA?X4@OXfChMHO!p$(1xAT9&2swq=zAChW;2=GWJaFAw9hfme76nGoStW zFMmP9$uJBt&>=YY+;b@6xZ{qWHf1W~?fxO4(5MwQpd$^Hv`K5CYFn!h;C2E>=Npm~ z@^uz?Ar(o3wM3#GeH9RiLN6pv1#nQYRG5Q^5K}}J`XlcqqDmYYg_>@yVeA>yj?ky{ zOnJ|HdD!`$C!cuYnrp88^bw!F_ul)+_P4)Xe9bji)A8DBOBVfoa%%+5OHX5Ce5Qa& zWQMOQ(Zte8=p?Ou8unYof8bNHFY}z~n|~#Vx>WIy0>i#mMf;6P0W4xn{*9Z$|AhWX z$daU!<@#6S&nUtAo_X`=OkWWSxonsxviK2Q#4R$Rh8BPFhv#mgR9ZZ7yDr`tmXuH*Wg|=9_3@F2CZ6)6YEf?QeT4ubCha z3hm@uv3h%v{kB>bEL_mm*75RnUfwhq+uT5-iI@6$tCVG))R&jOH`-tW9%FC~S;{}- z%rpMqpZ_1-GVr0}4tRk-lajE6$HTL&wbq&pIkevmZoi~IqRyr>gL-1mgC(A+;B4=^ z=k5a!Jm}#^9%k(x?h*W1Qt^=wf9Qe>{)a`@Ak2e(xp;sAYf)6E5VEXpK)$S0MI7^< zn)lG753{HY`aEd>XV#1n-n0lU;EkKbhy&s`-FOo`RBdMl{?rBIHted*@ewqJ^%a%KHQf4~idT)Lct z;L=$~t^JPgUn!r(X)?}O>i+~Got2J)AP(gSCZvjfN>SmC01h#jSq2Z8Dg%k&uV^13 zDXE$Nl6;jvv^vtes=Ig8l^fh(JC1uB})Pz6$?I@+$nrwSi0N;NSjf zeE+c0{;G;0vH(#fK_Y=RA^&CiVybk==95%bhV~a{z_V9$NL(UsM8<_wCxj=RS(5## z{W7G+BGohF1HD@N#rs`&;a|`E{h9b5oD<95p^wL4TI!VFo^szk_f27gj^<|ilQULd z{jG1`h37iQ9CHjU0<`D=vu0{BYh9&a^eu66EhqfsCkGyQ05fYiZ5$g-537Btm;FB* zAnGGXO3pFbgxh@YKJPv1s3V((n(n{vJ}OC55~GPrF1v&!y`i?s%SRr5=o26R1XFvg z+oJz3{5?=-q4CMTJsEZ5VY6zX$1r+<6jy)wRaabf#TBss;SYWIrT_9$)^zJhM&DxS z^O#2vkBD&+A&Bt>J$!a>rfM^0&fxh6kIA2!-@E&6yEB^x5gHCX zOB)?gqpYW5YABEU!T0aH>y8H=d=Pv?HAqxvUxuwsZs7gh2;q!A2KH?jdK&BB+EbkRleW``gQ|l z`%)1FeO)_A<}NXy@WgW1SEK)&kO`s4QUzHZ{aIT3f-K0)X+rr1QPGze`H%5c@b|q# zl}^%2o<3<<75$W=qJ2>ddzutzdR5~K7+bpjpdX<#N(w14Q*~th3HlWdmWGdr$a{>Z zuL4YqbK>6t1an)VuheHO4>Xu5t z5tjy&)S{;+)ymbhXkp)f|Mwp`?zsK-r=FV2QxPy^AivXFb~^OXLs{K0g;yq7|00J< zRu^qg5JjNCQ1oP}v{}3;mG}`i%Hu}A_;|7Ko3?OinZgCG3h#LZ9S=@uke zj`x+Xeuc)kZ!u#WZ8X4!3zuASDQz$-wXeSR>Mwug%M8_!xdcBv+|$$ZsUtr1k~Lo< z4qZWpVRRtQIO7ajQ&QMi*0lEi7%*t5492DReO_o{^U>RgixpjT0)2BSMrou-2J>al_Ae{VCHo zU1<0>mm`_^2+gdfueAKD=nuL4TpoPTPcxy+pq7Z@IP~ThQfC{WR+b0u{MMasy6Gk! zNdEcHe?ITrbFaMWDke0~KKpEj#2c(Xdz;t4exLWgcl#aQ!mQ$G+XxTnT%Dj!VX4Bp zD&qrG%V58tq}quaU$`!X{>x0Geo7+zg#dw3t-^lI1E*EjepS8VFcMOMkh{+MEBgoO zfF+!4lGS0dw2QeHxzI(^D%r=CXl zogol!3*u?tx6k|Z+Jbt(U35~>G%mENYjiBk6P@Ym?Cu*`#M}qec#zApVm8HEuxJ7E zL?W);Ui4-nn#m+Phi0X;b5;v$pim@cUYNwegn1T%$Dqsp=%bIaN6ECQQ*bNYGE30}%-pA*+Hk`SSu4s64wYskzyc_lh(3fRP?$x6nkwm|K#Zux zy&5$!=IChe;`sqluwi^OU4wlwQTT>8ynzK}($A(Qo?6d&e2)4;>^I~0Oqj-^P_3A2 z8v9=#`xt5HxKk1Eq05zO-+#RNs;fxP z+bD;B^rMsM=r@m%1wY6#2lx%Y4K*eiB~!mNqpElkS zTKY$>tFO7{kb@7t=bn4unpX7_pZeqpKmO@M4?o0vvfI6BI~LXMzyJOW7u#AVNxMXl zm888q)@Q+*Kf=JEP=w%Ncz~B#T4%j-HtWooe5CG7vGL@CSU#bvvFpfKchBU7^A^x5 zy#5WZXJr|JhT;PcJb>`Y3;zK1blP9NSfuL6TEwKPr~jM;f#Byte+25*mQwbg#FExA zK`Lb?vC=zZB&hK}jwP?tkv5X+tiNYyzlpe5Rb5nw>8mgWxl~`P&bmnb6Q?p5!I*G$ zSk%JY1udVe>ASFqt>s@s<3ua3tZd{sf`Iqo1(>E@V@oz3C&>}Qyp}YnDPD4rK+TZj@ zm5AWEXnza-l}8)~HIoeXQ%ITrps%#HS0z@ZG%`{ilL($F_>lT}X@C9M5P%Yb>gk9( z^`9*Oq3=+!obgYvN?rkfm43Q%qQX(ve(Bwvd(OE(JpRXEtnIpZj5n#9=NR@cl32Zn4uB9XqaaU6tt7iLjelR z8%D_-8)Q-TS~J(0zWQogY`QtGWxVRuuVV6U`t<2)HCce>y@0`=j`Sx#`6<1hKIc(h zz}$4R&BA{1?E`)Ns3B{FX$ma}flK>K*V;gfz4;3l(EbjM4e{-A3TEPrMu!(H_j>0$ z`Qkb45!Io655ZX0Sgf`)V|4=ka^6(^$&XK9XInfx#5J2XI?4o@>e4*MGj&?iFCTgI zVTT_^&wz%IRers_i{gP64La#58kV3R&!^klcssM5#oD->4qo5yLJ(s^BZDlsgC;|a zUS13mtWC6ENE4ar!B0m!?IsMw@yzX4LYCcX0$0C*mo(T4>S97;#S8KFKS~RzvH?eu zR#7XZ{fbr9#SFr-Ze+i4s7~X@FOf#ZEBHB<&wfSG_>|ih7cVA&F@am6Z=T#tP+%%E z`19Fnpzw&&S$}2vR{P}t<%*QVg_(dVEuQ!*B7>IFC47`#sa0ub(1ov+M;D1cmmE=q zudDD;oG2;A=W6;cEMjZs; zwRp0yrl-d28Dw~pkJe#HdpMAolvU}2HwTg~(kiv}5dk(W>g{DDfA(2toqWnEIBtdq zP^v_qR1hSfiayyQXUbnh>RvsQ z980mztNg-HcM-rfhs>c&UqSJg;$=z7psbNG&L(49%lE|F6wCN94tf1HTeI^etpY2- zp_En$1i^fUK4ZcFS_RQ^x3w|xbk{%b;(^iRsZ&`34Q;-Iw$TO~(XNpG`s=P^Wq4;t z*W~U=bOLDM{_y)hZ2QJHvQ%K3H*8CD^}X+ZAK}sifDVs?77z5(O;$ykn;w1S(UVR( zi3XdM4qBqc7qSNjKmF-Xha?H42_z-2zFv97Rhr6bZu!uMKkU|&Eft67FIY&4G>{ZJ zbImpP+jn2aX6he}X&DG*Tzt_**Is)qwP1z|=dDJVBkPZ+kJUkRek{*7-Q`|6Ef#kvr_n4`Wz5=KeBTyk*ouf^y> znRBp?W)TWt{)C0jk*z9{=!jMRM9!p@HPd%?XH(kr7fx+FeKEvC2mOeG1?^kP3HzeH z0D}Iofi@ot;s9;lA*KAyHr-^?O*ch4cmLz=AO7fv*Ia$oUH`cImRoPV>+ZWxJn_Vh zHs1I{haS4kHrs5l(T1x{n;ul*pMIVN@-*!?ATqxR0WluHQbt{TvV2uRDk?;TU$uP^ zBmY382RitNdH&&l+p2@tyzsoh>)r;;1iw`Qe8jY949&+E>5I)>P-8_vaZi zR>OYjpEKLelCfRh{?>c$y$=Kb(wDyU!4H0r{dbuFlKO>MEw(9>d+1~nj;qnzg_KDv z-O|z1Jz4$o(IMvC8GDF9%QqHyGZJ7P=lJ8kzd%ck^rj>u8C7PCy<-+Ze`?Q^Lk>BF zCLrX?oIP8YutbZUW8jA#Gn4Aj!)dVEi#aAsQTkJBzjz)^y^W0URGszC%+xbghtkuD zrz_7efEGt@^E1$b5YA-oTwvpII-AAtYGm^eqXIk=pYI&)W)a-te*7dg!u9b1N6(iv z(Wgg4s5x(-LyaLAuDGLB-{pi3Tj#`xWy{Ss=f#->g<416Yt*7S*4*u~%dYFs-hfFk z1*;3#!pQ1--}yF};3sjpmNdZVDExyqzVXM1!TzL^e$D=aPd@b&%>f^)Wufi?2Ojv| zcfV(+9e14b*c{rFBfoSMGvgmQ>~M`es%q+pfFfSMmSH|L}NT)&@qy)xngaKiSsw(zVy-wQjg#oHlpv6Ytx1KR%Mf%kM1f zLobjA@)G@Q1*Ft%y06ktdn8p*szRcq&dv~lE3*B44(wOn(DE%qpqi=9oKuCDY`;!h z-9K@VY1W}HAScE@@#FH*^58`B!7O35j(-knp^2hhOTWawYMf~n;v|!^b1KNF{`KuQ z=qo;DGXrEKBKtQ3P@)fA#b)}Bj`XGzc{2&$W%^>M%ztEc(z@-VTqFckb=5=Fp9p`4*OjlRnIa@;e?_}KeWwfhfltML$KjvyOTOYMDC##6XC@uT z%Nyvt&nUfP1gWJ)L(x%Wp_5lpfxi|*F`=V<9r;d3TSw3I$y;u*#q`ytzv30^ZL{_3 zH{L;XDF6UK07*naRNQd(y02J|$Fwunn1O4JnMKM_F=`tq1brRjdt7+$_NMmhuD|ww zee`2b&7BK1Q=IV8k@RQ4n_z05 zj&;ZF?Th)sfc-Vk%9z6<9sSN>_E2E%l9XO{O=Gd~Mz319a2^C7dE^mRHh?AjjWl^-|pOS2Q6ZU!4>GCHqQQLqDtROX;O%5v6o56#I2u0-r|z*$NQ)EuGO{37>SE z<7@S20{;w=$ZBFP!&+F6J_#djoJp(;t6ha3^Rz7OFG{QIi55Si7I!JEy8R-h66r{_ zOPBn>L+~FdmRbK*Q0Bir{UzaJe##`wzxc}>E|dMLUbbH6j7rt*|5H?|pXlo-(VsxR zK#}Mxi@G=*;#q(BnhV)igc3mxnkNn|luhYWnESGvomQlL#hP0#@vw|M7*XJn>7Dz% zmUdb`9)|q*hd=)1uYSQoT#_+JpE_k4i<{o}zI{IY;g3w6I)(RBcpw2)Sislwv;?Xu z1h3i|9||_xE>~=734ss&0A1WlCukAPBI-wsazRFeJk{7p;%sCHjdq=TY9KkALzLhkx|&$uwR4{WM^-C{9m{ zuE$;l^HCzpn>`G1sCkU@4hs4t4*JN<{4-9Or6@_-#2yYG$_#W{q2%V zKlzDIbWQ4}WeqtoN@Q~`sDA6%Z|?Ax?HO6oAT>D)9Sc74$f^+NGulDiz)m(^0n#HN z)`Im+p>YKu20yTk9NEO`u)_}HJTW`IEby7eiMSYrXTXJ0E)FQRc9C50^c~=#hInD(na< zIx$y;)zW`vqZVe(2w`kM1cnS_k3IRg-m+vI($>Z-8y*JX>f2lX;1_&6^x%Uh{q|&D zxZt@v6ftwwBDs4Xm#1}56c(uRQuY;> zUp~pNaB>TnbliF0lUH3>7)yS({=N6!d#}6hItDx&ZL~4dWt59KFj>E_uMac8RZeH$ znLd`UHg`;7snjqX_sKmyF`^o6Z_;~(EI@06XT2r^W8ee!hz!K~7Wb~R&N?`7D#e_d z&7k@3fB*Ywr=7;;F<2JQ447l$t>+nQt_hzSv#>xw4~P2R|Ni&yw)^fO9XJ>BLqk9L z$xoQoV}gl+2O)ajRGHtNR_WA0W@3 zJ(~wkJMOd-IazA)xzGRSMHgRu#u;Z~znTivV-9(T$$VXdJ9`KDgvq!R_y}Wxv!6QW#5ct#h_ta7dx9Cn)H5`RzBMM^=}i+oF9D zv=I@(+z5Y%0j93P=}1PzC@g}MO6lZhFbu_TOKcF*O@1^T)qt2so9h24ah9)s$(8#* zCMnuMkkTEW$mZB+8&;zBS2dtcoMBOr2^;+dCaO)eUJ+~l)oqLfWa7WGC}-r&AtX^i z;34u8^`9eSBzNW@5AJcHpB2^qN>|Wg&$%EnDFb@p_DD6CDrW_$w%_IKKjYujXMZMWTE9@oleO-N3o`Lmz>?DR8E zTex7+>t6Rd-VA^T7MSt=T9ok z;)%z=2eQ{6cii@!|eYFMGh1$c{m>YQ~PM3mD}wX43nrr!jzV`|4jK6`l6+( zioUE?7Y^Yh6#hSZ=K)w%vGnmIlr%yDgx-6z(iHXCKt;t4iVX!(RIp-w(w<^}3fQID z0K0+>8%ndF6a`eIccdqj07*!`-+$(uo0|}@Jm2@8K2PA@bI;kGot>GT*`3{;)z3ec z{f(lJ#zH0%Oh-V`+Ly8J;9?Ml~j!0UZMt#ypx3W89mZ$R{&q z)SLqi`t|RhoR|pEPbPi(@=Gu7W(?W1$xS!i%y5WE0gpcV7`v-#)~=DBUV%OU1ws10 z2V}#nA_^R%vt-ERD)m2xr2kMklPXSwqz_um^xj4~M8MUIN)Tdw} zMh=X{ta4@#sCtzZ$O1xYs}!9T#UQ}O|MQ;@-FM%8?52dsDJe;kbxTOECH_SxqUpEWK)A_qH7)ncLw z=o8=Wp^`sGdUHD-t(4?o7v-BrcBbTnKKV-)0&Puz9bfx5ozm~ zV~=^_i6?RQfDQXWK#%oGIlFW5YlAC=YOztQ1~XKEW`r1*E1Nod_AK%eRG3{eM3!OU z0QKFlaotfz9)(K|U)Z=e-?;j!t1=W84GPf|T(ye;z~^{hp51r<{deAZC(fSxAAF#9 zuU@2s)Np(6p!&OdB12wgLg4 zKi1^(9Q<*2@tf%ifN$Ekf&C`nu_rs{u*2JQ?%a9D&a4inoJ3USwuiMn|NILWra@20 zj?Nd>QbWSq+pkWZVfPI&6m`@9??F=+yHFXep=@bE-+%x89e4C)%`6^9Zy*mnbl0Vq zUc$sX9xKL*j2@v(_wJXm8V-6+nD9QwwB2>rpu-P8i~>==SPedt5|eQBF$iG@gk<3a zBrh4KPyYh3BwX6ud2>jgn4FZgOD9)qtE`kv$<3QK_3zggl;9W77cW|j@gj@p9$ZMD zgwWBmYI?IT-u|NhRWWv?N}D0x`nF9`4o(k4$l((K<@%TG@4wi7y>OxHI^X&ahN`L| zQR(Vu(6hid*Xib|n|5Q>JNeeo1r5GUbe{1J|1!~XIl%6?h05Pwe- zazIsb4aA**_-j3L{o||;;d@%k;OsTQ_|w>5;B{MYkn`FBJ@`Ei`1DWfOo zA65;sn4Ou_GtWBfn_07nFI=N$&67?%QQA2_kA2+RGBSzF-lch}u_Vt;pT&q_!_rK=gTjg+gT zg$QIgfQM1Rh07!ld-`d%a}tCE`al2N3r8H$mJK>|444IB9Dx#k=k2#yzJP8&@`xiF zG_3CpoC73#_Pp|y7hk5Yn37UPLd=E|vv-M{MH3Ti(O0DZnJ-BzpXTuvpaLeCId$MC zB`T&qCiUbFCm|ugU$k%$9yEM*$PZgA@yf-Q#DL=8%&|xa+8nVz{mg6RFCef)-bEGE zet~`q+%%Hhf-71|BX5dnUjh98*7j=^a{PY@eSS;(b*c7(i~}M43-@nbITfoPoa+1I zZ@_`V^#29$g-U_`^d=Zk!(LHelN4bf`b6XJ_A7v(7r{%OsLd9mp9MeEKa@gxzg%fr z5l-M=C!cO6ikE*Y(%^Gv2&#Gp|1bL+kRQqgSIEM>>Hn{`U*~l!n>N^2E^WmF7@%?` z6q<#WL@1f0M{hN{%PUhx!Dv{Q^udQ8zVZ62Q>IL#o9_H0gyi9E4?FGjGuT&;ZKHc_ zj1}Yn4(biCaWTnBWn79)8EIx&CeBLtJ#$EqAz&YZ?%43HdaJh`(8!k70Msr9A(2~A zOcJjrSivIj^$-wNh;T5V`m|<3auNs!{?usgl2qHRRYG3eeZ$APfGATRm{_1;gYgf7 zJLp&;0?7CSx&o959tMGm&Fl-=6eK7yHZB`O!ofI<(+JfWM`}Cf?5LxT>DRBH;)!Yp zPt2-SYhHZuMHt7P9YP)uOPf9coe=1MN!PB#r$8~P6qNLOe{p3 z9?}Q#sV89+@;6F;OuWTm0pLAf(efkwO$G!Dg4%W}yO)rNM7$-RG8zQF4Sy?2S@m3i zMIcZxF#0rs(!khLD9v8*jf{93SuJk>@Ge4k*0Urfa1MHfQ!1Qs#_4oE#*7{P-aGFx z6T5Na=1(V0diTBe`u87j>Zzw*aKVMRGGR0k+|-q1ydFcqDWZ}IVGwfo(e!cpyL`I* zBf5@l1b^qXniTrM8iQyH{(rjm8_i$Uky@I3I(@`gE?yu%CIu!xMqNll)GiL#WzCkR zzJrB4h=$-uu}FIfz8VWlvP4}Udm1nj(fW$lKgJiS6qqPRDyZKsrka~IXhk5w&ODPK#9CNH4TySAm|k9~QPWA!&nN5) zF}705lw}OFa>WXgaqY_5nUa>eecM*nRc*=GnwD0UU{ywv{6=dV;g{DbVALuvzhR>W zSNG~o4OuFSU11a7&K>OGVB?;45#pp{6hOJvtJOfCG;G*F4yoj9eDb^Z0MwW<$3@BnQY-ybzCWvjYSn;kozK6&zF z*456PH!r13(xgd~&OZCBzIXIJ^XxOLRjH;~u2?M=!1!V@z#s0Ioa|=BnQF>K6-|B= z*U~HwU^yrN_b77y_~TD;LNPdlaMliv&57YQL~p(jz$~x#Z`Uw_mZsG8?ucq$zy9@goQJOFzs^A&#zpoIWa7MXF@K0%&?{ zqjf#Cm%^AoHF@D@&&U~Bat(jR?>XYxs!eNRmNE5` znYrzWCmu(`W@`~y9EL{Ja5hq84AUyv%)HXMVq>7mzk*a6Nn9@}Vn9%RDqlh?#TdG{ z0^r`;FCHan9DB+-tAvGiu~WTxKi}?|xY~ zBC!A;3--cyMz7$2!KlC4P-f2pY z`1&F-Jp^eAB12mN!&k1~=JSbvK4tJ(*2(zj?7O&|Xu zQbA8^t$r(tlJr-$UuaDxSZ&ZuzZtn5I()Mmz0~++-|(aPD^|d`DE%fNT)YVNCy?xc zKF|*l{U#aV5eWSm?XSy!G<{5)I>n{?L?4O2UAhbJxv?M0YeHfE4ZZy{jeq|m?U#1r z^3Sk>=4Ps2s!0+k{i8;qrlx;JeY(+@6F4z>%H&6e4#iJ_cZMhQs#!A7xN)O9{@$0} zd<=dtPRy|cTr`Uz5CQ@Z^}87Oda8CN5zsm#Zh{|r!e!X#)6F_hgdU$>j{gYYTLuA) zlB!<}KPcDWt1$=tNh|Ev!WUL-MZEXkdzm_7w56_;D=%=1iOt%*lg=oHkab(#x^-{U zrVTzURv^O|;tsIl8{5xvZhF8NW4^pP4I0+#+4BnGBI7AzC^&WMRN~fSU+dPX`?sTy zTDfwS^d>pj@sylQAXIYM8a=UqviQj&f*G@B6QNZ;DfC3e(bUP7B61R;F8%P2D+>eWSoXI^o3?wJ20FW`;xf!u!neI1c|DaR6PY& zgG`+duwmW0r=Nam)Tn2exx$c_DU-tJ{pzdx+f|4D=gVHo|E3vBw{; zRI$>Kp+l%Ts~%X|&5D@by{};z1&30@UkJqu$>-kkBOwcm1rq7m&>Q$ z?^}KXutqee99T<5E)RaicYhc9t1iof6kmtGEH!dM#q)B8y+n?@=0lhl+rT5kJ} z(uoOI_3YL4qOL?|8#89?(@#H5hzH`Aj~qF2^qA3{9^R!(mmB)@VY-k08I)xVFC-Mj z96znOt%agp1^6TMb~#B3+Ls>wkF{TFykGjjCG^8y@*~RO6Xz=~Zt1dR#BV|$vhx8s zXICE{3v0;6c*yIQNv44k>vbvn&*&e-q+kDjgC8E;ph1HVKm35`CltY^K>XwMn1Mk- zXk++J@0IgowaJlzJUgA)RZSe<6!ZgIKadIvL*Db*HOa*fTq?vOp`DVS?C78ETXnn! zqR1}5ty?qkUdcOVv9a(3vEbBU(5^Wnf+#KW$L8k^dwST0ybV-|j8a5+3vUg93(O zr{ZD~;}Z${!gvhutVe^kq%V__LeOHgEt-Tl{J=+3$faZ9pRd2+dO~3`RA47dixw@Y z9qu`5&)>$)8((|vb&iN>)~p%&jVLzRyK-@JK|PEeE7a(-(CI&^Lx=he>N5v%`)#+e z#*L~See5wuwrz_;9};Ef>{_{U1-soD>9J9hC*ZHmt=iA1`3-1HePy?dzwF#xx@>8$ zUcK3R!x)eBbUGV1tj}ad<3^2G)qsbQY@9oQx}%QRh$c-Mzxl=+r=M}g4?p|>$CU#K!uCP*9uA`BXi^A3O_w^C-}tCeAKiQgnJjo6(KQd6fL~ zKuzga$iKBCfwZhY(|>RcXN}6Tcv1UqztOk^@B@?!`X_Su`DLkpE&i&j$Z7T~0A%#h zLw*az2>fF3SCV4rudJk&2ZXP^;Rj#=Eiv><>7k8Y1&{o;fIfO$gujLGS1l~7_uqs> zP>@Q9&_@W_RTRD+7JJU3 zl}p7aPzrk`)jQ>)w%P`<6|Wq|g1reW^1bc$+tSLWfe-qSeiy0e-%?4l7Yx@mrJ+cd z5_}QLLD?7zvl)NVNhh(fv}x0(Y|KP|10O9)IDCr!Zr(h$ZV&^ZOi~$)!Y#MlykNmX zG&?gTawsvK1wFDkR+58jVla7FLq-rNf82Hl9@Lz&v*sLyaS7 zQ1o^JHFGLt!M~Bz`puUCc?A0%z zkLq58{-O{6WOQef3wD9QSdRqO0XAv&tuQYsDg#BEl&WN^6*4u>80QsOw0uEr9^mD7=K>5;m37_ zy(wF!?9N?T3^^znb4%nK&>{_sq8SmLcWxKJa#9SVFNmf^5cCU5vdUWL)Nn(2SZL8F zE{<`juwm`q_U%tT`6Lc9@FB!9(74AfroB|L`7CzO+>W+55oL_zF~qE)`*2z|ab;%NhnE+1~ZXi+EWM=U0x#zwgzWQ_XR)xIPsHDo_+S24I4MH&}HN^qn>&8Swh>kYu~ph zX}GEP>G%iqQ7u#jxFYCFn~xMumV{qLK4S5?hqN zBo}=wXPefo4>|bYi!Q$Sf(tJ)|Bf%wV$qhS5b!|Y!BHWizuYL%^w&Mb8hrH(On#s} zK4tp)IA4BT@Z+eFqws62Sh?cZ^_41DZr!StmaN9c;pL|-lZN0dXPEftEo6hAu(tV6h5V(X;>n` z(IPZh;1Jjtn%{Q&-`{xijrBjRV@zZ+#vXim(C)hHF4|Yp-(<0e_qTGicCVOtC+ZTR?B`Ups3Wd;4DKYS|q{brwQX21#`o;85#AT)WH`re!IcyDTzY-$H zDayXU-)Q_J^5gv!MTfJuOC0#E;zY#2z z9vx{hPnUMSBhlap*nb8ne9}&GyCgFX=FWA^p|eyK~=t_g(ZSh1RQAkB%$2 zvrY>5j8i~B5E2N8M(D1%4DHE?{q&PhVHw&MGhI?)D8*O)B=oAV7{#CZpL(Ol#+Q=G zRg`?_p@(W6TujX8pMO4P?3jf=E*w1M;kIp$0O4NOT(e~HA`A@{Cu?Wi$)}upT1V!x z!AJ`hzy~xb2kVLcrwZtK6@fmtPN_;}zo#u6d+oK?h?GxH7Fzpj6*TED{ny8i#Q?e{PHtA-ve zKLzq>a;28~JJ?^!2>K6!e9{VlFMR@h$?;$ISM(M+{@?5`EKpEMyenI`VK}rViB2=Z zfO()#EE}ow#kMV3vh=OD-(rrMP+}}J(CXQQggUkBbU3-g*=L{4LMM4c6h9}9*=$sK zoKEqg7U}`>RBFBiK~i+QS9po}Xgaqth#2-3F*8XZk=|lrJbjpnv@9U!6*pKerO}Oq zlU3J(r5}J&0^ZVL-5-1_L@)}&7aIIU;9DvD7A#L{Js&~JNaKKx|P90zXAFSXH;V_*pz%5rUjdF%P^)!pZ-hY1r zV`)&w0Y_(L`j^u!{#Oj!XTJURTZWEEf-e?e2hh1)&I1Uh6q8N;3F_%*x?u;6>Was& zP}vFbSVd$)yvfUB8tU6Q-*)fTjTP762HGTR*r4He-+#Ae%^E@@(G^fACP=DI3?*o^ zA?cJD$&Uz4_-lS(^B)&3BmfX|S&J4e?sLP9EnBo`Uba~@{lx*lLVnqAKIpRijQkYg zuPm`Hu!)vWhc5p5kSMx5NdW0hkR>054m zI&@qpnpP%ML}KkaU?dHxSuE4`$RiFs=)nH{`%nCM;%l$JHfhqQt5&W4dghFovt|w& zH0b!_j_=&LQ;U`@>eQ}FpA`UxKOcM3$KF^Ga6&)A2NC>1MraqJ4~YuNf0&7sKSO`Y z0G7j~u>X?wTNr<60D@}e;d8M-KTY2FK;n%iYMaoE*=)8FOqw!z#>^SdJ@?#=H{5vS zkw*|11)kEwq^F2N;GQuwGa9!GX!bL!n7A;z)l}%l~VmbDuM~@!Zms;$SjxYW7S6?#<(u_-sR!%_b z4;-m1grkH`4XaYQ3UmIVZhjogA@L6xLllRRXvt_2hE-%?Q~_=A{>+^-_p!$x`()xo zLMt+d56zo4Z9;=UQ+@W?XO~@e8HsV_GgP1*efsIAs2bwJu;a2qg>)67b*YMIGk^XM z_!-fq+<)@Pr_A}=dNC3C+Uu`l85d&#t;7`H_|RT=45gGC%dgOectR7{)|yR5;cub$ zF^-p9a><1EC*b!7TMlTcSSg*OSC%bX&J@%S-+zy~0~oBNP!}{w1O+JknSu{;lUAil z#mY%5SFB{A9HY@mpH6!C;eWC?7b7B6^7Gl4S-W;^JaJlJONB7V+NYn5FyJ&?v0}xf zJ-N5uc5B0ijo=*uT&HgB7stN1a^)&knSC>R_C*(5h&xU?I)<1yrTIcY35XTN+*Xi>1VBK-r~2bVdCR;1^?Gys1FD0BcnyL^Qx(FC)N!TL`{uT>&nl ze}sYPV{k)MJaaMpRq8O2P>B5IZg2Yn7{C_DkEf6HuYj-oO5`;zgnwcBD;ed9L!`Ha zR{vt)3#3)P2z=*nA^a_=BcUus?U&y0KO+D8!QUV`{>mW?3iWS!xHo+SU)L|Af5_ir z;7h)RKZG%Ai=R$TxNg~$M#Hg0o;!r=G$*7rB|k@aXdODQg6i$E?tUgF!;rE3-Fo0 z2QmxHu;UN|A`|I_j*$8ZR;Yxakp6V3_?Ox*qHXdbRV4|dec5FX4gMdt{aQWXz@NYW z>@0(O-p;(@uKy7vSovqQzyG@aclt!@W7nVFD2js*nblULtxVT@de;xYDPkW({U5OC zcW=MN;BQ6!XWFl;B2$kVqoKG^a8i!UduQ*0Qf$$>|Goz@w`|GD%gM~x$|@D+_D?_a z3Gz{!P318@rzV&R41>l>jQ=_9nC&^TwldKETzAY0sCea6L(qG( z%KVo2>z%!j4^?X}U8M>%ne@j0D>>%eA@Im2RrDkZzMLz-*KCPWlT}gyD1$8q*sq{` zs!{-7S^RwjOI47WRatJwUvNOny7Ow&ZaoEG>RGtq;278btM<)~toCj7~ikb%O?t@TswRiVb$G zN;vQQE?-Rl0xOu3R+dU~)EYel`Zt^wH0qgARjX8CPZM&&%68U*(M{3zPkIx1HsZ25 zNsfBMF9v;xK2BXA3&qx{Q@d)_>N&Y9fL5+T`A3F6(yV#&j;EZwa@A^MNHVESpw#%*Z>i8`g3FCQgb&Te2Vm9 zxPYi1Ao}Un-WUh%Jqg4%y@%XN1=>Quo+`h;5&Z>GnH(+PIegwA119sz37wUloO*8n zB2a=Cl25@`Z6!aBzu;S8d4z+tyimbC6;Pu@TqzO~+eTsA=uBGzhtIg0yg6A}h!x9% zfN}J(e`|L_yY1U{eDuKwk3R9}jIU=Umx-S|b;^`!Q`m~zw0X0KAAInTLtD^^1XOhU za{7w`0p=hGY*puft^HDG>K@TQFbYB#hY%IfpL;RZqyZ72i*2HVu;sJVhxt|9)A+av zc+Wy17D%4cxf5YW2J|1m{yUNz*YH;afB=PH6{3GA0|2BD={GTkhNUpXH0;F0o4uzZS)JHfuD<~rgZ$rA5R=Gupek9Bqg*vzTFXrA3-iAjR*AW z|Jk(9=zPzdF@vyl3`kf&iW`fG4_w~K$!svBek@bvoWmSKt!&=RIqs@EO_F{tO~j^E z7zHtZoI}`P-2H|lJE`&h$CXJ+W`v`)pKLkF*1A_T9t7kD4SVRJhYWdS@OkHT*^^sx z;g1VWJFVl#A5L7m?x*<+euNg)t5>g4yCzk}9R&UdG;dl>A;Z)xIl!K_MVuO%7y||- z8$bp%h^9$QO5&a+RNCChMo62d*}XgGrLixL8}}wSun3MFh^P*Z7Z~;Iv$%H{o3RbN zTpH*9?q*poeR*7FppCE}+4cz5f*JDJz~rh~yB5LMnARwhQg++6%$>WltnI|HMe>qM zx`Lfeq)RhsQ*pLvWESPvOI>hqz$Rm$5YSb8FBR>ZpbkG~G zzy8t36MOf*8q!%W*jP;5Wi%}QmyUH&u5X{C6{!=O0htIaL(xV z4VyMH?SapB^ytx_e>RO}8i)tyFg*R#Q_Y$m0HK7CRJ#P!^+O<^Xe&J_u)k&pDHHsy zp{N4%^Fhbj7C-b@f^YTvJ?-zWZNJiQW`C`9i`df7hGDe*1#$2caFblbsic7 z9$Gq57lzLR%!%s5oj%My8U*lHAQ2(G#}({8QOqkwN4V@1*}l7}c@} zeINvvCDex~!B0IAeME4VpMXYsD~gg%^3Nss|8Dz>cY@96(jVSw1qBeHT<2sF`P{4h zBC6Eca0MQh_jp=(-} z9paf83HXHK6EIcSNBm>BZz23e=W zJw0sfm@#$h*W0*Z1EfTX)6*-IPS7k4{S)Uc@x{>RD11bg9^vN9E%XnE41ENFWvMcV z;ptksWJ%|9Ix|^aqk8qplP3`wneiY+!wn4^jezQ##Fb?I1fq>Wkky zlO{vtxaEDIzJwDJ^RM-vZRd&_oB$*3YK7Lm{^;#D(*G3asGjL(YUvUvVj@zAo+@tR zM?ER9zrUgV8f?KATLfF&vNHS*ad%aM4pU+94qHPC{srn!8dLlQw+Q|u1c=ZEZgKr* za{RB_UnnS@8R2gXtf>DoTSDPI#N#mzwB=ypE9s6t|8eZawen5vB-y5|D65R zxhyb4;T^TNLsFX`vt;SwFTa>RZv0zQIs9(@dK4mBkHxy@cJ9FGgdC5>s$K2Qqp-Xf zf?<&veXU6YoH($F#sW$*`{1X`c<7G}Ws1}vejqiP;GfmTDN+c*VVmPV#eAHt;#Ne zCoXr+yg9`7V*N9-Ib^I=yVmU4vkBOVR$_$Cmdsf*X4I%rt8&#!Svz+2>)Ur5!_Zy3 zI(O=H$U%p)CW5gY!zzxZf!PNhc;MS_zs(8FAO&s=FA<~O*YESTE$D~B_&|v|XE95G#o^ubsEAfaKT=>F zZWMgR-K-_l8(}GXgvik;WKQzzPUrkM|Hn^0{e+kolRo`q)2{4kQ>PwxeEWkAJg`Np zmc;p1h+J|ReFR!16yP5|78#|#^;gioETLZMEuZ@DnFWEgf8_S7QSbinkJ1mOf6Tra zeW3rIoIK{YwU){xkl_KmnKff3QScizZaCNwWru`W?;LBsRm$uYVQJmlAh+zUm z!->Q)T1b_%i$MkY0a<0iDfp;an-@gAWawZMDTwx|CwF}O(Z})KutpXE_w%t?}boN5Jv~)CdJ(x>%&JZ27X%@r@`>-FRYe>0C?E z|Hz|`5>xOZ5$(kkQTJ|_aSUa>26d}esWy4a)S-h1qrKBBmLEBCWW|b=AUOIHI4EX@ zLodAWe5Z3dqxW}OBr=9ijDMOpZpMB=peFyZd zQmIOZQ#z0mI?bLn`|FuAG1b;0)h93y1}7266SM}-uuTRtf#3G4>C@pmTVOnqEUxTU z9IG?FnKPF{MW2}bY-BnzEHwH=sSQ)JP)|CbKtI4T@!eE{G=g0c^ZyU~`)k`T$VK5V z#NCDk`Ypg-)e$Mx%5-BDhsNBCZoDC3P}d^#DWpGt0erF>C=`v6P*97)j}&GpqVOlN z|1)F)gbZK=e^icq)jR~g=^rHv|MNs_kLv%ZbSH{cjN54Yngu6oHDN zJQv|Y#TuIe{0p^TORb_JmzA~8{xfd(QO`VsVM4>;#6tfpTe_@!_sf8tket8)CY7sH zMgJHo`6Qb$b0#w^__Nv_fBYGzpH?Bg66fb~M)}sv%=C&CF%1kl%9pE zmr8n`T@*VAqnQ|wZDJu90~vNj(ARx<@X(hya(<55 z!(laK;V>C$fj%4F!86DMWRclb!75j*n2wiPG*~(R_5IPu}xO~-0o@Q>##FR3XLcm<`r}?3)<@1o@ z%uk9Z+80N_?*J<9Icf@t*m^?2ScGg%x5P4)PEyLkpo1qjE0(Ri<>p%mPP}Z{(wzJQqPQ!AsvpHEdIFJ}AJ?a+k| zD^5;!rC?vEGN4>O9e+qAG6K4YelxYgJ4+DAp2!_N{xbSoLQ#N!y5!vxW&8m|E25wE zD*VrF5%~7N8Z9sQ_g~O{_t*b497DD7yS3i}`s}6u3(+4kq9{XR>N~u+cmDp${xkWV z=mp~=*KJl*2t80(r%YXdNsPt_qyHsJW!X1VX4#d;0F|89@t) zH&!nuI&6c4{SYDj71cJfDdMXUSTB@GBot*uLwln8pxp(p?18;SUjaP=-%JCEf&_L` z0N+3wYmD7?M-nTJQpp#;04y8?8uN~dQa0hQsSu#?#$E<;VD!P1@k%-L z4*x`4N^%+5HmIgSvaa0SIi(Ux0|T>KzFZnMp0L(JMPyKcGO55I1$$U*Ba+IBC1Cb0 z77OqOha}r!o_Tf@Hi@Ot95?gClaGJ+(TBJ)r%#_=szfQP1N&jOCnj<}^lqdWuNGq+ zhLw0c*}}=%E_UWAIH-2m;9}D8K;;q=9)9>?21tzhZn^c=jhi+w46aq9CVTP9S188- z4?9XoW|^nhe4-Q)vqqNIaHc=RR!R}V@ALowKmbWZK~$!pRKeWctjA{I1N{I*g)uWb zw!ZxG%PPbp*M3TNLZzPC@zheROi+iz{uo4&P?puETZa%lvsuJAyR=+Ku)8E6G6|^l~V<{ zNMJumE-k9d1V^<4dvCw`VF{z*TkQk*maiE2R*cJ@sNk3QLGk=kC(UjR@nZ3}be_*R zQ@H)g_bdP^+VQ6{5%^%Q5&#jxr+XFjfM?%sL91s4$Z<+j^zWhV>! zJ1)5Jf-8IWJpY0Vnlx^liq0|Sv3-N?IH7on)Gj5Fmsh7w9V$&PpSW7!j8k5l1=Y@D z&;T4ALIA8i*|{a+b?_oRev&do$;~6KA~h8Ks0i*OJZmg;U9@;1>t3_6cH(4z_q})7 zE`sN)rF>+iXU(4V=D0W52|oRcFNu~leE4uC*qOuu4d|n0#PYEnJHQU(wKHo+f{va9 zI4j-uFx|ko2iD*R#34m=e7s=z>E4|%;esw*s#dE)#I%n-`WTzVO#7h+A0nhm#HcKk zr&e|49(MR)AWRlhlG+$@{sreV8Yr8Z%I1{ouD{l}5Z9r@N$uOWV=D@kV%ZwA2)f7R zR3?SWq5z=}gAG~}dlFgK0Q(RC+?vRy1_?ObN|n6(?z;!wc_;dze7SP4xj_TgLp0C| zGvc{O|0|e_L>jjsHMC`4WMptLH6baJshQ$Dk8=2A|;%9sU z5ZS-aC+C%cWZBddwoIbW0qxTs9G^+ZIUs{1&siLY&ymfZm8(^O)GW|oJ%zNi9QJwI z<6n$?f*OylVT#pAFNo_-0w4jli|U|gsTDU?HP|U&{QCXuuSyR|=SZkDgCD425!Dn4 zPlX}4=t0o%1LQ!;&<>v(nr?Gk#h88|iPc7_f3E$y(R5-+fvC{15b_(KfHc&=^^f2? zl6peHR80>89R<>|FOI*|E!gr_bA`K-e1?1=5XE*OZ4U%pNOI_@0|W?N_z5*UF(gyBu{M`O~od zqBF5L#~ydA%Mu5_;2Ei0u_9K&geZ=+RMo0iRg4h&H#W8T>8GEfby0XJsi~J<+Kql> zqecxMf9#2rvSpZb?|x}FmW)v(wt=<1^4-usDa{cnrMB+cgrH0#!_%UREyO8_Cz&p>2U#fC~^#d6L##A(o={^ZG%3AUS=kx7pZ;|Fl`vka@~ zuH7Mg^OsyEsayAMjo02>PkXAEttRjrDvYt`yi)Q^du>a}XpDXd(v5+$-m36j#o z!A?={g^LzLe{g~i*mn}>q!;QBx#IASsYe0kdhaSiWqTt~(ZCO1{M!CAMOk)}T9Z(o zprLsBXK26TGsF=kja$@<+J*h+-@W~+$v{7eIR#RraZ6GzEYt!-RjGgR(OGF~jMwUy z@x<{I-Vy#9{`@v3OY<0fz(wGzmi({>p-ob9mFXpiPYwK^#{Ndx7ps`6D~S%qMteM0j1mP;FE(b1vW) zBLM)v{q~#NZ@&#DKpsZgc)Q+t=N)K_6N;q=UrwLSnsdOlIQWonzMVtV8+;!pcj&<3 zI3P;LV&TFCGiS}h@4@CfR;}ZLDqWh0r)<|^MVJP$OP4NT;RD6udb|ID`xUAtH#aL= z<6Z`kPJb*S<0wQ31g(1|G%=P`3baYjAkJ*B@TXgVnU{~i0`Nlwo@iHQq5|SjRX+V9 z3-ERL90b&3(F2rr@6rw?%r>N}RkIegqi^9^B7O8i^w&E{MG^VosVQvnSD-0KV3bl1 zNUm?=zI_$qFA44P*QC?0@Q099-6fwcKLPwG`dAtJh943_AR!{-a>oQme5A5&A)@79 zbu|2A@pZDPncM=vDj%;DfxP2OX>rX{Pd$xek^UKoYDXS}c% zC27=+0kr8+QO5>CQS>pfbjng;tRWQDu1rH)Bp3SR1147sVfa1w=t5_g?T-3s4X6 z7yOVd5*XpG`lCHvMg#bmI0!Bz^6A8@{|dtFt)oHFLigmb%UC=MvCU^e)0{bTuj$=; z!u#(F^8oh(_@pE~AI8jpKK{_@52J9mvp|zsUTAUUl|2bug?^qkZ5k2fHf-ET2Nxd} ze)*@Lewto>>(;GVBGj-!gHt_+AsRVs2;O&<#G)_^5p;`ql6@m#;!e>&Ro3kc>h6|_M5~=#zOWo?aaPL#7v^B z0Ek*dAlFMAlio!6O<$iw6<;s;MHQ*c* zGyctSTQav|-{n%w&E@EoVZ(#E-Tc=jT_HdD;xj^ z0QM*`O~QH&0AU)aFDjkskU&4H?f#5>0>q@3lxj3|hiS?7P`~-dN6-rWseLMGs6VZ> z@W3>I^8TEDwptrxfj0dkZSR-L4%@Hm=Y7HV_GRdiM^YweeCi{({6RGS(eQso|3vyn zq=|+u;QW{VEQ&wRsZpR5BpX!4=%dOMLmxflzoQ$Cd+k(#ZUtR17FV1JoZpr^{o-B^5VE^1BQv=T&p zA*Pr|f@cjA=BFg840Vm+s9md;Iui7vwGK-(VtsV^^a`6cZp2h^2qW9)=gphPDK{uU zI_hM`069rzvmt=>AD96mH>OI~xI_ug-m&#u>JhOuvPO*>=zr!RHgDX79wulP0|C;f zrj%t84J(HtPL$@%>0^af?LA|4Vk%N_{QpH>Ps9I zX1s}d2GNo^so5^-)w_4Eu3axiwBLJgLa*Mv@EF!_&|uuSajZa3PDz3O;FHZXY8(MQ zw=rI!hd?fPR4z3wDIp0KA+WMuhGEb}=Mi{I#)ES775;Gg^DCnNX-Y-)8_6Zdo=0D> z-dcu13fq^U+PeV0Kui9;4TSPKlc&%Lg_F2YvdP+BBaqx zz!d)~Dxa467x6cm{(>c`h|oV$hTxlE1nuiD`)j0e`GhdiKauv8AE$vFQ^ZW6bC}?& zm8(Ab_@nXT$A9p_2fMPeX3w12<^1z2UUMxWY&aIMeftyGsD(NPAfs;P7Th)Fq*ptn z^`e>Og)y!g8~nLgf)SIN2tLuL@DDw1qaDkjh8;3gZ!KlWPM^TK@?Tw`% zb@r}oK`T+>%{SkKlGt4qUo>voq~C!4j3+6Vy_|yw4`B%dF&&6H)V*8xx%1{>ecQEb zr}GyWfEkM7LQKrkrOVj!wR4B!VIO;6GXLLK-To)OJB1VNQ0Vzn2=kGAgS4P<}`S(15?x z-Qbhh5=wB*;h}Pkm(CO)p$70hrPV3O%w5iiLP|se*;^M@O(!PEB7A;!Z z6t0|9Pd)V%y*Ez7guk403Rl23z(DXFY0**S*0)NgjZ=NCFN3`$wQ|bo&wEQ_4@eOX zci|(q&R-TBsYb>-nk-3?PvF8{6{Z*R%<=8~b@)aXdnBpRdjU~-LUv#90Sidw6=bW- zUhrK%sGCihR!4_V!u{YcsMH;#tviU;<0nUp(tpL1!lVLJ1vSCY3QBcJaY-N9_Q)Ih zaFj$#Hg^aSs&AK**7cigmI3!~H7s67={I`nECC`P`hn#_6)S6JUzaZDZ`iOAj~LuP z;J|~Pdg{sg_3N`U8X&aO)oa(HA58vZ6g%?tj&uH6;8by`}CPKX)-%Rcx9pPn>1_Ij1%mo z4am-lQ3?&M30e+O*A|Vo_vzhr%s!OUl`}nfd?LxwLJ@L z7cE|bOGwVO*cj&68UK*|ur`NPteCE`jpiYCLrLrM)q zHf0J2pKaW<2{)OK>(FWZ1b?FjTcI@|WECy+0gm?BAgZDd;wI|&FbRc5%=>%pxt9QN zpaEqt3}nHHY0X%>huD)NO=R+}v7C4$+JoWI#~e*;W`5uSB)SIes&PnBXI{sE&WWZriuGy}zMF_AVFD!j^i6I)0 z+^PXz`16S~mTKs*+XtXhN)A(B{Qp5fC*D8k~cKXDuzQ+^ktMIewju^WJ``W=d+= zEnBu=$Z*cFvC%rrF~k~U0|)D`k2vB8J~3Co+nuu;+XV8^=Mg5WUU-4()1A=KTO~^{ zGu^6H>+$2qQ7Cmprif~a>0=ohZbT~8y+@CB?N8`?$KS`l^EL-Sw`_SRET9LXDu7~6 zPOH|f=$ylDc5l!>!M;!sr9*H6N`gZGSO?~CDkYtSQY;x}NjN1|tXPp$r1Aa(M1X9v zKryjXrc8xVxx02XY}AncgKU=dOdu=E&MDbMSIr@!ufV_HJ`^J)jg))qH|jG!AE%G^4}E;?k`Pex zUv9tA^#9BL{=4n3!*|{~AH7NZWq-Z#{fF9b&_CChmhByGJZOgSiGcx&k1k}#&6dq0 zM~?dV&!#*bm&a%!7i*P|CUp%I)-76X zdHW4imN$6Qgf^%ORT*kZd5c`uWO-()&=iF$Iq5o6ZJs?krzsRuDn;N60sdLeu$b`2 ze&CbI)MX@#U>AnZds&t!_@XC

W3R@-sGPa0VH6j!rG+nrNCCnOgvd!ImawBw+Y^ zc7r5nykgW>B0+u!ZrF&#kSi~1&+y(txw{Asu1oT+Nja_G&j_tIUTzbipB})JsUo!si z!w>6nZkMUkrZUpQ8G3H#^O(M}omF|?&7F709e3zpIKp7Z#^EMog!%Q1uN9u3lXQq7 zB-{i1Wp0SqpO+8*oYqJ1ZwxjgMs*Q1TBN(jHy4YpNsF^i>l(lis<2p$UJy$;I2V;o zNvUb6a-FDOB-)GE)K*FD4l9OTHf!3H&|_zvc~+ImoHV4xeXJCNvpKtSPCxB*_$W_JNPqfL z!XMPt@m38)J^q=S!|^vq5=7S{+x{6iMJnhh-!#(%hgC~gb22;BRGnT_Nwr}6YP8Ag=HteBi z+@^IK=7i^bJBO1QFTChNsUv|mM#aPu3x*y!j(Ct~(HZq*F)5@1c_3>poq7IjO+gdy zSq;+=bt2RM=3;z;^G?qdWIze5$>%FxZ%oXsx82tBiYvH5w`|_DiO~pCFZ93JVEEQs z;}1UQU@EwA-G)2=aVO_|C6rci5Cb7G*OEyYEA`luQaT>^*ePnnD{8^!5x_QT+?ZJ( zqAo7@@kcU}p=QmR1kj|%uam;IZi`QkUod~cyt(r@o(w8+EPxiYatx^E#u+LgqWMG( zd+qf%SYOF11~#QOYS@tYT>bj>UAb}<^1|u^d};tU`1uniO!)ls&s8Jse~0A~d z;1{D@3S|_^JPPX_CQNw$h38-Re*O;*lR-|~!`luXJotd-&4sQ-C4*K782s(pwJV!O z?;bRWZ9c5^xaXd`9~nCIth3MRbWSINn$@UYBU-;HyNMAW*1oL%Jo77%A8+`ElRY^_MG{aLf45(eN(wXJhxj`MR9A93e7ldvpX9R?u`wWDgr;6 zzlG_)H~f&_EwS78p_qAw#N&{4159cryPCf4Kb91LO=!^b#V) z#iSkjpCov26e%)assjn9DVFp!@TZ{vxna8j8BvwyLDLMW+6VmwKu7ie-M#=H z&k>dvsY6ww@Hs$v<%*TKv1DRm*>s#jW+7VTg#HnfQE7!SCyu-xEe3GnGQSFcu$_`kTNFigHwA53t)8PO#*#9&#ma+_)m1q>V+ ziH}K-j`jW|Ub1A#6OTWEeZh(`7PH<2XSl0JSG#7dIdkURamU};-HAb&JY@*txHYV^2$##6#ENq?0OCtjJFo!Auq73wr+0ivA(KIp*LiR8)Wt z~{`baTxEF(e zK-5sR2l+@?i2mV^X#4fc=o1ZJ9CNxBg}*=i#S1IH82YP9CKOTli^Cy&%NhQF0MYmd z@Q>zM+a)P4i`F{GnD_D3IyvSdPn5|!n*wqL8Da5i9yd?Ks_ zJD)&B+DF4ko6VMeZn}w07C+3N|ME*OKKsmb%(|~yx$>zepBg!GWc~W}FTVKV9zA-L zFJIn_m#q}Ag#$7+h#|;n=aDRvTppBZWD?E7dIW^l%max}ZGhiw6p4=simQgnL0|YE*4802S+CP^;6eKs{CW9w^gytZV?BA`JFfGc83Lk~T~ zRw<++K7rue`I|E~Y4HM6Kyh)~wrytz7M?36$B-U^N}yE8LS8x`Odd*#9DfTmK%J-p zD@C$6&)e30fH~eV=BB8HvyWOsY_$SWIF?Ok8Cx=m%Z6p;F&zc6u;hgIAiyUVS(LOu zei(HJqNKNwpSMufx)z?e;L%?4HrNYp_$@YtDR>jSG-0Rm$ecY1<~YXgk_q{q>D-HH~R?`$Ckd4;LGvNK?h(I z;IAjT_-TDZ|0R@#gw;F*;k8kt=8bs3QXk;68;=x182I?oMOI~}muodw-O)cq;8WEi z@Pm?tN}? zGBNx>Mm5om04J-lkl7T|K?fd0Ot%wGJb|rzG*V_$p}!SP1?Xgp94JozR96BHw(p{XenHZnNdN)=; z@xDVYVM39&M=#8=jOw;3~La!%RC z4V$)PYON>;l#)^|-LG%I?w56E12spU`B9`$k%24W_zCR~!c2O#Y|#P{8Z~MZLo8f% z&p-eCOD~OO1LpNNT;KEZD=Jl~0`fAhh&$40^mhfmw|qh^vB$j5kk0i5pE@~(=V~yz zB;NlX`y0UDcmKiddtgpTE&eJKoQ>%Jz`od1wNOiAsVrrbe*3lUS7U2nKu}XpQa}8b z^oTNg?4P2K6>C}^I(z96{ouciBLV*4Ocp5ZDT9g`7*XAx0!N_4PQ8~nN>Qp@=^o>6 zr2hjX5*uCsRw6zeQa~S&mW~pA9DnDp1B)aU3Q_Q*WvAFFc0L%Fc&>lDO2y*0QCP}^mqD1 z!*|N7)`BNaM$sQoPM?s>P9l?k_-ikfo`Rge(e!uzI@i2^5-p#qniwE1xcmeu`4zTb zHX-8WY2BvHkRe0y0YM**XxXrS{g9!|)+C|vZoTDJ9ZsTvU-7A_Da5%a$O8n$Cq|$Q zJXZuq1Z7k=niBYUrs%+6Ur^vG279U;3v zt)X_Udi3NWf_B8K%7hawxmhv(arqIdti<|t8;sa!T6)XIS2KB()%J+PzWVwrI>s0s zj4uV@$g9bA&ioQ-svYc6A(%YEim`SJhZFrD9vWwf{?w5p zI~Oikc*f~xkQn@dj;*1-SPFPSOi#uybj7I_TR1UEz-M(cr*$dF=FY4+bG`u|`ZFwt zhdWF=fj{EJdC?G=EQ%#prmQi=<M6 zpOW4`RQpvOjCc^1sz&3#FZ=sz+pnpyLgVX*{tQMeO46{~x!%fsmT9 zEmr?W=>M1fMSlK2>@O2GYG1MI*00~ZDFZDW^k`)B@-$|KXQ*KM{oHVb*}#GS=-am+ zds4=|`NrGtyt8KI8aylvTJF8~ULtFraN>yaDhfu0M@QCQZHp)N zCP?EJRtZ&W8ao(~2+gg5v_Y_U7Prdmx|wE!;+YU9!775k!4&}DS%E6Jc=+~O=fqph}f0(m%MOXOH0{hOJ$@cFpQ_?3lyy-G0aIEWc(Pb?T|7VE9?iL7#z=K%-jq zYPM%*Pkdthp6or?GjITT7FiGyf&e`*`qfunWup?^2oR(crj^Xi!8BkBb!Jx=_+#aL zpAKn^{-O--I?#h#EHq;XO+>LQBr~1|^k+y(hiUop6-2$+zC8;f zh(#fjl(;|+bX&mRkp7Vr0v{&!uAP@;-BDv)G=BrY>7rPQPq-JszCk5RynHtrBI2u5V`QiHoqenkKW%88oe)w)% z<~AbN!lwH5>UF)e+sP-LSg(G4Nj(d(~FIpIC0{-b?cMM zB%@Z^9)8#b7o1PLt-(WveEaRUoFx0)0IA>bea22NNR8awj2b^EqAUVrWU^SiWa(TbX5 zhHkp~CQglD(z{91MrmoO>SX6?4=J$pDUBk?&zj*0l-`0%+WRHp5yD>5C+qD}p zVgxsDz4Laj-d9(zQN2p#D)<5E!4svCg$D0`@DX%JL})1-hX!P-Pt$Pt^h9@NX~5)&{&x16XKzpg_4H!5wZpJ3DL8v6v018Re(RW3Fz-gM{u?JISHcrSAi%B5up-A zAE*#TpNM@7ky`y|km!L_Y z1$_RfQ*=>jJ8=lRf*NAVcI}!qSdj7K-#-5MV^2Bx6xt1hptDUZ5%ed%8DPN%v>$ns z%aoxjJn@rHutDfIAX6!Y+ctkb1R%9F=K5JuBd$A*oXqMuQxD^{*t_mehMVmnZ$*hg$D z)r7Kb+P0eT-h|68zwFhQUj_dK^XId1^7mjs<)m3iP3F?pS0>iG8fPx7jG>v`QzGjw1jn&NJEcIJyD5vUXrLaMiKv zlRc!!2%Az1puYhyYNgc=!tc~b5UKkl6qziK>|MntcU%VwD zvjzIi+E+mfQ*MV(?-FIE*IdS=KXCgsG8CsD>_vn=`_ex~A0x%S^+Qzqg=9tP@AfeQ z-}<+vcZ~kdvw+YA_OXD!A^rcdzYs|xj){%3zs5zW5tskJ?61TxT0eyHgMGEzz2wu> z#_w%^#VbXg$>LgqS|D6C72@gT$l+Sc;Zs)}@_J!8-n)d+&Sat+(GC|K_x* zQ`fKGFm2jYMlS;g3^@7ZlQ}Dmb-AolK-FT^s0>P+B}ED48VB;Ffw?VRb@>O&1 zr%}8bD1ZI}V*ngea^R)LD>%}9CSx*GMjlIKFipTDgSyOEN?BUf^q%K>r6<7Cmu1z5 zVPL<4AHFb#1or1W9RW}-0nx1i(F3w8o1T$em);ZFKp|tYbBMU?i?ARjX0i-tgk^Gw z8%qOXIjRzn)Qg{@zk#vJLMl28dcd;oJ^8ukoP7=sl1sZ?de_}|35WcA4yxm5YIxDK zX;U^!GKj^bHE7U)5jdyYke+jD2s1Ev$Y6#*4A+5#K^-`709}w$T0oS~;n0}yq{QTA z&6>`dF_WpDh7Ia)l-q#^9B|Ar$DVlN39N=-O%cY}+*MFKhKQ#uuSiWvA@&&%Z~=h- zOE11OYt}6A2LU4u35CBFM;I;mVAl`O7^HEG5nDdDZr#TIDJPidND8YJPxzxfeE;1K z_uqdX>#`9i5^x!vjNBwuGB|9;(77j}@$yW5B7K1;gzB{O{*aoA%iYqo1EMXD(4EkTG7>u2r+c$t)$*e@g}c;m@j>_(YCw5PYu> z6)?0_eNj=x3ATUoQ##z*7dPQ^UaI{XZiYrc2C_NPthOrP58i%7hk*W8c|M#5Mf8u$ zPsn45j?&@5q_^GSq!T-z+v$`Jr_#OvMhn*Qh4j?f0WIK8aqi_hu%(wXKnd-pE30RoMsMjbnz_RvEQ zZ`!o!-3jk;a%F@14Hhk4xOVO8M<081&|QPj48FF9SX~r>1M8b_XXCP=``)TWOFGo% z+>ar0uo1^&z{?$jo~Je-MC-=Sj^NS%mSadVh37-)`%f7;UdP1_8$N98n9)3EscgIU z?PkrK#r*&3RjUb?Hu0m6t5mCkhn+xIci#DrC!Ton?z``1c^B(OfiD`#-DwTd+Ama+ z|0thh>{mdagv6v8HL62guHD(YF7I*qYp=bQu_fc{ufJmN342D)IP(k!32cO9O&Jph z@4Wps^daZ!^ez$0X&J)Rli={v~uN7I_X609n@xcuq%so z2l3D!Qt!#h>(;&dfz1yfY7C<$tSgO|eLZ*Hd8ea|WmK!D29SUccOSAAj6Fe2*=OgZ zr8v7_XNReM=G+0+>)hb~k^Ar`1JqU3xxZ|>Bo_%yQ^=j0KUxr99Czy@&b+I4zd z-ec6rk>$%(_N;cNLHErsQiUh-^?zuK3e80-iTL(X$i)uQl& za`{p={SV+9eqp)xkh1pCepRY4Hi{d<5&U^YW~Wc^T3EV$2Ko;ohtrdLazY~0GgBr_dEkNjShC8j=h4UfZNr8QxXBO>`oneW)HA?w3>MIhMrKF_RK$tt zb=!@u?ymMsH+I$F(|9iX+n6mL9dVv?=&WTD)Meq(tr zD~4I=hwZ@dDWLwgOs0phAmEQZ(zZ%t_(2CA0AdjR^Upue$jHPFLMWVNq$0hw`h0v( zOm0~t5_>9Du84C@2-q@hrjc^8b6NQYo={+S&aQ-#@lBgHA*uK&WK88bM-YSyTQ0YfHzu!h2> zFigLgk{>hwhxvVO?88Zyocc9z;K26nPpDVF0nSG(+^x6W!U8vq4p>jeTDGiAW?5Ra zJk+WwKDq)xRDS>dvuF(m+&)4oI;WZ+LI(8*-d4OM@{iemy-8V%vPyW3to2g2e`os* zN)Gk25!58r6o6=JP16W$`V7?Uz4QY&Md)uoc>yN^{TbpU*uJcwriu< z#l8P4w*1HSm*Xb6-ozIRN@DP#kR8L$3+A;8xAVd{Y&1qNq==*UyLanK*Ww#{?lJkp zkC=fu^NiE+aHP5V>Z`B6{(5E!2Jg7zF+V(d*=3jGiYt~pHO&bNUxII8mK2J)=B;JY zzZ5D}k;w9CX^jX=jAG?jBpqs(NLopgMwEhvBuS731e!{&4AOvz2DCo}-+2@viE|}G z$0NKs{0tEB{I3pQQ7Qy*>NL)3n7M4f$}u26Ta* zMBoQ-xWEs|KHoHPIKz6q^*7jPd?aC>wwKa@m9(UZaTYYUS&V6+`Y4J-O zKns6?PhTB*(@Qr>X|O73y!Qg=Lbe z6!{BB%k2*%{n4;<{mM0&2)j3yhM#JLz3^K5F<(cT}y+ z9Uad<`y7q+v3K6}ogE)LvoPPS5-ppIO z-UjPkb=6fIgn!aWC!y`HPJCtbRijTh;dt7({9~?V!woj<$c916P;(GHa9aSPX7n*T znm+zyDhDG{;*1$gr*~yP2T0KFUwhqk*w(?RE?Baso_=P~pl#_<9Cg%pd-v|cGPoU#3I}HBKum5|?Ex)$H3d@5MFi8fgkcE9H95}Gs zuDj9!)gs)Eopv5RjGIiN--Urjc_+Oy>6~AjgAt>*fE~;ED-vnKi&=eY5>~6`(`nO~ zaH0dFu9@nqWyQR)gaJLnGD845Z(8lkm1;9)LUX-qvys-GVIO9)+;+~~6HoZjQAZvH z(3MtNk;^x`cImv$HiH;Pqs#Qh8*jb#>O?M2X4L>dSZPc~23C44)vI5>{%o}D+O->V zV`InOg@0kA6U!F3fCc{|!@Y3+SK|PdCJL|?s4l@dS4+YzI_o2se``cWf6ITtKA0H7 z*Z(B}qGtpMOu#3=@cAiuR(J`1reBaf>6hV${u->o7%D%T!xua$HGrz&pTVcx1Yc>L zo_xYb_~r#Qx1Y`7lLl;E8U?FRfe?Hl;kq`5pHj>Ne_*N2fo}ml!R57Nf2z`_9REuC zSNgAV`nb*&_>-cw`(M+!ihp$Y!r`CMr&4}${=&;D?c0TvA4`YjLD&Hjp7JNL(vQSs zEpAHkSZP8g8qvu?Eu;42y8^{rVfM&om2${i&y( z{`lih={Rt6t`;+O>a@zL13eHE(u`ac@uz91osakn+Jf9($q*IJ2#fdp3ol@)UqK4%^S!O8?qxud^-;D-8`WZkif4HJEvQ=hb%@8pFVK zU!v#o%hMpIO87FZ)n_LJc9r=dg+;iwO)yWR2@Gr;_<%7J8??zeT7Xm<7h{x7HvrUf z+W?2;GA+$IWmb)K>(QMCCx-olM&$l0 zj?rvt;>iEQ4m*tL6x=JA;(f5swDapZ!bOLyHSM~~F0-|`yIqfN-6wze!3{V3#18N7tN;E!{JWooi&_KQomXc2YL*rHjR^B3UpAU={+Md-`) zUoiWonQ=?D=3FH12B>!hJD{q5EfV@0#Njj7-zxUuGt%E`^d~IY=b&fd6Y^Hte-T{t zcT|D?fq#OZ`NsmMp}{kG|rSyn0uJ0sxE~z(dffgT(m7Raae=VMwO;Sv-Q{BsE!B zC_wu}{-Z_*F5vt)(;DZ8tA+*mD$X6izpCr&y*XaZAw+-=cjz8-azO_crF86g?RD34 zLVxE@U8m2SzVd*9Y{_8_Nc*~Wf4cBORzq+QHE2-~U7hb8^<5Xvx^D3ovePg?cy z4^GwLtbwM+=~JiPdFNdh|82y$2Oh+gvLua#8*}HV70p@i6bt=0U4lBRWuQxh=q~Cw z9(0}b(2?Rq3jq~vc^Jn*z_c2|LWSFRFa##K(`Kr#i9Xh_us;-C!pm zvnBpgFAH|~RPh(EkecGn7xKYUaY}y(jtHR`ITwdTPYBg;dhs%32RxmF4+XfEkxDTL zB@b^&url}{Lk3DEO@hyNgb9w{l#NKicZh_JW$zGq)~q?4Lxh9?o*RY6jJf6dYp=h5 z-2H5Gqrn8lIkEg}Lx=9L!wze&vkpt#nF*x-uhzARskW51?rNv4)JiQadg~hC$nj6~ zp=byN4&K}(T#a$!iGL+slm04q{s2FJS5^37`b3?g?#@e0b@;7Ne-nK)7*UiTYGBfq z(?<(-L=kb2&2G?~Sy>vFY|0=c=tI*`@A^SXi5qZ{vKTx-s-c>wrxHa#J-Ph}dOA#+ z&mR_b`ZToup$8wtMDUYEsk`pFD>!hVK9-ma>IfN*SxI(AJp?AYMEf5x=2tN06+3{c^)T^y%!>#aBZ;D z=%dM?#ERNqop&C`dmVb{q1+@u`@7$A{dU@E7`3EVqh)981@6?DQ}!qY2)bgi9wW)H zzQ&Vs{nc!JW5cu0J}cVPHf_E2*I2I0xTJVdS2uj;;jE3?YOAeSS__xJxqDBBkHyU^ zuDlXJv!~CX?wY#b(iArEwATVr9+(B(UcGxA^sPgrZ*_IxPj6@9#8^T}i~!I(<>K0tPd<5xo;^XB6=c1a z>V@Qd{OKonpzd^*a60t#G*B;qNCKb9Cd||Sk{nJc4=?1>Yci9cpZ{Gb{SEsgH2Je9 z$Vg6r@ea&|xGT?3B!j9TYz6y3?G{9TslBNHH3VFAQBX*nL-gfQGWoTYbZyJPsOoO4o?(*Omkgbg=+$XCKvNfJ^j zM}#5TSX9u5*hR(vR>}|b2wA79;p0pqxoX$Bf<6u|QBi3U{drZU1V5J_!&jQm)c?W; z?2iVCV29;K_S+I74lbDf7Wpeyt8UfxSJhJUu<@)je@06Kb8*0a`}OJF2ht&992ZL) zxKFP>?0J9o*=L@3;&HZ{&~jxmY2EeKWuQl^x45853#{xlp|qTx4nBr_91L>ei6_1| z{>4|kYaTo>fcU7PpjONin~38Pu7n>4zCJb<8{uH%NvKF=#~^{1<4m z@sM|j!)O}=RkYb@z=JcxPPAHSpI`9X3;Olzx6(>0U3blOQ$P8XYqGZ5>Z|Lmx885h zKc6blfMzi?r|V+a@n4)R%!)0nJ;l*MU=Ko4fW`Q{rq^z+U=Z?C<+dEh|@vKAPB1Q1yogu}-O z-~~7fb=O_LKK{k=;EDaWnbRU|92^buopv0y^;TP}|Dr=G=lu71=W=-t!*@$AwG<<9 zBSwtiw0;bcT1`z&+!%1!p@*)v`s#b_z1QZOZ7%ZBvYj!L8Sz~HWi9@V_OtlQLlq=6 z@ETDs=vUr;lNecY7b*RvPW%&ngu7+AntcfW@6k(hJPmoe6M*SKin;b)wq3o&(rIIGW&%bsr%#e$ zK@a()vK90(qAHp{!%X-Kd=Y+5AHx@xnSTUpN>r;_S^StSe7MjkQY142tu*wlBPA;l zmR+VVz3#7mbxZ!g|H1p?pMRbX!!5Vm!bJKboILr+!<@3oCiz1T`!;N!hJva@TqQ7jR_) zlt=DGUbqT;5>&t!4A`%9F@(u%N!jv_VLtDr%#*CfSShjw8&g)LBk^G!&-3; zK&1}`{(_Ge7m);mCEr>$R@?5Q4<}0uC18ecIz}3pjKH2z# zV|Cc+Km7@NIw+!d?_Q^!_LFV4*&1x|u3VnQ3f13TaKX|`FGKeU|I(qo*G$pw=M~c7 zBM9MvrZR!rE2kN6Oix3@*s)`|?@XkxYq#Z=Uu8@X5>B7V88@BjnS}g{0v=(9-jp?a z*7R$yxfX*2c~}a~QdzQ~U)V(G&lY8@BK}H!u!aWKI6&$rpL}ZcsF6&kV=Xx=fVBf~ zf%Oca!9(^q*3AnN7}(dWL+@ToQ5;q=x1ZcRKmsfwm`4#vhB{X@HB0sG#j#~vBY>OL zS_OhwFJ`fW<8xUL#&k=^PQ-C7Cc8+HFU@dy`UFX0qo{ZdJsX?AAFyx}8RL_2F)S)m zHS`AQO;Jlk0`LsU0hL}HTadc5qM^2i%R_ZS8At!LM%xDdkl6DF(g(tURxzVmmFJaWLmfsEQ{azcKjmZS97TWd>@nlq%t8eDan#FA|6DCZc1^$m4|G~Y+FHLv}eV8_L z#s@U_=o zm*Xp;Ha@Oi2k_UxqgP*@$oLu+VMvp4Lp=C;>#sj_$Ds!ucwnzyI))OOlHk|>^BWF6 z7JqBo?Z4lCOhiFp+5_HV&2;V+0Te7^Xrbfd1aTZ#X5!vy|j z4~U7J=vSNn{etv2WYhA=a$pIU90)xPn!a@E)bT*N#U(vvomx;l?qawtMFs|U;!YVN zm>ue~^)_Gq>Q~u|{`~moe{$++Pd@qd2b14_>Y1m1{No=nr@@^3Pfj~^+wBG|z4X$+ zXDXv-kM0z}yY`jPw~$~k&83Dt;Zh}VFm6eB^bP`Q)6);pkHFAq+HNHLonPom(s0p+ zLwde#LZ!#rU1krda$Jf6-E82?utfsBr93F4PSPQHfj$DQMCJ4e_`Un~z3j5f*tf#` zJd3*k!yZPGlEKN8eB;i`J zXR7A(5BU-AwN1msi-x!)L3t$RBBo0z+Bb7SlvXr0vw?Q-iw`|Gj)gqzVi-JhD8#@0 zw%e|}Vx(4a*3^FU8{gF4jhY%3uL0DGR?A|~wbx#Iz4g|;>&~&HVe8_NM}2qfU3YPL z){QsbIB~+utFN&pBQLCt09`?%zWU8?evLHHrCD+L6^HG-v#X=)x6tl!~6RudKnwn>yeg2?>4&*o$%!1Y&O`pNboQAnHmt}lvYMy%H@w3l5i*YgPzyUNz z9Dew>4myY#5vav@4QmQ+y78uve-DAOta^mLe4n2SNX`r2#Qg3UUlv zG<9FogvLpgYHL{XcJ<${zWCycF{JSIok{Of5$Hg>AA)0%)>?f{;A_Z`ar{o)M@;{) z3HVJj8_=$Xne@w4cg!Gk0xOb%#;iFry7%Y_lVw}dMV7T3qI~yVcO8ECcQ}oac?CAJ zV__vH79&Qc$OwFHB!E7TKKw9iALy5npW!e@zQTya@&WXhe{!w(N}F-2iQuw?fu0ou zVYBDzG&GQA8lSEfpxEF~KL;nm+64M$pa?zWXZbI2#L{!p%|iHJWI{%SVK`)3Z<1>} z1IzH|?0-KK{WZ$MJhPukf00=8CE3rAPbuaC=&xXgj$76tt>6cU%>G-)X$gs9vdAf= zscE9BDF2nR@@8`tw#PZ0)&8s8zli>lJUIo)pPZtl9|#~K*pw*IB&AXiP|d#tD#AYs zC1vO@@E2YGE4`*h;on6Y9|srpF$8 z^ne2n;zW7dRchAMo^D4|(;au*@!|_F(sW@ekr?Z2?94N8Od!Mj_g^mh^R2hswARj_XuS>#6yx3Kax zpZ2rFR6-o>XI_recI8by$~ zG(I3!5u5o(&m>C!yoqLN)j*5nur} zgPzexgL`%D=%GO|yqE5R@xp8ddEDU&piT5M-}>hpY+7Nskk$jceja-GVf-U2uP+x#0JO(>)7`=`gTo*G_y;EID5!h49((Vz&!v}Ka>?H=dHjhd*ltr_TQ|3< zfh|${?YrNIzl?bG_1B=&tFOJf;a9%G-FXKeeDHSLZ3o54+y$yJh4ub>@7LFN;P{^Z zJ^z2X2aVgQZ@cx@{{8#2yqv>tI^xS~YG~OANNw$yTW)4?oY+_Jf2O9-ZDwDV!c`jG zQ{;-dGCs@rD~=}OgMao`;%_pkB3kju<%b-yNG17EDitbF^OT=R!?=>EQ3NjeQA^T- zQwcwpPvdHd)LY0WXEhj*@c+dx&VAyE$L}0_=Lhe9fE+L~$X3=p_Sj>;{r2M;K`s)a z+2o!!?U;IQu*)&1pwGPJvzq(^DJYQ0ps)0*LaM^Ie6n`Mb;;ykKcYg)SA25%E3l0I zIemzjC;k6J`kSeC0ekV+^x%VMKJr5~cp`g74DmN8D*2%u5|C1~;Aip^1vHmWXeK_8 z^qOm~W$h9mmR`E=K6~v&A7I3Y5iH%JbvbarfWwbCoaL4bU2)77Aw3PSZ$3pQT<>4=GXV#e?Lugj-6sRA_F>4KJg^| znQ1eo{p!45J@f3d?2c#S_A0BaidHfxL(`l|ZJcZ?8HH`gm2pTn|mCOoK#<_y{?C6xELH(< z{ynCHwUe*C`g)ATQ*KUw-anlD;iHc}iu6GiEENN78+>I$9U@j&Pd|j8%w@ZmDB5$v zqk0|fY8e$`kjst!1`Zk%JK6ZdqN;`lE+HBB;Dhz;yL9c;<=ShnW$GJWsdhi($?dfI zm8A+t9&rQ**mL>sUoIZ;;Rhcwx6dHp_r7=3fBoRUG#;nZquC(S#1YQ$k`4(74;eg^ zvBDpnc*2oK9LY~@y`jIVTg5t33wkx!&+D$gj;X`uiSG+tS0hkpQnuCsjniO0FMjZ1_(b?MBaKl*jR1GxHQZp6Z}t45E0{PD+N z85}`u6Iu# zgZ7F30l&!K9DYg{_Cs0YNzE!T-cK3*W&8^8Rfv8%(A-v*5`U|;AGoIc({s-|55q-X zZa`+~?uYMvAk!yEH4Ny`{y+A(V>#C2?4SR9;w!IE2X}Me^7!MAWu@;btF5}p#v9>5)UL$9)#yMEmQ!)!6~j7n8)q}=e%$fLVI(z> z#WaYPMCm5(u3fs&?qytzMa`5059)L!rcM<3`Okk&LAxWbBf4O@Qdq&79%?blv+ z-Q9NGWqpnf-f%zM6*Fq%+k?s`4xV8`swrq8t2SqlK~lzJ@(iG_CJ85hyV7si^n}M4!804 zJCpwW=ReNeEBEfzJBSa7gj~DYr=EQ3vMa6_ zHS)^IA5Mk_pxC!ppOK?Salt5QkZ$JD$GRtnIpMuI1(zd^)`A+gpQ}$PdKS z5v9DzfeY8dgP7&*A0??SpU#ro&s_eMLGqcPR)bGbRqRje)!zbXvez7c1yrXG5s3(* z)I#ZRW&0K>3{w%rHMa!R_J7{?jjSXrf5cz&Jb;$-S4lH#XZ-oe6A&_3$$s*)8h?xO zqkLubQGlsuSh@TtSQUzA-u6ui%kU5SWSPt03!d=L<#labE#ZhpMKR=xTe5i?qSt*p5FT39)WKnd-ZRZ{LK&>nddq2gcG^= znWAv^ZXC4Vaqv!@I<(sAtL?q_-iI7=$YYN_I*VIwX3r+s6<3bD^N!oL+UjfTth+AV zY;F|d&NbGR^zFM0oAtK*>Xuxm(4|YK4?g_h-FM$J1vyAa7f?=kWkT=Xy;;}r&L^KD z01%qdtDrx7wIB`mYSSNud>oz7sM)XDX;Y_n(!3)0v(}v};WUwm`_~9TkVcf0IpYI| z$``9SkTlu{j23=0WePoHDn%ZWn~dUbxqK1~e>|DA_cJC;F?FM#_ZCvT8Yt zqXfV8>n})BG1VmJr)mPF#Z3e#YDf?|s}Pc_`cZe2pv-=XEI~&S3pNy!k%=6lJtHLo zVWO4M$EYa3Z=0t)#=p@8Hbm^S<4&5qZL0nDVTW^(JZNRZ| z`bY<0F1t>EkCEA7hrwr@aR!alH{bZ@r1#$AsI056zUFJ*^Ny^(_Ufw)v1xFprio1- zoQQw>ZMPqA-~pdZ`4mND))}nW?%lT!`&ou_ZWk?bdL|61(^J`cn{BvuR&(3!YT3HN zd;rUlqG1iHTta`^si$(I?vda9F7wR5_f8isZLEWK)_4YN#VP`iqhbFaPJtU_AT^X?8XTQ^>&>^(Ac#q)XZTLTNI*p(EGuj0=vqcCn9!%x zuu-eAm@r^R42PbG^dtjE9iI%{u}N@o>a?k(X3~4koCby$DT+=<*RGv5+kCT4Hr{yH z@ZphNRc!i2bgwXh~TJ4C&jGcNHGTW*iBCgFY>|^Xz{PdRfQ@n zpkCn5&RYp(Q|(PR->hRPJ9VU&MF<>wuoU9NM_+|b} zv`zF+R>wdr@~JRobD>5FK@M|;B&HW4jV|BR9<+=8Ma7a%@^8`zh~D_;S0w5`?tfi= zuuI64=x^yNqfb;^7=yR}Edc(BaH^kD^CbF&_LtE|zRwg9$#edS;0{(uLQq_kOeB!i z=x_KiP&_ANGyiL+q6h=wMM=><6(=T}wTUe%KgVBHa2mTerOa~hlH@aqnb0#l64ln; ze9O&@zw^5M3d?V^_14E9cPvYZv1X04Y4R~ZBN6)Xi9dSk>8Ia(;|)fv+IQ&0T*7k8 zEob?V+o0ixeTM!Fe$oE75J?7^Mfk=y_M|~5OehlL)}WPQ9CgX4eJo8f!J1gE#Uu(d z&7{G6;vZRPrd@uubu=Ie#4G~M)s<*G_~5wLUVqJXZ2HEYd$I`;r$MzCv7&v&%4m-D zWJ-xU-Ak3IKi{;z5-FiC| zU~?tSV5Yt}j1sT(%+pUF^sR%&kADFHW-=QJprEwhIRJEzJ$C0b&$ZTMmaA!|)@*?? z+Z*RDv+Oc#irjs--7dTQa?VzpFyUnuV$*#%=bUp!jUF}T<}v-3?+;V9*kUu_QwwK= z^4c7VTL+U1F1UdGA?CP_NT#Q7&N;tiPYH-I=|xG?rcRwWVd@Wl@B^G6ewmOSJ-V+v zaAo=%_!PXUj~oRrDzu>O=a%#rN6La~FNqG{L_NhEnV!W$f8m;0kSQR)7t;SOtp2J1 z+kcZX^Dma4O8a2TWX$xTOh~khsZ5dSFJM-y&$#_;F3VW)&-Nh}s!7NfCI4&GgFboV zN(3|gRTan{ilWeJ_FuIpO`HD>0!d~i1klQAgH*~hmC>gppH=-Y?PqShC71nCjrNZi z8s7?m+7Y5a7vuu?rDSG?^D0qFpvI?S)dYr{EJEfY8F~(+Uq-iJ_FZkY)qZ&N513m2 zpWpn?rI%gGiUXDvFkbVspPkjOUq7y^XB%dx-c0Z7oEo}Y_&l=Ht*RG#uwNg3EI1yE zaoG{7X#JcA{$zIA3}=2ze;qbY_^wwad;(^0U{lk=+a764-F5V>Z~(5XOx+I;iPmtTH`X#9|)2%m$27$w0+vIGEX_vzF3 z@WT%Opa1qbDQv(7r}po0$d2viNjk=I>&{phPkopS2Q zAAT^Iu`oJ#TW+x>;90iDVW=#f_~FqJ=~UVHP+AD?v6 zonyx`H%Jy}LJM*A)mJ;~te@>TY*_m`mi+6oPEdiJ#$OoGx7Efk(5)(;sTv9Y7U-kQ zt`ohp1P6H&F{z4jXQqJ8|Mf4qjmtsbj}3 z2OfAJt4G;gyY)6(-+J5aKmO5=4n6b`cD20!!3P@bkUbV?P6J^}3sdWigZJpcCGL|4 z4<5qCj?);gI`vd`pwL=HfA6{XUi6oxWbCjz=j^lC+{f``Xg&K@P(&bct_rbdo_Pib zSJBwzI!L6Hg-GkIxACQy{H=TUo@^3fBx}tz*I=b^r_LQvUP%>eOWW7)y34LCIPKSO zIclZVNvff0=1x4{5H2cT)3av}4)-O9Kn_1<60V5Lnr z+4#W+A7bqQ!*iGj#sR?@%R?>o=VxUSMAP_VO>33w=7&cZepo%5JJ<;TZH` z_>7?oz$~}ivV#ZjaQJr)AGF;dMxeTMWrL@%A}L!)=yy>O>IYN?yr7Tw&a;w>K~CL1 zDMvP&>sA`Ia9FE>J8{%b`}S;X6jxMZ4nAYRlItVq%vp1dwT}AUQ3T4+H8pVr51+w& zIveUM*3>YzMaS>ri~st{#8=7BttQ})?ZwwnRT9(10=PX6C+5t=l2Z`bsWk(O^rN5; zgP9tb(Vp23uw9FVyAaX{ZW7~~CXTgcSvMszkSDEHm3a{Ch0tH@x$CGe^O=_f2v8;a z>{Qj^n?7U~0w4y;yp-!#;AKiC`4OJ_i?Tjr{mm+1{G(#YK7fL2GEe=b`A)paLGMs6 zg27|BB}%KdLG>)T66N-%;J=(Xr%!YEis!L#XE6C4xHQSVBfxoTB{~|y0ppS@RGU>Z0pYz7wWXkEk!1PgBLBP+={|YlP9{E&_RJRbT zj5i_6V%~i4cVw;!_d%BvoI%+Z`LNerfG#T8ay z76IRZrQyEgZMNBlOIKD{Va4N*J08=7>0=-X2+Sfgox@6J#-~_d3tE^dT9{a~`g-hG zN&eZR04g0i=mIMmLo5yExC!iS$F806kC;ttnw#Ic9|ot1*~iFs>)yk21F8!4hl`{( z-gpxZAGqYwOIBE6g`>WEBrR5&wQ7a>Dj1sCa-f`(8^@VZPdPG9AJc)1GS7TXwnSIQfgQA@bnAkh5EI3A_M)98^D@ z`UyB_O`Mv7Yg~Ns-$dYcH7l;PVzi&J_5%j4gj0iP^aUE`G%%#a66}*t`tja-?~DIk zqUVyJ0fzW@y9WAII;N9lcg!;#`Q0P-+H0@lk3W9Ys8L$qTw6O}z(AJo!3Cze7@jjb zwWXLp=#TtRO0V9%Kc4at+~fcU^oTb6uYPqNXOoeFe4B5!X}^BUe(?T>TBAK%1ks(L zoQB-3`#1LdMkj6s)exTcjz|J6jm_jA*k$%76E$VhI~l9!Z*B>ba0|TPXEF}M7FGQX z0#w-lh48=749bZf`qoEAbG^3t-!ItzqIec7TF7S8W@^6K4yD9@*4qs^SWb%{2Cs8J!W$;V*7m%KZ{-&BYgTKJ|8#$F_`(QXC z1^*4&6s$s@=J-QGlTQ6prj~2DZ3Ng30-S*fjtYu{LG)_RNeGsSABj#*Ybt6sNRNmG z3~i_9OvhL}sg+ZN;@GRXme|r20Cp`Jo6L;Kx=O6$0d&YRU!Mb?z^9?msdN24`|Qiwhd=-6qA_D`9`V<|YDZ2D zp0y)WZ7@h)gyWn$$UC;DYuChvpst(&&*E*eQ&vaTb<<9%ZDN%-Cu=gegBK^6qy-<^ zs7@*Sn`k<$xZ(<0KGDeCYIAUkEYnzOsinHVGl?Gc-h1z}?Y7%MKM{wLTtD+yLsoE^ zfkf7Wa9rIjw~S%0C>L+jWWm$dx2FlO!-ynOh#~+Fkwk(VVcm z(tv?XL1S}hwbOtGd2ssHw+?0x;4w!Z``h21z122bcWkdq*>~M_7dBJS2UxP_66{qR zG-&(nw%e9oPE-3D76`E41PjAD)@7FKr_q^qoY3EKuf4y?omxa=^fWTW-GjNvE7LV5Q|D82ZIH z6j9Wjjg9~P-@oRa5wY$)y4`#4eH>@TonBs0p-zj)t2-8Hw4*#29#Ng100lxHG7`Wb zP}mjPH{YeBdu;?HhZ2bMq~>^kO(OQnzZIi*UUAzwZ7}MNptF3?U(jDxw?PUJ z%XL6D*&;6%h}Y>ya9<+u8Tnro>c7uke+5^dbNrk8mwD?iwH940@)mcCylg?KeI;FR z2yw_y6loPvj__al=`?)!zoZGkA)iI~1|}HbY`PeK1=t3uWSRA_KggNHH1jV?rN=pq zNCtzvLQ^>SABl?kI}iGNQT`X{s-RDztSVSWe|`q7RM}$tHtc_aK9*-vRnjNyKU5_u z=wIY7@T2HT_>v09`Wg6NPs578u9;X&`;)o|4a3)WhMSNPC-t}{P($sPz(2zIG=C$$ zB>&20QD7DDkRgMgd+s^r{yKH((y>#QcC}L&w_;t`*=L_c^AbDkm0z_ZM~>QWzkRme zW?K+ayEI!4#sLwy63wb=v{qJ>HL%tf6~Y^zNhE_tEEKmF;#gTD2xGfqF_`RAWI^2j6b0>I>Y8i>hu9X@f*4Y)*B#I@tjoZkEF zwKt=gV{X0)G!8xZTWhVe4%2K?KAOVj1{UtEvBny#J?F$yX4Kfz3Vqlw(tr6Cw)*N; zTW+}p5b*va!#{@q6%rwrrjN0*5ZV5>)PAniU&&|bkzh+icoe*->u*%e^^tu|_IVNM z|0W)UkCR2wBO~$ex1STsOZI0`^}qI-mHbt+w$1;3G5V`5X-U864RKvPzQ;b0z60HZ z5MdM%LnBY>{+ilNa!_KXBwP{EhE;Muinc}4ub898Z~@$1FV;)7V=}=r>W$1@Fz&-F z&)xUjd+Fc)`q<-7OnmiKroLGsLxO`3{?=xjZZ>qs9l1p@8YMzRg<@}$DRfAbi?-qH zt#dgBB(H{lphulm3n^)sP}m^&K+o|H&{0ajC!~Tt4uLFcPX>L8@CA{kg&HDiWKezJ zGY3NrqC7kn4FVhna9Awey?a*@;#J7kwF^6C=;m-A4R?f*T;7^LwwdCtiF*63w>kHo zDW2u}E!VquA3_*OVWmdTC6^#I3((*W9vaHWAR`lSv=7v;GogY`tSeb|xqh5*)}>1~ z@Sn>%g<5V*qnF5Pb=}&7*9Rq03ZT4bn5&7&JUU$bjt52~O}YFa0VLLuYSgnG9B18G z9X(_wnh>n_K6uYvSskIxnRMWufwBc{yAM_!j0=}HBa_BSU-@g<7Mfu4g zlbWVpPM_CaeeKRW@BHH*{xEUkM5fm`N+0$vvE))a?zGchd+xR6R$FL#E#T2_pQU@^ znEZ4zi&yCvkcpS{W)C4b{Le~%f#^b1O29YOU2&n8G+p#DSWW=EUb=YFpiO2kOnP(-jEXa&#QqBbq4CW)JFsGqv0Q)Y5wXMx z`j8j-=NEfM7*%IZI&7N3l2;Erq1NEfND!U{`$HDRg<2avb=Kx%#E|M``fH9zp!r|a zUw0?80R;T12%ACdG9flA6i`{o@h6{t$_zaCgBUwmpbrZ|GX|+>pz=E*q0uxBzn{Zp zXSw_scMaIF$cv3FOE1%xQ**SwfrECmBT{4w|BItW_|)*%eoWmdM(NN$wz8@NCeH%d z1r2a45{TM!1`U0le*KnLN706zbrAo2{ht_Bw!l#BDU&~Z{hx0z$+6N3E8>AsY(Qa| zC%p0swi=VW+G?u@eS*IhVQE1iAEg9;t>KO*5{JU(qjq)Je=2dwrI#}3ALz++DM4^v z0GiW3fJZ4I|AZ`*|NZ>*SBU6e)I#{HMi#62wol0B>Q~xNDYVF!!Oi5;zZ3l2>QY$o zk(^}hH^7Llwm-az%#y&=J_-0nfaN*z@0cg_7l}z$MYg46-||wb@|RfiSs$(0{|e8H zKH(ox9}yww<4TxNW%L)H#09a|kJy8#iq`yA{`3^-->UpC_S0rWqKW-yMbGeEcE0B7 zt5t^#dA>uxTwT|Ypx~>LCHtcY2P)F`Gw37TZCQUg1B)gcvoZq*uEIe4^cm9_2_i$l z=X=`p8C^QD_)8n=13m*Q@4x>Z3E2DHkBJtO+4z!VKK}Q~r=DcZ8ct{V{+c*p0lrSn zH(cbCw&9FfGdRDMp&Q)LDyyyveXyVSN4X4cox*-eOkUPHk6TyL8{Bp--x?2g1$Bj2?FEOfVV>{#@`rZQ2YDEMU?CJ!Sxo4IIEH7H7xCf0mvBk^18G zw9bxmNLcn|`J{14K4(H3TpGeVUA4o(g#$^`kWQt^1K&zKi0Q-;5=1wo6k$dT$L zDKEmW04!=}ApK6fLqvlFUkorgop)aJQTf>;mC7F@Q-tpr(G%EZ@S&+fEAbEd2mCU~ zQA7d0i;bf3I5-y=sGo^G0t_dk-U`Ou35FmTimJ^Ei7N3|GR3NM7_WhUq0){IdS<{< z;%4y4rMe4{@u56J4fy_1hQCXsc+*E{sBj}|K$g!UeG***Xp9x73K{^|LkCT#FW1 zSMCL8{xjpR@E7UhL+`G=iWeaY`jzCQNFVYhD^0myRdJH5wp z4`M}I;n_<4udxF``N!t+L#!(lUTOaPRYi09`@IN%k?_BownX7PD%8LK@|+kueE2Xf zZ!sy%&Rj}V+rqP0``@w3JAeJ_yFK^Z^S6!}bLEvI-+1$%A5H$~!G|8&bn`7%S$XAQ z!&tDq)3)0UV)?_|M&0T~DIB-Sh(lpqEJF(fJt3i~(oFCI1tXR42T8_k-1u>%Yov~q?f>BN%(p1IY^c6|V@y`(9JK+0Uq5O}pD)0;P z8T>U;u3=8&0{#IC`ana?;t4}WV@v#1XG7(S*#K~JL8E@EujMCYGH?YF)f_%cY39yh z?vdUdlZ~1ntC_3oIJuU)z1G%^xpmBCmtM}*Z!f+up7X)9(56F&!9$1azQ-OzcNoHX zNIf_}y|%V{w{9#50lbPgefZ$G0Q>7=O7z^W`;}23q zhJ4OXej>YI1^mTde?zauYK=5g>#XV}`7}8tmx;Jl=r7r}P-gUpCHyxb$(PH|{OE7_ zsRG~p1A{MAe^sGO6-6ffQ%D7U{x$sth#(1ArF~YJg{KHVlmA5jO8T_vZ)QIsDouGv z9HitE)h^0sNqkAFR3-jqnQ|UjtTpnXOio>*e8-E4)H-9}^D~=H9FWXPv4L1KM!C(CSFJvgRPecMstiD?C zFD14qu?erQk^xgN6y?7O3A{G{o6BdSN`XEiwdqcnzbox?g?zT;f7Lc|`-HNfzs;Fo z^P1DYD*Rwqlr4T1>|4l&5ECmCg8b4Of0?4p8~?JLDkjTjAo`KQn!zvVZ_$40Ju}ng z_F2%2@Z0nk`k25-fw@#pA4qTdx9M+*K1Ka1`uC9kB=vlywQ^$)mTY}Lhqt|=(-Z`Md_Z&PqBO{6M1Zy{L}RE znr8enTS2tIJJK~ZanP8y#-Qy!W`%sKuf96REgg6K@ee=z@THerHg4PlFTV8RgbA;l z|G($|`Og<|aQjX>?KE`Aj%%;8CWjHQE|+OB7U*{7Gpa6`)7oEX;Pi1_Pbf@Ku^* z$CI=!PK{H}B6B$Kz3d>zU!XJrprYttWb`k<_h4hxF7RO4S2_K$vxXlt^TNVUg5?c3 zgsCR{bF?ZhY9-z(xo}pW2K-%kBoS6hY57qdGW?Z=q?RA{gi@AE$naOJ;3#8&6kGyG z1`N^1_XaqZVoV?ErRynW>V&Czpn@`_(~F?^pXJlrpd?`0+N#vL-7V;^L_DHTM0>@D z@D?QDEEHMgpb;xJN;oWH$>utnVyue*KT=B?XV2la;lx z+z!;xyU)_=tiAT2?FVt!0dmf5-`q*Z#1}+V%~eeeObc`jAVa^5KEhl$@#STGHbcrp zaxw~WBEeY+^g#swKU@8UQ04O99KMR@F}gy`2n%&^O6YUJBGN4RH~mevfOn-yj z)?e{o8tQx@YYV8qBER8V=JYQAl*v!nw~(*q@Fi`q%zTq3hRpKaroYzTx!9mkeR_#- zQ1p@|_NhCP^+jEOQP@S-|4O5BLB}SmVG|sx=6}Vyf6@P%>uvg5vJVg>w{OX+*?JHv zr)~d*5A#o1G#fTFhk&y7=Y0LIK&hxuLLJDZS9r}4QktArsX2U=X#N|Quw~(MjmqNa ze-~-}wdawla#m2EQ$002M$NklwGe=&$@noD`ydbNZ`7Mfo>;Rl^To8T_EEh@g;+=Jfa)b({4&livCH zIp?1=p@7d)4KE*q=IqY>m(WGzr*~+&s58(Tf)Chf5lz#wNieH_D#-- zL_Ek(WYBy5#;buSo`{qrlb?WJk{`nfF|^WT{>9-7A)hW&nRvw8a{0_~S?u(e+M8-x25{M9N!u{yPHU3(1EzfPZw{sJnKT5jK3oBv%r^f&Po zPKs~RAIov(ALEDEocz~fufGz$OzS0tlJU@K$A}<$zC`}F&_A+-v`C*a`H{$^loBb) zPwQ$wx9P9sxZHn9_6tv9oP=HdyNth9iSX}g2PgaA)_yKg4?~*T=Mb$OH&mLEwj~6p zci}&rrQM}dJ2#OsH=0@(Y4r`=GnP2kKO)F*+Oaq+d1Q9?O8RW;ViF15}NcvQFNMveCa1I!cj=MV${}Q zdzX1JP=qfmR0&DQTuvjMGv|#r z-WWM*d_~HP$rDn4#Od zfx~HJ05b_id12d(K2cX;ug8obWiCI??7YfaL4PF_vZ7W7zl=VL@RK0$9znVMM6Btr zqLTcGcl?ZT_ac4Bo>j>Ot9D{a;7~^Yf_|w`K{qXofIf2l5)O$ziT);3$XTiw*&WA+ zz_ecdjimt~s}ssqGf1E7M4|!}pu%nXE4c{%icl5$TP8mtGz+1>z||ykkUyaF&Wj-- zKVhE{Ji?StZOsyVO}scpQF_|+SL!GEH9_>jAqsw%W}7~c)N)Oz5-*ziYyYl#EyDg6 z^s&=gc@YLD81gyzU(u;x-CDN)N~n|)6+QYrPyb7jHvOHK|4p_{qtkYcq9^nhft&e9 zky~0?slV`}!oDeyt)aeU7u~*6ZWluOYVgb2w>UkFB^h)7t~}1>ptkn2=_-nT;qB*E zuD_C3TMjuJHLODVk~l#@iJ5Il1SKiA^rik+2y8UKY=1td{x68AC{U79Y->Lw<8A$4 z$!8K-$wt;cMObB9iIrWJW&BtBzpY$ioa~}V|fdiLW zdYPSv4S(;w_piJD+N-a*`h^!>nm%><9k<=_kAK{F>~Y6@^P77ec))?&d|b!nXI$ym zfqP?WXU?3(N#}6|TY?Ybf-hMUrrc;C9968U%!km?8;F8jj+9dG{0s0EUVJi?HG)`9AAXnc zS0UBtlZ95HPn43uH~tzo6~XwSMpA+PfqzLpL@UdWBLJ1rC*dAI6t1^u3krW7icg4( z06q~aiJw|ksBdw|37m$X8a$jppb^zxKSdMvS}jo_qee@y|bh z&2`s~xn&Gji|US{rW&q-T4T*M4m#-Iormq*rE6!wr*`HwHwr~jb+vV!xv|rTsUD_< zK6#9gzAZnAKG|=lspM~&{45ClO*_M{CO-z(Tz)e8FSP!uQao08ADA3i($d>ZRkqg2tJAR>E)7-ym^Dp9=dR28n<&4t3 zWr!JW_2`92H*RF#9FqXF&^}iI{N+gt@Z0n^we5m}@tK*NHvcR6LTut`CO-uKyZ#pd z#r{#u(%L=)+%j9uzY|k#pToZk&Aj~}7164Va`;M8)UVH5f6L&vwV%zm#m4`Z$tOQP z)BZ2;48NLx3HfjHzb*M+QO42C5|3On97*z69RJ5&TCJSa8S{K30DHoo;7P$`}+DWUAr={vBHWg{MYyY z>-*pT0T(>~=}&(eHFD&OFTTW1%uyppj~YFSp@(mO``gDHb4=f*m*&b~-zzNm{ENj? zULV9fc63N7B?=16vr=OnhjAl;oAo97#1>5vkK}G-G^bBtiZrU5!Ea8V1YeZUBUs8J zl4MZI=##-C39lv3Bgh&4<`)4$S5!`v7aNVW%PEI1lcSLfLo^mD#x>Q8{sMl$6g^dW zbGMQ{Ey)j#M(C1H;Pm)gKT{d;JN;0B#$WJ-x5bd`5$>p{aG;j(kF=(ovCIn-G-sCF zLI{d6{s0PC&`6vXIuYjNLK?!cCNaqjBlm0suCKrU{`=28_uL1^jr(-!RBjubHfJr4MrYE!DBPdCaJ%`_>zou3d{e^#F+r&sy z4D#6mCrLMd7hV0Wl+Q&?e_@H(A_o%lM3Vd5pjxpk|I!hu5vfoS!W|Wftq|mDreD&c zHvP@*k4YB(3vxojynW@LVZ;jd2a-ZQazR*>D6>D}4`IkvK}E$R`{6G{wa|nwY5Q3sS4p3sNos#e_DG}u2+|kge|7mh zdgLQP#J_?Bo2OD0**8Bz(mZWzKj)He6CnDCGFfPmzXXcErT#^$YCl)Xr`XLuwK=Fz zoBmePr%e7^f&NP9T3i1&9Bt@=2G%W7h*EkY$UkTjMR*vPc-M$ymO6dwi@)rD3e|*F zWQ0o&^RF`a3U|L(QISud@m9l9oo?N_v-OM_PW<_-*|Tx>+&kX4Z{L$nI_cIsZolW= zdyYQ(=z*&YWVafNe6G0s^3_*gZTL<*F>*0s!i4G5X8@m}iCME}0aRGPj{^Lx9D<}v zQEGf9B+Bt8kW>+tUL$n`mGWmW(l=iTzTOI@=JYY-_$a^++f+%PM1R39z|ViBFxpFC zmGDnJJO7$5{2Wpld}FQxtKuL0bp^`!n=mY?X~4{IOy6Z+S)8+G_#1N6rU1V>eMlx4 z*)#U+|)Ggfd`H}^2qhpU4O{n9qzj8u1`K?rC6tR)>-Fg zXP$BIefN$ZKYrhR_wBjF63_=xMA90NyxJPBt(@E7v-s!Et)xG!h}uRqqmmik*>|Zb zkv2XFE#RN@s{((CYNQVMDgS>?e_gt0mvh5!fkzez(&hG{Kn(k4%fG6Y@Xz0}5W`Pp z1U~YCoJH2U|ryEYt&!xlvC7G_LbwWmtb7x-9!G}WOTk3{BMcJCBRkMSIy~Tpw;zD zMF%u}7XOGzKl$>1{Y;4x{$=gY2y=>Z|L9y9u>3ng3gGF7Qx@=VjruDp=r8C)dju8) z7Z#X41^%kWkuQT=3Q%q*uQDUq4gwg#Y~YH>#FpO8^V_2jnbX z-$nkmM*Yoj4f>=SEgXH?^f$>*oBpAQ4~Zr#^icb$_@KIM^zAG-RQYwmmCf%iZ7 zVC-FYjT<-ate>5=!;m3+?X~9?TWmRSzyL-#*a+td(mLUpm!}f>$v@$5TByb;fs&Qz zljBBcR7KHF!iO}SWkA#K*T#R8q;yEPAR*F74Un#pQo^J}N|cn^ltx-WN*E22N`t_p z1*99K8>9zpWAW_&;(4+6-_^O#eXi?#u86{v-(?st8zCS+D*O7Ot;o+C17LRusu;fMq9m#}F&xxIjl`0pbV3&`Q zR7DQ9XGHX!uL!u6OA>`GkWE2p8VEVFK{fZUgkOnsbtdf%`zsAu9{eESJt*oFu-xJR zPsVb5NU&8BZZ4J?^Iuq00AS@%5mxlk(B_?NixeDL#=pEo^z> zUi)#ulc&I{T$Eoh~zaN0JnNJh}~G zVaLc4J^M*7J+>=?%6s>72bSSTw*X>W`TkjwhWP7^^ykEU)8^0OX|M5(5ew|bFAvMN znD7z1bx_l&2dS;SF$0xit2RI1fno;H23#3|qI+5rJh_R9wxa0S@5w~SOIoiVCk%A` zE_(E2VhoPMfY$#2vV9z$3gp_<(tlgQ%$qCC@^k(Ni)klc?0?GKUwwB#9ZBlfv-^w& zfS7?6U=N&ATJZboCoPh#DXcTy&O=G^*pZ7NPJqins{4#v;OXDRZ_YYaBANJBccid- z+cE~AH}~sF3`#dGUUd*4!nP5iDmPOqrfNg}H15mc_EFB>o1~H7ZV$5hVr_#TX(8fO z>n#1yeGr8P5_IDHhDSMvnhCTybrfWioqW|T)3{4E0Vi|Wtl1+Z+FhY+|4@Il0< zX1wa4G;9xhh$f)2eV3?aG=?Xywd~i$Cg&$Yj4F)tAFzCt4w9>wJ5>Ae6u)7@7JyG| z>bjV>Yjm5S$~l-W8-0woUf{vqkjVRTJ1wN-(T6?=q*uqq!h<(D>DdB$A@kW}<$k|VrPD-D>6p{=yn@d^8RJ7xCzS=SR)jF1 z@AGQ1ZE>H9nC9!Nwg+0qBmQ#gLW4DFNGZ($ zzP}!ehhI%U1xjrx1|EG!zOwrpHY&~l3f|p}<$m>g`}9Xpb9H^_%fD@_w~NlT&GSod z`>yfPHMp_X#Rj*OHym~2eFa8LURJ?PdNuf^HldSF1{vp8GyLZZ3FXHx z;(xt3IP&I#OJ;@zxuu^bN%?7}7t(0BE0!N+^#v_g!R7JM1UvU?{!4Q>H?sQnfmRk< z2XIeQrw|7Jjcmq-Jw%ZOFgYm`uxV*~H^uxIzMP+}wX+)FolvIwG?vr%_i$&} zd@z|abjC=2&2K`*r|X(|_9HAY_oU(KF@E#>t~~FJ)Z2~!KGvkcb;kwc1_3)mrFtX@ zx-ZpL;QJ5e<^zY&jQ2~vR26@YkOw{lgd~L=m~RT4=V_AHk(iKZ3F(A+tDy4IKKh(n zjS)hoim4zWlfo$o1C0AgT=64!mw;1c!%NVHgdU42hDn~6IKzDgBp|>*lh6t14>E`V z>tPeHBwM?{_XMNBSAbfPdm9!z)VlK$a29v3ne|{4h(Q7IYhUB2KEt|w(jz?KXgaj# zg%^Bb?ioZH7yX!J1z=&5`8?AyRG2SPJbVG=Bj1R*Mp02dO) zUpedzUytL_1=JnTDuVM&`?A{v?a?2eu^#zRGKKrx-)A4M1Vz901Q_i2;flG7{}eBS z<*?qUgr-&`=nxA#fw1;R+^CKLzu> zqxw(x3QmlEADa{mvpV-~U>s16S&o@fovntj5uOh-BBM!0YFb#sdBq8*eOq7e%(Ybe z3>}t;iowvfp0xqzJ*16|m&y3He)o%Hr}8aYdV2_<yTTL^^T(9&s^mC&$Tueux$hE8oc+=e zCgD_NCWH_xFW&)lL#XL~2Qgr?URirGvT+$@UH$B!$)l#_W|v$780NZ#hq?}ZbF4QC zWBMiSHsO@C_$tr4WlYYF`+snzHEND*r(Gwq8{3X_p8gl>m5nHs5WUf z9aSHF$swEbU$OBbV>|1zMPpyB-gaNkP=O~aj3BEF*Fa}y zJyoQ-(!=G6SA#GO93KQf(-8{L-YF$xehEuBAi3konTO(S5;wYyMhQxU047XV(;oX#09a6}N9$Mb1(Zc3ts1|K7e_9T17p26F+Lx1TfILQtmhgYu ztp3rWx~7pbpwpO610*%0InQddc0#Ma__A96-C?GP{y($er4;Ss%-@=C^&ZCAH3>}T zHV^~WO3D}%g!%Dt&V;(ZUwSwT@Q)<p^+e zHQ$g6eO1`pC5IsJu1-9wLNWu7jJr(=J@3II(yD`?szOlTOT4#pF6dBoLpJp0E9&OD z%EkP?XWXJSDdD>L`M9CFpB3pf(dTL z6G>JMGZ*+4sPI%2X`pb9dCi;nmcV~jJyjE0siaW(su!?vWuR+Pw=Q#fNE`Oy^#eru zTC?PXzLXAOw}>xWd80HoHbT0VtP9hj&d+`ZDjIo>lm<=Q%>`Q}bszN&bB||B=J%@o z#9d+_h)8ZO|Ba9K9Rc3!-TZADnNJ((;zI&on$7=>xo6!o!r*vf^T3KT!UbXXPV@dY z&4*$%aYv7pqu0G^-_pyJ${0`r90(`BA;m-*Gb_HO(=q(5r430ylidDV=S$yl;H^s! zuBYKr#B9 zov_ze(&)P#{->{Npa&y{?lAMWZPe&X1R_8OR=@D0Czld|>f)ReX)2M%2|g)^9Mam_ zuy!r;g$DHxqqn2m{Hmj{Cs9EWtpN0nR|2#+d6RD zh6u9abqn8z$lGeMloJ|QA@KF5__dmF*T|N_6E}Aj8durD)K6WBlynsXpyO9X;k&es z7rPfQi$JkBpfAosh8-lT=lI?vrql4|_(57%`8M2q6pE zH_peO5{jOCV&g{m!0#)ePV`8ol|%4v-dKP9u-tF|ar#=m|3As0H|Lr6W2re&vFo@^ z`~q8Xj)L=`p|5Y$65jAhtO4oe+Ykz21t-JI`e(kX4uXPD1`;y>QYed9;74O7_(b8% zh+e(+r&~=Zeb|s_3rBY(Uf)o=TKIvryvO~-6+VGvXPtUxMrNedqy|Nf07)Rx9oQ(? zI$^hl+CyzccndoIZsgP5ENw`T!elne>%XNcnJhS`UrK(=7UZ)^zY#7M29@ z6q0Z*dR`ypg9ZRFvVj&NOe>$!20sl096i%&3V{iiHpaOB%mnx^iuN?)RVm4!UdAHo zV;{|Od@&SFqZ@f?A}$1o>ZYqGp#KIp$u2r6vDO?CZAb6?+k_Lx7p|e2uScJXY$Pd$ zFd_Ep3tizfh|J6Mt1Ol*PVQYa6C=hIn|AwUXK0B<9>e55asp&RSv2CburWtg)7;UT>0|y`y;^6Uhh>+SR}U6_V8&Jt z+{AD0-^@HVZugVf0Qw;f&#RaAgHr%a$n~$^hgbgh0pFQ;L7ya=q*x}AsCp=ReBgh; zQp?-hG+Th{v2UwXe_uvSN;+fP;2+n+#`u!lDCFQp>folK0ij}&^BxBcS?OmYD}pc0 zgkj*TiRq2Yr0S?CSfa=;Ve1F1HJ>v6ro~(NhFX`{rJTOd{5?_v7KN{S=XCeQ(f}~< z8J~^|?h&(dt4|vh{K?JBn z5BJwXcdnm?#TEa|@VoHh5i>%6&PihoSg_}jA6=(n|K?T)5tPdkj7SKeC8pL9{G#Lv z& zpJFu@(M%}=JeV8Z=3S8N@8>0S+n?xa&9;G=_fhF9lm~GFh9((&J=NMcEj@^3z}yOa zGeKpRc2s~z0Z#e2awIG}155HqQ>2SQ>>(}rl`jhN4niP1wYlUw!|98%1(2WQ zqSXLanEB|D>IKX4Km(aUG$QX`%ENI1-Y-zvC@D!WdLJ@|o0pR% z7A?Zz8gCz-f3dvIKkEvp=&)pr{6O^X)62KapZ6!JH$BWxBGJ~)*%PUlc5-w(nv%nj7f$Dkk46Tj&TqW>>s0sXqAd}pcXenDl zW&L0w=R?Lc7?;|z}pYV1?QXJ@!t2Ix9;DDxZ6{7~ zaVyI*KWyeI)M0ZNFp3JZ1}Fu_A}m9klM|4lV{gQo@~$^k4l!rt@t|_9S897Kth&t8 z)W>vd&?@wsxOJwdBSTSV4U78~zT&W@3KgqFKakAS z;0M-kFz^<9nMP3A!`W0XHxn7=S%VKDS4Ed2IoK>D{CV*tj;CuDn@`}RF= z+(#ekH*dtmg?W|}C;XOY%buql^yLxM>RLF!XTfi#MaCV9`s}eDk`de_+H-WrCEa2& zXQ#f`fvv|^R_kzTdquBJ7J80Nn4D$h$uXhh!J5yPr?TJJ+AfFF#Ws9@Q&c(ZlF^Ue^L?aGDxAvt5Uafa~5@J#;FC zH7TUT+3k12EoWQK6Jo@3m$<6zcU9Sz8a#`>vwt#!$Ju4&?a=pdbm7YSly-yFl6$$s&$6$C}Hglj}klCp+eRa~34Vz7@WF6xIQ5 z*Ed4khRz0&)qZ46XxK1-eRv`s!shkMG{y^jw4k%9f65Df_Jm6?S9!enVCm;H2%_4* z`42h_=><$Jht5t8I{!;z<|RD3e&TVXGhx9v)!QtKBZLRcMbPjN5tMN zB(FDL*)6iz$;D8un9V=pItEnbuuUG+WkX8(HfZjYA@{F*TC{#t1M@P=;S|V zokIqqE3d+(YM_lqqi8#ePjA_`x&(> zG!MAYy^f0Dte&B5R%HS9sAN~6;JGL2nJf89;wr5}L zBjZb(N7cpr-wdN|almQ|C>ScMa{-6&WU^lG~# zghD~6M~TwhniQaW?~YlOQ7sO)%%q5*>hbh>*n}XXig+8*l03slA?z9z_%VLC_R21R zBxm$M4|J7+~f$cQ-<+f)xZ@yr4RBG=I;>tQF$^``}2!)|}BUylc2$1Z3f2w;J4 zm2tlE-j9_0g52Ev-F-2Y7u#oZt~#TNJ;JZ;|2XlzwKz*uF@M4Us3h+BtlHhtQ|raQ zpavn3Yi^DWnAK-rni>*Y%GM&oa_(#8`$lJtm&JrR_8C$Dr-w5{B<1+99C zRZO+jpaUm$l%VGp31W$Dv<0Ihpkgl}zfm#>ID>=EL}#0Pzt2G=Nh!6yGh0DZku2nf zg0tW4^4oGV8iX1JB=wy+SIvD)0IiPE7As);|Ni|-AJ@GrdI|^oB!M2CCJ=aQQl@Q- zMhv$YAsuJC7dh^nkADNKq2JBb5v0s>4x0X)pvPse&=MD-6jCvi!HF?da{Xub_CdSA zaDe@~=fYeA%J-*gfbEMHv-WJ6|MF)zI>6tn@*aVA@fh+zwoVG6#&oYLL}u(WHf)Eu z+K%BJ;8--Vc%$bm6OgHH8BmMN>MJ;$HBr~FOVHf;Z}9m9C;;5zuc+EzKkpE7AHp< z`iG=o;rB4<3aUL@Zt=}L^45VH7T6YfimC6`V%@KoIXtYpBGwe3E+!l)AntaUfCsr-K~ArrxNQKcq(b@nTLtX0i2;Jvw0A3A zxsnine8l{3Ij3On2KkZ|`dGK^R0QQ$aj{Vum|3Ul5gStkDYR~rGmFujJJ7V8YpxDa zKe}>KL9qkKw}oo`HJ5!JM%ssX8CH*fc(ZLcAg5YU{Xi8bt!^bsqH49tXGtIhcmO42 zai_CF#}azD7k0p`ZYGAwyQa+W1`~@_Ij;b%EaIxFweYdA@gvI5jna)-;5})S&Gc;- zE_@x&3$u89w|;w262uX>Xke^zy(W;KML`-j`Cqt`nyUrv zG-W-#4&3So!jDj!eY@D9=23GBV^TPS$Mg`>QiUr`cf!-<6rYv#>a8G6+@wy5^H%jS zG$}kC*Lp>yw%p4_5-tW_Y-#cF`1;R|*HxE}2OPY;6}qUExMcUaI&ly9k+JO`e;yyW z{ju#|PS8_4A`qmvRs#F<31{$0A9@cE?O6u=)HD+J&BspqQ&}~<{vsBU$<~Fe-Pza2 z4dO&GBs`v1cHPyXR?`}4X9LJ=b%gM}ajvzsATx)j;DrL}02N!UmPOOMPTJAy>$%jxJLLXjX zeL8YvqkCY|fDl?&LwC105INnAR+>J<^B-F6k+0RwNgc5G2YbLZaPqz?30U?aI1hQj z^%Q6Ob`272P&9W9%*?nZ8d+AZY3s$^s&wCYgOw){fmMukf*zKO*W3m(b2^`>E)wh* z-4}Z*-cRDP*JBYU2;f1<78b!LGlMqUhgwv=`mclDNeHtV5899Yfe3Oy*B$I*FHuI& zMqmQt(nkiZ;sNr&3;65b>-yD-zmo=~wX$_d7A8#$B+0Gm4>a!y6ZG|1e|$Hhw!lm9 zujglxho7;Y>zmYhkHM5zvlTQUrem__*P+&`EJJ)>=_*-L15SnBR@4d8W3z=qo?j2#8E@Vhz?)~QFrmglh?a? zAZ%fojuDeW=%Etn=UXN;y*`DHUjC68XD``w25xWLyz@6>xo`xn(!M$CPg;i^{=WO` z78rP1ezp!@fLt#?Zju2%v8c1S!o;?K6*OME!G7IW#&5V?DRopV{;L81vQXtMIh!)2 z>G7?c83fE}O~*>)yI>+sYg6j;3>2eqB@}4ZS0+Y*N8u}bvoTKxgg$zK|?uoOI^*LC*n0V5G zkv@ic7+~H?dc{j_%W}TL{6u==wpfCO=%SKv&g-F0Xb=BKxJ5vMsjhbek?OyGUZ^XJ ztZ%k?>tZVcQVR%>9HWKBy?r|d!_Lt|;5?!97Q-2ccCRmcSx)QpAW7KEP&r$!uNUm- z2*?Oco&ea~p-=2GCj+trz+BbqkD}R~jn(D*>*>F2ofdwDT>qefil9=Ef}cfia|?xY zIMYW<@0!5RF}&cs>p688`0-8BbrHY9!?7`LSO{rrFG}v7<-BJf>M6SM7IQ`m6BF4H zMPe4zvDn)J4j5uf{TjP)8#3vpKat zowj1|6_uNuiuS#!W#^HaGNJq_QAW~ zD!yKRaOQe@IXv-xD!~Dsm=1i7;Z&lw3=emAk?Wojnt!(+a9x0`)_5B~qooBEpE2i+ zNimBp^O%oa@Wy7$=4lp%RP~!r4c!fgeG6(eM3J@Y;1cssM6v3;S2WTx4zwyk=c|<) zL{71^LjUE8!^rM>A8|G|kMW_beo#*-BSs_A(|-n?bX>Qrz+FGWP@}NUgRKMrh6ZANNp0(2BXw|C#727p6vI6^8LJ}KY7v>poz&O8CjZtW{k%jl z%|)W}AA4LAf8bL9gc z?!5970!O#CsNONc!ou}FAfycKP{)}kqL!v|D!=_5L0zX)&=9XB$o%wB^FaeCwD=x? z_hxYcb#%Uns6AJodWHQsVPH9V9&jSgaL+|bfHb)0Eztmw6EmlK(v0NYv>UTv+ZEBn zOWzJ|=yOP+f(Op(`U1;v5I)?7*fG*AAK-`WDIpO(PlRrZ?@7>$>1;3>-k|{pq~O%ze(~$SgA>@s zd33M!-#ni8%tS3C5Ge>t7DZhq7xg)s5@y5-v*ACkW^vw@|C3c3RILwjaOOa}rH{il zp@DZbBqbx*B{z^#v1m2(hyboMza>%t9BFuibHs>6&N*;gQeAlNul8T$F{dUkj&uh- zLtlo=U?e$HN&I)s5f8LX2eTSwT?g`rpSOOW4M|Hu8(H6Xnlj*ZpObg?-M(y(`jqca zTt#VXp>lpGCPh<59+y808l*AixHS@s+7c!C`@#J=_sjr`+#K`T3-VGopOB(`{4AF# zY&OAxhzV=gb`uwD$5CD$05%LFbo!23E9k!8;O001JTS;e37K`T=(-$ae}g{5-9nFd zM%@6RyLi#<)(V1II18Q#9XU?5-A#ycUOnQD85lrpzp$PbV zVUUdceN>jB>9Z5<{5_t8tb3r{y?WahY4c3Wpya z91ok#tD{sx9qe^=JK}rL~pP$8vjUNcd#Db^@3R*%@8x@DjT;KHtKZP6v?9`q~Pwp1129$%gJ9JlN4@WNc0RHgB9jR zR*X-Gl%-Y_JAUzn?SXhtmb>ia*{p4Ccwc&foD8AENQ&nx`4RX+u3!(uK0Vk68H%5d zRRy3IW|uJ6PyIqq-(5e{k;Ot5YM?^bt}N`v zoWM6zwF2MkV4}Y;erLZ>!Eqp==dQ+iN+g-vBwhK))A{ShXUx?~i{$H}+Gr2G`l3Vn zsP=XnBhi@|>5g|(m(vY82RrqbDj8b;P7cziT`B=eMrTS6p)>Kt?c4+YlQ71)A8v?+!xa8exrQldc_`^ z>{kETT-zc3u-7TwTOUHJbm;C?ah_m)uQ&#>VehMolQ^7z)3(BTWkilWKabA5Fdxaz zDVPs$U}4U>eveQ3RN2wtzSP0(p0L>aPwm%r99iKf5nJ-HYBCnRkGq7zuE%`vv4bS_p{vKqim1xF)iZFqox%dE)m5}=2PP>? z?D%K~0UTS$+!fWrJ~d#S;jGO*+~E0gxEZ|uyd+51J%s|ywk3bejq<7TojpE^Qf7ZY`Vx~{^K;nA_ zw=N3?A&}SE?q~JF56tSh&I?{aR!nh6r(+$QWN&J(TRB7aL9Oc?7j|eJTdxL0?RC&W z;$;f5YzXHK^$4F^42lCa#iO0W#Vui~;{aDo$tQ1&JMr@=SbY7jvk8DTPfG+)*f)j; zKZ1w|d>*uYDZ=e?zHaJ;RV`Y zN1u~kFVz6U&|ECAucfDdB(xv`J)>89T2KcW?D~P<+y45TFG$mRDO?MV@s9dprQxjO z?5sh^`d8!)2LFb8JI5z5R|!KbWBDSASO%sWcks9E(ZcYHxtoay02>H%m8B<36OK>~ z`LG>y*8k~E-vskQZ-0D%>HMDbA5@ddXGtLji_*i#yvRl&_jh3f7jn$X1`cCAP3VezaXy%b!i2YTps2FlxX5p>3q zQF5}D_&^ayBpqEyJ*?VvfaVqVP67(|Nbvr-9_fI++ zyIV%<+x|Io|2&VN9EV+whz6bhEn_zzM=owBhc6j`ZhBd|&JTg1%L4K|4AEUE*XXh` z*Hh&(uImt|Gsdhu_2{9Emv<ZrCO^zm( zy_E;- z3;Kfk)hA?oTX9JhT@gSMOth(Jm=b}p8F$dLmC-@-p|+u;sJghXUoB1J>qCvvQnvTS z9@qV{7ynscB6i4tl15wD0cZc17W&q4j(Vq;KJCt!ty!Uk(QEKU2Ttd>?4kc@P+Q|F zBcDk(O2G`g?4B8?JkwvVQ6Bq7@?0v}v~oTlVr7*TEui3ynpRxh1*nBblRGlLuOnAg zQhxU<_JkvkA8Z89fW^EH*ML+#dEBM=dJl9LN1VkOz}j5U+r|}_l9r{6J0kDHcS3G0 zTcRA#qcf~fwED>eI^(M?;2RMj-kZ-K7(n!})|pqS46tQDZKDOX9%}v|Ijz(Jp5Kt~ zL;uv<&O-ltLQfP)3hM1S`F!X4lPxY z4z?LqN_J%g!t3R?DQwX>N<>$LvJdS3eWBwHyP(v$S7tgGmNOKSgWAW}3ug>Z8TBO+ zUk3=yl3(O=|C%|0YaXo9!3s8qX*vQ8Q)b2)qRn>Z-wSYS?f?$PsXQJx12BrW}e;PGx^uYA;6c zU_vcDNZGSe>|(uG!l7YGqcwCP8N7Fy!2(5X;a`z>**8&o?XHKW0CQar*~XXs(*YJx z*pwDG;Ha_$iYOK8+PnrpdbfdXI7ymGBul^#&;65?z<>XRSpq8BN*@oO>CVrQO7Ww9 z-c{lQ(=s3i+{e%}K|)9h--*jzl?21)iw9CQwZM&IezucJo84(y@EC0HcCrQ#{;hSS zdkNB6C)EuOK$o-oAP_?m*vZ&&%wjb6O1od{^8ni&l_S%Xuq_5CV$0k(c)Sr>3)PvF zpL(lm4tildx$-1m-wtjiCaWEs08A3RyLXa|+aS)w&7#NBm;7r~t2Wmxb(9=c?tmr0 zB&f$Mia&m?R8r!DlNC+i3N_e$6J=4V1?Ct61=wES!7KEq-53UaM!PZSbQ62bl+qn+ zV8<~LYXttC%YqSaXr$3H{o^MLf|G`Tf1-od!ey~bJFP}sFs^s3K2t+|f&e>qK{cx% zv359x1T|$j%&?(e%Ubm|o_pL@Olay=ZiZ|BwSOuAq+>Tv1e zRU^e*LtVxJdLKEev$sCR0Nb3P8u5_Dwv%sVT|I62P$J>VB4rEvF1?KeWybT3V4pNGrFdNav(zPgEY z-TS6)#BDX~8DhhOxJ-V#qONH0I|@<#@p(4&;U5qmh1ecH!-gQPXCGKm^~dzb2MEtl7f z!`;YU)d*1%efQKDEYY(@_05bkC~u;K1XMRz*hQjPJTX3Y;8L$jnZoemA+P&I#RfmW z=YlcNF@rK5@SSMg^21&mYZjWhes13QM6S-cwBS}^YFX|UCGLFha`0RlrwFJ~!97Qn zCU;yU&<+FaP;K&J3LV`?`=E32FT_nz&)a%hHNVt>aI$bZu=BbLttOR0(xL&8@AqY* zNZiA0h&W*W-~zgbsn=1KTB8LSB|rk<-18HhxOXTAdGLiSNzce-)oHWl+V_dgq^Ew1 zo(Z0zL**`C5)eP+F+2t=|IY%L{u0JmEvAt!E@kTQ{wcD8=;qX4o0k51zLzCVhoD=; zHSFxCsQ@*#;QT+u6|FO_jGr4WxnZJag5kLUgk(yk@JDz0{phN5T~OVhoiZiqgX-YG zDH&bvIg>NLoirFC0auMZS!&#GFpvoEV_%1#oQ3_*$%;A;=AK&m+P+j<|9)U}9la+H zecMZp&x@(d`8Ox{3iTP?f((9rxX;qQN~$N}+l;a(@`(8$M?Po4=b@ac6#(t6vCC}?Wwalp^*R8HHy zeE(@ZpWsngR)G#@u!|GfQ*f6lS#IcV8cM1rWM0qkcEWVlxpF?0&Jk3=>TF_&TGalg zd#BKR{L^fjv~3WO#Ag(BlItNBUs-j3HtLLt%6`VPvKZw)F<@x;>vt)SCCXf7D2t$y z&HCkH`LJ(`3FX}4>A3?NtO$s3BjfgU9zy5Zv)@Y>SpQWJl9rVDkG;VS#|xW7c;tHJ z6nu|u`L!VV*F~!$%qn2JbFRyv^>0|hdC=L~*eXbY^Xu|ZGT}!wQH+u!X&Afxgo0M_ zQ1uS9$+C6?VrHpGI{eI^wevI1>j{{IKou!H^YY=BZ4{lCfb^5Qq6)=EocV+$J>5a2 z@A6`FGV*8b@X`G#T3Ds;RZL-0%~*ZSQSJF`#;IJPvYa%`vgJyM;qSg_ zR?P-r9LElYoS=>dAOkl41cMCFTgk))T#Gx?VG0Uf$KNAz~Pi20(dbql}4&g1+ zH)1E~&6DM+aGL?qnG8%QiTx;&Ec38VJ!MjhG(oaa9W@1wYw(m>VdtzUcS)YI2g z<#fTBC1wh1v*H!&+9v|$jNNLUe_;A@p=Gf`h9$$1 znO6maV?`h#kEBGlN!u*Q8Wdk)sbcn0k;izSan{8Ly0Db^=Zai^DR95`W!N)|?aZlp zGw+Xm%E|D1tOxyNMKsBCviz?kWjv4>E7j<_!wX+f`D{t5*L--w`4ja`lGiyc4apLQ zF?S{Sz`e}(OjR~c^~X&rWf@&|~qqR_}>OJhd!Wn0^X76A#b*tw;&fJj`h}rV*NpEUyF_7H_=E10JG1`hfiHx`XEPpl?p`^NF=om~&`c$5`iAW>{WHCZA)Dbi+2#<(hJa~^z+6qEp(it)3H*+Vt~i~f*YiGu3`5S$##M_jmcWaCYgC29Qlkd%@+s|9bM zteFUT{v6Sz>pO7t_s`$n>FEJz8{*F`G#H;l9FENOob`1L&Xy`O-rh0jR|UK4!z*GVIWV=iKlz(^>TJl z@7`^5qfprQ_n}@OQ|IkD1Xq5$fX1MiyZ>)3%@Qk;{ZpzkFTh!zOc1 z^?AX+goNje@48`rsM((K@%cI#_?!jbzHOu&5OI66f~Y7UNrX}+-WO+Ep>LJr-cmvr z?&G$gGc$JnmsY3)j`O;FrK~KgS8j7m|9RAL>JbTTYfO})84-2IMBhHOb%=6x9=aCh z0Jh^U6*zvE;LYsdA0@#VXx98s@%;0oX%dZM{+m_3=M~kLA2{nH7MfQW-dwyC_yEn zXePbGWa+^X;)s|xaMnif{4mMpyS`-du_u&-HntA)G={6bqhtGDLvO-Q{is*pIYe-E zUN)ICQzqwtQv0HEE8-M&1Ys*kpj>Is8dgVaQ(3K!4$kt)GIW+6L zcW+^z^M9lfTxs#A_Oud9B-~pZtZTSpjmqRFUxrXG*Udg`*0j{dOqpo`mN-8%y?9B( zx(6yIBp*cyAuBYSCi~FrC`MfS=$2Is@5$yY`lP(1rpv8!N!yy~N5tK#lI0D@uagP>{1OO<6}G3>l98|9ek|5nD|Q82 zsw|+1)_?J%a}opiC>Z(ark8<{Gmrb2oOOcD$Ym7tWH~bY>J6?T(b6|;8Rx$n$j`?5%WdLc zh7)E#%`P)&iL$8~U4Q;$b)*0Ii9Hn|gy|4U<`WT_($jA#rT%B%J?p#n`0*9VnBU`qz|Hy^w;s5M-B35OLonk7v^e#FOQfl#8sr$9m;+^+c!O}aN^S)?oex^?t z5zpf$y>2pRIka8oK@LrUyJq<$fN%7oOqxCbD-)sJYIezf21?{cWgxVOO@4h${6-47 zD*kHx;4KA@UvYzeBrM%=iyQHgq^{*dHNumVGiRJgQ{EyFT-ZUeXp1oOjYJPiPHv@F zPP%bG4jC*f4nRHuv$0DZK1nO^$-(W{@OtF!5e>DsfHa%`#WTpiD(^mo^>EVK_G4d7F5vdY3aX z-32Q<`j`jNnky)dC3*n!ylTzta}_r~aJ^#Gp&mQ24w^uVUOg5UI|}{l)X>xQ#q1Y`s%&9m<_df9}xE~bC5$RX zAIflU+SCL6MQkrcamQ{6{*F=;}ugkIh$F z&$Iq){nCBrrRg!>+Ki3+qdl_$tk%c*FGhTadK8)e@G(^00*dMLsMM?A2#ayr)#%1i zsQed6m`k|wW2T;l<#XscWKZV>nOp*8LI|VR6gTCtcH#6-u&aX*q;-63So*r|2=61O z8MwQ~$_(0{vle~L=vK*_5`4bsgMJ>C6u*8@`UjS&O2vA23I2PlgT!F_eYk?CekMSBflr!v z0V&&{2?={_5pXd}N1Q4j;PJn|6Ya`h6f)=lbKGqqha8DFce=@Zn7ZAtbqHo;;pMU&Gq1GyyQxQ z+g@1(&b;|Q zFAMz{)@Gkg=`7Iy$1rfZN~3~eM1Kiy#7@cd31E=Q!-yZk_$U_`Zak>_it`N7$nMkT zt`3s5w|RbClB{^yGy}xdu7{1@aK;|^XUoi5`39b>)mD#PS0ucNcySJN9!9-YRW(?6 z>sEgP=(;Im)~5OM+TqU4OpxZ02*NZe1kLSkzWW2U-_3^mHv{8_SU_%Noz|5Y0C2}& z2pDPi;dbi{?gB}Q5%=;XD1dVdl6|4W3C%XB@|yRb{rk&t{K|e^p(J!uyvpRvV0R4_ z1rjvW7^MG>{RU$-7A{RPe{f^i5tviq@ff|yM6RuHgUT7s5O-pzC!`BP18bRoLKi;RMQlT=kPLhtt`Il7u+f+1oMV*n6Dg zIGl5wbKk%F`^WEpuFK`{ey!)@`TSnmC)CJsApabg|MJE*pCflkfr(T^swyg;ZRT)h`}Z^ac|YWOA=|yLou{(>Bl*D9apcm#tYy zf~0&0RL5s4l)t)~b3O?nOay+d5TUucK&O|hX%)P)$!#qEU?CzWB z0;)ckwOGLO|1M#Px7IZcDF}Ru20uR7v_}e zaww^b;Bc^sX+Q@A=nxH{H+y?OV|nFNxC=l#DXzIO7VSA1aQ|A3FF^)%V7M>(PWI87 zk>v_j_P=2^W06mqhn77(N!1q(IZB=>*Xe(cRQ`i>7A!=>ArepwTsicxR#^o!&%MyP zY4*u$CK?k!=O7C(h&5yZ88h3wJ(#WXqDs1^b@$?I9E)5kn`X40{wKdSU~g?F?j8#0 zxPCpua{6}OM`m$;zyd7!oXNA`PVzeKg^Qv&!1a3l!IXJy3ahX!u;=-|&@t&FNElu| z^pRC{J6nwy-uiO{jv)%Heik#{UO(g846oeomOXle_v-~9>zoZu_@NNKBt!nUwHhjr zrVFgDnazhEX(<9c=5>Jt+8OBJF#JgS=H6>d(dQlTH<&lbl3)1@q0bl(majCaeY|?F z?+=IYuuC(hc2B4stCQ!;?@kSQ=zwfXt0DJh#(Nsd=fzVNZ zzwn_IL~Dk#K&ZZa?5`RR3p+OWb5S&f5fBlho4A>NdTJIqSKYropj#a2_yw~umR~~a zi3c>e*+wpjy5x!&Fq+9kc&f+AX&5Wn? zRc(4eQEgb%b^-}z+kZ&)#@s#h=`IQD!URGzt##6e{`?^4(IEKfE%J5cgeh?H-7+sE zzx9q8?(F-}b{WHXey(y4Oue;ms)rHO&6IE1u6GAU(mz_As9#co`?hjht_)kDDQ42r z3+G>4`q#;Iu{SSN@O&Mrm#%uwsJndFP?B_W|3i{VT5!i~3629DD}c@%y*b;um?-1W z>0@sa5}fcQpv@&eZ$E62kNVQ%nD4Rk@cA%>*(>+k)}xitcbr`- z*ytwA;X01n|GpP9zM~cHm7dEi%T88Knv$n4dQ`AxtX(qqXMO!2u8i7xodh_7`;TB4 zyOx>2P$7I{45sn;;H=}C;`?cbUe781I5+Rp8uw#!v{tl5+Jj){puy;Mxv&CenkY}Y z`?hh|!vD|RcbdoVGoo#j&^Tc5w&|LTf{n%cL#Nsq16k|OMutq%&qdIdlqmM0|6{tk0vh1e0H<9Rmi>i;4Z>=YK^14e3u~d%(aR9 zg*DNz+R1xpkW7MUlo0v+WWm8#je5G)km~>%bV$LI15*5Fm9foF%JHK74kgYo~iJA%(SfL5bC8- zat@-A18ku*!+arFQmm8Z&6Rhl=oQSDWnuy0_woq?dgZ9N z+y!{tU$VtbHp0>Ei9hWr#Q`5NyB<*$GD))% z?&o{+P||m3#e&7Jt3@ZufiE}R6$ zF*=m=FBehwFQUY3Q*$YZ6*F226G4qdQe?>jrm@frJ0il*hfzdF(|L2_dK2tfayGxr zvEZ+U=Qki_+T)LB8(2pEy30foHsa937Z)|YZBEs#1nNzU)Wu$(nw6&~GI~NR(9?e8 zE>4xr1L9ZzH^@3}g}V!g`e$=0&z>B7qO~?`8nth7r2Zr{OfeC%4kk4!=T^};Y}n-^ z+g(g2FhT3RH-nb`$Xt&$Tc?<>;_3buhWifQayTKaeYpGVnvPxz&Q|ocnr1hC7XHhm z7tDs^b_K51ijFR{;OQzjJaebZYVOPRv`Dmk_)|0KWKNZ<9Pu-d0sio*=9Oe1|MBC5 zp@ve<9=|McN6XlT$My%jf@e+^FE|g%-d`cB>dneZZ+*@jI*HR` zMG+>6z+Pbex;=!}hHDxZ0xd0a@E6_d7~l+k&%;}Dpvv!_b%$>P>pS}*mb>p)y6!}4_Kp=ik*T=D8-_EhsN35Z*TmZ&Qz5>f zWZybl&sZx37ZLzkStYzkz~O|Bc<(UytzBpz&tl-PXvQjo+VK_l`)QCKvUy-!k*P}& zO5N+Dy-vX0ZiM?-r_1r^1oD8dkxba*n=|lP!C7#wkLF{#`5jBunw$W`@g*kt=aFH3 zT9;nR*qU*YE$A*Q*cy>2Tk4E%sEI=JF--j+UIgudTB9Xr4T_IfCu7oRouBSlw@*NX z(|KOEcHHIce9U+6nbML=*$-O1InZyA@#AeJvfd92_yx3Tq7~)>?oJDmMa@f^eY__= zgcq4KEw{mfh1>?M1OuLx#@}Xd2pIcUwMBK(73=|L!7!XbbX$*X;3-fBy}%agkb4*N z_*DQmzDm>P_ogk=>6)UIC2Ww#TULNo{>Jb}Hcc8b2u20p{`n`3QRlRS_$I|@J}#r_ z1!N~Ulp6M456xZw_8Ak7Sx|{8pICyJoe!^Z6&tlC%N1D|k*6<$i|ElP3m)xoLGC0? zQKqV5NrLT-H8jdS)UtTq^7$ao-Pz+|=kS6<*iMDYD2xZ1>%WHDF<$&hB+qIa?wbe> z_rja_(}=Yam*1Om1=No6$Q%Y~@hRdmNAasGBW;wh%YG zmN30cD_`vG6g}_UZ2nEVIXGPuGzK77#E+k*KU7NN}JGftpc6?W`$YHX0gHqe% z>ydWl^IiKSHjy^|8wc3u_422yAL$#@LMyRHh1k**Hwk+xeG3CFjRfP_Z8OAx;MoW9 z)I@rSqp`mN8@>zKGx`Td;IrDL{Dwk+)9=Oiwm)SI|)#a8g3MMn+wi$XgFqi^M0j1EI~4D%#&uF_v*HlQ1a=JfGMDS9a!D&Rj1^_ zfE5+oL0ftCzmmyikK{e)b{M2!3#R*G_y3(oFnz4;hz0+G4k|pOpXrA7H~?;kw|8bV zBY$>M1A{x#U^_WNKnF8n$%G!iBTI{FfpeW@d&WE;sd^f@{zW2AQ?N&1m>C>LI~5v| zn-)iNIh0R3B`tVmK-S@h48V#1RFzEJ6Gf|fa5lK6&+JY=_Y;(_i9UCm=0e8yV-(u9 ze=)H-sYh?yi#O%n?>MdY;J{@e+ADMfP?f=qlJ9L_-@5VYnNy<~usX*<_CV<~YEydg z+RSs(#%5}wKRwauOSn)0!0PXA3(h#s2+{Xze2fO^>UN6>(Pjh@8|99N32+)U-LBxpYG(HydGM&^*|sHlhJ8%cF_mns zx{~AvkGf%Ef?Z(F6l||6x4{_wLA)8@J}+h`5d$n^2Fu^M6i_{+&H7NizEG}5G9q*^ zDFKqVegE>XwAQ&=K;I2#A&)M{(-h6iF#WB^X3k*Xdn?3|_QJ5QcC)xJOE{97cO4-k zBh$00z0}dshuV@rA@?(UdmBjeysLI)GM6VSztZ~IG-ybT&FlDG1N?4`uKJz**CQ9@ z1TOOIovmudNLdOeKG9!Vuo8^Z2=N<*OE0Le{HY{$!0VVEtfb`YC597ghV9T=Hr6lp z*WV1)AYBtD)tzB70;UpmNDx_b&u^lIS8`|rK29zwB`X71tE0zd|20HHKc>W0Ry56s z;e841(RcdRJ#?kQaPNhJ)3@|CVvKmBSkJRQ0WZGX)kd&gMGJ&(dq!Z4^dnB5syc(| zWD2o$u*Vhak61~7yRj*Gd~+&4egwD5YcsEc?mMf?IR?9AUH0|E+3&@BPZN$4c z`?>%j^w=!LcDVsVk*=&wPQ?!ibs@am3fX0;lzxTu2#S)f6fEP>6QNy&aiIOMrKzzj52sT%oC=Ix6gdosUTwVp~zH``P{th zpni6ZAk^dt{MBJ>aFw3wLp6nwSZuuCZkY8beh+b?)bp7K4vV@q9|2>e;-fdVwr3dQ zwAdezN-G-h54Aqe(ZzEI#(iDl*=rv%H*@o@9_T|8dGj@zCJn$CU78|i6SSFLX|&`j z{US+?DkhPFo9=%hrl`~7Zwp`(o)6R8wCCq6{i)-L%Pj;}_c|2tXg4=}B3K1s zdfHomCZ9v%!^xX#9eTJlUz-Rqp~Cx1FN7^RN5~KUE52X7L6tp1tHOQzFJ95{_<*W- zNa&htI8AY+t&ZM;G#??V(Z~XxORHbq$O%5-PFGnp{v3BjarvANsr?dw$8YOsKXYu| zy;EjEGo^<#u8M#9WVx%vc3rj3-x%IX3*1)KhELE&aHu$Kc4lH`!+p$-o;U=ggN@F@ z2d?&TrB;HVe1nC+08C@VdM4M`7CVD$2%&DToIh{TgZwVF>g8_0Xv|g3f0^0TycM#a zwf}(wPYpybmm-1At*JV{?+B&G?~+=W9Kg)~EaW?P)|fxboeE&?qx(0X1#h?l~2O54H zY4-&;$q1dk{)<%`iDYW0yCQi}sZifuL7QSnhvWX;fUo+q;d2pwee;}n2r9v~8owO{ zV1TH{_*wH4pvc#Zr(z*0@I2-c&9vE7kSB4s3J|=IbmSubsW6AYqIY4=cfezPv2XVF zlSdQF7{xa`=KsZrpv#9j@y716;H+>ttz=h^uCRoY4;z#Hkl3~ga^k0$qA0&PkNt|N zXkqj(bkEYKbnm5u#M)UY3a|G^s&&SW)jx4_@c~7AU#}qtq|IDASGr&qNMTCy+n8Oy z9Ro?b$BYaFH@`-`a!;#GqlWVuj48}Oe2rmSo%QlA~!+#g7%KQTUzldxOE@pp1~GSl!i zN4oF-!;~k43_s%IQ5v;}%i%7c%8Hc}g!6HM=5x;AW+TFW(dQmum zI>OFuUmBuWHC|=*GQM`fcM%Le_-@J)=WPW_zsTmxS7*>qkYu^a-FTxj=~Q3$^fWe1 zD0+bI+-JWkpsm(k~_Sml{w94jQHKY;`U%OFDHn0)5`Ef6UNTyk=tntIW4%ErM@O>PV(2Gb? zJSy{;&3i3=VZK6CFQ2_U{NzaHbBmOnsbF=oIOnUbMCmb?{XWM*g#NJR~M1Cq#}D0WI|;9EjSq2 z$F7A(7KeDRCYB~;p?3@%7>VNQvzX6VV!&j1wQ%s5Bmeo#&v>>d#g7(@qk2&(Q5jdW z(=hzM@fSX|3Ecnb9mf$>mjtkf@brQQc+B?2?RrQsjl%;Bjk-XOoRQmEd%K0;aF@eg znTzInBS+9z&DWz6>RUofQM1SNu!G)Yn3_574uQN^^=;Gsj-qMftc5Pf*`lq-aE+bL z#gSlAt#@J~?~JL8nabSXcgnc-l{7wohchsP)EegFdiv2**$!I1huN{`f}rX202^S> zB-M$an?cWTVb*pTt+iqtLaQPGkAnoyi6qPfFtkwp{G1I}$ZpL~6d@iN+z5`g<5)0Q zz8I8crZr0QY=_ED)!+v|_0@Lf{r~8mF~sLjJGjuvrc?H+;B@Jn<@s-D`GXkIv)Ih^ zdesX4`7`n&Wz>^!e#H(Bv$c?qwP}XS0|f;xZRI#~i@^_CA2~8UB$9ulA7mTy)5FTL zdrpeqX*Bu3gmRxoqUgN&QoI~grc9>T_bvTWyK{8>f=kEWeX;V&tT#oEGjGJBckX?v z)h)*=8NMiq7F<_}N?UGMT0-*A#pKJoTkG-itg76AZxI=)lY#JMViWe8!U>wM*DXAl z^NPOsrx(xg3%QFK;7s=?^jWd7fg<(`H+ z_eTo+ip1f+?9*WFi_5PO0Ga=(zuv1-I7I47%y0 z`0tG;aO!ZO`XX68N+LVVQa8=M;8Jq_j`9S2|(D6A=UPTM~yymqv`sUHI8^O#8G4m6*r+nTF-_2 zWenH7fa^s(`9w^A?4H}dNO;WBM>-pNDV1KQ%5AR#tBP`uQEhAAZkVZq3BLv8p5Az% zzOTLp*JdgQX57uyM47*RTTYw!iqhSgyw>nl4#9@~p32u0sw~P8V3D$Fq3pSb;Pqd^ ziJ&VrO-B;NxIaMh%W!G5NLy{9`SP^@zDW$NT$Fi;ftoQAH@#b9HxiCI|r`33?;^ItnWc;v?6dp)l) z8%G-sEPR|=MI>7L4eDG^Vzi)T9qQE#Ec!^^sBTONCnIF{)RiS7U*?*x6~{q5aS50G zLe6d$#yAR;Y_3=MwRHlk?8$B81SOHDbRB4u;lV0@G}$0*Q4W7DH~5UG3|`5s%A1~G zyt*pbmtYSzp1LT~X1b1~zzR7=H=5aMHo=3p+rr_G-eF>E;&D|}u0|2GXMqS>=+;fA z&c4~CJxz_X-q3Sw;H$GFpF}FP3mnxiS*&5 zRDK2g`sB0FY$KRRNiu^Vs%Hs`xSIQK3JQJ|e4=`6YUO;k8yDK3JVI3GPMp{$fp=_D z7wu{T!XIA5qLjm0#cj~{nsUUI)tppjAAF14)<-{xyFXQ7Fd^;f0koZA&iuP{S2oS+ zCr$RHEo|*^=&3^yPIC|EeWk88e$qUBCpYR-RBef{x>Ws~cZTNTCkGW`*{wtrr~*!T zXJG)kWS1nn!8^6|0`{K&k)XgdNDYxitr{#>_iRtp7+{afZa9oJIM?w>Ny!)@5bXAa z#+#=)8s6ajAKAHu2TVtEh3;>0#~8b=9`Lp{sGzso6o{B57;_r7VLg^BkW$pd6%G6L z`}a;10T1w^$-KflGtp~N0q&RC)(P#I!+TUxF8&)M!s|>Uk2U!M`4eV`s-nd+nH=U; zSdJ0=PYd(_w`m;wOXX7-86&HX21m>Fq9Zggij>_nclBZ}E%hZNTsC@thcDo}K?>Bd z1(db1;4}YYkweoYZJ90qrskX2)dN(Uk!{GfEL-~{W+@&jRSp^9n zL2~j3s;??Okvzn#!ccJietlCu>n>A9I`uPMqlM3%zXZ(PtZifbqdBk+F62#!y?=^6 z4FunPE%maoy1=ztORmHJCH$CdJB~`=Pwf;6{c@?U0kEJ%Xv?A)5yW7$Tw^RQ{>SIP zsZ`%3?8~($0`N|)elQqVBN4a{jV~u^+5=yngqrvNbVMD7^$@J!fg9-?gR`)0WZUZb z^)JwAZ0~T4&;9uN+TKV@r&Lyw$A zRk39|PVzviu{wZy@)9FMn0IbBfSdQx1n zeFeQ`Jx3Eg~9Zo5t*oF9Y)<;4hDTW0v8c z!KNnEb_ksHZ^6J}CoviZI_tvxKQaa^SiY1=NWRls1e56-AR?(@Q?H5)SERzDF%g6KMXFo2z(ZwCi~F zJX|1_$j1YSvNgVYeoS+{m!4#r;gk!w0eKP6bp>UZLo|*p%R6e zOQeYj3((_I&YB6;)GZ+?#DCC%TyJZz_?tFICKd}eN~`qI-X+<s2GW0Wdxc*bx)DUG*Y}?2t26oItb*wbPK`Bp>=HW|yxEs<9W(gb1vo`=1kSLU z!ThL?qdl_^ukOjnBBXGxywu$ z_TCS$=kTWszI&6l*I-OJ8(WOh=pT2MHg=1luv@&jPOY4{Y&a80C(uetq)Bd%TDYLb z{VQZbbpU-@oyKm|N&Yc$Mab$*Lb_6b@7q^d*lb zf1*54WtY^P%;(#;IPuqvq8g{940rm}K)=mfEx6zJL3eO`o0j~aUzeEmtG=~g%5ISJ zN?FEdO!+>Z!Sm#oXD`-|Sc$f}g;tHs3wVM)Iq&`XWlt$)ud~lVk5l!|%UJ0eT{5T7 zef6RwH3R=r@mFfPM@qHw(EjD`HEztaeR3SDdbe}i^m=Ezxo&ZnX!4;U6302OkEHz1 zGqv5hvebdol8hJCTZcXWBBYf66$_s>pC>RD^tnOPkAZ3ENoKfX{=;c%WksKjjh))U zd+mq6SL;%0VdK1bTI>HpBAa2rUnN55>QcJ*+46YRC4j*iSk~CDK?HT0ag6)KpnG2g z`CA9s(I(`J#`5elw6%Y7Hkyq5$VK~|4RrG%M+tcU-WW9K(_GF#*!c}miS!DdrwR&{ z5U;b4l(oB%f}dOL3)!MnZ;vUf+~O?{*dABFR>)FrM_zzoFT&SW4hh9tDd??hwV*l6 z_)1L(8084q5sGs;UN>_?;NM~5fxtXM`i?@)bX03RKCXH?f~Hve%12ef$2x;n{OXWJVAuUg;fO*V)WIL z@ojnQCA(U=P9y6Fv^}lwU3uV^5T@6Sm7ZI;70=$BzbmD6 zl{W(i_U)D-KvX6c5cVg z&M<>ft#2Y%s(av1EMc#orh__jkQTXZ_F!~X?*wDS%a!k7w6F`?z}Wpzz%PGDO;}+& zjpDsBANVrkAn{5zVrEjkoQ|5OSkj=UkwD8v=!*(0VMC9*=rw#CjAANtQy17urRaHs zW7Lz=9d0)c)V*J{t(hHQcCF|4E3neQuQ>{2$S9KmnU zQbP5aIYX9AX|x_46=zod51gN8wl#9%6%%Gn%v$CJsJ1-It^K8b8@Oy z`<(7E|Fr+i64Yn+MQODe#WMtE`m_XKkzIQ-@8#%+2q8rG!|9iApLVLtP$Zvy`gsbQ z=t}6rt7Y-8zb3uL?(Tm$g`8q5sQY-)&%tmhHESa0NvWpvaI2Ge*2_8k0TD9~f!p9t z3ZA)*S+5pmNyVI<} z_{%{RMT;xU6)J?{4Q@YLt9Xh-ZF=~1Ra(|FMqp~&zhNtElOpFF|0c+luYXigYV97MK!%sxD-}6q7=`o>pP+u>Rp z5?C)kQy|dWn$zHMP=ARf)_=abzTquBKMQ}jp$#Dhc`sD!VuMj*E6AShA1CokazQWA z#`k)B_WyLLR06yo| zbaF&Ev_PIv*21Q7iWgC!5)Hw#l2w$jB+GL1YT79~aB>$4!hCFwbTQosOT|zl^&18< zG&y(1?a*atdY=>5BzHh=+mzb%hJ4s(Q@TjBAY-gu z-KM$?Gu6lX=fe?_x@D)_HD0QB(ScZU8enHSuwJq-l4!h0`sZ&v)MqFZw!&s)eb$GK;s_X zvadUPYvab0y18ZAy0h{kUqerEfC(%3~W5B5X zQJaTQq-J*hMJ6S6ogZo#IZ=sxBU>)K+cTD`av7}@?&{zpbWx)mrqBFD7jqw>uoI%) z1}vts`QFFkgd_WSC3Xi8W3AdGK3yi0A7T;#lz2>bq!#T3cnY4=mSo*UyER zs z0@?!oCzdop97{a(`x3$<9V_I`Q?-Lp?|xRy%VyDcD;^h%a2|QWXK~>{8ksQaA_$ft zoS}lZDY@FRiEfeYxdq+OqN|!3X0hW9#zuk<&4iWHh~`QSHDO&v7RlJnyPq-6#0kur z11bN}QlVgGfvA3)iq_^?a{6++2%3C5BLe;;sRs@Ucaux^>+1rxRX4tBS9@g=I1Wgy z9ly@thhFu**2741Qftz-lnRDq6o!VByEySzR6LE}1}-y8hl@M3_W5jWtn8ca8gQwd z%1%BxwPNw^)n8Cb`;pYpx4nKzKr&?gQ1z!FZ@z<#`(>~+y2d}tGqit#>a(S1X;fL#OqalRM`QR^FFV^ucyA zM`&rT{h&*Toivp@N!*GdBkhUHOY=JAt^pP#s z&jZ`%>r-u#?V^Qfzw-7l+@`o4n2vtcaY!iG)@r$Oo_74a>ou6eDKyQ61SoKGJ%R{mgI%Qam>mmPSp27Qx6S+-lGfCm-RL? zr*ph*&tXDcbvwnM*+&bxAJ10DspIET&mN2ES`+uTlf*=U4uG5=$Rq0Fd>5|@y!A`zBeX{BfZEg18JMVwF+1{%?ylSu zi@hswNH~O_GQ36D{C6h)usUh^pI)Z| zwrZAtvo^gUKU)u_Gyl*Bz4jpi+cy{9i^J(15RGq}#cUD(=AJrs0M}%|(ffhKJ;*r3 zA|=6yOr0L7siE`MuiV44VCaKiun=1J@a~pI-OpZcTRSVxhApjvkX;F*G0z*AQMOUJ zucV@W-FHgK6zhw9a`6k5!~erfR+$y~FZgrzPM~Rp`c>okRCRYG=KgAB?71F5R7+=b zJ!v57I>%ojm3c$a?@*f$ASYFr3N!=o8TK}`F{K4kSKBN}Y}ZCcec@og`0!6;G4xLE zy^{bRgDn|c52`Su48mUhyZLGw$G^W>MnE@HGU4lNGx(;YfmT9~2JeQ}Ff?h)ocQ@& z%dc?@n0sZhkoR#5;TO{m2Xg}6(&Yxl-1iTlEgIMQ+Zk2@F?poEE|5j)*XAI2(!6Wv zJS^kIwVlpS*E6}CuNUSuwmM1H{s`^0j9YEUspTx*O<&?&e(wkvQ4Z|Y!OlG!gH`#j zts0W=f}5*LZtaxDBs1hGQCELFMJW~cO;H#n-<)nWw3TE1)14Mo>SYl$DN>`_A9AUe zU)PV{Xr8ccyfu8EOhD*;q}uKvMjNvRlT}OhWtGs@@DRHn0xICSqd}-tDKFGrOUxA2 zGY!*&@dJ98&MY4CNNWQ99-)jnH>4kcgJe8?HsJarUgUD!JAv3rS$M06uBr1IC-5Zj zA1_^%WF4-3S$W5|9zA-h0CINq?(Aq#0tcv+h85BO?m`O_QncCsUb}wrvWviZM#Mek zU7-5AcR7dXtN#|1Unhu&8FC}Dp)J?TpveYp=A89TsvJFt(&oLuMq5`LZ|U4Cw+V55 z{~ezqe%3NBI9PvRu~O`<+&S)SorXobUf?%4V)41Ft8{3{tQLEaKD}EYsTo4|TTn9P zm+LNwzbs3OOeynm9aJfO9iE!ksb)x8H;Hq7kobYU!_ed>SLm11gVETt7m>;5vO)@D z34S~$PpaxEJQx~yGdeS;hlmjH?*A+4X%P)08am^)iOFPW&~)r1XUnKSNC)eCs!B@1376 z>zzz64RYuE=hJTQ+oME;Z2P_HH%AWyeIkrQiUCOn^4mg9YB2|z6ka72P#ChjT)jC_ zWw1{`I@qO6sYN@Ai!(jhx1=BqRBY8s0=EQ;^V<_*H+~RVm)0;8)Wl~tXdv4J_o*iD zoym5!-rZ)n_zQnugr8IKbvr=(D~ofkXNV_7C-WcdkzgZ*d$9FNS!M{ymZ{6E%!Q-$ z@+&m&>w0>M?g@qCIz{Zd8cxE6N}huy+S1?%%8-Q5==F`)Zx{;g{&CV zERxjqylut>aGxnB!R9Q3%di+u&%2-;S*aVpqS_*Ry|4qW0aOMs<$X$mp6MZumG2}N zm@&`Q8|ws5%|nDv-5#T5?ZdhrAxm@*0`@-R4=e1z9}Zj%t>=#6U2w@vYFE|SNOx{i z|LD;+bHo|*0C3xo{CXUznWHaC>xfQc;uC09npqG1gTL(cooy&cYoN=KE$5Xn5rYqV zkSN;aOlnOt&6CwbT$j|TssJDwP^T5oM)Zh}fH7}Twr`?FD{(uA&zsA?dC=70F-KUB z=EwX-NwNrg*a}ULHr_R&98iUcr-BulhUWK$mBr7yBJ8^Hqb?eu>&XAn!B*&!bSB^m zNz?wtK($NxLu^03p@ao3&yNV#t*IbwND#Tz>S2>o| zK9=lljU@1F{YDY;(68^WeB(*DB!N}WYr62O12d<$dsmMdbncL~{J%Ct#a-EdeyYd2 zG`UK(z4FHs;_c=J!j?8BXO1>Ra2-F0dfWNmk*62x^4C@@NQC7_6DDT4!CpQ>X7k-S z*R*xU+7e!bx5^}#lR7%zUxoxx@j*A&On;qexPIbO?-;>M7i%7EH$Es1RoyC0HJhJM z+pF;62oGxRNjbja1f(4!L4XAP4|M4zIr9P(I4cQVhYGf-=C@XzBZ&$F&KwxV5O#0P zlGPm!+nEuH7OwsMtEet6cICXw<8U1-h28er@dm9wwKs0BHs_^O^VR+3VR-;QpO=(y zg}Eg~0PgFSR?^JEpR&<6RC=g4ULyojEbWRuZFsDvo$_SP^xb#;H*;GT_YAq;^-?kv z4;lL$v7sf)ZNQBZ_CEQO!%PLdo7P4kujOX~B{e2q0v99z>MwWZ1g2^3eg?IeHKbAE zHgvtYhnyVM;0Vzz`g-HlqsY0rvAsl}y{cY!{d^nn>+1hjw1RUgrf)UeV<2!hyVsjb zODtYzi8sIY#2Dw$$k{=fh5Jafi|&epGllX$ZFcu?tY+efHTM}U4C8o`x)%OH2d_kx znw`S{$M$K8_gKoR)PdGtF#)qw-VVsH2a(*uETz`qg$^G$0JYPG_1{{Vmzl{~)Jh#Uq1!S5j;fHt zEP6s@D2!T$;rNH;B{$tJ4~-n6GOTG!*RM>Q9tHJbhxM683jR5#5Wt-9~Ju`VJxw zO~1Uj$E~#&o|grk*kNF7gS}@>*PT+9g5wy*9S~?F#{3Vs+Yo?y;si?H16?Jm zBitVn)tvGU_z)D`R2sBQX2q;OGcS9V)h_d7KS$Jpm1n(9TwCPMKo>=VhG zVF)FbT(Y9>Ie?#e%t-f#-V{&f?!*IAOXd-*cuI&XsrrXB${mn5hBer_WS~)xL_^&& z+01u~sT@v9#d6HZoOtz-J16ym$y9|+hWzIFEK&rt9XQF({*K5>*#_E)tSKm#LKMH# zh2QZ8H)RDOo9DyYu$3=$JH7+sF`5thXx6a(;(*fyr`69!0d18sk1A`@gbzbDsE67n z2!cuA(FU=@Hlgf4_W7BbprY-h%8ij0Wk;!5tSGac=F)>7Y`2O@HV<=*^ackSCsr9O zd>ax8+s*GQ8gFXp$+=oX*HE;~0#w0WnUumAmU`1zgfj=;d(Mqh!QT%uBJHkeK3e~OR!=iYS-ZRA7xyB4K&KjU{4fL%~=+IVm#G|&kW*tyuG zX@7R8;=php?JX14sAB1`vgo4gufTA%VG$LVYeqh7-=F(~0H zK8J0IiB)YSGhXLG8<^5{X7iH_9cc0y9>$J>;3pZ_}F@A0#mrJPGTsQlV z_B!`5qA#dj5h}ZJ>_YAmjb)Gy#f;ERFR|0ZoEiN zyrAC|u;{MRCUi(ROBV2rXBrd_XJy-)e#&f-S!71xSCEtNci@<6ZlrtN)nqgD=zlfk z!aIz{V`7+T-z6gCnf-y#mT3L>A?j^~`D<&Xgf;kneJj7?W1J*ZR)qMMh=1R&v@pFu zarw6)u)kCVa*O=ky}5&r|AgMWGF)ea^-FGD?qUcIwK3j5MhvN;s|HQ}alLv=N`{=h zpYJdp3Kc2sr@W;{(iUxmHHNUpFMUAUu0ypz=mD~}bQyWpC)x?wc6CEIbbj{LXFS;? zAd)RF9*z~C{W#Vsk)oEE{X=cXkC!5P@C}Ga1x{h;sK2i?+X!*)oQ0KW9>>w>`$gH+ z)c-;*z(R^70Wp*n&)Z+~2Kj1Dl3D#;5%{s9`m-OrDuFR}{@nHDeq?tG%nId}4*_ouYtU7*|`MGAXco|gJ>$8k&c$;LO77;IP!h`{F;Ef3fz|my&i;3K?2x1jZ@ z?)~gbV}@r7sL-C^Oeksu%Vj}LrJ>>@sv2xXK#fmD#2w~S#4Q9nIeEt_0ICv>Q~Zz1Uw@KcuV zPidDGzPfC!0DlY)lioblcp`Gy*wirztFi>%R#klptzVKZ9Vo!BR$2{< z(+9jrvZZ_WV@Qa0oUjw?ON<3+Z4sK~3Dx0HxE#1`!f-VmO23e~Loifk0BSphIgyvt zE^T8*4u0G*eapz>d-o;O>wYUeHs<8fR&c?@E9Dm;*$HA|Jiuga8zB{Oi_xsugC?E(3NXAz2maO*#k|L3$MskQ(XIjM9q`dPfrKCM4N6-`~vpcQVf;yYt*~?z!iD znNrzUe7@R|wHRqTviRlw8^3^2JuvJMGP8kL_4c>@{kO^)_a*pktbas3(l`&ZVl>n3 zKvd;iG5FJS@XnIn9CeSYk4dP!_~xG9XcY7BqDo0q&`t^m!}Jf=yR-VYolcDPh{^># zOoUg+mdXcHIYasizj7JYA$ zY%KvsG+Fqfv@%KhwF`*r(J7pF&Z_RSClfGbI7#S-@!6tn_K&wE&pcR_Ji)CP2o3YwA#8OMR!pfzC804sib5 zERL2$NwOJzmsuZpkL~%%w>-(jsa*Q~nMdTrr)ZQJUc~x_RSWx}^|gXB3~_~`FSte* zECKhMot2#*Aki!*A>)Vfxd{}x&PvUDm?xrNtkst8D^`msrqWD&==A}vac^fv7)zG# zD<@wvsOjA(LA$+jb2Cvk{?DY$=$ijQq(V2(%&U94n#E3Tnwb16cIw*noXjo zjV;H#Fh_=s!^LKM(NpRgw&|h~1V#h?_mAX1&!4>T-ez$9k-q#J<+Xe458s$u9Sjac z__DY9Y?r&c2crKj?r1%kb^7PsV-T=8l&^neX;I^ZHYzt(@VIam??n?Qyf~fo^ElM! z{zA*I<6H0#T_&*=exWOXY8g=*vsGH%r4YvDkE8sGT?MHn9VvIC=SR1pa|Ox7Pb=>l7xj-TPBZI4)Ng zqg`vPDl}&Abj6|lw0ai)&(LFWW(tZ~WFiT`jVJFsF*8DyC1Sr4A1-JEkKntXv&WI@ zu>YEXXy`X$#6ht+nBZ~q9y{Rv)hP4hSU&AT?bKWvWDX^&9MtnljZ!_FF!TA!~{ENPxB zrG@Sc*A?dt6v9hsAKtg(FG_2DQc~w;o>)5jQAz{{;%MrS?}6W_=`jw4EI zbQ=udX0c?DM&yRfd(ufioqn=)bUTE7HkUXdaG+pZ5UASr*|c=S94S~1An*?AhF>r@f|MFOP>W>0ToWfWBr$K6D1RV=4(k(w)`ltJ z@DIW7pptl$^8Vy}nwW`Sh}p?sKLu`?jcpe}=g>9My`_K!o6$JAzU1?i@vsDS+OG_3 zw-B}bCyeL?*0Ae~CI>b@MtVYo;4(2la4vnuCP0|tey)~dh6%-PBAM^MimZGq3*D^* zMg4~hsd&}_Gk_4f?G=yRK;(qeMUgi!((s;oP;<+3 zD;E&w6MiuI3vxL>EKLhL#7`435lEUg%TX|%a;_xSUhDTOr3zJ@gOYdBXRaqscz6~ZseG$5HWSn)EXJSY7`UYp+r93(=uh+$ zLZ;$6AQe#Gn}7l3uF(>qYcwvX49*38#_f?Xqrh(Xe*GMRC`;7tb=^RaD#?g-1SSU; zg&+Y9aNH6UKwV9S+5osZR6|oeAT@%w3qz5h6~k?y51w4bakm!!<}S4Ewn)DV?usd-Rw`cd?VOz~ z<(Jj0b3_m}daYq<0_mp3PpWKwVaqBN)Ll|v1t0@kN6$oihIW*Q`Ip{eX1o#cOQXrK z*VKuH>q~K?A*)TVvvl^LNcy=>nC1%F%akZaz}2|MXkZ%*AjSN)Ku zJ@ItWRhJd`x*gs3yacyV3cug(Mym)Sl5Uo3;KBDH#`SS1O^JO5r-Rx%n z72Tb}l{*!YJ}9?|dzidB&wOJ#x2GukN)bEj5K^H|x;ixL5ap64oKkj64PBL|tT2E2Gs6UnUs7E6bFkuBr+O{ZA{Q$6jC*|T+MKb=-CUk)Zne*Q$Z`R;as8c+ zj_i~Tyf)(e;2`rCyX@Oy=;7_pQ?GVnwB~w|@65G6wdio)yC5aC@kRmm!V;$LZ3gRCgK4uUJk2}*{ zS?PVw(~z~ib1nk;1(!bSQ=Fr&>cVRFE^+dPonAmfl&X>3^Bs0=mj?ci`tE4$##CX~ z3!=+KfC0g%t`K}}7m8e!IEw46c0i2-}TKl-4r*Zl>jbWvs>e`V{*!9i2 z9$CnsHFk#VX^!dK=hLk}7pDmac7=bLQE-9*c#*(|P} z1iF2O?N%zSPgzr^!SvWak1Kp$QyXG&EKnr=FMT^d{I4Wp8wLO+UICqk|$_y>2(j#*jxL&IWnnA0J@tbK55Z!jb_y8wO zvFrGQ?Se9BdxEIz^Oa<$n(mz~d}S!03#}YV5d-P~%u;2%?PLa+m}wV}7)D4_Uj>^O z)RfZjO0Ow)lw3P0^l!)$a`z7iZsJ6`60wgd!bGBC4}ib=U$osrl543DV9SLij>^z> z?w#KN?c)^w`^V~Zt<~a1`;!boV5wMsr)Pc1+8q7w`|5QERbZwv+cS&I_?x@0nvX@6y-yT$QJ3u>zq|D|e0<^%}D_|2C0ZW96$uqi&zp zeefXIJVHIRH1Oqs!B14rRrjz4sUE~dxhbhl9lpxTwn6f4diXCreUFE)2$oAuT$yH0 zxh4%=9{H1ebaZ1|Yvsin*PSJGDf+b{=M9QTgi|GDxK(PCM6t(-Syk{^qM%q zAe7pQZF>~54HCUxo1u5$E4~r@ z0RrEtg~#>Gl{8h&bB1;nf>EyIG%@@4=r@om#pFpd53~yzs(kJPh8W$H_@XWToP5xd zF+FMPMZ766T%2qVj$fuQJ->Q3@Pi>^GVcK&t!5#p*8-F1s4FEvRo-Bt?E4KBdGEo@ z+Mj|rg0b0f{5B>O>BVXg*m2XFc&mxu;gD!S<4X84X#^;2H^n#zAwGSp5f>Huv#2hG z?J>svUkF)7@WJ{3Z{p?)+eK^b*>O&L7M43m1M0J*zZ z5#zt!K7@&S2CjkCIKo%jJm$ffvmcqzsN6ee7+qUh^=v+=Tw*_A@ismFpvFjn--K$n z>B?E3lL!{HCB=)I8b6DMz`r(>s0M?kiLZ!d*-O0@_@6yvVG$UYC!dhjk87A( zVc97MamEfL1Rw7>?pffu8hCM2Cl~abI&fr_FD@xj@ojrx1#$mgk}AZA7|8-`wtXnn zR%y4ZJADv0hCv0cQBUx=-Pc#ir?JXcCeS+HFA>2Jq@uZhuI(TG@y?ew3$Gb*j@GO^ zQ^VhyALw`OXc1Uj#b?53vK?5ma`r>Fk}uO$o$d>-=N={zPYMZ~r3DvYpxck9ddUs@ zHq@TPMRL(&P52~F@UilyD<(FZRx{5|yM+=#YV~;QNh!|Z_QKh~4q)gk^X=BBP$ca} z%t|?}x8dIfsulhA%`Vh;uSV5m12m4spx8I^vHoI4X%N+DRGt-N77M8~v`>FLgFTWPL8%re~zXQrAoQbM3S5`rH;_>bjP0 zs@HT_;9Mggoz8KU+;pQB;a9$+u0rP#tQ5rC2mP&bH9`LccogJ$Q4HCO-QORNX@8wj zBA$K#%BzfqS7!q@n20J%0-gEt`KJV!FFpIjDxV|6&QaLTaxuS1&RS9k(+58^|zpg%yT7J{BHD4-qx zAvGp95tJlnrD`AIEsEW}0>}Wmz2WaoX(+>fV4^?IR}SyWmfx9S>?j*=Y6pqXW{=O+ zHKOY29|T+u=KltuW$166^HmG_UcDGPjy8UfK|u&W zjnHi*aLzuiau*t=WmB+^AuZ;Q%Lj`~Q0zX*kNzzTi2RAnGAQ+@&?~)!E?=|Kj5Yr* zL=agbOFA!Lt09|=(8|WH!}bo1YkDT+9$j6(MnlcsTrRL}TSmQ8*#Cfu!8E^=!=RZ< zeBN5K+fUdX6%_vodb1Ez`B(=Rr7D<1o={-L_DC;|LRG4sUVZXd0m*BK`nUTkh z7LZ#tP0u0BlRMKt2i23VJqox-NFQ7Q4~-U$xvDoD^9Yir5jmWcr(!<_tO@=1Kk-cL z=kVwfqJ??)-s)4>wZI)q2{WcpEPu%fQs<>753(@Bo96b z5z28l80~3$EB~uoZY1qzYrqG$Oygj^*#+LV0*yXps@X~I=^2mT;WdtHzZsabMWCF3(B^Xa(x%C_h`<=W3FsmXf?^UC_b8SsS)( zR_OdV4sM%--aC19jPJINiw?O!RfE&6J1wnTwZ6mYq={!qX>3ar5;4(b6!*-L5t^&9 zB$mI}_!E;NB4a3oF8*?U+_+k31xcKd+mML95#Z1%M|fEAobtFUc<@T&E7*gZT|Ss5{oE z(1+Y-SH19Z^th1r`6&-&2>EFc%O+X9@cOEA26sa4sg};l8wbV*&jhIb9;}6MS3+$< z(#7i}R}y=z&OPr-!)K6w6RWY0v48(pP^3%N?KE3TgwJ50z;=8u1a)Vf` z*&Qn!uJFM+>SV#e$*>#?MH;hsNZRi!Dvo*YL#JXmQ>-=bN)D^V9^Tui{&DtCgzwq& z8|=9aqZdmHl_XLkF@A<+?rq)jJ>`e3*Fn)m1+<=4!b$o0=HBN&dVIqXphuC2G%$XC z-cmfu!dA=*9THzqbANMs57AaHyzRmJZ7oU7^1nF$#&xq<6N+kB{OMPj=V`4<=h>8l zjhTI?>){n)ODzli1Sv{3T;1ZWhCiN39Y#!Q>+i-=!s)IvR{d4e^LEZBq85lb0FLOR zKW9X(Ay`44?WgJO{5(CjJibZfrJ2qi123Kqt{%Cl+u zjM^aub*3#re~JAk+6n~IR(gt`?k6v9EvrMbD|o+ahkMF>bq_bejEe4SyGei%(DDe2 zen8cP=1l$CU^MmK7Kkp<>j({ice=+dD7nB}1V*EkZkTana&i_gO&g|EF1n&<=ShGE zrG^~rK}DPc$P!<+)az|Lhe#*9ih6&;5MIuXe4BVw`E5R8>wfY0U6ygO4&&MBUv{jIW>AHie3lulufs1N zq#4dL8AM$uD*5_wv9tcE`5#ujauy#ainQBV11K`(6uw=7tH}+j~Jf$y+pl`A;V*AznA?2%XD zqOohlbnND_vTG>jlZTuE|B)d2ZWwLeN4|*F@?t@8UGGLC9fFlXTLh9<*JSs<>rw868-z z#mg2fY};qIxL=84hSwK@^Uw)Ot`_6{v3j(ejl2PLKYz^Uy3k!|A4!d4GxlBsJx(6F z4$*&=1-SK_K-4I5YWrw(553&tiawUAaRJ8j|Ve&=9bgYhtQLV#8R`Q9tD1tKw#l_}Wl7#oU2 zKq`}D2&ec8xoBsyPUYSwKg|a_9g;DcN zmeusd$TtU5?+d^6F`D;CxzST7SOJbCK6a}Kq`xs|t;O+lad(L=q6DD*L_d4Zs(T6X zwqh|lAOlfrooS(P_LT}zCcT_kC5Z~_drcj_W_Vh0f2UXkwD_g=sSp9ug>(L#WKUU8 zq*qM4o99FWtZ@-0) ztTd_XM$lU_U?*k*`x?avnNbVjr_n!&XV9JJ{&yx!r3d`z<#=H7!H&`m#ghg1VHIDR zv6Y|@TfRgkak+T94Y*;~9lnmi6yf%v8ITSt$iwLgiUK0`Cy&|Py9~!kR5=7X7x*-e zu`PgL1TJa2X5Vj_$ZoK*%R6|21g(QU0d%G(UW?MY)R$}|i7H8m<4oAx0$=_vyoQTG zi0^gLT8eOwf`!Q#(0Hk75L+R3?(zt{%$P2Ua2y2w5eO8$=EN~KM|4Q-ql4NS2OJVQDD#Jq>yizg#{Bh2sC{C_MEF4d!(Z zj(|%x^Q<0V;j{;9JXouhG(*?{b!8lTOq^dt&D5ZSm%LkeN58+hlRX<~dvN}2qDNRY z_swxXsA=|Hg1=94@ASpY7Cve8tl-0+>spV`euiX(zo^MjKBA;5YR22mjVP*C!Hx`zZWm zx?B4D2Kcccu^smSp~suf*Lu>_<)*8(AmQNSTi*2jw?p3}O?|HU-XH}bdL=WhSCB)6 zF}pDX{qZpPQ+-`cv-R87E>Sy}CFDbWnBAEWL#D|0AJoZ-Y>DJFDCrLDO*P$GJ8BJR1%+{!o*en&c~A+GU3{a>?yA7R-aKmWD81_J z0JgIJjSTuo&&P9O`@w?v>hqBOVt+m8L+rOR{$E^Gf4rmwjXfn$SMYcZ3VH16>4m=w z0`ctZ!TveJU(#%DVtoff5H_&c6=dzimBk%6)RxW?XGrl_tK? zoPO}=2}5D;wdBP7!8-I4uNawpM}qj<=5XB9QUYR3%%4gkcA%-ADP@DcNhhQ^y)WjM zO-1<2TWDyk8|WriqTXYnFx{~wT!NUM-?+xDV1+?Lamxgz1%tCp3hIwLkfbKG77XLs z|3p)e4P}t@5u~`B#*XDz#8w$z`DDH2G_4ev2aTU?L)Mq zVaYh;eM+FY+TAORv9nX032f}%aco*qOk1se2eQLi{^Aiv#z8T>EeR_p9&7UH-Y%Jd z?S*%|tm;+ya?to_B_>I=V_(pR@m6qE>?JmnM~w3U)x?@8?S z<($%UoX0m@U}=?g63~u@i@K$cY(`-m9~QZIEN0WBp*}nj5khbLxe=pYzX3-O-tqgk>6wF_Va%8U+j=5ld;kQ6l>qGJnM`+A$pOE&ZRIBYcd zM7ZuP?BOmOSsBh3DQi@@PH%B#2~0=HfQg_pW))FLmzU$ENGg)7XlB&c^q&c3!CgV* zpTYxU#ZMdfp;aF2+SffC3i_0R)JjW!B#E? z$Z~d<6>_S80tve8#0R{L^&rskA<@PWZgToToDMG&`g4kOxLZ7kodkkPH zoR6pe;AJ@iD?$^e(j(%pPQwzNXTKp9s`k=6A|H1WVmYqgN0NYOTFW~ql!6&qWjUbx zRVPAQf1&8b&CSD|5}XYUDc?#*^a!tBQ3EccF+nIpl}~mUx!K^9O?(<`r~LAl7}3E? zEPL*wiS@d(e}6Rz4#1u??wm@+ZG>hF2c1$OI$Uuzsdi_UF=Kvw|IsFqC)>xk;80SV zeCFyEm#1F-Nz8WM(4HPC1(G{GN>(jD`g1~sPPy09s4?$aU%`Wg# zT`}q>UU_>!MoT2D%LtfCG?c#h72Ywc?9`aw{6M{$$;2+N?n#5jGmp!v z{FJ1)E77#xa_`K1l@lU68OvTh$L)U!do6O7kG%YH&ajs0G?*ffJ{M}{-yE4zIP{@y zrl#gACR)2+L~E2NPYuPZV`70|Bamosa5YIMqWE&grycr}LmXf(e&gf#&ylm!RJGxg zM-}MGb<`p`n_11&l7=6<1?k#u?mSbrcS1L65LAuxC%affBC)Ho;%1U~@-+6u9OBUU zYZr`rY4j95D3T=|yT&&#SUlKeqXhjyEO>(97567bnhc)_?s^kFI{z*xcW8G<1_bwm z)q5Cz-M+73(q0pS%#-evDjls7{?{vn#k(tnJbJ{Nw8>+Vrt|U2+WpZtj@j=(=p*nz zzj%*Bb^YvdLTk%wP4*Fr3zG`gnVzb#ZKR{id$+1w**Oy`T3%}Pgj#i3j@h=fDm))l zy!tXZwfKV~Iqlhx$u={yYQIckw8U*OtB1EmH@P@t4a-AR7Sq}opp&Ddu!OhuyB<{8 zR>ZZm==<*T=W5C@Qctdm>!9}|1x$nCg~S6>Q+qaLxWx~=_Rm+nS*u~yXvW^0u18VA zrxlEsn4o{lLLmQ7#9E`Wd&2wt@^dM?!CEQ$4Ed2z_~i_-w}RzmPG=hzeJrLanTieO zwZbQUuz){8jKdnnFo>w0vn6YbWIpc=F-$RT@?dYI$GDssh&11{NAzFrD`M}dfQGTv zi%9QOnhcelranI;k=3q!ved+_>Osx6zVnfG%bs*uKo-DjUE^XWPa zPmTrDi!9egbKYWCF@Zh6eP1#{;|i?+d*}B4A8`pt6V?mXlh^}(U@L$iTGG=?sOWPq zl$=Kk_a$sH77$w&5@7RPyX?V5mt6@G4>l886JZXB#tYLIwCUz~M;zGOR(cPu#4gci z;b$&zJlyAdhCC*ZM<_m?bbadoyzNWkW1YmdCm+vD#JF;K+*Z`IecAQHX24#8Mc_wi z2U0KT-_8$qy_nvBZs}l_Xst&lN+K$~+1@(cYEyRVihpFWyG{w!bNJ#FX1mB6@>qeO zOv{V!PP?41geo!5Oh@cqL4S{%G847jaN6@|%EkT0zboF1p)r^m@X!`q^)e{9H3UJM z%lJ452faanR;vO2&yy3XI+Tg;p$9qr&lUSROz%FEQDNPCv(#-1(;;W> zNUuFek?e6|Z6ClTwZrA*-B%8ZaOa?YGO}|AeAmv63MAtflpUV`0&)RzC2I3lmk$Fe z66&m4vc5jv+6=C`Kn-0!#Q_TFf_fifDXaHQjd^V+bY*7;sMHNGel_L?-oZMZf@7~wK|RljrVuXK@3m{aGm1X|8#yQh!!va3prMz1!254lOEkS3y05O z{izqmRf!nmE(8&?2kHsqUa`@JI$bCYeK5)>q7=YZz;QiD62c5JUV!*O|KQhgG`3R{ z#N(xd83M+51J4a*kh6qf*AI5;$5s^yh49D_o+C~OH@r$B)-K=|##Mb0XtX1Q9wyGq zilYO1>wTk?mxj}2 z(;G`lq~kDVfuJo^h1g^>i@34GTSJ(MlTMg+Fols(@WsFD7tPpvP8td{<1<89S_Q9h z#7c>Lb+wM#cAHHvZ6(I+X+jp561~$>Uwx>{aPW(k#i`Z+E!gaZEsZMGL&R|jz=Fy9 z9Qun5|8B#s>zhLU3;)lY&CiD4S;I)WwFM>5_hozQ;tZ2cl+(;?q|qsrwB?Fj$(G^p zt93M?D{Vb88|96=TzWSC27R~K$nou$RGlsc>7S>KTx6yXbPf}#cCXSiu|+q@^&|;x zXkychrXKrdB-{V#I^rd(PoXi_HKMu4h_tq{*=`NU9#a5kc=KpWrgg!;&yupqaLq_6 zv}KI6=hAjG5v*B67JzuL34WuiW{bR|+ru$F2cFgkbma+-%uiu7!M*9l%pPlV=H7bH z3u^AFy#7VoYwvgiqh@5^Gb4kBE)e^Ff5_J)kLr$CF7u@hs8mv(6vN!{>82k8dtw5A?6Rq?{$5j2%NQbbCq%pMkHwR9AA#FR*zloWEmD8sC;QnfqY)S>G5p1Myj7G57V z`#a@BY}&wX_(vn}`0gO+DK_6VmUkgdlt}<~$MB3$1)$pxKJlJ{)x0-b7`PlS>EU&K z%`Ai=l=H)jq3^9)zhjOsJ5gLp!#T}ht5?)4oxlIQ5b@}g+6te>VU=p%CcMr0`oI+w@Z<<;^JyQok%mNDx4B=< z6+R5qtsUaVZk>9Z!;J+qr~JMtMUAHjf<1@L@JGKv-h-UxGjo72*7oZ#^d6xUE4z~@ z;#&@xU@RbMinK=|?j&qBZy&D+Iu-YHl+$iAVulZKtLqr%8HOS`9*E>j@h9h5%+=FM zp{&j{4NEBbkK|92Ekq7rOi)I3W>YcJV(>Kp7vHIr+4ajOfk+w=LWhidhqjJ3-v-DKw zh7v?qw#&h-8BeKF*Q-7P{ZQowl4wC5#{PH)sN3hXbg8>FhE-N#kAF%u`VS0punF%J zb{cF*?xroefx;V>Cm{3W%dc`N8GW%-(bawW*YEWYObc#j#bcq{iJybTiM}m z0kajpR;Q{Ao!8W=5quV1U2IhcP>|E*BN9`nt_T zBeNZk#6#6@TV#6jJ7Tb>9^54__kvf@vei*VHS^EmLa#sFYaLtt+57yv+R|-Bm_eUIRxVhSg^GsGy zbRW>8Lt5`F!R)tI9-pAO*MApjOFqOKrG5T-sZ>7Bb!LzWpHy-$DHhPXfu8m4lJ#V< zBl>t}=@))X@g#yC^vvx8CiEsM$kGh2%W_B^Jqpk!DYB1dhW!amdYHgX4Y_btGsh00 zBCY=Mll}2jJmI`%dN?+j?!ZEqUo|qB_~+$5sV`YLFN^)j@gfdYeoENkHxpN8H@@gH zm|FB>FaPUD?02)Zs~0_jOs-jo7gGzXWvmAA;Itq zs|4rYHqeT#{<9~%bRp^HK4_E8xL^B6`=)qRU_N$LjybX$+f`Z8t(!ZzkF%i9DP%xF zz>3F8F)-rmsrO#-JP25$coP!i8uQ_5w)iD+TnL$xTS3hsW`XHq+0CqkH$ZFaU>M8 zL#S4Exv|Lb4V#&TpjQUY)$t00u3jnZ>5alzI8#TgEVf_4!KY{8Y=mb6Z>6wx#gJyR z1#D-en;`-faq{ST^Y(If`F_bJ_rIjBzF(0RgqHxqLx27}2j)TDX{yxg><+PwMemOi zf&^7381nY_7g0NJRtr-a*uI3-M zMcK94g4=?%-rE1Cu^TB`ddSfBRC)4}FXo-;6*fxNn@Tyyjph3D_XSz~K-Rn=%0-K;0(}pDuix=MwPD`j;$%QK!QgM4e)S7|tB1(9iD&zPp z!~3jac45uxSovXE`D}T2TE5;-S#tb=qGUcaXWY&Og6zhNq(?zRFHY=NqJyc0Nce9r zpV?N@vQR6dEHxa{co>;q=C-$m)Lal08p5udF6$3d>INU^UFP5pv*ZwfksU zbWKpst;x%LX|7a5JkpTSdZT6li_UU9mhfB?SWG)zRH6|2p4Fspz430UpO%W9=hI3M z9fyhi)2iekd}d}U_ob0{o9wM7zSE?=4jVSpB+6tn^nLF2(9JE-2RwHsBCsJ6Nja~+ zD<-zlDT|`4{Ah#q=NLQF!Op3nLTb3T*>mT48$H{H<#qS>yT@IA7cDAB6$CQp#&0tu z-&Y5^>rGWJY;>GVxtgTE5S)#fdIyo-^nsuE=w%>tcHk^k0Yy!^Q zv?WRNi|Qvcm#pMfi_xe@447j)5}UTMARlJ+Uj!~p5Y+9ceM%D?9QIpnQ>m^{8fFC9ei z3jhdWuf`1^;B%Kv=K#!oaUqhUnpYrm{_&!f)S2Jd4M~&=V;Mo*DghBRVLv)^W63ymq5>JLKV(6g<5 z*-Y7U|FRsSWEB@d30e(lbqR<@cGBlU1vnXWK<*+4tMB(Fv%8i;rE`M7`-#Dw?+WLd zh>B0up)6-*B983Z^`jVqEqE+Lyux%-@$5gYF^Fh*zIQ>(Mz;Qg)6m3RDY|@&>?b)#4FoHUek2{@q2Q~`?L%;eB zWZyhvj`rhj%g-CXN}mD5c?8NzBmG`#wsKkgd!y>gH-E^GCTyLweD`F^t0ded}o~<#7H}^xX>3)#-RFL)9nlJBtTDdYwp_PZFmA8|ZaQeA0g2p02aufjOa^RQ& z=neFeY(EbLqbdjh932a--(V>xhu{}G3io&JVWdIlXSIXkH)!n+4FK4KA^Q9!&s1VJ zw0SA3Zvah5pQirwHu^>IAVPTYgp!Qas+;O{i=2yi`Q(Lfv#s+8xk=FvCaG_T?`6mT z`?oK$_~u$2|FiQFw)c24|5<+mI#R2}o6*nYdJs72t3Dtg`YKI9}Hxu0BTK%e*T+j@kt*I-6 zui=X&>q3bG{!{mA(cB{M%g>O$ML2kxHGov(tg2@dbIQgTH}Aolj(IWlP35KDuUS6Y znk5VDvGV=}3qD=U&)RW*)FEJp`#hwWNVGykuj=A~-+oVKZC zl-mW@e}vbhMB=#9789Jlb;!tJ_)Wo9nLZ4{Eoi6t#%d)WFuwY&KCNW-8|G7{+*(|x zl5xm@H10t8vOYz0nk#c9A~`#9R;^co#d(~J>Day0e;dWQq(Yg|TQmDlIZhRGTPLeu z+Cd*zq>@w-qlI*>i*IkUN2he@p#${eAgxl*d>G(wqKnm+$G>~tdeex38M`lQKFvZk z|GHbBO5cH)hY}Z*X}#M{$z6%Ne{{s8f9Y^^FUvlV$4^T0zfY=WNB!`Gt_9OI`w1Vx zEAIM6qv~D5Th)*(MTRNmV*Eh)$FCwb(%Kf-(V2yDM>F2KQ2?Du^axr|zuX+hKJ#vW z$De{!o}ldUVC%s_@B=COi@eCun(e&&Y9+tEpqBUEj>56?HPmAoRi;z+NF%Gn_vKGT zT4DX2C$yH#P3b1Mhg6StoT{;1Qkc4!VULdc*TBV9FB-LAuhPRY8E1EF-gzWDbmz}s zeYb_6GsVR=n}o^_FgySrbc7$J)4<`(@FUo4GrFrwrR5)4Odc60pDN#}Z1MHBE@E#k z-J|)~7UOIn>H1TFNqM2MC)0K5n@5${LPAKI?2bdy2s!Drg5s|ca)!;%o0b;6W)8BB z!CzVR<*vGV^@!u_L;qIu47W`-Oc?f4t1EH%01v5>ZY)%r##TnrTV+XvI>r7d_H!aFBIpL2sc8G0_#S>SPa%x@$TsQV%G4YvT zMZt^WH&-K(WL|qPS~kyTaH*|_R~DemGd=iD6{z{P9P&5~vKH`V?%2DV{Mxz_SeMMt zakjp%VU#7oaZyU*vQalz8a*n}S7SG5T`2FK%jZ+KauHU}WB@e5^w7a_c5j!~D9DU} zUg()nR8<1lyh=Cg?eY1U(TKr%-5EbV`f z-Cl*yey8QXOKx}k`;0nGWQi`R00mfg5ers@66R0e7#4x~x#bM)M+dKC*_Fm3l*`&@ zKn8m)vIO5oIdtRP-)NuWq7itGZ#~A_|h-mp)WT22V+&7wQQNjOaf-sapqc zFR6{1<$~N~b0*bRXUaKfON&x)e$1YH;#`4d2g{2oXYYUW*EWI0`feuH*p5387*BgZ z6t^aiw^lHXsGPq;EWRR=pp9nIAjZ?sbq~9B9=zwLEo!G-Y1OfiepM=Pn`(A`88-eK z;X2$YyEzC22o-POZ{TX4p&ZECusv1owDAIc`FH_&v2ZwF;g(YlDO`_5Ro=7iRAfP@ z-NW)znkwZ{8v#po4LlUlO4^?)LC_EKY*l}1fl zwfnz1krWNy0cF%7<<9WMrKykUB1RAYt-F$IFa)#Gd|_!B5}QiMMd2@&j2~K6Da)%2NPRGtN9Uc7A%^hniVkhp3Yv zpS@l8EV(52(ZLQMRhcGDn+4uS655z3XRs;(-9NWY-PP~W=KfM}i?R%*;>myM8hZX? zaf7QQ;MD`T872krYid{@ALLZK3153IH5^q&{%vktZ(4(kBL`oHEKX|9?a=X69X$7PF*C@YRYAdXkT3WMQ-w1j(Ze-}3rsIKC23CC>%qkBit(F4q~VVJhktJESWe zvoF4Celuf@@J~*U5V*avVWdR2@+md`L|i=3oR?zIPP|s)$gu7>h&K&jAFO|&{$@Az z9MJ8W@t)<@KL?w}3S^m7V8EYDvfTCH0aHB+C~L*G5Uy2ZdUbQvK4sF9AbRaJiK)jt zM0IoO;r~evF`%Ll_M!!HT~YU) z9Y%n8>;f-<|G+N#(Q{c{{NE`-LNs7gMmFQu74fZ@JEE@$bYb14_=e2?wY`W2#gAT- zcDl8_Pa#^557L0caX_YK9^nuYDx5DzvFbMeN?*2D=n5FUrfEn}Sdh1Di@h7% zLdC+m8bAkvXkv=(RZ2^#D3@uO??zISYLx)}zZ8#I_CIPvEF<~3(L}%issI%(kue*J z-7t-)NmbNc9noK#qP=gC)Y6}0C1G@$Eb1Cfp9p&?{dTtGv66u;$C&LF>*UCt5SxKoZtZ`jgSMSMyyzOz?d^dkP7WJ8+>*neyQ&xPlp4*)3 zcnq7LaAmsa*Yw^6zb&!aed2}G^}9o=4&?fW{0O-jwr#!tUh`O!au6te=;JMd8TOzZ zuQw2RRrqVdvl8IMRQkOB^U_P8#f98^EKh!Z&!knF=s$70m5aO?ZQZ4~^gnRdM6Hnx zqR4d8J(HFZLOajwn`9jR*{MHqeVOp@K+LXj`U{8kTIl>C+nYrl9JV#X&TIT;Rujd|wYWBdBiW#TRPbrH7cI6T zjBC0+11Oyy5ey!C(Sn|{WQhBCV815`Z43Oa_!SQK~! zJ#?!mk^0r4c#XM-yH3*`weUF7iKenJ{KNgpGfLS3N;R~6puqp(A{i4O_^MRjc_z`V zWDFA{Y8xS>dTKqw{` zgWF(Zm(RM7)juj1PL5#)NF(;^^oQh;pYgwdqaCh-Qm^7G9kG9a9Y<&yI)WOoO?w<# z6=G)-B0e)U21?#kYjqEP5wK%ux-FTwMm(jNrqs&m#K?78F^HAmK^Y{@=(XWS4JZKJ zT9j5k%`b$t2uesW8Fy8X&@wFXqkQ|vugw+s-XpO&(3x2-1qB# zz3%7p@tg{_yK&_1{8+S5gU?;1qx!<|)ax(z%jHi^vNsoveh1XuzPY!tSlizHHY(rY zzy^umdhX5i#DZ19<1#T6skD%I!4GtNDR*=YTs-HX@vXj4|9}j` zel$&+aCm&Xv+xQJF(4HoGrUC%CQzvlQKR?PfD%leF$S1HQ`IZoI3wpZsc=qmlhWc{d-ic=P=*q*RW-cD63_fp!z>UtPHo( z@wdnT(ZQ|0g>nR36M_{H=pp=JzwKRujqx_h`lCBknJBizp zG1ZvDR;`P-h=QlvAhX~6tM!{76!Em%vA2$IAEFB-AmUy$$X3;R;z?$Ay!lcwGbQCyP7|#1*_q6AW7^tSPYWqM!OJg&b428V-b z)Gh1CQF5K8B9)RW!vWy>@ggpyBZ4z4Jmg~y%-HLo;MGJ3f0D%36=6Mp2rd@(?5`i3hmik66A-cM15a z5FzyID^S!Q9T((gF|deShjQVG2+AwGgng9vbwxjm`}oZn|I839gpL?zNrWBT>=+g% zE1sm{Q;5EuXfjPxFnF7?B=8I;#cDe)u$WKK6EZ#VRVt{wO3_c9$G*Q7dv<1uARXhB zp>QO(jom8r;KE6z`$tLTCn!Dt=zC0VOOa0)^>L#vp1Th(8VO!Aaa@^V1x1FJWkpG; zQT=p?kR#e8Cbp_e@>EIIyhgtx9L7`AKFG+hx!MEvEHWGt_n((Aj&Qs4*u}ml>HRy8 z;1}jEU{sfVB<5_*b5tZ7K6BZ3b9jyf-4giClGY zJ6E~>Y6DiU#R*>IH!-vLc;__f6<$c{lbva@UqJP4_g|Nk&{oQV{yRgz#ppO9wE43T zBb)ZpBy0{d3XI(Sr_WpQ3z?36McUGK{Jn*)Ew}7RUU55urm#z_<6g-wblnOr`9C|| z8nq21jf5G9QO^pio<6xU_H}}QlZ9e%*zo1Qq%-B^$>tK=Bkbf!h9ko@g~Izc!E3P! zna@7@>rRF0&#qzjD^P3HGkTeFKXc-tkUvW^6mL!G6Vlr3dijPT#&T!57zq$om$&80 z_V=2s8~q4aun*b;`?=73;?Lgv@+CVU#M6FCF<>?GCmp79|GsVWx{$(G!&coUd+?M7 zCHQ1SQ;+MN_t1;s&CW1e=SUr`;7gAa^mK3QNeU#cXN}{XJd@hA^ge4Jy;vwtRA}h` zI5lWYZGT@!Pq)BBizJJ(cuMK;P3qu``E;6>-^|IMVD7kF@-0e(A~1*fuH`yV5+o2% z@M;Nxh6Kouz7mb2MNzoZR=T3r(lZ29cUp5P9N;x`Vbj}L}B zTAt~jl(n%=*ilk_bQxc_cA$j=X(@dj73I*jdBl!k7s(mim!d?2;=QGvpt`|1z2(!z zxiA<&jQ3RmK?=17BDg`xLoM%)fgfjx_D|<6W#Qrcp<(l>?o6KhBi&!Lmn^=V(Onn7 zZ{u}i>^VF1`gq1)I3`gd?{iX(FT{IXzJ$q2Y1=8HTQ+NyLG>N!-sDfNN3^h?%vPmX z^>zj>eyRG)Ns_#}Qf(M=PylfXKj=x(hOOCGYLyn*V-`F+9p8U>DY$egGT?ShdpNS- z8$3jUM>Z#Pflqx-Q(sZ7+#2+EFZ=6KD(4Y6*9!<*Kt=lOe(S|byq^rCEIvC$V{5qa zR|5Yw{G~$B4Uq;8VSy$Nfh^z^azmc@H%qI<60-@Sp57@8-Cb7+mvT9877=rVWGJ#?D%hYC-DC$^2vT1g>Gpw~W8XvDI~^ zD{Mst_rG~wQ>WWwV+ZKi$-ux%u06sOb{e1a5tVxGoDl?jaL%yVuiP zqPy?I6%!BEb^eXheA`=3S>ayCdaR2}*hfQ;oIjZZ5ozXA^-b$ooD~*B#DxJkBuLZT zMh`65o2hG9ye<557^m08YOA{9zd(>=SmJwsf$6M1CmN`b{lkrHT-~gA)~9|!2|1>3 zq}8^4ueM}SX<~t>kaFjp&eta?+b)&8<4@*)YBvZC$DRm4vQ$tJ(S{7pnNd9fcsYDbV^ z5D^I>p|$<}J>1gw-13M*6;J=1ucM;l5d+NV_Fwb*L0)O$^(dx$aDy=`(lH;s&3u6W zX$)Wvu0tdQITl(AgZ8n|;UsVpESDSdm$G(%>-7fbP)U{M!F(yjU&F!zrkHm{@nQHR zP13p{q?b|i{L_HfSVO1VgXWV8fT<58=XC84Vn~PWGj%X-cq18?53RaE-PD5QG{jeo zC6b@E+ucvHIbF}IgD7m^-O!Oz%5@5lbfaa2DT>vk3`-(Z22eM#x?P>qIbb42ogihm zqp&c+mZt#`MWO)jJ_(4^GUO7E0<9x(FGs{Q+ki##{HF${=KY+%U;bP-u{fxI`Rb%F z&Tz>>$L!h1yU|BUUeV5`W=Vsd(r;Jq=$Jdv)DP*{+n+ZGtT5qS>jqp_<z330}x@^>VHJhN}ygfmJrbu!vs zh9MF6xNv97Gj*6ozgSpcT8uBbD=gLXuI2XN^RlCtdV#Sg*J?Iqa}Qjr^i|A?73bPQ z79&{^m?NF@geQL%wj46`NN@WO#3Mv3(hR^<7O#dUxEoh7eQ#!>%G!h!@JlTsPZM5& zQwBvT){0k??sFz$x;2cx{*I~qEsa0YK9A_04NdLA+WBYm=|_YB=NiJC$c|*eY|qnAUnyR!M|_u7}C6Hu0a~f$FCcGjPQHly@8$> z(Ix87YFM^nw~hG5Ly=?-QY?amPix!9BEW<8Sbx>kIeIw!-x~N$a{l(H3=VcC%HP?I zH3`qzLsq>CBTivzp0E)UC;N4$XW7<7ZrM;b`JesUJNaKt!_!Tn=bNdnvT?@_>z5nA z)UgNSSo)P(AIT5F+j_CQ(;PM`79%q?2a~s{qEt0ta9BBR_niI+hQ8>FeSBmL(_(8U zErPB3eK_`w0g!r)$)7O-vD_MpD8-hPZCT=u?rN7~4-~}6o$J0!VVIkGP6>)A&j4fM zb?BoQ^qs50m}Bmakc0w1iprITP)4>O6$9jNfdSCpYZno02eWupHvjXY`X;O1tRVd+ zwkI)SL#D(=OwN#nO!l=f)H?{rinr;Q)Zy2QAY1CUliSe?8F2RKZagO#O~2NXAHw?) z@mcd9_NBsq5UU&Nt!m`{9uk8(>Z*W2n=A@o&eejA{duD}FIMDSl*C0H8D1+INgT6@ zg?oe;HbQ%K@sT?@Ot&gGDYryq(?`SNw{0>Fw1@1wB5|jEzWTpf4^26g-O>5;z1GJw zn>*C!d1Y%tVd^yQ9C`vXUF`qZ~Cg>|F65F>76~Za*xw6ruX;X=6Vi-8rnIcfcWby-76N5g3*RVvn;l4f`f~2OZ z5Br&2{bK2;;bR?KEUkw!F^8xMP=3d2dw##$9;a%IluB5sb*O(+MN>v14PZsWUR zt?WyFnQXbcb^cKO+d(u_#WRo&^{(SnA=-pQ(&?)M)qAHGy-c?)aK+ltyZbd<2V`D4 zH>BMq7SjOh9t{~n`0#-Zdj@fIhz2hTI8okfsnOc2!|<%oaAG;CAv^X|euRU`5@#A9 zen_!BXQZ4cbZT5721L=l*TlI6=pFRK$Y)>Zew3C{^x#eV*6UN{zb24prwQSx+a1(?kWP!vlZ%13+vk+UwRuS&%Gdl*O~EBz1){K3_{GO1%s&@7z{>kBD#8h(9)5L{fm>Xe0lzmZa30W!6VDz)Qz`c17^Xfs%aknDoRVM!xODtLAbB=*>sSNmNFA`yBtYzlpzXuIZDG ztiDs{B~1$(h-_D#6Rs%NOI5fRr2mbtbB~iGlf_0G2rOH?=!^|l`HiK^l9x9hMbMRU ze$mw~^3Z^^n?zaT(NQZjkh$aUGII{csSqJohd?TWgAl}KYn}07umsh+XF_5Z*R&sV zrhR&e3)9@N76Mp{lU}epOTWa&M%VbgvuQ1JG639A=%txs>&($2ecU=vN#yr$mpC{& zgPjRcVEggq^Y7ax{<|AGtwp2bS2ALOUUTbgq6K9yEcI+WFRXC9-5hoiqo)nq_^+o3 zqi%S$sxfwq0@}$L%t1zo?QNk^jcN*?UYkH|u`h2Ly^d+)Y}4|ZB>oci9g^3v`&<}8vX7Q+ zr_vg7jSj;YvH0<<(oEVpXxbHJw%jLn)VCxHWw)a}L2uffzlp-=*a*~bqgz%3<;QQ5 zww!1O*Jn<6 zj;!|87MLq?)*KPR3AKnhrt5= z-n2dFNl406T7J>w4+)aC{WwhhzqlqSRbx~*KHff9FqVm zuH&p9b(g#Y$HeprI-mEvs4Ty7?S?s8NL}7C+72^4PIxMLSTj-grHr_2ecmqYI?uNy z&&b~=RigebzksagD#q$BuCvLv*FfO9 zAD<1FJDlg)8#*L}7WG4;Pdpil zff`1%igYM^DV;6^&>qUB#Ska=t*IXW>_QR}e+e0y1`vo7Wg@_w& zIly?BO}i*ANfEzL6%_mO7?*pz_^CY)Xb+8B7~#&8BF;=E?P)3=yvz{kg;vqBsT(iW z2G+6J`3_(Bc&>dh8^R%}c-gdiGOLdwZ8+_Bw1L2Te9}fr%N9S>V%Y=4;oXa6wWQqI zwm~ZnZF!(ah9n#)PPu1e!~WoOo~h}QXiRuy0;&oK`50)Hx1Sb=+X-xEr`JxYMXn^Z z$2~bPQmrkOW~T3>=my6G`zL--KS(zy5-qNd6LCs{i=;>h_S=TR!0IfV9Hn{}Q{~J* z6nU!7h<6P^c3-U=%{QEuAY~2p+$=F4)}ksrub~L09E{=?oH=h0ecPcY;3Grzp-4l# z#sn8U$-L%+z38$hsb66mv3fTWZav}i6g`3!L)FZllsQ)YoGykN-gtM5O-%!$#FrAr z4{w1AklESk_2#Kjgq;zdyHyN4+63!6CaIuY<@FPZdKT!N3VI2!gxxc-r>0#Y;44c@ zwr&%?4~B!3eP9UV4kK$oXyP{6X^OagiE=%~<0QF~Mzr1e*}1=lxMMU!;V-stl3YJl zybhicZ|=e)gI&7nu>K@_>Y_({o;Gs*TEnV%L}PC>i=JfWk8M?k#}?5NVPAD&-Vx8V z7+g0lzN(dM?#JYHnFWlEVMrn~sV6<0P`&LaTcKH%M_iQl{b8^KE<5<~%kiYpQ|6w7 z3sU)37d`A2Yv@crKYw|(Z?IUStn5A429IYZf4}@~xy?qboF4NLQMOFNi?_~J?z6N}MhrHab zOzG0?u6I&@vP~v`2O|DPy3UXqz9J3BmO>E6S}(q?KgEkuexJfcX;oNA(SObL?JL#1 z1%v#IFsO%?G{={1i{2jtTGxl}RTmdUWWa@p-B->X5I-!_5GC-NSR-(RliV=cjq;<$ zUYITtMz_H>!Uu8JpmOsQVKAb0H2=&u9J_^G-^>AiaS=?-^Fp-_-X&99Wlxy~2Nrnd zudULEV3W3OSLkl>t7b3tiD+JhydIRhj&3wkCbRbZl?=v}wrkxF?~*!CIRr<(u+cL7 zrD%S>Eb))=R7bejh++<;5aReTa*IQBXOv=x-Ikflq0iM1V@-1PiVx|jq{NSEP}~Ma0R7hD(q3$?Es?GE#9x9X@aL^L<0T*UDdQ()OINVDnO4W55a8 z19G*odL>AgJ-G1r|H?hB5PLA^qb~e%5D@dIj*#-!-T#e9l%Rld*GmCWTAk*Fg_I@>k)%_oa&M2NzBI;Aa2#d(fdjt=o-;X6skO6qR%IS7j9C zDr;X78bNqnD^31Z6}yqDWEE!@s;nN{w9=C^%QI9{;oG^GBQF;3wCm_=IKkcDbbla& z1>s+A@opXM9Q}j2|MWI@spitu`$y@wk0LoA6T9i*UsY=s4*pz3MBRP&Sj~D>eJXI! zh!T)|b)@S3%6+~V8+-1FKVAoep7&)nww3svjvMG6oD7E1S4UQzyM&f0wfQusIWF;BKDw-zot%B)wE*9kADa)9mj2$XnnB`wc1oOsq(^rHw%U=s3SmU!vlYZ>%^kJXhN za-*cLvpkH}t>TaG<#Svnww-yv>!0#l9eOc6tgp&^Ds8ZT$KSstESLfLQ~t%9%*%Hp z9{(GL`M-LGM2#31*XidBM^Xmoa$vQEd!7M4Ga;_)ift@?d$>gAf&YGHa{BkwuKf;7 zS((gLI9&*QxPh=vP(!CXS7b(=$412s`S+xFockk)lGZP1tOqVOb11-#1l95Fk4aew z855y9Hs(Qg$7QlQcrnNiRCaA2{K7=0!I06HEi!g`A!Xi8HJEZMPSTV2pPK23`nTNT z#dAO+K)Ny}{fa_IxPm!($+9+Kc&L^76)z*^k5y{a>d`ruPcs#+xFbZQ79kaN{nowa6>PNvl4* z8Hm2TIjhd&NA)K71^2@$8z0=ZFhMZ(MOjrG;m0=ix3g!?LvMA>XYo6;uWl=OyjXgo zw`Tp6Wo{$>n}^79>z!MFH*emj&|}$|ZQnEPETM0zfbI+nzWAqK?%rM-=3#!VTtOwJ ze61gffU|sBem-=C?S;i-Iw45p**MpMDVSzpRV(R1qh&SeX(@-0nwxEc^BXTRqdz>e zfBveG@_{lQPQirUpI_D|bKD_YB*lDw&uQv1Z)APmx(>C;Nw8EcY#~^W zTWiUf10g8T490SWl!UQIXX~H;huQ$Rfq(n+fcD3rDjcR$BCULvEJVx=qgz&e(g5`3 zt>Jx+p(fDJU-XzFwxizi(Sn8iqA;G-zjJptdy`y2BoUd4{0$E8l(pBD?>^VI zaS@m~k9s{OjVli65zEN{U(Vz?qM}0Loo4m+k1nao;YL}`OgFXS0VWPG(f6We_4Q8^ zl-+RzsUXz+y9D$5A4hcCjbf+gd9qM^?(b@!ZRB({q!E?@+ExgVDXl6og#ojg^Pa+} zk3Z*7fO|AZ5qVrh&hJvZv8u7+@j-D0Wub^G+@KeyzzE>8{ME6JF!{ppOVl#*C1^4} zheu4%1I>7Wdxz7D&DUID`5^;Jw)m`jGf$6nzKbWzCh-CEr^~Sn!k^hwt(31;vnQt? zpVPSSi{mI{$G#niz@PkaP^yfuw)_d=e%G4)vAcjOytho#dFpZ!lf(iR!P2QfZiY4WfRfJ};=i)v)W+ ztoDAqGU4*cUflouViMie?*-Io@O~(qW}$TxiR`*Tf3m$)F#qRKwL7jo5B~-abJ4A2TcPv~@a%D;4EO zK6D;N_X@&!NXN^;_taq)36D0?K^zeI_JoSMgcC$d>sj4SR%QW9D7TU%qm-%Tqw9&Z z&+&DL_-Z_Tzv20rhl$sp&wO>wEJd}HM@To5euxqec<&psP*gtUw`m*MF)9)u`Twj ztTpIXf@4tO+bDFG&SUu>iJ*5Wpons0qfH7j&{D5XL2UV6J(}u0lTdo{CrB}8@Xixb zS96tO2N%n)^R0uQg1;-_`{K^4Tppo9Jo~12;c~s0{ArzRB@Mz+6vZwBQK1(knM+sGXM;dXCf_ zjkpr9hDSODHO4?1WDA)0!%uepl-ol8-RUEVi9XU#;<_u@XAI3;X4%49eSAt68Akeg z!?%a@;-8mJ@{+gR-l|I3pFXIw?(GEqz|?1}387Y2N*wjl?ZFk`8e;t0DO#Fv&3`k5 zGQajFABK?17E<8I-^c)o1Lzk%pNM!DAa{}<4fdXFpiPen&&l-)y$+Y=+;3W*wwl8f z(s6^L#QYXdvcqUt=r3G<@tCRW2?2bU2T-UriN_RMsI@Ag2MmaTCsJT^rno`31;qRC zqnoEMy=D+b{uwa{q&ixZxPO*{!`6P|f(+V`cNw7Xq>wEpO0owClhEukj6DFB;*Qvb9rBd%(W29`6rr$ zfe)-1~t%~71 zLQmia>Fk5nxP<2)px+wf38`HleYVS&j=Krt0ABhbta^)x8{O2*jexO5{< zF3gSbevZa=9R5#w5N-NduJhdzH|{orcUn|S9YN0N4-WmvD+aw1hjRa)Lbr_O^OogL z?cW9qxXvqR*Ph%J(iitoaLxKQ7qUHMG$bcFI?6E?VRP$#%>;ryHr@6wQ5Gsneb4sz z(_!9`L~P)}2JIxj|1g(xiPB{|2lcl;|L&n)AcO(l+XFJL3H?m&T7Bkf{HM!?@PxV4nJ8L zIB~#-G4{VL&tOC*G@+8uhYd@289x%J{g6I%jaH^W3|VnKUF%sg@K|O!Mf4HcQ;PTP zuU^B?rLr&6vuociTuC+_w^&CqRNEgBRY1-MkV=8ENu(9byjg{w@9DBCx^LS-=3JM_ z;ZXho)eXel#v0;PEqaV$zN7V*3#RCrX+YSk0?^sEz#DSt+CXSCz2dqMPSr;=z z*jpzyVw@obqSZ|i_(=mU24942;DBBAlU{SI)9Y z8p-#f06`4yYlBk@CN0_2qKNkPLz+GK{4_hB1}PU$UH%#x77=+Ca&H_n@A;6t5i87| z^+Ja8BFpwoj4I^x@zWxlK=bhau*;>dvT^Z#R&5=5DZ`%>U8KtSt_IRNX?ssmIY{5)1lHR)-y=2(9 z!V(q@cOXvg82ozvo#Iaw0s3^Is0%3vS12eJuIB5xP0NGt1a!%Hzv|y_=|g)5lOPv~ zhGNR>#o)@$AO-)oZeyNm(TXG;kiEeO6v>l1k;Unz7V=T}#YGH`dMCH-h=WJVbq$_) zdLCYhhO8zneM;`<-H#TeqV@*}x`5u*@%2pT$wFr3+qq8Dyacs-vrDF_?=8qjfAFE> zQfsVa?70KCt$ladZnVzwjBgt7brY?O;v0$!F%3Aj^?ydLdR_FKM4_RS5}%)Kd#?G>mS01FWAU!z>>}$C+H0p!x@1LWU{)= zGVC~r)+5?-+vr2w^CQ2EZ=Hx$panftxbg|nJ=h6kR&7cV^aJSd4upKA&}8vYXUemp zm$sFXM*zRX8&Sm%Auz_~wzdg7G^g@~R|K(F8rDl_E zj8Gz8^?({YqQq+WUj}A06J2o}^yBY%H3<|p=QPHd(&pr<5!OAA(fDEr-3f#OhoRJR z1wTib=6Bq^={y6-{!l839_KfQH~~>TtGRz@b@lu?;5!-kK#W+eXU@nRPnA6O?W-_>JOqXjK~5KwIbYOiU4FtDDUG%BIB>N+&_RzN9)7QP z93dWCycMSMxycM9z837m99---6H)Wr?~Di!nIcV(EQh|C=Uy8S*{$`v@Ez{xr_CHM z-qSOI`DgmNK~}KPYi5%X%%%+}0^ao{GE8)fl6GCW32nFFw)K%0Qq~vKRsf&r%j*<}UG@KVDjcrkl6y90|VRF6W+0Cev zmzVz?&pb!OicP%c!iZn<9}=bfpW9r4dzQJzA`)hv|Aj(jJ6a!}ltm=8|N51Jsd?a@ zlsh>_1XOJFOdej_Ijs1P%q0A+$Q|{PWhr-Z@W%`J$q9Pt(Z3k;T@tj@bR}KPBo#yy zHVDn0<9+nslk$yUyju*$52*|i%)CsgsKzYqEP?NHgTK14;N$^c(I?&u#o2plH*Vgz zAM_!Ufr^>=J`cONT)$U32%y)MO)R@LJ7M) z*PoBjG-7wX?0w~xK0*2dQ^u;|wN$);GH7in6r!F?+z9n=zhNz^g;ytBM9x)^eCSHT z8NO&McE9*n#-E-SKcxV_JMzOet~(X@C(}rC=Tu@PUsD$R+%cvda7k=YV?BZtXe|=r zHvL>@zSy;1xn>afmd_x-|BK%=B(86kMiGCFT*P#ek$x$~Qoui9oZdbDfGVk{>~hFl z>tsb{HC3~hj#$FubQ^8~jwY6XHo5AQte(HuQ@aLxy=nZ_%yPci9wB50(^7J;Hs3A4 zjQKWs{+BSso|+q%NY>QqjptGK*E0ExlzG$rbNmT@z<7>COkrU_SQ2+TksUzm>UdZ^ z%2CBS73LHq5j~y857a>k=oKzvnYXYaJF)Cf?J|-K7UOpcM4_v&c%Vn1Jby|y0qW9M zmCn8#^25hvQFgP9AV(;ZvV2;h`}JaRUVDg675KK8&pjo~fH$H(|Pc zy#7>Gel;)QUBy%o!@=r@zhlwH1IT|SHE!4NL%!#cVZw}GtZAm>aXs}8crC$zlLf`_ z*hl!}B0fxni13UUhK}h>1huqV zUlfWB%q0RhKpwI<6*i%vDgmk38aonc5i0TIlzGYTfUlhZK8r)^#SeVLd7@M>LD_>1 zgZiPI6Vg+dqVB7((!2fseV%`c|M&!7_o+htPr9*lATzG} zsl$V+I|*OQwyw(q-b)S#Vma3Xyq4+7bty1KiRYAW3LJ(A${rE8N5XSBg?4>AKKYgP zlm(0ihCs;hvb)O$a#9byaFO+7zIs<_8vZWvM!as_!7`FBt` z4)Y11P<&n$A&!xs*lxhy*a$(t@9}&SXO-r|1JglhP*dDp;sdDg@apLpMq`W=7n?^kF{eI5c-kMSbVm^gR3#QE%FajzD#|7 zV=^By_9Ziw%*glH$l%fD6!g4JKmgMsog(o_5wB!L=bvZybJ~|=%|hA`{&zkNmq=vZ zILGhGfVfcgoIz>PZz4@?vAAh!wtb*9H}@)RlT}vdyWX`sYK*c1Dj%W%K4SYEQ#Nq_ zAxido@Pjux%DS}9mVpyg>qPIXbfA`ro$!ap^~)JRg87qI1Jjk)#I=f+7{cj6W}~Dp zue)kVERs4bh=gFE>(l<7?5N+8F8?_aOrDzX&NbaY6w>x(rTz6lQU6Q4zv5~j*c*Co z>vO|W*Ep9O*O?YM>H!xio6N!M?yPk;63^_1<(y*4tW$A>|0ZxB1Y%CK@Kn?U}^C|>Kjf6$FqC5L?Qhw1s3Om-1dh> zOJ$88o?AKqv`;uK8UC|}-UcNs?}$=sZluGKQlqhwq1V+?S5&_1TBoVX!Ihu~KVN7! zro{+_IA-<9`>YPLN&kyNvs+Wh_I|G-N`_r<53g^Irv*OE^MK!5efr-mgiYRxHI=04 zW|9swTE$T7(m_vhnEYn9&*H_R`?gcD0|Kc}3)z2-W}5wrg*!fd+>Y5HF*pf{*0RkM z6@8s>y5+5wqd%ShP~MOiYi8NZ*z6Id?^&&fV|%EsmFMVRfb;xZWjS4W@`pXas4R(V zx7_G6Hut~Vx4#0+n@buOJS$Bk1j^qm=AV)xN!(&18neP`m5c-s}1w=hnC97|yoB z$I6zR${k1-1J;1g8hg>THLeD$$2WORj}S5M3>Ip&FkSx?6iZ~NG&h~ z|LEC{)brb?G%``dk8{Ir_>^zh4wJN1i9;A^D&|8^F7lVivl#$GT8WcC#_~eyWP4B` zrvgdpB$WAy4{Gb#^+jFmhE40sk9Xm)D?ab3zs+J@U-_Ss_#d9I-qp?KI^ zpz7pdkahcg#PvG{c0>{@Kd1n|V-fRpxp&l9^!3mB;HzOyZM(l4Dm#pyoNE12b3zCt z@~axEo3eU%=~3~k3(AFwZ#|B(`fY$=Z0@}#FvBDsg~pA9VdIh6mzr5u?QanEPT&3N zxAIT1cm_Vgt4}wf``j+5pnxs4?gO=9qq?N>vvF5h^wf`K058c#IVm1;G zL+tKepPP9BJD7LW*VQR;X!EAsVHuqc(H<_(pIBn;{*6iiQ_uzcAp=n>2YhqdIe+;; zIDSURKM!Af=}bR=_Ys~3=UGOodx6eEdP`M7L;KmJpE`4-f(ahN87{9d3u-(6(c$0p zAP`IbBK>OgbS?tX3Sh`?VkFwX`On)JR~Z79d$2a>3l22?z`K+tCWZ7qyjoZWPPGf; zVWOyDeQtG`F(kN65VXATgBO1Em+?VNTkkRQhX6IhNY0>A-`cdD#;r>u#_}Hui+)=# z1zYNduysauHgaVDHgnWKJpGyZt3zCd3nVEda1ua)FyTNzlqUjXFZSD>6T1`T3AEJm#v+$e*>XzG|~xEeR%gX&5h?X`PD74 zpVBXCCfbJ2js?i@FODZ8SXZzgE;ZB8NVViLtqi{8kNwlv>ES=bD7;~cxSqN==CTqe zeT+poL%RQ#@hmt-j!JSS-qzq9=`-1uZVFn!4kl9&eLr)=SjW1^DpWt54WVH+GUxw$KfjqrNG5L3 zM7R_#@@R^fa~bl#Al>Xr%#?SVA7hIL641Aj8T2~n57~?_`2Ik)H!k6)V`DDB2u-&{ zUVhR99ZBr?H+TMH{7p!i%j7xH<-J_DuvP!u{2Rxe|nCtSn!-q_pS zzGf&{)BGcFGfkveaqrP@2^2qOTO&R&$-0D=CFp1AgXTaelw4e`cJuu(7>wp77YU!G zOd__3BIZXq$6`4v#@Y2u<=$$Pqj<$see(@OB)&{N=g%E|8P&(zI*p&wK0v)DVu=2r zZ!?Gx253zq%1>jSZ}IpgDyqTyvPSN;xFOB~&f6RE{HWf+kXcmAH z$YN|1z1rvaqbk~CB;T)3Ph#JSr@Wk$F&3`U_gDKTF{8)5%6TpPr%Q6NJjBOpEP-E_ zOi*L!)AvFd-0<>@nM+6T|M*MubH_-p^g)O%^{-sDr|*Km;)nn93`cFN#jtWgZ@qc zDUN9_PA%1V&tR#WWO0;m=hrwN##W!5i;y(bhr#98f~m)m$x#@7f>}NGm5h*Jm*^^W zbUB;F`i-Ii=X2Q#Rs0MkUI#jPhIW-tk`^3FYGo4qGf4?uP3F{y;9bUZ9a~fRF@C$F z)_g4={<&p8z^kJq{hL({GeXpIb~H|s+W^CleOIEcx29#6h!f(M_9*>?DJAPYH$|=D zWR=W~iXiS%fFfKj2zCE|j`4!a7##EobUxdh%C|tap!lDOW2kwpP5lQ5n@N~pTqVqS|DeM_9XjT@#R07ZFQUF`k4F#}|u~pSYaTeFgU!zd0hYsU^g9 zW~?xNEVq`)n|0TLi7<#8wh5K^oenG)n^!Lla+7}d-@V;)#Oe=nwG67@V0#b#x$(-y zKufNbVKHP|ViNS8^muIuI3u)P`$g)*^;0u@3f|>_Y{9&-kmIjsDaV}Dn_Z?Re5_0P z99#de-2RR!x=SzM9{W!)2?%7DM8)L=Y{tsqhI!2V}uoR-<%ns6bpw86E70#9f-T_$a~-{ znnLBiPG)Ua4gNS4vUC){fW7>8-Ec=kT)-{C!qI{Ujy=QI%YzRIT!h_?CLB_GV-X0( zGyb_nNp{ckprrrSn|SRwccy`+Siwf(VAcXA5B;N7UlbN z{YVKS9g>T5BOsvEihy(rNUn5)bnhx4-673NcY{bSNOyO4OT#WKyYGH~c>jXuy6)$` zXU?2CGe_O{giBP=p*Sh^_ZQuPvSbk@@tVKIS(*hnqgu4kX0VxcO~%&n5d1_j7S&uC zLSv;nMi=(CAo=L1Pn&3#>;l3Sw_3`EMHn1pdbxAcXZ{n%nC96Jj6k!lSYx<-cM=pr6pK^v3~Q<4o}~$N6}n zWLo%79y4x?ixfCw)dKiYs~%O^5)7c^gOIL2>=Xu_C=t0lU*cd=tg2d)p6#vS>DobJ z{-Dq=5qMkmqyD`-8C~PG1B=RSX*3uu?-dfuH}BE~o@kg^t^FAS*efrhaCL;nj`KTW zZFb+5j>E|lS$8ZBU~=C#I8GFLu#3-iHXuHz#@@GWWWA2zkdA%OO$z@S2g>1inle8? zib^R2O(xRN#{BMT0zyf;xeY#qMc#BA5IdQxh8QOg3hWXpyqzM3I_1a|0q$=kiu%Vb zd&lSQb&XB7J>vtCRB#npxkBs1zV|UYe73|VLRucD`VcCVhXjagsl`AVLX?F#_sW02 zZtVwx+nc{O9xk(K2kl@5qwSDbk=S3PG`~XxCUc#p{`yTL|r*9bX_L*>N z!03O>zNqW8MNs_PW2?4ho{ljD`zi>cdf;IqATm=nR>&lTbmg*JYaclbCewRiq&}-4 zI&Bn=rcdHjZ&Om-ZsjVB0<fh&Apz?Uf8?{#_eu*E`Lm??^qZ(zw$WPYWJW0Sh zp$Ae$Fp3OqFGG#RfUroV{%8&?JK?h=1Ka0*iwifOBr7|#1ePc_CWLSw1yG~EpRG~K zkYm=dJ^-d^sVQWg?$v+K1&51keZBRtvD-i_?-8{RtITB7xVY z4uWbRAq@8}eUQ(&G<5gXs$!xBl`)1Rw%xpfUIsKcHGqoe4%1JSrgZ&~YY)}&uJbE- z-+zL3FBCcoezV3_v<^c~Pk-_g6jn{(r+jM9LlcA$eka9Vc>7qmb{y4zUVMM@$KYMj zm$#qv0yJmLWJ(F%S%zSz(|CRX$B=(4hdZ5%W~Ifi4F0M#0`=B z;gZlqF(TjY3UCAYimx1=)nX&q{`>UVT7D*eokEe9ZGz>da3C&B*1kfoynQpqrR`?V zNI6AF$6_Ra&!_`AAxj)M-@4WqfVf<2Zl3CaFTk-YM_yrCTVPqDr@qNJx42*wan`N}5iflHDdv59 zAHeEBT5|5WNcej|DoLm!-01(Xd&U(+S&o};Y7^gnQ zg-Mz<#I9UNK47?dFEQMC@gZsu&~RuTGxuhf^wI|TnP`|eNf7++JYSqUetVh~|DrV7 z5gAd^M-8M{X9CXEA-$PUTgdATmFOU_|B1p@8piKk^h}-~q6LfE)@kY{UI;#ykM0oy zp2uM0pezCsFEpxRO#g`DF-D9Bo4-w_*3`mbDgdFF!CENB0I@F*O_0*;^Yhnro!$Zi zTM)&6*u&Kgy(b`ZiW6W`R!cK@7bY(B24HgU`6!P}-HW=%DNRK`;oZ<-@iY4%hS_ie z%Y?=Q=g)#JqdNJ)p{V7;b!jlwT{YvwAmXeE!o?p2lz$*bj==sNIih>SOm#Pp9iD3V z%)Li~9}HF?R!^Ro1MA2hsAJ0m>9J~J(d_jjr#d2o``e7;pa|S&#U>~_`{zs>4?)JL6+9T115NqqO=uM|kUH znO^zP=fdDOTfe>yosW~1g%ZsO?Y8XV4bsVfb6t(rn3~mGvJFxbdOnP10f{BI0)Fa~ z9NBDKI3;B70*nt>7lX z+Upr?=?^wHf0~jNa5;#_;4wiCJ%^VLDiPfbQ#Ovi^sAV&FI4`aA+A+9m&%u!UOq>A z)91tWV9`&%utASeh^f*enZh^Di^Y>S_zMD z3241*h*o>ilRDRmz(?505B80F;ba=x#F=dCC~AiVe$+DT4w>yRjZYeeew|^QUb>ar zrpDpCB)=qnoTp6~Hh2a3h~i< zPe+Tli*;!6HjUe|&{UJ&U$8XULS@l0aLrGe2~Z5C-)<}=dZZRxIY3FQ?X?>tz17})H)RxiU&1dY^v8>kouwD$KFWKNJcs*%@A*#yfH$+Wf%*RuoCD+ti3r8 zqJM=0v=ps;ct83z=j^Re+{su+Q2bGSUDDR?5sRi3sgoN_5)%SA4T9fZx(3BJe$$}` z3jZEFr`Z%{zp@=U8yM~bHY3X_?vYC1K_wHa$@9YR$@9sjA${7DRekEou4kW~1;jyb zh@?SxnqoT&qGch-Cv>342_TeEN!a=?F|!F<_SYojn^VcglZ(tT*Z^IRj?sy$6C0Y$ zXsahC-U6&B4{pM2g&hhPH2Lr5JfDzue%gQ5?s+#}>y`o+J>@wYKK_s6)(t#*q0XVc zCO@RXSezaiRsVP*2%ac>E(NBtxz{PbHaeN2uT(gbA_RmJqiaJ38(&O?K|@<`z%%3- ziLWEj9#XcCQ#1wEETD@Rz=0M2D>~gmVvPA4LTLcQ%8_6C7pAnZS%o}C_5DHV0uo>r zXfpfT5jPkbPzY~z#@#CGw z5M)y7Oy)K$kf6|f(NtupszM%g(8Ly&z`&KaE{tZFKVy${d3lX3Q70%f4qjH zLJCe(c;va^z|pTyf4t#0PGGLF$^Q^(GOqzY!OEj}OyK;SVxs~lh%nH$w1lo+mWR3< z|4{)KdN9?s_wMRbZn7a_FzXFjcY}1}fo{(%oy}bl5Aa;Q?`Uwqq1aa4(#e506EIDs zP-u{F9=p%Mf~7{(A*Q*xS;S!;`2a(=QboCfndUF2-!99Qj$G4ByaP8HmM63Kv2V2{ z`{e{uBE-~izICqS%|u#YVlrS!hbFSpbD>oo-Yc(=ieg>7mI5jF8KbabKpFHGLU8F=~xZlG-5^fy@mlaSYrYPNm`xI zK;#7C#~7?^;`^U!q!|D36~FCCdGJQ@FHoxDz18~tZam+$#5c@qzwI4IBKdp0lU>}V z6?|;1p{uvky|5vNYbs>1eJ?yNjxCKIV)oGR8Xx+Ib8Qq4jL~S3sGU6DM%Iz;o^2{{ z0G9Xr5`pJ8JjK6y*Uh(^W&{sLwz`>6IMb&fDP*}{mC@;n6bcIj{nSu;z3tt#t`<;m z1visHQdo;Stw(nmn1h~lQ2o^%lQGM z!!su5n4M)mzPJH(it{mj7i95Un)?O1rLwqje3y>7aIBtMt5qw#0ePU*21x{!Gfo!(N^Pt2V4eTzu0^>n!WHE z7+$`&IPz)Y^)-lxP}}mqIRWTzf%FL7Xf8Vyzv-&uk~Xh@0;tSIn*;r!pWit}DYi3v znn+t54i#59;i*v`CrPsZ9p=x23G)COgCi$NUafFa53vrZ-e0{}!xNC+mA@BiAvYLZ z&WndPq{xWH$k8wI*g<756%k!Ko-q+FTgR#46KM}yg!3ZAW}UccF2m?zZR+m z!YIufC{E$OhFIvW;LhU4Y|ePP^%Ph_3v!j~r~ zqUfCp3hV6%hh~0z#Z*i7wt1kekb@rp8C+X!sOAk}J!$jrPnt{9V4$Q_H~r z-3j==7d^H}`pbSh7zV%rNr9AhyHtr8qT8x8+jw!%6dT&UzWNLIG2&j*?v?PcEWiijZ*5un#^u2qfQ3%)g_CG#0-!yf4&D_< zk;%`6cTRpwjtqIk24hODq29a^jg9GWM^gMB9cIW4yklIJROkhyM=at5l>|x#p4hws z&(O?bOI-Z#5D5_`o!K>k*9T%dcsNd&59yi#yEOK=y{941ZvpVHqSqkziCXBy*#yKX z6hezS{#JH;cP`)?&+xV_byX6TuXmU#@FU_c@*CM#W6?m)4`hAxp*wS>Kg;a&U#gFc zfxZj4?7*b8%T`&q6vOcda+4bV_RN5hRA$4HxTlzhNViYvR8wq1 zD4K0^|B{XyKVOtwI_>dN!634vccCY7*G8|mF-6rY)`bG{Z6N`|1O@<)GM9($^)@&h zKozN%CZ%9@P}w5$nOBYt+PPH zfRlsg&8q)ia}T7XEfL%bUxTg$w5fLcxw+j^AN2M3-lQyY7*lVLf4o{(g~45%4PMEl zA`_FHz6AWMhjM1}@_Bau-XdP}#pLmn5AYH!nH z1IGv*lmscy>)!`-eyQJ>jUcY#?IjE+j_PY-yM$WOzJ#&}G(q2{+FS^Ck8fvM{!4`s z19=jc``*6uVH49+H@*~fJcJ85F}wo)87n~d=K*=0y)G8?xPpZ^qe9<30XCsq9e^r3 z`_7AKjO3K-R;OVc6a4RyK~(8u7t9R{r4w#)O4G3a+Y#Sr=Jh z=WG^PwQo480j$_t5S)OUQ5(x|*AF2^WYUoz!B-oiwgpfC=I89dzl0bs;e!b-%byOC zjJk-}zh;<kr$VLl7npU*-=-R?!_6Zu%nfojWzkT&rtYy zxOP1g7tbdr3lxq3lRG#O`~IRQ>0(8s6p!gW#iJs^NtNp=k{9kVo z88?e+VF+?7j3Sncj+3G`giPk9R37J&6$}5V>+sm~-GqYPRDM}JlbK7rj)#44F#Xd~ z4@^~>NL!aB9nNTo%WdRClo(i+)|qS$4ylHLAL#F04)j-uw)QnvAPi7JynYR%-%gzO zyvw>UtIV{>XeEUu9(ZbEH&T3Y=Bv`vXa043W+m3v-J1a1C`lS#&rh}ou$qE}SE3uv z2e9s|YDtZbTs1wdXmr5}#-MB(SNm0ddrmb~FhQ{!crx;3oEgc|zv@J@77&#dfHROQvuZhY|Rw(#$3`t5M3p^(S}}54G+4{^vUF*J5D=n^DnvJm@r%^Nu)o#v5kZ`A8k)R)0}n z#S;XzWGPd6nkH1(16;XYh>I>#BhgVB{Km{yQD7;{$}hE`KV-hWPY>J@P9aDC#a_Og zPCl8wykdYGrhSOKs1@kfQ2KCJV9BLY_X9l37MY2zfiic{QdYH88mCaDK0i#&(H^69 zU)SY8RDTI!?Q%o33v>-c9Y=K%gM}V((acikt^y?}#T5F&NE zyZs#0X(uaM;r_y!1Ko(l*^QFtxdd4bohDt@L6$kd5aMb`H-ts{3BW7vePqi~<4$7m z)hP>G5l}mYiionbG@iUsL+O?HJI`=A-Kamm;1gZ?vKVguJXfiQwf2%hgdIJ$bJo=) zNA-lJW!IZ;>Vs;jOoDw4k|E$${L-Cd<%zRB;cpKxYZKEav5R#!n6NI>qx^nXKT+O1 z1A+@1!->8J5HM|Wpx&JfxS&kFeUJy^ZtNo`UN6`WjF2Aq27IoZdtXsMF8xpseI$II zNq3VGpyW`C!`A#9&da7A^D*ob)5TdHdw~j=Ex1@efgP^cKenw1bDp;(?s`v za3q$0FrI)UMJy)&t+D)C*U)vlND0;1-2tTsCJte;{d8$uF53DcrUsWzC{}*UCe5r4 zpiE2){8nola2x|A%0@l&;F15h9&<+a$}|)WgP_`pA5;I+7$YVKzli%x20iq!xVaF7 zBW*v*_9qXHgXB^9KYDLe(!1c@R{{mf468^V+pe%2nl7C;*FNS$=a-D5G>5lB^8`Ow zQ9nMEItF~W77ZVI37LRTZ+EjT*~Du2W16lzdRT8oeu~C>rH#7scBE z=`<^kr^eN$pWo-o7Ui&(2^huePem$T2!OQTmFbZA_Et(TMjX-#q^`3ng55=)KqDB3 zdg}cjE=(K*0|Z$j$WV!h{k%_0g5@UgUN{L74=f7qJep`RwNo=XYV)egm39oLE#|j} zQY2_CgR!@|B~ekfCAdppA1`FsSy8hNWuYT~;VjFr^UY|W=lk1IV3bSM{fYbFE#OwE zxaE|5HUzv%uONQ1U;~2~up4+z!U_(4)s4dvlga=iSoVEV)(?HQa*v6|dR#ICLfV?y z3i1D4aO^|Ld;>H$-;RvI^1f<4Y6oz^Rhj^P0@EA7L_6;GC_vk+B#-o;WjR)fnZ(oj za1C<#fS2q`X|2BCn5XG0_~y?^*`t8=CjS-BgDq`WFZ%H*^4qZVr-q(9;(~hsT&f{= zOL5bsM&T~W1x$8cJ~T}snlgv4uLNQjO~lccv^X9mL(R4_O|lJa9jl zH;wd%VW+n|1iOe%7oTO1pUgM?0gPm~Bj{fmQsmW%4A5l`) zj67~*iT1t5gZfu8uGLEg6Wx661n5D&;D@I-!2m4I(nVF__g0NnLuB&5D`jJ6#MetcLS>HAQ2kG7TDc7=EFac%Peg zvZ3&hs2(<1<~M-Ui}L5$_pM8->sR3L`Ma>N(7JZ>uT`&d%7q;j5ln!rP9BytQe`;b zrg<{We&cbKbf^!v^wXIiO=&47Ad76cE)JvgcJC?*bifI03`C*~9SyWK5$eu!YUQDfUb+3Bt8{dO?bTiZA+hE<~j# z$ub9&FE~zgR+MNkY1xr;Yadgc{TO3XIR^#FMiNWYDFpDqtJR&?uBjN_wW})xWH>e^ zqGq-xARBilV$(5eE3yY)ubRK!l45L} ztXL1&eqmMhdwG;JM^Zvid~l=Y-Mq61V(zP3vZvX~%EU`Mk)^BAsfB}{g9U7esCZ~b z8{})jY=PumTSKNAKl}3N{Y!72nKHsR980G{`BPpFd8>}I<@uF;w)x^yWif9S?Iy_1 zqPSCEmLG{|pCUe5nOBS~P%ioGw&Z+&?h_^IV`YPe_Wi}L4vK?&fR-*k}p0t6g5ak!qbX4?*DpJDEdG8XgmbehaNn2 znsc}jckm)1`p&)0l9VDSHQQtW*vxj~y_#}g(G)x$>$~nWQb)%2t*T&F<39bcO(W8U+7(`CBH%Jy&4llf9UBC7We9ypED#c@loB;U*9^!4xMlB?C^i})IozI}6XtJtE+ z-AztS#6mvOLeV#Fd7&}=evQJ{alsC|NUS2U_?5(LI6|^Qy>IX0U3;(wGZpizC*p}_ zLrq)*zZLRSLRwf;nH&@Kmkoman@81>Nn&z4kxe&&GB64UdH` z9nD^lI{I`7iWz<2Yh3z-FYJ;VW$i?Def|dTyaMXZWPXqMeHgeOrO4`sQ9`cA4>BFy)&yS%hgnV->EjE zFG^@?9X&nNJG;Ij-3>13g|`{F)GQIgoSQUxzW))QDj5~Cw~%Z0%VrYQ&Xk=$2+W9| zbPjdNQ^=8j({0g$FO9T*k7xM8n52|egmF_%GBx2FF>spFvvsXxQ@TMXA}wNDEOQ~f zK`!~5mUN~kp{|Teft0RH`wMkua&n89kL_;+muy83*lBap83pwFHt@ohXxX9PuVxB&&AJsYsDS}W<~oD`nN2`Tn0I| zkAGjZKX+hO6a37ur@=4}O3wd7mNG6zDp(gkf+H02DZ*O2ogT^_1?PU{+HlCzy-y;? z%Bq&Y+wa|7py)HJcHkr-Zx)OYFPo>8p46TMyV;?{Z%y^DcWwDPY1?2ypx9rB5U%rQ}6(E8J1DInV460)Jl8zQ_dVyFF)R6 zW=diTEPz5eGPhUMTQIMD3VphTIP&kwz#W-DC@$Rn16)~OzzE#-{Ge>?p;fn?kT<8dTkbpU)%R|bjx9UH-F+AiR zJcX-9MXUW(Hnw#eyc)ggsC`KC@`tkkZsFNpZMQbS^4It?_LU%5Y*Yi8fOIN}YQk*5 z?p#_%MpoLF4|a}T&bJv3Rj6~kIN(;J!isNywlR_W&3KZdC;l+DMmzX`hsbx+9eTC0 zFSWmy>72#u>b1w+Za_=J*QlTlp9lk?>R{`~r5b_j5G15xxa75aCuVcVPNa%J;KJ|a ze|5)I0s$!Z>WA5C!x&okXMAffXG@+AHDISU`;E1aX&aZuF!vd^?=4nQk$Y;I(Aw0+ z^1MG(lnJ)h%H#h=vn8}XQKG7sX7{b0-8){iIm606=&+Jqvn*n6%dlm!Of}k_&p0S3 z;-XYJVyp3p$bfY!AyZcY`(o7wu?lxq*t(qopT zMbG=v8n{!HQ|>y@?aNM0k8A!?kLUUwb;kU{L^e5eR?i9ItsNR>JK%XyIW^m{4`;1s z$EZz_@_&CQqs;lEBcAqbn;Yram1v~)dc^CG1xtx)%?%%DAu?tiULu4IgoaevSijbe zu6h}=bvAmJj~4z{y=DIU*I=(qFg~uUW_i@Y?Ldp?Y+m3}r}#y)E%fW3_`pP#7lV>k zP*sxG`<<5L`VF0LE3N8OD7KO zd4u;)tmR!`yG8AH;CmLpMU7L_!lgw&ho^|NOi7^!B`Y|0Y!a|G#h z%uOq2KeyY@>USV}#gJFW0*{--wIo&5>^v!kl|RhRkLstTZwB=l!3!9+V~ac%{k2jn zlQV`7nqz%WsEb5qK-*B^^NfGc;0I|3zzIRShW=2YmbHnB0HPJful~ zn`+ZeAW13|+6=b)-1CbE{^WMe8l1`HGrHKQl2Ja59bcq7DH$L*+}6 z_!}ExLQcLtt)uE=LFJK{F20w$!}wmNqs;I(j;{?K+Og_gk+ z>6T}*+EcF|JhhTjB%;7>zH}yAqK)r<9IKeEn7@!v)0%fMj!spnP}kv5Nx`SUU%ieT zh$xt;*0Laz9Qt<7lu&O*`0FtqicBpHfsR^U%q_5O833 z>aiK0Td#9KV+C5>TA&^f+YL_MR_4yddLMw&HlIgK`qFe@7@#9>>Y#$XCxarPY_&I& zzQ%Hc%G1Re)4Vpq~ihRp31h=ha!ix_Q}@7#pU zXd1JOKpoO&U$JytKTpL9+V$*Taj1At(Q1`DC3i62nxm|Tw<_zkr|=tHu22QdZawVF z3~!dZys58;qM_r!vv=dGfpJpoLu@qPqsp&jz9(~K+pXkMVA)(AD+vCqIKIQbiHx&^ zu6e^2>XAb{OK}653>RdSNl0~Fg$BXZbthm6^FDBqxO1RzW23C>=Wy|F_kdka8I#Dz z5SPay^FeT;qLwQB(_w+=Uhj+xo(uTIA%~AWzD6 zrhi}D6#mDeA6`~gh>ODd5pzMKA9G_JWk+iYLR}Y2r5xWb-QTDV1zZ{JOPy>ITOr3A z7c)d0M14DpZv3{ZX3OWROopo~9Q3M9nw(Ef%FpiqA*chc+OC|D-7#;vct>I-{&}3g z5-|Ghn8BuPJX~91;a2Cd*3@Cp?l5cXdl|ZpMwt-@mW=7~7Jd7fkYJd9G3pNM`XTif zIc&6ER%Mqkb%3+p{_D5jDc}z*cXrLMymEL~X|66^fZdn&`lEwxLx|e^{Y)rfm3{v( z8yJ7@KK^*eu$`dD^!2Q>u+o1o81r0Xp5fGtrqoe+3z~?ug1xM*Bab!w#FhSHJIL+X z?9Yw!W}4wfMCrpXBs1r2pLeM^G-_Vy083)r9bE7&(828Vpg-x+2*2e*5>0ayOm-zz%(MF=1oGHI`q$i@s3B7zqZTUg+=!`Z^<0|x5S zxj2TOq}zRMcMQmhgKB+<){u;@mwL|Eq+x%bIKCY>6WKArSa%e4o1ie1klPOrG1ZjY zw%O$|x`N&(X4N^gDf?z0gHp#@@`7GUGI$488tGV+@u*BfrT?>!6XN*~NNXk-^G5W3 z!wJx`>a}d*w<-VTvf~c2aR$1d;%i!o^jnynDcE~YH#(Q?z2dqZ(=_8S+ju;$b#(+a z_WUQ-;X24h-BhMaiyS&EM#KA4og-nN92&x37Ao&@=%cS6_M1QC_x=AafWYT~fNLU% zh}65m`^J04dcAV>nyTtm)Rv8UaO9*m;(^-q&x4Majp=h=ANQr*RFl4)y-KmQwn6sH z+49+sh0({gUsTJ5nv=$KdiDs*KJ)gNm)>IQ)GsU;2j&^)aK_XOO{kQtY$62NP2_~l zeX|sEN+{=Rc!XUm;)7XHB9(cc3%OOx`fda~d(z&>oEpOl3gxs%Vem$?>mGoAo=p9P zQx>Gq((WVtDrXcwphf*J>vYnH@LXA81X z?C53st@X!i-%xm#4==Vw_|#BpIhF~%6Vq=z*rqmx2fJild0J0Ye{Q@VY&~;b+|_@L z80B4AIazrdC9lB2csb#e{!bjylQg&PyS&@9Qs-!mVDv|&mFhZ%;Olh7rN%qdkBTi@ zXF45xYM*5N{pollTFmq^k6L$re2UsyalR@$J1bqrub{Vur!bQ0-`vcbE41QfF>_}-goiVu&z@>~geQOsH_{e)b zLv`9*DvEnX#rok5|9?+cc2|=}2Wjp}C?_MIGy>cve$~C#tD0oB*eN&uN)wJR2jb?o ztB-l3*_piB`hMs?fXB8DSESab+07{-E4GwO62#FL9*9gpT}b9|oaR8>CX0#|xY3{| z^~t}G!aUmnGP@sht-Gn5G{9Fd*JR!R^8H|<4+3D1iiuWdjLfyd6g(r7^o9Q|S&%YU zx7}is@Cusb1Nryp5RYG%8yXtp^u9Y<K1sSis-5(c_SIw zORdJIF&f#LEjK*)6(Idfh15q*w%=LMAWnJ2D$Bl>X||%#-zNG*zG;`H$B_6RemL0N z`1)MlSilnw{9Lc+xB1=bME**z2VMgktk7;rI1l>SBN&c4?D6wQ`z0zJ9tH#p%ccsU zmFEeJ8j#CIx0_c`{!l^<3bMM?VRD+`9uO$$x>fS~4k+7yh8r1)b$-a6=ed&_h@yF@ zj?s)>_#jTN6d^w4{WJ5H9N3j~`Lk044=A|Yg(v-AaG}!yjQKk(=n*3uJ%9y|3#1RD z&K?AA2h*Uh{-}zOV~K3S?q^eaCepWL7X)hG2o#Gakp4S+{VT2n)11V|Zfm&dM~9CG zBL?kpRKXs^=XhCw*?!046Rus2Zb)y?ZkWeu5A>S)vK(NtjYo<9m{{HkSKaI&5|#PK*=E6eM6ll&j3}IK zzOSPcUa@&+$_<0}3N+szm9IZmc{inJC5Ah#I*6HC$I)e^{h+Pe%wDyKlZuo2_ealNN+2R5JA2hKNv-*r zal7*=8?bFbCyM3aw8^E}*t{9LvbOI-;I^^}PxTDBwOd17*`UaI4@0?L<>y*sjoC`E zp$z{mvJ?c1vujc?HgIM$GP$MViD8r1=*Z>nY%{mYY+1cK@79)mmVfr%e#VHnR@mWl z(;1h>fz%;~xyn0a^2cw)3y!`qH@dazRyhy!=>`_1q<2Fi27*0~ zFOzhROZ6+&>i!h=h`b&v7?$7D{Njd<)Vo5u!%#C<9l1D;TV*pTnA))ZDF&r)BU9)AYD;Ek3*79{x#n@yZ}x zn<5tb@K|CwWLTcZmHy*mt%k)RGD?X~WV3vQDWKHl3F_B{`{%~D*N9c8+hJ(G#v-u1 z*0~wc#}X&$=2bNm&FvX%R$95}Gg^+`>{@mX9-Pg*`aBl7%VYY`GJo1OJf`|z&4P{G z@Mzk_Oo+&P$q-0^wqfA1)B63K7$ym&3T6Jcej|P-R%XNdXXp3(xuQ+AA`fi8bgsos zt!3cxB3>4z*zD@vSAQ7Mxm!r`{T#jD0z$iuX0-05*Jh`$9bQNXIYrJ+*p?F!J?2wy zk!V3d{v70{z$@eK#O;zovcHtJChZ{A(}d>x;1}1U&-P3 zS_Wo4dZeaTWtsQ9Z7`CmwC-1fknX0aVptLB9*zOOlBI1^Uo-5F+!?;B#R|%pCAllu zxXVq2!hk7hJ|umwZdcfS_GnT8FnF)T&hOeuE`8k<2j21F;|p=}FsXKKea!EGO#Ud& z379%LlJLl3pld$}x*~pyZbb%Znbx+oIA{A|#;Q{=kA?*V9vChv=DwP>ZZrhd23+1Q zm!%>%m>Z3&ORJN&!PY_htiqF5m5c9u#ha?S3DR5q4N(%J?=aZA_4L-nyXxm`T{<9s z-`e5*SDiokY9rpQityCOrf%TL0aWCddK9_~Wff5V{cpgr>`mR3zB8G%#+Kp+Q40}Y zsk#wSr@B_zuUs1g!>uARU_lZz%(`ZzOan9_L*;&T`kUx*4kd7lf)J z#%bW^dgN|D_zb7yc+?b3e+sC4x`UGw@VqAfj0dHa1ZyX44(%(x$o$DE5M!^CGt^7E z$>Npu&!h~0or=kBv2$dnX+S=};fGLZNl}wX>py_C_SF|7+TrR%8t{8M&HUfJT&_yQWGeFB^ymyPJ9D(#G8 z2|2W}pZl?D}*qj%?5MU$&pPS1pvYF8N$JTt9&0W%r*^rAETvI&$ z4YFWq!WL%#S*w)LgbCwlog- zuH~zeZLBUmF%G?h_VU9o8J@CAS}T6EhSvS;LX{IP=UxdvP=DCs_(Q^2MExJL0h2Wq zFrr*!a~Sq1np+l4`TQNGhuru`I1Y;(2}#~U+@Ub3PqV4_P$=# z_Q?#l+<>N*SJ?jg+gA=DZU88x8F$x|kw1@SSWys}Q*}XkKh4bVQFIYE_uDpWliBYh zC9uY!qSK=ECCtwEm%aODeru2=)=QZlPZRZ^IMOeK8+7q03@h>BCOu|&n6&FCH)V-& z;0QzqRR`R@v83TsbF*_&*~%%rEX#C}e0&bl_J3+Ykc;Ty_&3r+GJhZKphOEbSWC-h zaKNa3&3hviB#{ZNQip@B|LTOFFM7Sq^AOG_J93Jj_*pa0W={u&VXn z&_Y`7+j8L|_F|uzi55?Hx=kFwhN^H2KG?tY?~*My{-lA`njyEtgT5+ePdsQ|Jn^>(dw^&BsIO9%R!w9^7g7Z&FC>$ zOKHNsrbq@9wPJ&T2Eu8FC9@71%U7i?e#4oQLqlpsX*uvoHlW`++8?dx=^R4o^l58T z-c!%`YfxF-OnPT|w5XB$arv45Iz+i1Sw0@Cu6l?sM?UgLkt?IpPUQ2u?j0g8Epz)g zwpSzfDb1VZ?+nr_)_jlUtY~`$7iQIrEroWA;qP64_US70efHT(d)123d2)}tr>%c2sv1gT23)WSm)+dDNtN&69%xMW=TxIvE_c+c9NuW2*4!3*Vn7=FQBfcF zE>3)8Pyjo>F!m8;)BwT%xd>yY)#>J2#fi!ngeJy7-ekF*DQWRFGO6^JBorERp>Mrm z=#_ox{!zO$&NE)R?6MLHE2ArFD0(zusNv|qPV?Fs1w|pCe~MFl4fQ04MA+Fs6TclI z%SlM!Yg)8U&Gudl9GCJ6SW)*oqan|4vkA(mu(Tw#vZ`C1*dc1#?&8DsMR||e;IkSA zz`Gf{u5tu_MS;IgF4{nQD|nhgNMx!(t786uA7WR(MkxG;yvEY060h7-wWMxI-}0`} zVY1sf3BBy|gt#p9>hr2cM5DuerP(4y6Jb5KdFv@7B8>s?5Lb-~JM~(~717SAocsFm zo{zDiljJ9p#-p%851<}PHXY1@>ba?AuhvZMPa*iCS1w&rtr;rXsHgZ!$#>;^VMh4G zmXEAtC^pLCi!nM6aIfrR~8C7Rg~E|3HvD$_2Tap2Iv?13?pk@QvbD?FqCuy#v9VPxn@ z;=|axTA%5`2)<~;Po#w5Uqw|?KT?P|FeLuc$pV=&@mS^m>S>BU!d3Vemt^vxx$M+@ zgNH=Uq>e@K{Ka-dg&>h5nz^4jf=_o!CU&x`xGjpG8OGzaDG^!59`2!&cqEs+*Ec}Y zp*kh+2iN*7Hktc-@!JLckD(|52FpuoxT^u{3)HG$Vh*7Zx`NWdvonewYuVxp^)aA*P$U5oKUBM5Id+V#ombJS` z_qU2L?vx9_<#159bU=ma&87ml4~8LbrI2C9q2$qriJ|7h2*FimC8Wm*M|YF?z~~9+ zallds)_RfjI1%z>^)P<@br-}(D-`xEu&mA(XPub^BaXEil*)vssxN3NY(clb-&2;_ z4a1wv6_ap~lSr|1(ur+#e65-4LboL*G~ZI(gNyrD0jdjueU68dCMYv-a4ZyMR0B91 z=1y#0APz&dY9e`;G+u#+p141R`~@CfRo|QA{g0QMt{MBciS`YUef=1zT4wwiH`C=E zy1??XST#~%k{8_K-(sVK&kZ$CzHJ#lOMv!{V@Ny6qmxY%(uQ;yLA-QZc?xrM^yn9% zKR2({6p}R)$Mg{h6I=TF1j_qaGg_ED&Y@D{^KHBcCXsTzH)Uk@O0NUqg!?*n@z_ay zKwc_3KD!u7ee;$98y*eUz(q!jiw_Rw)){n39uXqaU4|6xSKsl|2Ga-AOF2#&bV`lE za2Cdk5RQLs%T=vTt*5ef9;+>$!ox+j;b=)fbXctGv1axm$BxTreSJyfNJ(*M7!_QXO@>`Rl$X{0t(EJr;Kg`)4h?zh|4)ZeyF;Pa8BK_i8TStNPnO zsw$=!hT8PgDaaH?~5SY{1y@`FAxnk9}MxA_e+G50)_Rhd2%XP%HzwL6O zd+mJO#i!Fc)Ajo(EYqb0k>>(Yh{L9(G3Y>05mMrZRJ=9&9oB#9e#h9PKc;(=boB!E zI4Zx6`2EK=&auUf&9K6Q!tIh(0_X;8+3Fz9@ z1Y>!g30OQLjaz`AA3fJiN4|#GA>2qD4XAeCj-1%!UR3_;ni0{d>n+*8)u9zmAY(^i zA0%q=FBVsUsQ$(sug5E<@ae<#8%pcR$)!-A@dI5gAwK3bz;4?-uj|=r@XgX}xyXFb(^-NmWZA)HtL@oj-{H{sKi0D}9N%K>5s zlZ*Z<;%5F}H1aTE)4(NepzB|tX?L-Y8BS)@(v7ULRYO`_0f-r~zn$@kysv;y zhZG++?7xVnEsRjnE8I1gUw)%4(~+FZR9~e>HYLTcOaZ1$a0Osa$aNA9P5~9y zpOO$Xm31Nssf%ixKjy(z(yh8*I!qFE;fEB_N-8b@q2vvQ5VAS`6KgD_a-*7hBL4ul zUw*f1r1)?bgrAqi>4DmIQgq;q5)#3m7pZ}=`^#MLMwrbU#Z~fP|{?F(BXF_kwIIvEF3J2Dc z4|Hmg#)jwLq=Wn0`Bzsbb|`eF0IurZeW=%_J6ULdaw(fr335 zTxHnKxxEiO;G~a#oV9E^)RW*V zT#n7v^gJfbs*TO_V0s>inVw?)kEanyJSDB;oo=R= z$q1g5&q@*`9^ob??r-8DJ|F+kM|k|m*T4RCmSRy}tF(A*c7sJjdK9-Rdf^K)Uv}MfzhLfgq(1+Fj~xG!7r%rj zpS|FD&;R~+zQa0}M?dCK#~gh$cf~#A!CN!@#CD!@-~mS;d(8CAvX7tm2^PCy8n@nh z>oG?iU74KZo=-iwjHfTLf{e#^u@q;1goQzS?Y(=W$|ZJVdPXwKe)zQc(6v}BtQT%p z>o0ooK|j9aMhLSY^FwSO29ar)fvW7BUVrPXfOGY352!kB_F>}19qRgGcECj=&nN1$AFmL52wU5VR zqS{P;a9P7*DBhpIy?iVtbJ5xAdgjHLFVg}vNv^wGDT8!lQ&U`{VbIOBt63+gL~ekc ztjLNyC%}9X3zqJ@{mze`c*2K{IhHkzbWZ*_$_y#YU`KS0rKj9iSZh^B{_i{A@#QnW zdd_+0F>NOnY0re4cB*Qf9ce}VT1YEZigbk_*J)t`lo6*f6BD2+2%h*?6V{bgCmT?C zWhz5iymMf(%$z*0pkM|bb1aoBta)Ui>D(++K&|Pi(tp19$Yb97{&r(lXiO*ZI52r1 zF2}kJpCfss;N0Bt#~gFsH_to!th1S(CQxAcE#}&ohh~a%o=K<%-@4J1`7bTF_U}u` zfK2p6>owcFaQBwmZaMhiLzp2&z{>Q@_b&Y2CYwCq#+z??%9Ec`C1Ta9d+oi~;jenl zH^2Ff!_PTeGPFnD{HC}3+uPnT%X>~sr7!*Wf1|aZU3=|kKleG_9Lb~2cnK}|{78WN zF?n69ws_dtlb`(LMs=1)r$GhJAS*Lpk4G}8-{f_`1dZxsS#hsft?#+Vp1=G3?^(!+ zOYgYTPN$#wH6B68vN~*))wpZ9`;v&qWB6y8RpUu?JmyTC2g@kIsxa$vRjqd^>h~V_ zufc^6YeAiNY!G(eF8z`Mt@@|y7n_6ta{t8!!Zkw=Dhafb_DPN%h#XnP#~*msCXduR>k;910`-sSh-_ zIDQUCu>$^J`2*igM(bHqYEca(1Z9R`bi}-Jl;^srv-xmO%{e8+4_>M29enNbKf~9~ zLi{LXE1xYCmhqVC_wa9_%wPduf)((S{Ko9nZ|1+2PhjAKl>bpl!2D(Z!@9JXQpI(| z4`q{w9CFB)zx1Umuey@t(&QwMQ{`Ew-~ZnCSMp23C<{Kz{30d+z|R2PMv_0k-4Oh1 zE6VSL_~_EFk|-S&B&PAZ?6V6$9k^f-A4#a~T%R&sKnal=_>nkRzlj{RFZ1o} z%}>-6iuTgq(0@a_i4l0|;UD#jvQED2M~4OeyX>dKe;C>z`{|26NxyETA3q6TQBp1? zlOZpRZ^2#3DwoF9nGmZ>e&@u;VEm}*&%yiv9#SS*2k{f~7vpEDLq@(0{nyrIK8F4q zG<3zcqWySql{s9p@4@y7P|E*P@ZTh@Bo6K$L{iY+pO-`@C7J!SA0C{c=(0{>n1nZZ#(<3p(nlT9#ARH^}m$W?>x?}^ResRi(< z=AS(@QN~6Ba2@*>J}GKYSwai^G`_XOa3Fkg*eGIwbOb&_k)0m@xX11IH~Z}U^eZp@ zF|$tFZ}-UEc6$yx@I343YvT^PtE6=W}>MeR&l}5w!ES;d^jBKB|@3Wt|-+nyc zjG|l)z2OaSVByvQ&w1Wc_Sj?nx^)kH;1(2n)9a7WHQjuhXa93S$edAmif7@MUi}ZR z`R%W-*Zq~%S+0|}d+cNW@wKnr>OosgPfg!+^G&CJ`Sfpp>sxJZiO_?$+i!o{+a9#l zmb>n}>tva!A{K?zS#&YJY}4ktRik62J@$SE&%77!n)zzgsamOV;x|pd7F0JY6 z)mL75+%d--am4FO6+Jhdc|%>kak1H&U%PrO%R`vLYI14Yp68)Iyy}r>oXv6DCac)^ z*|}AR1-;Oahg0!}y;f_Bt+wQzF8JSRmtD5ra*InY`W{#EPkGX=7k%%${D!^5qqe)~ z;tMJLj(5ECgU24r!lFI*dR{j#E@V`ESD4 zF*7sDHJYGe6KZDtUHHDfP;06q`;T4P&}F%%2(*C@BKOcdaLX-T_<|R`=l{Ov)1Um* zy7lXR_=Ag1{p6=v#RCVw`^|6exb^lkzw||(vVZQmU*CG02j8UEAYk@9+XfXfQB&g< zGg4@Z=`bGn#1z)Lbv4a}^CAXKZF2XUCkRQ>^LplayCBBS3d+e+WwzYfZ1V@c;~j6u z-KM6dYmMdy-uM4_>%v1Gvh87qAI3x&O+0YREkE_?(=Pko_c1m$b1G+9U!DEl_b&L* z2S3OI`L)JvY;@VmmCru#+50_n-xV{MQ2 zDf}p#{}B%3mMNCUG1EP}PD@sqHKcc}`NU#aS~uDn|MY)-_V^Een0Ya7P;9qnr#E@v ziw}Cq9=ktjYGxU$%d21Y#sC=54UsGu)IwFVc%K83aN8!`)vHBVz3a0D(UP}e ze)TI~dFMOcdGjA`BFdB}EBhaCzz2>xig*lKm(4DZ!rYKX&AKRvm}A_9=i?ozmyOnSX^`sih`Eh-!4OC&jrh3 z1mq7@5wHA9<&_y|!d8yT1bkM@_<5)HBK@1#=pbx`QFNI2MMNLx7LWKPG zDM^#pbyK9MupG`CUuqkUQjdZkp*e?26g7;X^tUiSE=R}IQK}??)F}rq4KP#2>6}=spl=%zf_ule@l-|IAbygaC^ri8We0NU zkTz(w?u+s#eu95Bn&X>P69}xfZu@k(K75o7K}m?Hh1lk`M}8WXNFaclN=j`1b8~rO z$|on;kE`bBskU38?dYflH;nPEu;526kUR8W(-85&E^6X}Oqm*oe~SJaHnJ4?Z3pyG z_8I<*UC18go{n0?G``IDU}J zB`ZqyW_wEZ`ImG4g#+=kg!ohBXCwQs{as3u;zAvc0=ncPk56*+FO7d94n(5shJIb0 z4iOttd~n}}=U>s$Gyd#~pXN|QFEJNX!<@&)d?>~2XZ-6}cB7IJy*XeYrSTg}65N*R zs|^3qp>VAeuGDqqoL`7)#>Mbz$7TG08l4n09R-xn4x!)Ne?v^#?;n2#e(t~6bpV4P zc~~P+t37C|t*#oMm~Yg+e(pDR-(yeG$14;6{ontC{Eb?5`|Y>a1Cqwarl)uwS!1Na zEB1KWeQkVF2+XALu$u{%fQ`&ATRzDEl7~lgt@f*5Ud!Y5S=+^BzaQ09y7rg9VtRmE zkhsEU?F`GW9=XGI*ZlMv9>{gsxpd+220#Z(sIzf49?4J5lkdqmE*g zM0ui&Y%Gc3`gvM6Of`->?$`^?`=%H~8Rf|r9Q2|O9e+H_H0T_&6WnWc(2EW_^J`}v zal~t@tJjQ8O|H82RxUNa^5xUpTu#uX`WPt zU9^~(**O`TzfNRqjaq}B(u${5_$9(PfDt-n?32{Cmna(2WfGhhZeQlPkYwB zC!FxH&9>N*M`|(hU{(!od10m*<{me>iws8b2m4X~FiQ%t7l)Fb>@=F8o=g$uT$*1LN+S@3v+G~w1~$Icxp_?WkVq2~TEZqc zcwD#X`zmI+VBv8&e=iEDPz2yN9+=2()o%4-z^_Zve@ds_a_0|u5 z2r7UH^%v)6oetBHdjBbFlyGt$qZY2kZL)f611;Y0`&+L6?XR)(J8r$T%~Sct%e>w4 z!V53NhPU2&8>Wxj^J{qA&1Rc!cI>go{`F)2=D6dIJMn}QZu$NE^6AM}9eU`${L8<5 z;fr5-!rwk&+if0vxt{vpm}WJZ8sgU7#?8OK;q|XOLO1A+mvF+Zx8LEI4;}ZUop+s? zny$}Pd1qq1(K_IH&p+(2L$A8@N1DN!n)>`FPkr+4yT9OtFVxyhcY{kmJdxXkxtkQ- zvgt(}+|tPXn6tC2h9iZgj&P3E@Dc%Tb>dlFIrK;8{@y1%_k-6A3$armz zTRTUYL~l21_&fM8&K=D>ET2g$riyBF+;%#~R1qF6Gxtg^-e{m@m{^Wp=Q{rD>#lqI z+uv4QbC>7vTMv5Vqd$E7hw;-DD`o&*x4wG7fd?M;s>5D%(2K7B)vxEdnRImQ#1DSx zxzBm_F1zg3s5Vgm%O-WEoTxBokH+aUQ%b}!Jc3Y)XIh)G9c+y|ptsm!%P*gC`q4)o z$-MlTXP)^#r+o%ZvwD=9VdtB(-#z!-^S}M=u@c?ocFrn~di%}w*FXQc&l#JTl$mNV zVEejjf5HD>UH9uxo%UHCBZng~3kKVIi>BOA(c8v?BQn80&l^8j63Db&X}W^*5;p&E z!|&enu6KRyt7pm&+Km;~2OrG!D?873eFa=^C?=_4dRBa$Wy$pOx zxEcvd#0Pg_d`>M3AK}v<G z)P+xTaxByC47Z_@z5HkV6lG5PiJv9oyIL1MDN#FN^c<-|%3q3qQ9Af3+UFqoQ)@;2 zE(Kq@w0?{BxioxrB(oop-W4AT_Sr9gH$P$|-wOOMYX1fM3H=uN>BjFmD&W)R!t!VM zp;|!{;#-CYnEGwuG2nRw&tj@Wnay`TBaXU^5DT;h$Dm@z7! z{-x9JxaF45e&(}Waqy$jH}Gw{qA=s z9RCrd<-!~5JnS{EKIXV%xh&_Y{jyco>69nd&#m8Y|9#)`#y2x7*XBZfeEetETtnA* zsirPLTdchB1uakXo1EdjmE7*68zgyY!gzUVrb4C1{>@{LJ@!LPC}=Whw0-lt7bgK`?8$n1G=eZ({IzIMhHOad3es}S0 zx8B;Xe_!6R?pHxy+9fAzmCtVd^XD(0o%{c8@Xg#{3iMg@ONyjlnrE#N{_5Xk87Yq) zH*VZ!n+FP+O1Ax}=bptvS}f)te(+&!HLa8n|IT+1cUXWHI7132tzr?QJbU7xGH|f1 zn88=(4_Lq&>o@0|GkMaa1NPewzq3FjL8iO@apyjJ@4fR*JI;9E{&ah*gfS3YylCO8 zue@^GEjO)Lz6@JBOyjf)T&Ui8dp?F~WFE&rVU7g}4wfqI?d_=BU#^^*Zg0m(hu9D< z@E>>Iwe2?B)ipKbOT|QeUA|lnM?>ojUH9QfAKPI!c7uikxhyg;<&S@m=^DEgx)(#v zIm@CA*Clp_QYYyc3YW8Acj=ORHjCi~yA3LX*{5&cBo;<|o4}zx-G+#w7sXtmWlalt z+FM#$SFaR$sH#q|rE8I`mZHTx2LdEW0S-b-r_t8Zu8q>=zy}|E(5krg@?OC+gV1hB zDn41X6;s{SS5IwUxe{(kh{KVYGp6sj<5yYCwzhG8E0~0nsnj~dhCcG>qmBLgP+KsG zLul}(8?a|#$je0WZ>3M;DiV<|LnWzL911{3G)1xC%0)cjU<%v=2c!f3vL&3jW*1&` zF?K5{BKE6H24->yMlc=&XAS`^YCkx3XL-44BQ56d0$ML?G?5cAq~hn5D`Z)}%Wf@I z0uF7GeTZ5hBU&3z;`-JQk0&@hR7y?P5GddCkGr;>xb=P0rm=~Vo&%HmRaaeo@rA!_ zXl$Tc%cVK!7O-G};Pd|l+1D^)A|9YAIcD*%`uAJ0#GGdDi%IO|-4^BMkMt@jHB^5k)|D%$$x(p-=fAtU=9j!uvc9#G;(C%5#KtQ5y z@F=TVyTTXqDaT7?7j(&`uo4*il+hmvWJHs=TaEO^jG8j)Tb?fXBN}Q9se&z}L81db zuiRT#OfU4O@rQ3+o>fUG2m{xA6*3F5Jpx!*@uPAcBu5{;=!CzcKebpoU{px5Y4qXA z6=yiBF`YsP8|(~!LG*G73q9$}{}z8yQ;)UO4SD#d6JFiqrccJplr)U^=D;h6sZ_S%W4ryK_vTVh2wO=h&KVl`oNVHI4-T)uP z7ArhF1y>oYHHN=0pJb3&DWA^-J$k%WLn&INPj~pJJ4MBJ%7YFMb~y+nn(Rn^gt6+d z>I^(SD!$L@_R(?A8Yhc$3U-Ic{t-^N=^7(5ms={}`3_jT;X3A~!@xegJUi~%s z|1#*Wgwm5AlXky<^W_r}@%jfR|F!z-MBbAhM9D_6E=$oz zcm4A4;zh6iIxaa~cl|@J{`&Ic<_}-a@%M|MzpQXgRT(TbqVTElSF$R-=8l;$s}5rq z3Dxn11RD_59e6KQpm+TJUmt%N1dIX3G#JDjQ1fT?$>xNCkwTB~Q|oG`kv3zu@zFYS zT#dYWrph7&6qI{LkMn1NrKmUKW*&sm2>73%zY-IJd&J>Ktz5a3buuD@f5iLq<{x;_ zK@3BzjXhZl297xLNbo!O;6vDlNHeepJpaP8Km5Uo%&F8UCZ$Z`DWkoFrFDj^bJ0Z? zv3QnEfs>qKafvYD$8wwniIY~0EoZDAv;~ahK;Y2B54~c_lsD(h;kZfS?679WLN;yM zJ;*fI7URcj)u+iSFjx))Bl6IL4>qq^McjSb8Hp#4Iri9whDK~soCjwf6~($_qD;i6 zO65C;A3o(zm$j`~4cL4p|MWA@?78PRgiML3-z;Ly@KnYHB%ev=@a#wsw<{QAH)1r- ziE_oUn{SS$iSF#HXk*)Adn#FnK7orTP}mGYp{71fOwQS5M|3Kz3GBhlXIoN<6r1M6 zRVZsyQ-)lrrD6l+2~JfPNn%qI)#H)9_T6XF-g^`3or(;-lKj_*#;ry}F2(pKxEZ9I zJ8$lJzr83(m@p!RD{42#G_ufVeJ>$lzZgUl-*J*+VM2X|D9^I5aIR<|eC6erx2#yf z3moy*U4OmbUpfU#4Qp%!k?o?|H$tV}DoR2kslw&Vn?I{H)(q#urAn3JdKhwd%un2D@bMURS%4Bo7n{K-C{`>ET+KUz~#DR~~LeD?<{Ohl~fwynG{)W7P%H#Vcnt%KuHxqQ@7M_|rH{v$zRy|INALl<^VEVI#*PdR0au@gWD8JF#t z24I$rMzE@sDFQmC{y1!6quF-5?U3wTI`hDc2S5|6PmJr-I3PI+_LVzW2r(Z%mxH73BW#!v#kiemIt)NHBcnFV0xArsc1Hy=M08*}1lM zdZ}otZus!wdwz3o&Y-{u#q^aO)kOZs7c$+3Hskgcx%JlDZocidm;U*pPWFtUJCzML zTz{8ccm0pEei`M2TJ~B8!0!4LJ>eFJ0lYe~5*v&RWVbKiYdEsKx*4B!GsHcGXQFd-tGFZa$ z`9~go>>sz>3cAR&JHZwel)#!kdh!WYdW%4?yVFhs06~s+C}-$=uD|};QZ@@)(VMNe z-g?R(|G4oc8zC#wNbuoEhHN34)G3xYoS`!Gq1vV{^#m@$A_ahe@Cg@K6*T=d-KE9C zxGMh=9#b5pd^Y{9>KFPezcr_glvh2}NJ169UfTuQAg8LVKXM&{p7d6Y(ieY*p3(%O zNBb2OSZ4e1PpG77QHwG_=1UL_4+mKQ^~%*jNwZPbBzk2m#TD0Z!yUi69DH}LZp={7 zqrtcKajC^h&d)3O)#}!QzgpY{e+S>gU)6TO zUm_q>#VlU?_*1_!^Vb7?YVoJ+C*UtI!1dt^y1+V8*qxtZi9O&?Ch?QkUe3<^?Vyiw zotFY%OMgggNnWL@+VB@h6H)NDVy-00sW<$iZ0PWzKRofoQ%*ivkb}Vt|CC@qwuOE) z_>N|(%y1KYAAbYE8=+iFAJ+`8Ju2xKTBQ$3?&@Ho*J>sA2%q#%ppVHsh5Y;P(Ht>6pPp@4y^jCg? z@O$-FbmnrT_DEA@kP)p5{d@J7?$KQx{j)FA5Ns{<@k?m@s_mcs{-;Aed+6V~%m3Qu z4+2xufByfdzcx)Y7I^)YBRZdc|6gKx?mgI7@A&)a@+tCp^sJE|65UnPKlC7<)jr*c z->1%>odQu0OA-~~m+@C7PfZ*<=@)&!11db!6`@}oe6J@y^i7&~2maRo3mwvc&!>ZC z;|s-k(FuQv2Bo{wM-#h1<&no8&aPR_8WA`+%7Elk4?cjA1fcRx3Po7AJ@dqqcu9;J zJ$l{Y!*Q0uoUm-^a$FlQ+1O0p)?k3gh>*{Z`^s0^+S^%_kKSSeOE|Vry*pjT<$Z%83OiOXaN-FkTzk!5@l*MD>0+!mg`6DFIEoS9ssj%^u(7E@1dt^{ zi$8wbT3U#=jp;8KXLDKkYhT-WKhw~lD25^!fYl- zW5h>czvu6<6V~RSH@A+tj5j``U8<*Wyxf$oEMXs=urjkNpH@cy^1&| ztjw`Hv}2M9=1m^)%(!USNF z2bLsBMCwy@nQVrAoir2oHvp_Na>SkY+_nDt>*G2UWpAKNj|%v1fQ6hc!8iP^e^x56 zI`#j`Wg=)rsL$k6*{kxYEP&E#*~3RLU7{zHvq;pv;C}laIBxUt!-o%Bf5Y`L zyDVM$F?JjTe&9Mo2M*|uX{Jx#CLlvBly0L9HySo<*n$r~m@$1Cf+Qq^p+EiUvUIwg zjhtJI--06=aXvIn7gaE|yzuNSv`n6Wq3}8*)<5c)qflSx2ubKHgb)eO7K)f)60!IW zJM1ui>xqP`6WxmWTW-3E0D}l`E}NP6%FEKOQsIiruXy%}C&W9<9vq7i2kiirC>VJoo%7r%o;5bQVuw>82&{ZT?$ty*+!*y?5Vp z^UXJp8@ENikc-6EIr*sxrXYLD^Ar-iK1qL_ zlByEa8(@Mjh3c-qSmV{X*{{8}`jnqEN(+FEUxH$nz2FWcYxP$NUKz`#;^YYGUn|^U zd=;(r<1U$@)ww(Js>K4}z2shoM|W8hjAky2u;Puz0>0@Z9DZJQLtFOcPQR3k2WlF)d_#8x+r0<_06!3 zsh`P!{?80Fyf+U!?68||zUk$cUZyu7M4`dQ0~Q67yS0_{0J!P|1r2uex7G?1`v;He zG5kIGG~^8{L)eLggQzAbw?o>|$Kmhbt1hyTWUcHqW#^f5e@XaX)G zb&^k>MS0#+gJEOkU0F-f3us*{KbAlV0kfi=;2Q=W+9IEWW&h+CY=hsczn=U+9G^ZS zv~fqA5S^<073*s0ua2n8^##^n!_;&Sp|HM=wyUXXFJEem35C1uB%ihYk0@28kE4K~ zs!sq+XZm<`deY{@chym*SAUK4B4@Au_Kv^S->h}~^>oQbsxO58s?V^PI-0dI_ts2+ z85fdl;|qxT^Ao4|USFZ^R2>Dytw}C7MS1KUfB(nl&p6*O6=Uik@~i)6CI{_}X_`ON ziNhM$A+IQJ{z-o3V`c}^yh>$lj)Iqg+@*LXQAL@yvuG!!+x%HD4XkPfhK+GkNqhhR zKmbWZK~y@x#H^(5PoiV;pfN-!#G*0&I~rssl%W`ir6g*l0s*;V_4O<-vD~m22y0&< zHCwS*3>Pc{+(eT}7RhX5B*2#7W9kZp(wTND#xdi82WIL&vgOL?(WBZ|t-uI%%{5n# z89Sb}+FV zn&O&)Sid-H76yz;LE98BzVIRfKw$-wQN!pH(K&rA_nA7gODnSL?t8Eai{X;DsbpM7 z!o?yOi7-zv8`dh3Rc0a2s+eOriF-~?JV)5#MCudR1UctUONmH*svg@IJ4uOjhC7PM zPno(fj6fAm9>jM8=QEDjgH9N>R6EOUYL#c5X)6?Bs~yf?oJ^O?v%Re?Cpd%XPhD6MxthQk4A;e>Ub0a3+#S$zzB$HBN_QAnX&SJ1*l0OMQ-4lvH88GhSmN z#Ai$A)(HpEqrI)2c5!50F$WES7};=x4Ti5fJe|n`3qFP-v3yB;&15<&v&XXp{%|mx zCSVg%2SgUT!bZf@Wp-YP6_UiggiB9kusl$J{D!|V*7!ruV{)l&q=G?iTZ)@tdcG)N zXX^`tX7Ibrp9R9!zYab;R1y1dHAz%eo!~2{yx>$bc(W8EvosEg35Kv@U=|?)6?@+* zInFALaXP96f+~fwZ8|;`n-p-cPC}YkR6S=@dv6hWsdJ7ky&@Un1TzpwK@Q`5mqYh{ ze&!imI0v2nv!7ji)l^;)Q1AD@yGVK=Q|w9V5<=nn`uhJk>p$Lp=j{Xb-~Z5WA4(F5 zr~dS(Y%N4KCEF4q2xWAqh`DXj2pYuBKz4#6oVZrya+rQ}03{8lf51w^UQ7fYOEd=P z6OK8`C=|N;j@!>W`&XDswVRh892K*Lryrb&+m}q(k|UTSec_NtkePAUT^}u2Fk|Kn ze5fEJdV$6Hm}9;Q|MQifJ?%)Zdtj+oFqfW#Up6 zeb+u4p9IzWmzCa&ergam>d0JnSFgQQnvJ8@-x)mK8hm@-1wJoaZkNudMrHLr9x4)M zAAbYT34bG|58pueAF$Fq64hjfzp64!I^nO^2H)NZ(a%61@(MqL=i{$5$N~Bu{(68E zK786##owX9a}|Hzvg9)H#^Fyb0DzsYjRpbu6jLLtLW9Kz@DVTS2Y)BWdhb=rnB>vZ zA<;Abnl7mU4!(+77l78|9Y&YY)YNq24L9t%>u$?eEN2eDfwqD#Ryp{(5d`<~N%Zf) zze=AfVotaKQ$^mvM}i%EuM(p_ZFCtWi5>j?6!-?ZD|}(yK_uGJnZJ5z4HJD-o6B#> z%B=c%=&WDX1WWEjf0x%&leOXR;MdB(qt$1SPvPP4H~RGIucL=}|%f@t!e(29{Z4#noqj65FXfYljg5&l7pD;i1;Zw|f7BqiWS>XBePZFx*sis4N;qVJA*M7|#b9OotuXP$Fd>lwBy2%! z)q=|D_O^e%{37NFY(gp>s0gt?hCezyL)MC2Unp>cfzV=twN#*$G!EMasC9 z5vM6cXni(U;?saJ%eW^t26afKV#=_v2?umir(A~uTW_=V&O7adm5T6BkcGJ1s5fQ^ z7=gcAQ$r&}sBfreP5l0X_h(GIA4$-jFoBVU2X`1e{P2;v`|kVFX~HTYK2{g!*2x=2 z5=308RH`nU$-eaBi$bCjSh{q{%{SbLJ%uwpsR_FambG-6Eq($FZgq9Zz4qRR)i`T} z1U`Bl@}mlP?B4&_`i0^q|0ZXAAO?bn)G`0z+0zHT){mRrQo<8eWs%HSF&=m$qZTP9 z|5m08e9sOBd`i{8rDtXKzNPRRCC!Q2Rsq)0g$9PI)8%&L6;|zVU;QcMP+zRUu`wj29^B*64@FCs8 zoHyUxXP?PnjIhmk@L_`YLvh02XjdQ@=5mDSLVmG*k$Kqgb+P0U8BG+#iHJ)a7d5c} zVI`veL4ya;?s5*hS2@$#Qp)D)`VYX+i#nGItR=L=bU}scH4agOWJ$yzKb#N z<>#J7C|508@zbCE?25~$a-OVh_=8{;5-Zxc+wQw@!oVh@N4K@M-g)O8zxwrmVqt{0 zoDlK$+__s!n1HEwB~50tEb;yeg$>rIW+8!c%#^l!dm1^& zp{=#;?|;7o+Zf>KX;l|0tRpci;aD64U|9dUC@xs=KJv-&1h2jJ`YpHKOmA7BE6K6N zl8}IPSp}M-Q=?Z2Zm{7dJMFw}5o30s(%i=3CG64G8MW^6mB5pKQ#470-l96zK}bL( zCr-bB>*<$uPF?h?Mt{vwrvtv=38=pK;ohDF&5c@?RWoRBJ`7Ba{(8>_ zScQzDUT*pa9Xu?pQ11X!u}6|qB0yFlWX)ASC7gtnao#L>^d=5FBZ!7ruZYl8c6$Ux z-Ko5R5E%aStSr_h?s(Q~^;XKQm#U8xfpSCuJ0$|qE(c%vj1X3ZGI;O}wqqA52HxPS zeu1R1s?B2x3Fcm5Z@S}8h036=mmkqbcv9B>n*k1#6DE2q_{#59q*rQ$OR&b-laiIF zVrnDw7Za-ZlU{|dj3OhWl(|<~Ss#CL3BJ)^wL6%GfeKN~{wH8B#9w6q5LF4HzBMdR z0XU;^`p9;2CEP}Av?9ZKcl}oXQ`uDVI_FB_Y~$%Z8P~% z3)Bop8|5Lv7^Nf;R`~_8D}QAi7xDtfRi%F|eA-ooZxB?M)n{cWC4yN^VyN;0s8)U) zeS|Ia@pAA*W~LX%(K@GbKPil{lBUdb(xshB1j;;Jq!#l>+Zf3(mC=jpF@V+aiJ zxszprM-RRmMo8Ch&G@Uu50|Zhw%vZaT)Le(QZiLf(!1}y%QizT9V+GRx8Js@v9H{y z*vJ!;U0`}}CW~QU4O@5kcaQiE5pUj_KmU(YrUXl2+(QmO{JRv3#pBe^Op|Op60(EE zD%%x^55zxWam>QdmM3CPFjA=G+S}6e-AhxY2+a~M!^dsUpEyrd=t7)Rm`AvTr2IYRoG?=}RlqHCEyb$I9 zfgpZfi3Fj*IA1f!;g#h=`IZ}Rdi>$X<(5(^uqzFpve#aD<+Ybz5|s#p&k1+sz~sr3 z*@c;^OAx&re-9Cqy;It?#dZwYTi9(EOT`tA4ci~PuG(6dT?>HwB@a~h=I1WDvL^Y$ zxm4=ghXXFzXqjYJW;TakTY|9tn7Y8Sxvdr3N}pIi+8$0}>8lUNI2chJvLEVas~PkN ztysSN=ck?4wsIBGvXB(GhU*swKLmv415i_ZrtuU7bg(ji&b-r3{xLQ~0>F8+!&rhz z03u>Nj7PWPAce1gBT4Wj?IaaLI?bQ0*Wp7{7B(IX5l#Ik2{c&qbAGKKQ?ijkBmth+ zgFzFar2dT;{5kowMGR@s$87+jsI7ldj*j`cOj0ZjoUk&u0ud+sotP$6}3 z?1U+bfo2D8X!NL2n~oaA2H$KpT~}X+HR-CWu0n({rw$o3m_T51>XEZlaX|m3dGqFD zXsHAdo{BuYblhR(zPs;1Mp>qh88e21&WOivjl(L6Yb73;*r>~FCrzqbjo(yZHo&hmZVFkT!yC@{7v=g%K;jr zMs0#U5$)b=+}OY0ar=%t?)>qGAK@PP_!CbNp|7#2NiD^3O#6Zpjs5yf`O}{o8yX3C zk2&&)BagU$+I=rR{UnxMkXh5biamqa6IU%?!DAVVRxmVTz4cnt_+!Ro?ovMobtT7I zUGm4b`2^yANjuC?F7ZNWF;)}ejRQbJIJzQ5()|MGW~2za#!;<>Ofu0J;q(qH(WOd# zeI0n_IF~a{)UN~w_XbP(0PbOl_<;WXpi;cZ*|; z8`h_rmuQikC@xVIrCCU{v?zMa4^R5aHWQH#kcUp#&FOheqa^#%bCf`8g?kzd*Jk6- zM}Lj}s)0S*)W>DY24Z52VJOiPrZ9tdpz(RFf2o@~q|(5rUo)+jq)Uw$`BcCA^)L~7 z_MV&~i?DStxU*hML#8r$f4qih;)*^==7kEXTI)yLEh)UHzVq`Tl*;YB@!_jNb$h(j zWnexDzGYNquYi@Zdwq1r--GY6uND6e`Z)N&S_^#D>}pbFJ<+FHR?=ua?AqvKxI2KN z0tMBfJNQ0*RG0t9tr5@?y7QMJVugsw9pCyD_jM0^Nt~+e34co?6EO54OB$2_e=&q% z0aZq|*u?(hpZxgtJ8qvldoC~q%Vt~-zEw{XoMB8=cBPNftd#fJ1AV&iSM>weFXZqS z7&TU}{5S4S@U2>xOrpxy%70J%rSTs9KLx(YPffLJ#oy&|bsPP43r-Hcch{XhweY+1 z*8x>w|Igai8U82AkHH6vUj3~Kicd;?lc+(#(N~{6@Yg9;HIGUu$&;V|mi}6WweX$% zDA{V^YFT@mWyVKvj&^!N+ zJ8?(y;}!bk{=Zj$C9o1~5$u!uPZ9J>sJ~2WHD-GBaa~8v{EGgKClPQh^y$@Kh+_iQ zt^af{^v?g|?(^1Px(%cj{xMs_Pe|}>b*>oaRvX=gxNGx6*;cr(@>j^{owzGCVPV6) zjZMt|)yFU_xK|EJ6@SXO`KQ(i%HqSo<{QJ3HhM(wui)2C_AZ%oSP_%|wRdAv8GoO+&aW0B5Q#7Lo(!*YQk2+%(h;9x{ z8csc&`iO~*_-6o*MB?l>D`#|C-@U|5!yP7>QlufaI8hdp2b=O(YDbg!lWaVG%!xld z33ym~O6=>x?vlsPMRB{aFF;0qrLfu?m4aM>XegPglYbc|JXY=@%vW-6A+!=`@r!*y z+>lUk>|ODp2OoI!(Z`CMY6qL}ZAqj`Z05kMBHtCDvYQX%6kGk`3b{=zGnz;kILgz` zN`Rf1nM{sJOIu4Tn9EY2S2V;vkRR~2SZSqP8#RJHA)CZ#S+#H-LRhE;YL&`)R`0?Y z6bYJyeE~*d8#C#1;sj_Qbp} zJI+BES=g<^W>{j7u{RTu(aDC1y3|*{y3+^mFCZG1LR@jGBVt@GavmiH9q2;F zA8)>CpLAPh)^pFObBaX1ckHnk=BOdTrfePiSd29^opshPe|XZ5@FZr^TcjZ>N`urpE}NWx3CtZEcR z&A}K{2@f=FP7f7B729DN<`l>zTd$L;_BAU5+1&mAxQkO7ph%!XL^<}H+WzrUPClW~ zS01}pW1*X19f$GvQ-HT0~)-OD}-)bQegi;lKfvobE(jI*9 z7cdXLEVxLjfa%A}uiTVXM*a8&1RkUfC(8@N1j@h}L^nujxFD<2rQ!CP1c2+$vK#4K zL5Gbn(xp071+3E@MGQR$49e39yDD9}ho!*!@MZAzE)NTbic2$mP+tdMC<#lz8_FH< zl@6&q{JnN+0%1fp_13*LNNAE(xELt0OYoIyCAsA-B?0M^OqmV7a_NN%sAINgvh(QZ zuUgSaql2Mql0bH~)UbE3NOANBT$TPl6enCJQNqK1$?tFmw2Z#Y2gt^21iS`+W03I2 zUY(57BXb-tnJGe_OGm&zo#@*bn;9a}XS)8TL?hibp&TafGZ_;(eV>3@z(O)Ju@SRq zt7OqAFA5k^YLMt*>?Fl$2nk*3WBHA8eE1kD5wt`ILG$RNRJ%IUM;U$ox(d~FuYN}# z2j8W-N<8{I_`;Nze(l`iWG4A^IH*qAY4sTXLX0PaY6yAo?cVbG`epdgXpbDi#^lFz zSfvbA2VT=Nhqi-n(8#W~c;#J{r1b{r}_f_Y0xFHvUS*Ssg)o z_2kx1dEJGLFZAWTtpByw~usoq~Z3~1}uU<)eS=n+AwXr-D z6^l%>d*k z={P(RkE+QBA9VW9&lotcZ(BPM60nZ|=KRx{g?^D^H(3jZgW#zz*4)8LT`G~oHwP1h z;*{mWu_TKcX5rWnS-)WUQ`q}3Es({bEdQ<0CZjeTG;k0bGEsLZ0F~@?EYN4OSQHsx z?oL=GHq@RsnRxMdV6tljAhmIRNjE!!M-qhvrPfQk^w~VPmwG#do}_~ zn7E8ls?E)xZT)4Bdg=9AWR>i>E0~uS@C%)wqdGA4VK$EuD=;tUrR3iO@5Wzo6fl|r zi(jhP-K!dPkp`YO+C;2O5=?1I^+z**D#qp@Mk!8_66V#)Wvk7i zC%3%{PE<&vBvKm#Arsz(llGhh#vEsniYEzJkJe*A>esL7;BOsx)AiSxq8FDfUxI_! zebes2G=qhS@L_Z2&bjo`%Me1M`{R6u$rS~ei@#eQ@23Pbdit5ifAE9v;RdCX3heOB zz4!Vi6_$$Gwrq)$WRcI!H{VRmibc%{3pxXq`7_UZntXa`*#3Bq5V6;h;KXouRwz{mph<1u=u>vv{RBS%p(FZ}X(M?5SnJlE> zf}wVj0O?%n?j;aX7wGxl(_i5%Q7|)wxAc(yaY?tR0zI(=r@yKdK~#^RWYfuNcG23U zT7TtG1hf>z%WL|pvUbtCn9c)FnvI#+*rhfaD3?q1Xf#x%)uQf(6#5W-cv-7oR-vT= z$}lyEYNu-$K=sClTCK1aQU_?&QH8A=!FT;yC-?%>8NRAe{%TDs?OomB3z)KYg0Hbw z;~&`#f9{}0kNDSMuUnyQNq)M6&l5ZsXI*syRs-Ml<~8uuPgm(r^9-;9Z&*=4khnVG zzc%=5!C$xvE05BKQCIw3E$C02h<*IGg02|}fH>j_X;Ax0W6Opt1Ewei?kgD;E<58{gfd_P9uXwCZPx%dv%^$FhxRS+ZDAqEL#~;qb+duxNTs zE8)S|v(wtvqT~4jVgCP0Wif15gwv*Swo37ZNHS|=l`oqVt5g<<+NPjwP2^@*W|UQE zJd@3{lZuTPkan zgDA-nfu%w^%judc1`ZfZwc1<6$&keT(fpPeXY6gnGF5o|v8UdgGY8`j2=2J!j+<<< z2~qH&q}C0z)Z%B49=#c%d?N7#fx0l95GsiIaZ}&MAwvcoa>%!~+;U4M)op1uV=5*Q zRf*yO%qkpRm}QmEp#>Ocn06QPIRbRaoFw#qAv-j63(z#1f<&w&q>T48YvIsk$7_CNpm(wV>bMLCCMP}~M~4vUF`8U_0f z8hHKnH-b9^WYZ{>NJh*f2B`UT7FQVdivIGKzr699YY@$Cw%hKe8*U&(8moQ?$c8mg zQXp^+|HL|gHG@Tg&K$sDN-Kg|`4O`L-lhNfH2I_@T*w|M!T~K)B)JlI8AOple|(XA zI`|S|?({+w69DK`m4BYZ4NI?irwn@JvhtQo`Xl=QP-Msz`|$ySRANyun+vCsGI~a1 zgyh#Dq$bXZltQ^Hz!Q!Mp-^-4YT5B{FBmX@HI$BQjMx@;2+E2HP&%Iir%QIG3UDeF zM`(p0HVjCe&!^LA_U`5I`zaN_vB&OMDv5-JDFuyVXEEzFFy^#03~$f}`;{y`l}cmZ zJ}9VDmJ4GxU zk|K#d{rX{SCZZO3O86=UVuJ?s$67~3InHUoeuVbPwJ;Rga?34lz9}NTD&?0iTh3Z? z$&#gF6mk-dK00&yqYphOn^9e&n5LId7%a}U&^}Y#yF#HQA1{^*m2rdi%l2vOS>gg< zj%9tyHdg)r0Qk8)7CTOXw0)a%12MwH8Y#dsBT1zYtEv#f<3a zhWgPnl8d!4y+?*zamRsZ=r~?2BsU#M5gvf-{{692hG=G(4NF+@XnQI})dkr-C9`>syCn?cik4Pf>kxTd4>8C|C!FXz(o1(f zAN}=o58&POtK0ZX^T-Y&Qd|4;%TU10|>$0=p`_Te^Mmizh41C!lo8kIFcNTz`Tf>MdP9eVy4v9C!h@E<+`yZ(Ksy z-g@vgy84%T9D{6JP^u4KMP+ibyaEAaU~w5Pfb{WKDZd80=fkhF7QQviff3GLDb-^% zwKtAFNSlU0Rd2`{JRTf|%VOcFDR_q&X@NTdoQ&z6tb8 zZrD)70FwtRJ14VXd%kk&pWof%>$B(1VRtoo=#lH|8|ePjmEeoZGzXwoIUiI+Vh8>X z2cXO0N&?|-3bmfP#4+h za`1iH=oM1HqmwyD2iGn~NhhCPx)n>hJ%b`di!o=pQGJ=Lm%9V0Zm>0#e&QsDv*+D2(KvM+p?b=yn83^2zkx;5(Sg z(UX1|{9gSveeKm>lOl;IvSgjI-;H@P=A=lUyfCTzvW>t0cKwxlYe2G3-G~v^KWI9` zE&d{wW-PM)p?CcK0*$|NWs(bt9FVI0zxXMA(gaKXVm1>r{bJo^^Akh`fPg)Uf*uud#{SZ9D6!r@*S5&fIe2%|H6l$=D|p^F;e3*v=`M6?4Q7DzJ;{ zkAJx2>8GE>@KP?6{xEgwh;>KSr&8<#)ghH!?0_l-e*K%@d~NsLAvSg*mg!j8$WG&I z*CRX2pbgdmd^pM&JXki15iFV52URSztZD8$cmOfXGyLyarjjt5n=oLo#$LXBIi@~B z5wVsgCo_56T-;tHmK$9RD)A%+zr zLKw1qj`a(64X}sMoGQRKW~>x0MXVVroC9DxL=>TmsA6)HgN_`%$VL+Wb3cqLvTEH)Xts?cIvwrm}Y!>LFj2(r-4cA?_;Jx>dfIarybE^qkV)Nn^lsxRP z!=8Kk8Bl_s1BVQfL!XpM0xuWDN>r#KfdCd-P9gCef;8&8gOMc0FSbLrw6!Bcr#@Bs^4jS5I?Foa9vBM(0!Qqq!mZ2RqZ;OIukinmlQpTj8> zm!t#>zlJ810{JH+zDyBJP2mI*NHBleW5;f4D-ulew0y9LuvGDV&71ZUu4%~bCOF8tM{k~5~i`0HQ&1`4CIn@!yEo0BFX&v=K4 z+jz#I6M2f1G34W^Nn#$aO7hV;^J1`m)QDoP?7BnCqhn6 z!1khZ1Cn+1bie9i%GgG+*DBy3ts_Hy9f1oA$f?_e&Gt}iZuMAfGr2@6*4~;A$08mL z;f>S*nUtDX<=p6U`t{$|U#Sj!hEpIe6*Oh4!q;#o*&~%lM}T=!O2e#__^ zebma26{P7M{X?z%_v&vgeH>Ah@_Fm8MvR{2({#}U)x(4@Up_lzq1r!mlOIn!df>0% ztyTVOy0kR zEOqpl)QP{QrM2)KYrZJ@>+q+40DsMqnSiMoOdKRbnm?HQ=!;}~)IEu6ur=Bmq?eTN z=5H>GSCaPtwel44Zh)kF`!Xr6!nZ8F`v}|5&az(JBf~!}O47=k?_nVI$yXmH* zZoBnX7Rh5bAJc!pfGaM)yqLy!COm1cNl!ib z$D_mxFJizEOj&|hF5?X(uM5y&-OS2?wYO@MZwH2rd`oM~TXW}Kd)>8re0`5^eEl1W zz9b@)_x$GGef##skA{pgJ0X>}r|iRp3m<#rF=FnpXk<0U1Qk~stQ45@W@KDJ zB~zvx_MO8A4_+sm&9g&F+x~E(3RdFSiHgCM%a_fW^CtGCQd?`dso%JYhc;klzk(ay3lP4#l*B>zzcJ=U>U{jf)&l&jaQ zN;D8IKS>7cmss(jH1lZI#_SlyX2&v;r8D~_nPY25s__GZ73eo;J)Dmw8uFQTl5PDA zRSH^ESNMOnJi2f%(^9hn8Q519;Vlk9c%4CZh)w{hL?S4!qfjWDWm^^&4XL^evcz$d z>k84Pu)$?98}w<6q%%zUwPz5s8`dsX*Vr5}#E_cBea@D7A6;%`66?i$`;>{x;wn)}BEhdGpSAE+Sl|U)rA@y~ z{;Kk?q6XODgF;omocw#38GON#eS|*nhbNu1aM8m1r%l^@)6GV0yy0`tJpIu}AC)rc zabw1eTyNy_&p$tU^yuThe*&&UAF~%#Jc+C`cpYp{_-DxmrWVV3rpwS*pUduCZNkG+ ziHym6h+jCea>dFUZ@F>hs+GU_&2KQGV4bAggb7>W&q&S;ev4`)43#yuy*)GYp@%cg zE5kwdCn6g<9~L#)W}B_mPWTiIzCHilmtJ~l*WGr5eOO-F^LavLp^JzB_SzBQ4BHvWHSpFE}{iEQXsG3!(tJeR4&{8?t=GuuW=$6 zYiephc-ZQwDDwPcCKx#7i>*m@Ieg> z>O@EmzG!S}EU<8uFovq z2j4odav+l{KKA&NCNHdWLj(H{gb2XLp_N^Z$TS;S2{ec&CT^sNHyS}-0$+#%CzerbAG=mSpH!PG?#fm!;B>@5=U2$dfEiKZwyR10RHSq0)uU~A5 zGa%|=RHZq`xT?K=K>{ZwR2Kvw<#Ls(7U`)K75yC^Ud1ZpDuKBag+6}(7-kNC5CyV=52}geqz9l&PtE5t% zD*gsr^tTSi0GZU_$b!YjJjm@>rHgp(QRDCUd9R8XQxSS$A z(&b7D87?P3x~szHPLHZm=^({7l+^>5(cjTWnP{|HW{gug!$F@$j4trS3@86iekkwg zW7QZ8gJ6I_-(!sM5I!D!G7FGlr)0a3PXtZ;waNvV9Q^qBE7>&?j17OoTpz+;5RLv) zB?DpAIrs)lS$g$XLgevC>~QoE2O#tpO@D=*{%-@qUjKYPU}tNLa1>($?0{Z%)l{!^6b)c<($BdxM@-Fz9x-!F## zqP)5oSC~@kf9hhFIszi1Xh@I=f>%!G$*6HdJ%V^v?SDM}_Kv?$?0SmRu~DCvLSB zXHC8}Zyr`Ed`8w^e?6Lj-v|8z3(XKt80IO1&jX%N4Gr}bMem{&6--XFi8XmB+Hlrc zXU&^C2NX8lY*YDAvG%KA;t5}N>1Av<0w264@C_p!cQH_X=Go^Sf9z43M~$?I{z)-? z!jXf%^{uIYx*XRE+L6y^r%#`L#1Tgm(i`gKawS%wL;spIx^u!bI75GQBc+?3gMT7t4lhYFXvx=F z$S=-1tF>iK1(&Ws<;Ik>5C{$P? z=r1swfjoYBl2rh3QYH2TNQ7-b%X=&Y9r9znjobUk>`T(B3-PsYXduCIqHbB$b19c6 zmE2RK1f9!uW+vEV@RKQE{hMg+m!x?%f2Mt!3<-cd>s(d|Hm!o#hK0TK;6o4+BdX6~ zXq9>TWqS`%%yRkazg%_oHP;a4{p)+~NfQWnMJt+HTTeOV6t*Jv>(_6;0}dF_Z{Y0N zv$3s~a_t*zv>}ES(8GR#M-QDeODfQy;fNn5OCAa61j{YySv zFmb0HF-mf56N@RJ%8(oTZt(Sb;yaisZv?GY{|@rAEv7(B#sUQKm|e(&b!!j z^q8+tp8U{^8PFPo-A{jd>O&7bJb0ZUxHrTSDQv-*elVoMV$eXpIKmarqEaqDY{Lz| zIcYEU80p|k8DdluB!1+PhgYsz-Pq6oyEEBbeMBL+(&@H)?zsz$!toT&n?r{U$Lcg; ztBK)MJ+2rWQi)A#yY0S$k5j%_h$S%`XWuKJrNafrIeDL%x83 z=~t%PSY+k$RWlx(fv6jiD|_v|H=Itza52Lqg~ri9?Y(QPYfwnIV6ab9gS<6O=2;-G zX>KOuK6+u4*M?#l`YMHsF1~Qhnr5~XFJ8Rl{0n{?ELIZ9*jeZP2Jbo=W9n7}m2571 z{<#+{UHD-rmOTE16UL7l&#v(yg9kZJF^OfM!v1m0(O5MRU7o30#yeXDLjbmZA~o3R zbo4Fnq#Ot=s;3sQ%UMpM9PJs45Z8<=Lqx=2fc?&Fi6sM7-~q?JXALQ)V~|>!t0PlU z73B~(sf#jO{b<6!sJ{Z^=_%hJFRe9nRXONO&G-?^O?jnr#u_1Gg=rXx%ISIpjbAjx zo&|&o?4^A=t~&fxT9 zsi+uhSn{BrQV&)9JLscK4u8L4!e8ihfiGA>q-Xd>HV;!O?tpLjTR{h(&nkS??Jj^C z{%Vr&Gz3(YHA3Q~YJ0-pgAZQHp{5GHK%g(iZc7%VD*csD`(owXi)u5wpnpe0#531= zAQ=5k3Yl!kNgl1?kzR{0vT7!$!Q5e}N`!UK^79Gw_wX0W0>yU+{ROn<>H^;?wj$Pt zkdX{lz8n3O>aI@kJLJb+eiD4IG9Nx*Jo!|~4)8wyM*kk40xzg99(=sDW=H zCskA`@6@TghqcLP2You=_td}j>aS=jYIh#LI@7;1e|z=UdO(xUFYWmI@6}&a+4=%R z(RE>byZ%Al!Y9su)OEN*)`=J#f$_#tEAM@L()dUTbw+;w(`EkSQ^yUJ-uPA9|5y#5 z1pia|wh@z>F&BkFZ zVYlN`Pd~$Xc5l4?dK+#OIBO^Z(N^t*qfVr>vq>vnypx0lzexgGvF8AzOeDs>1iYy5 zv|^`NDNvgG`s?!GAoM=?GCPe%6?eW|8b4tR_NGA^qF)m&i2;lCGd3x_JTMR-fGjLA zrD^XYf$Ia6v17)4ZI3-3fAAqpS{yNX<{4+UtZF{-M<=m44tj}7rDCSN^!D7j*IsjV zFp)GM0esB{4(w0;*xg`>LLd=YjhJQS5>`LzFq^Yt+dI-SF=p+-ehO%W6%MaD9Cq;L z7%*^PQ=h&q&8?wO5`U8O&bwgJ-upE6@23LG*DY z>_Me0R-2ofum1C2@(S=I<77TpNF@~7iCt0HvzWjW%Ly+w+|cCz1~Q>Ak&HMJR6MNY z-My*4u8y6A6la|NGy+Y|4~S6tih%=sH8U*=Cz39(!!r(#3b)dFQdm zeea_WKH^A6oVwOscQ`97L`0xmn`KgwB?4m*4p;TqkMenJWNq!4=H?c>eFEw1!uQ^j zg#d$WD6(>8v%;zeWEt6e-~H-jy$lyP3j%`4rHG>ua1X)^gdHa;HzzD-!~u(N*rLp) z-#hMmk3IZIK7&7F=>7NJKjhGF-+S-0#-;{2A;C<19dMQ`mwtNcsW)ExSB#(~&U_38 z_M5yv2w*`&XYmI#`oopbyL0B-q#HHfHShU)=%Vl>+Tk->GwI z4ZbAZBc>@P$IGsdq9J#kp^(9{d)%j`^Aze^Q(h8q~O z1@Epj92Wi@HkFstW zREMXS>cAopy49uk{5bqQ=P_4vH~bMYt58%>)A)0xMGe>0 zw1YlX{0%(7t(b~ZjR{8(^!Ka$<#A?a(m%rF$SGJDFp=!>)!tK5i<@9~W)V7$mI}ZM{_|Ry*#B`@VcSH5H?QWBTR8mr!=LNMNe?hsC444qeD%Dkg zH9;{8NyqdBX%&1oz6cv2P>g?|vKm8(3Z_VLOWXTAL98*k_YxI}c*%{Lo3a8O$t?ngNmbFy&dGSnL(G!ICz*(@BM zKkw}eetW^}*>8Njc&QHlW1Amgj}&4kwD;tFcG_iU?J(jLOHc?0Ik~b@%0=ty^6a1T zX59(}FJ!fU5c`o_cuXmXSro%ecF84E_MN=%!jC>=pC)VEi!S`_qK`g0^5`SSj2YXq zX61YDzI)%aX;)r(MXtTw5v*?D!1K>OC&Lkwtn-PTgfJ4|hJYzb zvp{Gr2Fi0sC*a9^$3dgGv=kGGNZ(kWU!8Z(@yC6SbMnZubkT>q?6Ug-2OltbpM4W` z^|@@0(7iWZfBg$jJ{^oDIjWDz^#O+-zV5o~sX2kt{I}n^`1hBT+S>@*8;WYrT_C_B zJb~YcHunWX_uO|c`7mt=1CCTI74fj)M86P+093-W=gs37I}8Cq4!L7rTR1Xo24jpa&Y;!S9RRmT= zt)Z$!@cAN}>=^*Io>o%s=5NRVN!7m5;PXsT{X?I8;4h4oB4}RsZTN$;+GBqxmfo6v zX|G=}@cx4L*?AAR`I?;QT!x8He36;vu) zY%zh=76L=udeHzBy~yCwSYp&5Ef-((+eaUIZ2sGCrQ6bMh$YUKw&n%OBS&p^_;(JA zV>^k(iCiOdZCfiRi;5g)7#yf1k!y(`P)=k&m zi0yO~7=LQ-{+-(7TZQ)AQq2OJ1m#a!{hnKOTM(utUDa8wH>QpX*8+{lq55KDaG zzB%ceH(hrfXkZmO^sqyxUO9Eddh2oq1?D>}j2Hjz;xhhpmC8nA#*Erz6b9?8?0sCC zobm~A@fyozGUcLT?O~KwdvwZY9#mr3(MqLkF7wr$cR2c(W3HtlSQCrPe*Lw5_SxrW zKRa#P?YCv$_rgUV&3gX%%dea|@3mJct?*~@_%D9>i~jxl;c`l#Hkg7Pu^hyXDS18j zobwtR*(N2cWs04TSl8%V=&Udx7R$p&4nN`ePVcnTZxw8K){dM>wPGCbt8p6~FDsu2ujSvS1Pk#+%p7q}Fv0FV-N(uV0M(MQx zb=F_iK!f;of9fBU3)2|o41i!cs8!ftCA_?3eOnR0%JtXb5Y8(hGg*g-RQI<#qTg6RCorENg>ruMCD@QdDsDRf~U5 z@Eut9*#$m{x(wWBg8yms0odB`7rLs-DB^NSj2(SMdeOfF{~G%H^Z`SDf#bMq1B20D zLyyvpzXIr8Rr>h!ui>w7F?>|k6>;!u@fRE`3x9d*<*cQ@hrh?)8u*4kwmYHK!C!-C z_^ZlJ^3ws|$ESn8KK?>MBm-a%^bxi$hmq0)(-nWi*x-2bFATfOXAS;7K0>)Od{MtU ze9I;-IsU>_AAVQ*bcXEUQ@U4wokrG5rpRKyUFFkWbdqFom;~`vUp#!~{!N9Peu){K z9Z<^43XyBu+K@Wzy$p#tSdMm9>l#zpnD>Abdvn|JLyrOx^YG)L(BQ z0Ua4%Y<^|*7h-fmOil3yk<8YNU%lgR5B+D&{GmoZd*H7T)&>%FllEUS8((V2_s=}O zApcs%__JMffhTcG6{`~Qj%TMONV}^Y_+q&6SKQPBi6G6Me%0{TFG&`aH2A<2Xnrie zz^kakzY5>xpSJEY_^Vc|CJqt8#M1q^<4=gp6X||L|eE z?!Mdc-#ea|N_c8$hax7kc!Cn6HXU8pP;V#b5&TQbE&MfTLtAt6{JC>6O0iaFK`o09 zt7t|2jt&?w^FgsT35G>qjDa$?q>2 zG-N2qVQC_W56fH7w$M+}U^2-*P292|Hg*R*R%Evk33ahGQ%ooaV+lgt%cm`!DebfG z{=5JEj)!N=z+)sBiZ5KSV9JHRyY!OFfUZDbc!b4L>J;_UN*AufO*4E8;MwOzoWuOxSk2zS;WXz@r4CKDTMut7g!cyWY; zH5y}$1fKItv$@w_c?DBuAQ6|DR!6?3lgT>SWy<9XFaF)4k3N3lk?Dbud<NAq-=xW1}Ui#~#1znl-2X z-OdFIKdD2~+3eeKobjygh|jm66x@qz!NY&3Fo#5r`U ziCca5$Rn_eapFZRj%#FuV_vhZZIHQA2u-{1UZk7ogd9pM-NxC;WSwq~x<-r`N!O$d zyu;dsP8mM~B~07se<0hkCKQgNn>d|Xmr9q3jA1w{vay_&ValfqCBi>JiPVq4aqOPC zEX~nboCQv7kkFdcQiuJ!LUa-KAi@voPh&t0O-Gkkzf-* z9t2Z>)iS&H2fBtVF-M~kLND* zG5je}Mc*N3b#5Z87w z*u&pQ;BtHXb@X@qHTv_`T5iByD;z#{@A0=2eUxAD!QTYPAoNHd`cA0MR=hi`%p(XE;e`3GxXR(zZ~96?fNa^@1 z+A4{Us(!&@KtlZa$-i;@)exW#-jh$BK+4VE-1sXy5SV)7 zFIjE=sks_Tul`>`fHg;KH6X@clYg~S<@99$BrwQ=;LB5o2RDlZfXR;_0H4JXrmL7% zlx*<9W_9+hBwBHX60bJ8!&(r_1n7Hu?E!KR@W;gE$wm zv8f(kGtTDYn7ME)$rj3BeZ$>%-`l5eU)GSKms+IN7W*k@&3boaW>+MgXG-amuKi@z_wHA;vr&pcVHl=OGv2jo9b}#jd{kn*H{j z{OUhn5)0^|*u{o-3Fucs3cUs(EO@~{zahi#pFVB9^*10F2l}zQa^0cB?!5aS9AU|c zR|IUr5-@$n^y{v_t|&*F(gBBj_lP5pV4qhqmB5GqL1`FXc&DF!+WfimSfd|v;*Tfo zvzH6WMBEf|HZ|5m7U0u(^%W5rzoIcsN%0HJ=1RmnZ>X*3Dk&!lpvSNfLJNdgfyrMNNLi6D%J`*c}O@|4pF1PBSZO`M&4 zLSxb#zT|;r0*!+#b-0|smm+8xq{;+Sa-qqxB?}u;lxlsWm1249WhNxLp2RrQj~Rd`*sEm_IOamI{QF+i2A2hNh-9cmw9LKl|}d zXfWPJ39QJKELl*+*sY;f};+&j{kpSa~#PdxG{+C==h!;d)PXQ%$G zU%$T8$c9;%%l=5k;3A-UseHv1SB@V)p78TB8AU<`1;ZlhSCBJd$8Ryd(7GB6Z#0%f z_Uh{DU^>lB#o{@|BgSMvWeI z{{s*7^ATvQGfjn zH#RmlLG1#@SzL|-(IzR8MMX5^ycijufOz?OljI08HK$6dHy+v9e$Xi z14h`rDAO$D#b$?M;Wd^o<}oA@5ESxWd*h9# zoP5fR`=()cma#9B)lQi9=0Ln2pH7_-$;lY;)Slnm^Ur_2iuh<)Ejii~$;bP(t*s4O z7qj?&Y5N_lkzLFdPICA{H0aRq|B4gY9y?)Zk3GJA=e^Tl2$oHx1vNP5>~mgw>7SDH zO6Bxl{O6Y2e1+!2bKK>?pMB3f9(DB5ZL3$)LHzoH3$XjcUxXY386q}l(BKRdFNg8X zBsv&%;8{nHjA0nVHNnReRIym%kWDliVU$S}N6{j_uAmdhu?^h<9pR3rU6@)73KNMr zmlAYat6w}LgCv{$tHqM@&q;qFyp%?Lqu|RbNmui~wfX^af5}Ew3e$$3YcE)vvgm9yq=PN$=E2>8<s1Ro-;4Sy=6S`8rH zWqmE$V6Eu^eoyrA*x~T^;Z*4ZBv*^*1CxZaq0=M&-Qa6PHfo#bTZ5J0(f?EN2OVXv z(x(gjuJorPa#vUQ9{oQv{t^wltZMgR{Pp0wR(0^#Pwq+|(mx4*uW42IJ;?vs=r3*n z+NX@&>an~0m?k*+^x^yT|5W{|f_1Gh{h~#Nu+`nGzaqNN1#Y_2|Npf9!fCb1zSO^Y zS6BV2raPIe_CKUM$?;0|>aX8__UbQ^QSE;uUrxwO#?-&m^ly5sN|^3CjgK|`n~g7e z@5yJ+@V`vsuPN@oPk&iS5F^M2jOzTs_$MA3ZXRE|^bft`uju28GcnW!+Rz)-pU3)Xu*<_`K@dz+1zQTovt;lBEf^N3>z$@GpyY+*&s(NvQdq=Lh|$? zW>6gS7mL44KA#B0Cr+IB`fIOUbInzMyX|l9zw;LBQKB-jsAQWb>*q)`dE~Lj9CYA8 z+)T}xCP^vo0`l3I)S0ByT917(;f?I20m zrAGXHZn^FDhaQ~ymn)~ve)UyNURf-LBN&<3_J&QNu5ZJ4zI)`+M<2!V7=u7IEx#cw zdrbrSZM)5Ow2_ET@4Yks(o3iO?e@Q6zCuEVjM(73^UohJcrX)w0v_R%gin)MsVZ15 z-hKNm2s3=d@EyPURkmJg^=@kbmd&iyDMC1^RFWe>G0n&VO4mV{l!YIE{PE(&P*09= zvH0Re3yDa@p?G-juu6wuMDQ1BC2g`iNd!Ihk>~-$z4GN#rn*}CNHA4~FD0w68J&3g zf9$;ruw~g<-*@}I@9mx$jYbkkni+wFG!o+d3P?yJ2@nJnut9+U5(p}g7;qdgDTg3| z3FKA5K$VG|xOgbBQ?^x=m=eK2CA=h2JQNWNP${yZKsc$2K@yrrci-E$Z+Cvb|F_mY z`#k%0_lz`>&c65Tv-f)Z*SG%P_pP<|I&1G;*LQ{nNyUf zY0Z%g=GT=B6`#6h`Aq_YPreeb`Y#GHZG=#6@~27VG@WHwRNdFcMF{~F3_|H{hER}@ zQM$WpXr!d11ZI?w?#_9T4jCGe7(hA&q>&Dlu3?6W^XL8Wem&Ruu&?Xveb(Cdy4U*g zZe)+mtzqg}d_P=W52PI-|Nt{ySfTx;8T`@=1f7a2JZmaHeIn!} zl>!eIddSrH8S^KLAp*u&Y?s=zEY3y7pk}CjAr%4MpLXywuXNf z`VSB6jA!jEvz0&vth01n3VHeoADnT`9KfBm*!|D)_n?n_oe?2B0&N=Ui!a5#_;Nc| zur#W6?B4M*Ji8A8F5$Rd9ZU)f2DFN=1#hdFM0izip^kd+@DpE%v%G<4qSnQ{%PNBqbUy?)jtKsp6*5J~#8~J| zLGCOo3+ujMbb7tj9i~(`?f3Vek)4^)up38Pr}ufH48rRB`=fPgg-F;gTh8I-gUxCa z;e2HGFBAwr4FWD0VAmd=p$y{AE1$jv4W?yxEHA8*oP3%jbRba$YdxWesoPf`@H2Cx zUr}ueVg@c2*kI`=hT;jsJ|fKLlcK)8?bwxL_q01%*9Vu*D_5F@oY5&i*E=pHSI^Rb z-)lGgOPF(Q7XpkP0pU@{Xt#jh3(s zjv?PnX3~KC!`q9_BQ7my(z4r0E9)T`9l}Lt2mQvMX(gvx?>#U+4IDIhj4!0xM|SVsK z`STqkeFcuuIqPTMArj)h(ff2pN{EdCDc zg3hwP)TNbexWR=!IKaPV4?Q3Gg&|jS1Md5=a1g5TjV_8V;J^D#lh4PWQ9FTA2&dO> zfE5<;RoiXN9hZkU43F9vLxJ`Q0neeP-8!<>OBh#RD?{=C2F4Ng9j`AX&>AX{Ts90H z?UNeMz8A+sr<*nZeLI5rJ;_@QSwe6Bm<6PZ*YD=*X$(|F|DegSw;3BvAvRHO`v$^Z ztk@=+9F^Y8D{CT~s1E`UUjX5B@ArkSsm-POuZ8m4Rl8a98Iv3X3aqEKna z&@&U*Wi|3>4f`1!GT#op3V`l`2sj}4Pm#*i)s^sy_aI%@A4dh;L@*PTEJrtRY;(gQ}ucR&FPehP+b z1aJA>NumM2eD5?kCCBydH*Br5?DvvW2HfQQDcH?9(fp^ZW(g=(!;v#QloVW+6DKP~ z?XJ$rqJG|2r0o;_fKgObt4xSZF7ppMD$hVFC2mB*5;olVP&)o?$jjcS^wGwlWa+>P ze50~Pf1rBTO`ibZpGy29gz^H83|tzX%2>mET{~;}HtxH2Erh^+VnM&5z1b{>o~^WF zTJFc*ET{8O{Csi5eCW9(L`Kd|Zl9oo>jR-%yFM=38egB#8g>`hI5b;tU#Q=I&+G%+ z8KDGUMpDTFn2Li5nd6ev&!kTstUvR(I(`@>LkOTRAy9EpnBv3Q+SDn-Y^B^9OMA5^ zS68-AR=xJ}D<$1GnQK8yzdN_qZ*`l5oMy?Q&$0jvq~o|1ifskZ5d5hE75F{jJL=mOcRCpX1@DA|V2dpHQd`uF1x>x;sHCq*}DkX-rCETD+ZLZn0RueKuykBb9qheH4YT` z5K;KwgtcKkZ#o}W*;r!jiM5%v>*=3TjSjl449In94Q23g@EX<)iP}Gg0a##j4c9+6 zvFj`Mx9w)N;gjD1GgD@r&@D#_S&>AQjH%4J?z8|)!ai^X*p5H5{5~QUK(|#SN17N@ zF;g8y7S+MCtroC$IaHkGlK9rtv~onh1%k#i{ zALhc^h9*P;`?^n^o(x1rg(e?80cN3g*All{*O7eU1d{2t5BqN21|PZhEe>$oCfO17 z3|P-w7Z2UM8_!Mb^0Lh4igcN}GTg;?A*MF-i5{ucyrJ)wnmHf-=Q+TxqfPVES0V+c z?yjMiEyNBn21&82^p739IbpB6g(>4|?6*pUmuNu+rcVpZJ*Mv|C2QtU%0)+u_v-Ac zOusE_F#ksnO9nT#YLyYaBD(_~mKl)zKm?{d9q*v-Vrz=Uyf7N9Q&B!NzPlgx%x;Ns z6ZpoY#c>9YflniJoBcv!Xun0Z7K?(f7t`H2BIqXsfq|=~bR2&p&6I{%#Wwgcp$X?` zB>#f4aDD#7Q4rP`p8BDI>P|Q=lBJu|Y(!^6V>w~xc=~J0Jmr8#rB+=NGT>*PFRM8(Y4 zHo;xtvrGJV;p88oWn;pO+z(T+hthlEcaD`_pH&#|6Ty4z<8%N}t)gBHUPTBy< zf)QYQ084yw(*XkPz?!DyUal}+gM^DS6A%?$6HBIl9BS~bi@$%$ftuhMhkIsty2pm( z&%U(w)%D96@u6Qp_doD4EfcUlO)*d_BgeTC%3q3vX-_MY_Rl3-^XS-W0h_W;n`tEL zQ`5E^ zd=ZInX>x)>(!@tyK_`l@6;Ze%MeOQT_L^v7VlsW_5Ea#P!uyQqX-KVLrym z%^+pPlOb+9@c9e`-AfnJFgj6Nc*Js0fc`M4bz~$fyRcK_K4#^OAA2NX?LDlx)@(2A zEotU*23p0C&9j6qXw3P}qmDba_EvG{lfi`Z)>cUnWVc6Y-b(oW7?<3%B^VZ1D0p|M zvsQ4>UWlGjy^W9RjekHZUsz<|Sav=0P^4$7V9K+xzXSW$S3&?f0la@P$UNz{*=6_+ zN1W*jxry+hZo=6?9p(rxq1-gsxsK;jlCPgQF8*Lsnb=k}x%=dE=rc2v zlu-oQRCR|O7)2aQL;sc?z|GCRyt&R&QlTI97r+#+<(pI0@*u3Rk&x?t1VI3HNws9$&i5?m9;1DRgMbb^Gz}W1HjP8;dS5yjP|ej4dIh$ zy_s~;yTg&WR?qu~!gZVE;jN?NLvhE@??2kf>RsJ~SYC*E{AePVY&!cBx;rmI zA~Uor<)xZbBr>S5JgF%jpzTxN)EwNiXCZpId+0F6{l(nzbA^iV*pPDT~@dZ;*H#rvZnwnl)n9rWq9jO=#y_wN-kGK^-2 zchSCZpt~HlTb?o>L%&MCE+*Ohre8$|?VXHwf&O|NB#`kF~Pb{oKw$1U3Y&ha0I z2@NYxPkVG!K)y5c({q3RguR->1Nq-p!hDfU32|AVjq*ugdk?vnMBu7MNtjuG_r{-F z<063zEzhOpWoQ0m$ACI5-!}<1Q*&YSq2JFETO~xyh7U{HwSN}mpM2Kh5pW097BITx ztPEGt$lq;eVtx=%xC2LGTUxF5Xs7No<0g-(y`Q8vcAigXbF6!KpdtLmTf`>a;=FMB~2@#n7rlOjv5x4umqhXuYkQ9pmbwi4g>i6vM z(lUoM^QTL?sFD|CBvLA#xzmR1sXT$PmG>}{B@UFeEav)lgDOtjo?2!Z&r&o?a1uw0 z`l)3*Qtu2t#eQNi^p@c7Z%ff?83Om~Y$rwZ{Ab6fhw788JFd7*MDeAgZne5VF~dU? zxc}j7cAHLxT74b4G5AUAUywnAzS6zU75eAv;|Ik2@!;e@#-_VsPa?AoaM_>x>@1?5 z45X0U;_OJwJMT3>_UoixMy$VwUhHps$n!_nw{&I5PA9)&*RM4Ga4=)1q30@TCR)Rp zkpD|0;_8h1%zingjlMpfFJ!X`md2Sw<3M?NA1q?xv7UxNWBsX<@y>^5b%s1V|Im{jV;>?TOEX=9Py(M6h_>$t}DnLT!bG*xwe47ED3M-FHs8LH)2c0&%|vV%VG$gBzTdTz2R`?ebMp+qo_Jj?@R$}H9M+>8ea{mq=Y^SkgMUqBY=z5E zA*>zC!nmH)WUjZUFsp|qrW?jp3Z**#TVJxGdjL9}mbNa4)@{ckPh~te^8SL3K5V|x zu4GM<;yK13*Unjh%j3<=iYRa+2i^tK)e;m#%oG-_u zE$!viJ-Q_x^*XVz43Gsyu~HIMf*uSte$=k*xtCu6o-B^0S8a56u=n(gl8R(nr329i zh_W|rgU?sX>kIdT|L;b1!UZCnA<$=tYXL~m298OSOQgEDvG#SutJ>`;5M`FIb(iLm zFT)Dmz0TtOokUm? zZT!`NT}y_}NWQxIv?t1j`NNSF!ozBEx>WAla+jXt#|P2oVRYL;<4GkGnQ@GwqVD@= z#S=*q&+=sl3(8_CRq{mPzXob5G;Hqvq;s_)Hn+KD=e9pq7ijR8djiTOTB=0(+7|E& zRSE4;35I5vM)N#kanTD_XAqq&a=$%DN8K(H3~Xz8UWcy%;XJ>C_5`d?m5w)Z-f6-P zj&REmLRD7BcE3ti8$N|Re;))p*?}D&1D$X@%NqJ0?8+^NizqYtW=9sZFFJax)4Gk- z**d3pd_zt=;SRH{SYU(+vXwN*SOl+x4Q>lRZvJ!BTR>-8qAUbp;nz7j*k3wrA@5xV zd3xG}W1_rXUH?&fm$u^_oMeq6Rj7K6{kS2%{9`3iL46L&+*p=MHpo)Q2tPPzTaBI7 z%pqT}$#^`&RdOTfk?kF?FIot4b&QmFtxA7-B?550kXe>x^Z-uQr?xbfE!zKMmcfL^ zYH`R?;&_K*`$C+{xXIk_Z}m<7&e2*~<*oL|H~{z;cq>$MDOF8>)GnjaTr z%UIB!H~G8Ys-j~PH67#Gkzl759u9$CY(^?TNPE?PhKGlb4h~X*ad&Z$Z(g=A7|UYI zmFerKlM0vflsFa3g+=}FhE9CE^Xk>twd-T#X@X}6W)yOIBNF<7LKjE<1A>}K3~(*f zhH&z?u47?uXcwjv>zWdBLZ43vO{-*i842B^)fG*1NWLZZ{=TBTV)j7vj`b;#&-*9n=_2rSq2=+(wk3Zt@jNbV6@gO8dv@~1I&XM$|KO|_U$VXWLkRwLnmw%PZpZXC!>q1qAP~xih)I)m4N8w^0Mw)>K z7ux4Y)LIVeejYz|`@iLqH#hp1Eq2j-Gbse<2T#YA0oD2_01{gd+G1&)NA%g*bZUYah#iKP5AJ7}4vK|QmWdnud;0ztQ~UeZ2bWN*2>Y2I>8E8W1uV5YlfF@)X1vvU+Dy21d9&`R-2c`>}_j zS0^;Ugb5T=ZA}6=IJDfO`I9^^0o~+*;y?h3O3W=}-YQ^=?hhlE9jb;w9wx%VHubqI z9N!QTlf$=7h=ky{b`?d6g5elV*RWQR@PWRM0ry!LMV6J$Ah<|K$y?@s0at18lAhaN z$JLqQw~(T?PT2uTK{iZ{Myn}NS8C;vifLmp~m6Y zcD;!~X?Xl$6mzG~hl8fv9T9uZ;hNs~9KAeW!Q+NMxeh(LmCw!>bBOwh?E@;(YZwhU z1Yeh)9TV+71H$Oaqq2RkCS>n6)Zb(#^tw?KJXOqla`iA~X=Q)L{5I7xjB2093eO4u zcALSR#@SCvW*-RxpsAZ42F5F8s%hRNQkV0u;u5_Ka^yz21hkfA02v6;az{>H{8 zzCnaqRR8ZqjjLcrQbB`8XUSzLDD>}f7oMp+D=7$nnjR#cK)IG zZsd*<_RVXmy9`S}BGqWBmVT%_rD!!li8dgGxN`xy@&z!T`^aKAX>RsVQ!PWXsI1}( z&YBa}tH!SGu-9$LNJiuGHIr4WTN3?KBd*X+)PTx7$X2~d{x%6|#D_TD^Y57E=`BM+AIv6Y%8_w#7 z1mC*dkD_9|Y4%dzPnFG=<|-KH@}1u%rW2F+8%6~F`fCOVqh1= zXa0jH7p-}V!SM2CcJ-@La@*c7Rik&)C^3k1tv}l=qiY8$Mfy<#Ks0(UMcg^k5}3OI;!{ARgxi2$jw9$PT;l-fQCrS$GecEgXH?c)@n& znMf4{{OeSQqkJdiG?0$23y-F2@vZrq@WaFMK{p_SPOI|p<>4WfaQzW%2KE9#cTDhEq3-4k7Q1gR%#d->t~e>yWT98dmcwaz3x+rAL39Mqk47xY45vYFNTjc?Zn-O7WBv8 zq%A6mQfcft1DA9|!vo7Z97})2k2HXFmXPLlJepY|^j4DS1Qy$KBYo1JxfcyJ~w zDE#Y+xifG~Y3pc$HVNm%NBr7yw9z{Dk{cUrN#*I2@{i`y&l)<2)`G4l@`>(TO8*|A z=nP5}@G{-dzK5W06PBbXvRolDGOSx7Lnb75Qk&J1HW+=mt)l52n6m5VS0UHBH^y{K z;bo?+TK-xx&Sv9o>4!xf*CPX&0L=Ek!O)e#ALPEV5yyv2miE%}cyrz~v-C;gFCF*=Yw*)D^ymeW9^{n1d;u&?g zjsFr-Cy;D;R>!G=Dl&Q7@>Ki;WHE!*BMB0$%=}?m^O#p6DMji@dB@3H!nXdcB^}?Q zL@mg-kMzW1%V7=K^S9EXv`!}?+K-{rYzWCtpY8O0nk;2@;k2(ck2WWa-`SS1Yj%rS zMBuV-*Q}nW2Nz!dDwk)HJhzLe5tvq+VyY(IY6{2eQuq!Y1jT}JvU^hSzTaW8+Hp}j zwi5-34OU2Z^ zOk)0NW_BDNv#{T3w&hPyj*sbINTNC4Aw30+eO5E7gJZ^mLrcsDu*qY}jp5L`kA^DP z__sR{bGFsNtqHH&g?|UOa5?57?!0sU#8raRBh7&jLLWPO$%Dyh~8%m(Rt6;rQhOR`1I0r4AKwt2$_m+ejwa# znp@iIskX(e5#BXHgt9xMFIiJtw_dD?;d>!4t(O#gn$Gd%?8N|&%0`~tFaz{ii3^S> znUe|3_Y9f_<}wO!#xjahR(F{ct*7 za1+MWmC->2=1k5hy|#)+_7MhAi|D84U>-maoB;5aJAfqQ2kr2gIL;Q@ap>42E!O=^ zXE7<}Mi9X@p~$`u99jr|LB})y?m^aOFcEXFbo)Qw1T%jN0|v)o4|RS&I4s{Zex0a( zp&B58shqLHQXR>HHI`XMnpDV*M16DyP%f6{^}@CKEt8ul72>oSd_b>`VV;#)Ib%7V z$Ne{u+4B`I`6CjYiB}Trx7vqDgS`}O!kf4e79D6$$kIX%@mUD=S7sJL@nv5UBSSC|fFt=fYq3$~mWuQiF<{9E+fW!d2E+-=@*eC)M* zZSm(zS!xfC&Ktrmgf&|+O@C#@3n>k|K~|{Lg?;mRwKi|ti94Yk6iZ7|vdGC&mv^i0 zUq#QnoNrT0+g|nSH!59hqrHS`+Ihh(pZCzum;6*tu~eDQyTwt{ko!vwo(g|NZZQON zl^jJrVFkxob~s#3GpNgMny+DAj$Hiv+0Z6(`Hf=i;8wLk-~*`{=au>4f=q0?W@(Tc zosOKn$Adfav-dOYe7PQwx>Ww^!E~w!Go6ZXm+ajCKnJo;bU!%L`e(Vu)kxYqsQ%7f zWbG3_73e_UWv)$E?%HmNq*LV6y8XIMJZzg&T&j4@fu!$0V(mre4IXvBmGn?)9cop# z?2*h0=Z&=~^00`dGsHCFkf&&LCrh`0_uRu!RK9TwCB19zA3$YE+2VbK=0ARSQ`(Pa zxX1QyJ(;e&K{D#|m$78?V~+!3?ieZwYVdpYJ3cr_Iw6j1b> zjhpVA_NOT`Wgj`HDubwLeV^@3u6-IjfM1am{|Yx1+&y!IFGOaX3~ExOy+gC<+}b9! zJ^DR{z=fq2uqV$9vr;gh{@}dr`1-!Mj^CyLiGT zzx+D6hvA;DO5Hs^s9r3mEVIB*zwL!MdvOlKa4l{ zWaqvxhJMg3O4UfQ|C(GqlpQ7T8dKawV)QceHAQJN|89e#L;&r1@9`rS7!z7Y(T; z-#OdA7mgc#6DA^-iWDn_%F}*4{nj=6j5UGn+jdsfFMhQ6nIu%5u*qXyS<}M?dJFA~ zy5DK3D`QAFwEE=F=hiWASn}>nz+-Ps1gyoJbm!OhUWX_7y91#ea9{azV%G8xkN=BV zuH$zfkQ+oZQq3w%4`t(rpiQPOyjV`-2UQ=*RIJ($h=a+#!yiZg;YkI=q;=YD;#@$fgsiDpgBLJ|ktGHx$+b#tOb_2b>!oS+ANrt6A z4T$m1SP0R8pj84~g6XV&p5XEIu{6Glk8Sa@#UEcZ~O9 z+a7KTr(ESM1YhdTt2lgYeKKO1E?=Sa$E!4^^mW)VSj1#I37S!{Q49#}>IT}bF;`N@ z?}&A}l8@hLu=5Shx>S=j8Fp@}_9szR9ps4? zQj!o6rP!<~VSNe~s6R7G=G&Q>60deAuKbWIaQSY5QQZ4BxA=?HL&4YpE^Dw}n(C24 z{w>}YnfQ-S=rM03ViIr;tZ6rRz*$pTKNbH`69r{&_22Beq=&BQHQBe$4Kx*^{rmX& z%bgtHJHeawF?)fUg;Rosy{}uYP@zxxA9DxE&JiLhRg-EW9**;ilt(JuRo{7IB^}es z`(2Yq!>YUczb5|?ee-_`my(i!{cKV>@9agN6?N_USs*#m->yVpiA5QSmv&Aut0eBT zI!QV4X)A^a&n(PFsh+037II+U_&IXBZ`@5Ln5?b$^z)nlvM++p46ar$kOWXYyu>L? zg~c(}UoySQoGp9%EXecp|7QU_T&pclE>^=-M#^$46px{jC(4KE_J)d1E1i2E##O?H zSg19Kwtc6Z(vbJv*yq{p9h(jU+@75^x6Lj=zNVvfuH;VJpp_vi za<%fD@6LPdUC-I`bEUmocaV8qRiEq}AXpQ&Sli!%ueps1zI-!ta$%!Qu2TzqRQkZq z_3pbVlP>bFt`s?MXAkgat1t4urIuwlX;V_!L+QQ*%W<=}uibA=rQj4Us{hJ8APbEL zDYY@ADn00XV^JxTvY-|BPH}AA0{%2){Pos5bQ!&fs^Itt1m2tVmtn(=QH@h_gDj$3 z@wU!)T+f61aoaj0F3{B0tv$u{NB;!RObEAh(0b~kcUuVIxO^&H$+Y4Zkw^)vJ;42p zu8S+_l+W;`MC7^-+rl#_dL#X~0p4enSoV2hlK94LaUd2lYU&8qIG8ybyrG1I2f1UVp?z3jisSi`3mIW>;N7` zgY)n=9p7ZQ{0gKfknwJnVAA9{df?oB)vcH*9tlUCyPxH??;PR+r8k|4ppwtK_>fs1 z70&`!0D$u}AHSKA|AK25xqT!sp#v7~!r ze-3&^n0@!Yh?f*_#*Vf#osN182pL%eD8=J-_7p4let}a5dib2a&k7 znownM@jjx7s-Fs5k4u1+-AbRQnpKLG__a%EEDJIp6Z#sGxd6mN$*?-JWrt~1#-j3C zC*C|p|LL2WI+%6v{g@B2!>Io-EF=F#N8vN@+|n&tGcqu!mWb}Z^TJd*qq!%Mcg4~6 zH=@OUE906=9SJ(tRZf!F5jCdn{F$GfewYt3TyO^#VA!{<3q7}3Wq0EAe*ShmX(YvIq($Fs*#~D!G(HA1?=moEtG9DUhqd2LZ9VQfi!yL^0EWQ2PYQ3 zfb6RhdZ1s{*13|LR94rsfJA(pSG1DOzc4aXHFykRSiH->md*0oChHsT+A+D@o9nde zA}`H!-p49_OEC5!&3+oJ^36Hepaa`u!q*guyt?UR{~QP@dY{$HGe=ibbKn$>dQ*|^ z_WH9fm^SB(KJzY`fiuBa&eE!UT?hF${u&;bBLlCI>OhB{hcrrj9M1oRe=YL6DbW(2 z6^p=bZzhMlVO6)B<5|`drCCk!>5fU-;-DOVQpwOgrp?_7#F8z^@Zd1k_CV zN|RZND{tpt7YwoN&_VTPT#eAN%6bm&xOB}(@TLkTHTKtgoXcW5q*9;UUqVbctN%Xd zG?iJ?7Q-k@@+0JFT66mAKWqIuq>Qp=;$Wdlw=%@nY5}AbubcU(g07NRu4RfrRz0pP zrt4wa`zpUNof7Iepk`up(l`N7lo*=^*(lwQ%o$ZZCBICvKiwZH@lp6=0~fzG`i~FS z0Dl9Gq>yv`1m0cqk?Er6{wm@^Kk{7dWlknMy?!;(Q`;yT<;RIEV%2rte5tp$g#6pA z^q~C2!2c-`uTv!T!61uXKUKQiO_Gsh4hUhB0PSWfdmz`8T4e|60=Cz!zb{l7Pbbir zP(9NOl;NkaBarG)Ug#X5{RrlX1VZhu5v|(}^fo;5JbBE++A%FXJ2ZiJ<{>v(2;O@j zN~ein!d0+*f_j~4{Bq7!$TR^y>&XCaJq_21Q=X59clyN;-NnXJG3|TxHL%+-emYTW za?(Q`t4H@D-fjsEiryQ%$6~<$TJY6L?Ckha*)?6SS!%!LYYKLS4IU!V260q$HSveg}#WSSJ4_qr5_z*vWJpana$>jpsfoQgc+tIFdB`rM4Y_Df>OGd|xa@ve-=c?p;A5 zb2zb90{qpEi>@|pa)erz?B5!6J?m*Y($xax!dk?E>S+yrlqL*P&?3Lpt*?sbwIbA zM}Ua>qi!oCYo`Z51E)d+`Af{$nQ!cxvx{awsovTNo8E+n*Wo^{(`nUe8|bla2DYo-0#m zZ?NG`ZVI^B&Cg62iDq(owk94oHi*{`!Q^BhckY21o%eBuK?nQY$0TS4!5}hRrgLx7 z_2Kee7PD6zPLG%{KVZGwxSHwXY#8i?ayO%oC_vx>YykVZ?^5}XDtXD1ewcHws3tLU)MQbJ8*sgJ46;e;HGN@j~2>@Jhga9;z;fs^PhzG_(bfL8UDecPQ5u!21L`TiPH$VWEiY|iD-Rg{!$LqY;GQ5 z&|Pxro)bE;s8be6dj)BY1q;DuJ$rQo{oa1#xbPSPPXbR*O-@)O3@{ZO`KwJhfs?FL zg|@}-_?m4X^OVH`I)KZ%aPGUh))RP(a$bE0NV(%Cx3c<&Hq@bEEWiGMRJ8m0qw>mm zyYumIJ%acdcmNdSsY2Meu9poh!nGzL$K*6_9ycZfL72hoPCgtBF6q#@5D*_h-iAE| zFJ&)BEy6D2#9IM-g2wgDi{{8w*2lUpmI*v7(dwJ6E*cnRRK-u(#GE78a^pZ?2Rul1 zWzdv;s>NY~+Fd)r(F_jW`$gNfY*@XPleiv(rEGp@`(ifHjqN~9H= zmu?lf&aJWH$B4@hXSpB!tw&tfpz80hO5cVW+a2#)T}cwn9A^Bsa_N>n#+_d~(A;XG ztonMeGEpg%FBu^;jA*MoDKnfQg|uUH69K=v=>IUD1? zHECOq<6;iahpjJ!`Ii-p35deG1daBd>YF>3uc*>@l;H%SI9F|d56yqsP`H0n%vb{E z#Do|^*HWXNI{lYk|A~qD`cfd)5tls9C75`A;SHBD4Cfu7{I@3R7vGTtZ$2=E3Y!FO zQc-$XUK?(u-G1KH+PV#oUyJW1dc7;)D# z#jO&AfDf$+p5e#z^`u3s&uqCGSKdImw-n+EdMCf%Da_>R5=oqF1`Ink+FiqTbRS4J zs_#ET*XFQ-WNtl-1NoFev|o7$)@W~R8E~_b>wBiKX*{3dS5YW&LubmYlNmb}dUn-)0m*8wL8=Kcw5MBDZ0B*2@K4fANCT_2;|uX3R@$Wc%v|IFpvP#oii zGJ47}@ci=bX6}~_$oYpmEz|xZ-j5Vz)1DBrf!r>ZQ3!f>|9Xej=LPh&^SN;ev4!-e zQiKGwMOwKRoT)j6zOGnCI{4B17@Z7uUiEl?7JQ+V6x^U z_K?yX{=)|UyyC|XtDV?xJ~zt6x&>|0442$T(#xSL$?8I^)(-A34H1OcBT>)qUeJ$0 z{`PUn7I3*$2$f0XJu4XfT4gavlXpEZ`xyS4lljf!ip3pYgL}+xDHkNC7-9rJivLnB z@d?_^6#o30uRiY}@0ptVf5FFM1AE!`w4iAV%;W1kO5uVb0uUlDr!=QX+(!AvRfpBU zXSgfrq<_6fIBqVG1S+z3O7`CasW_IlK4Dz?hsQW#SO)G6^wjfB_LO3~0Zu@P_+~`~bc`~-X=KESf5GkBaIfHU4|9EIJ1I-QdBNYO9rRLu+6ciO3F=|9yi?*j&_ZQ5uZwL!FPVZ8Ob=VCXq|*$`cPKb} zOb@fgiZARfAm<1&^~*P2i@0WBQ~jj|3;g+=r&yEI?Jk+dD9JiFr_PM2uK&YgR8Ty? zb_!W`W|982T#-ZQ-*s`twbTHvhp=t%44UY{()xNCc5oJ-#s1tyPWc*g=*$N6N0`V> zAzXe+T6j;Q7yo;6Gc`GpMdlp}`2<4G#srsSd0CBLxrH3Nw=uBAvSYyoo?3b&)=W6P zcb0TI4rFiAG0%hSfrckB2H#u=32iShall|QJQ63cX5XiS*iDDBD1?mbN%y|D0++db z>6nvSVFm=YJ<{VJV}Kvaf_~ZxH$aO9U6;pa$a*s@!IFCD6A+K!%<}XSQpch5UWK}L)3R7? zyFL6FPqqcL+I!c20cRUFqo?}7c~UERl@BbHoVVIq$Kgn4R432SJS{^qDtSK^?T^B26HEPMKsIMMoj{HT$9(L&HXUbI^zYwc=((m~F`CGoq z^*d#py8<}1Vd5x&Fr>{kKX?;&~=dTw7R$yl#=0_P*rr zyNIIgEdLAj)-Y&ipWYQOwrYt^n5gp0-fUXmy?@kMcTTgwfAm<=VoI-z?K&5U2ru z&wE_`@39Aj7by_iOf;h5kN9IuDO(0-esnXSeCI^GUgz&bM|h3P3w{~YdHo(zR4wcZ&&7b7%wz{_hf520by{Z1} z$nqI@hNYpMDs{T) zHsh9CAIdct4aaRuhNfw(--bsRIXKq5tDzC;2iS3+*-(s7b(bOzy!IlnfFunesSG({ zkO$tChwQ+7%xPzS4(~~%!f8kEIoHEUaDr*DFKZb%fl`)Kfe4m-eT;Ec24Xc0 zu+j?!ACjvX!hJ`E(>3`=vYSyqVk_DA(Z*e@>j7b&^)*ziDaA(cOzLF>b^$= z%X$mL2I3}R19OB;ECCYJzAmDFo||WHBFFVOYYz6}@o~DF(w2}F#ZgSIs!0j!YrEI+=nPGr`Qa414m#}+0x;H@B#G5k(ZQ&ZYs0_)?Wdd z4sFA2oenRa4xS;pAw_F95sz^H(3u3NK6U%f6MheS<%na2apue-$|`y@m-ez*9+Ntq zME1y~K=LJW>6<;B8{rIR;vB$#YYZbu6y*80FZi6iF$64Au zT?Hi8r^dc_=j=V?UtcB=eNCxE@uG=T`$~$4;KNd18kGfoZoM7!JM5Xpx{oFyF@Z^6qopo4~@B98$P(l!C5pYU( zr^FONN|@4!I%&x%D9xs%bT_)Uba#j-lkO0tW28t48!$HZZ2Rr=J&xahJDxxH-1q%l z_j$gq^K!Ozk|mW4q$j|yf{uMEWw*AA@)TnvsNGE8nuJgNET36psGDv+FeSC; zpOnmG6L6V>n7`g{F{(YNJjdczAhZPo*fsu2FMDU0c^3IS37Qrfk+CQr++$OymFW(*2W-2^2S~2_jO(_^dp~a#HEOG3H!*WU-J<3H|cgjY_ z7_Yy`{J#Ug#pPWK;uoap6K+k?FKX-2LMz@(-C_@#i%1A5U890VZiR$8MD3Mt)K=!x z?md*dHA&*eRTiLNyVbaSY;G0EfJ+j-dI?=E^<)@%@+F^J1P4P{=hnPy`Kc@w7lPqZ>g}vrr8k$&EEM$Lb2FK z_b}Drmt%ZqFpb7xp2uM<7Yt4%TI>P7XO&R0$23Pbue?pqgZq{kM*XPooT&zrVg_q0 zq*}OOVH!rdRGG|2t-h;V3N8qJIl00eatkS=7ulMYJiB{Vw55!ZpjkscZDv=`VGlT*6H*_ zP*R~DrkrI4Gc%tycybvJycG&Euamle^Ko(zPQsmJiPmxtM*pUA6pq@gwfyPHdPN(< zNz7Ur0R*bKWKL|V`SB8Hy`V`P^#({W$Z!n;8MzpatO#0RNhR=c()T z;aZjDqsVq|V~hJJtpbR9Sgr@@&(K?aBwLF>KE{C^!L8lY>j?~sGRHL*k)SSM zP!PqLO@%+9T!ad?o^C$NV0mzFIh^|&DSR{KAu_k5Wr zu=QIXOqAgZvt4uMMe;W{6*M_$)8FH+^7iG=-+AKF*s;?QvBI5>?&J$pzZ9g!)HG5h9IouB$R!FvVElATSY2;>0d zq1WTOF;tm!Sw}8l*kel2=)j3n_>r&eub?hg9!16jM#^2HTEfJ_)FbRTEP)>}1plZS zqu2bq+C8e|=Le8S6Arp_6F+YM*3(9BlgO}-nxp-b9JkvPY>qQO-%>T(4cim)iVw{} z6q7u0Fj1S!=B8^OC7;FYp-u#LfRqXHNArd2RxWrpl^6dNttv6^ zKr3p?OB$?%o37_w5A#A8h^NYhK+bFb6Y4li@&X=q&5)Drmnl)IF(;0BA>A0o{rQ(E zzi522N3Y}04(zd2K?-oI!M`^S5*UOUM9*MNXZ%!lLy}TwYkV`Eh8um=c zF_c{IE(iwvhwj}_Ql?%*6u29)W8Go4|1+tXzP$XNr0*(SbFyIi{V7`X+l5c&t>!qz zRHWy7RUii7;;Lf5>AmZK5qn(zQc0#WlS215&$gw z<0Em&W?GPHH*1~Z`leb52hqC!Nt4d>sqE_s;~#{l`>1a(-7RKO7Uhl_VyV3wubd80=Mgg5mAx6{Jp-{A$I1N`W%>9Kk-@WqYX{w4cv$1O@0{o% z>~w^{Zo;$JTI9*4=kZrQdcM7pgdAZhdgz8^RtN#9Q)W%quSq z8gL~>S_(JCnqdiKV56^_EoCFaKAqOOg$Vyf2RECQn+9E|DANOE`;dQU-8tYLX(4yC zHQKc@}4 zF5L&(guM@;VuED3ZWO?D{+tMF9q+{*|Jp+-M@)ag8cxlU_V!7`tDe-18qP)%u?OYZ zrqd9wx=^9M2k^eC=smC#2nt8_APf~3;bVJQ-@m)w>9q)KV#z-TgW;bCP+t+Mc3G(A z@wlUE^CP?Mw=RTmFCKQ{vj_-ds~cg*Z673AGs8N9@$~}&3ELQB)Nr3@B|)5EeN25a z@nwLIkApp_qWbo!@j4GR*oM^9<-MYv zHOR6W4_vuz!h>d`+|{5c0@TM5we$vZOqi-e2h!N;_V4s+Qaq4!hq6kf1{Y|OHNmdw zQ0#mOX54`?d_-ey=7G!~8wMxU(zo^?V)M2Tw@?SQQoL(;g(t$WwO%{(W#2{&o2&9;KY`LKq%NAQ<~H{w9bE5>{rRWHr{+)pp7N9kh`pvG(8+i&iH^x9 z=sLVd9ovXI4Qnv>&+TuV{4BraBVXGA)+O@fOQb7u;u)Q6-H4KK$1*JP zX`Wf!y?lDBoR3Xh42nOZOFXgqg_|4gJFOKDp4_ik8gyKarMs9%5>1pk+to!sG&C>z>O{bFLSad0nQIF!P@!+0e^O<&xYfpy>bW9I%tQ<*@kO@1STQ zVfUHaElf%k!Tu53c+6Ro(iAC+pY)AFI*LcudQI*)BFPms;;x$$SETFJ~bk*Q4 z-<`PE%(#?HOS2k#c}K|Piy!!HxxW3S4crrf~XKwp{JcI~;@fJ2(s zQ>${F5yt)US#Fs?MSTTHkHvf5qVxlAGMz8}js4u!JT&spd475cZ`(L>YOD~;(ZpAz zRQWj6+3ecJVwFPd2%|y@Dgjyjx(_*p|3Z5YlNLeM;`n~z-q!|6ET7LQ2Xig5ZGDIvB<$ZiWPjN8S|i>Y}Rw#^|>NB zOYoNPYW-3?x&BM?GE}cC^9@zY2rmY;opTU(yX@)`h3ncyYC5tg4p_ z1=$j|L0x}BpIeHJx z1CK>SWMj}D(Y1*Fb7uG;^qmx8?UQh{RZr5`ivdR>Cfhyv5@DHYtpB8z=5{!6c8JvF zw9cnZ5=``2n}d(?UwULvk!9-}DNTs~2z@y=s!RJ|eEwsO>Zs4@-UsrzD0ly7&qGeu zdyTmdtjx7~47UWbSJ+V4+iTJ`3QOyTsSd=yh8{|~l3xwRZP!XUInghEZMt@~a$mjCLS+Xd=McrHLmels%87BZ;<@D)8frYjsqWrHgC zC0y|k&hRNebL=qWXjwWHbnFiF=eGL${gtl$^kRBt>=QNSDY~ZjXZy#WQhlvQiQva> z>mTOaNY{X(7!`l9>%B=_f6yj>Re@Il_2a_$6fmC40?(D-)ea~EqB1(16eHF7k}c7a zL4+Uhf#L%0M127y@!|x*bu)4_Bb84-Kvn!FY0J3mbBiRi-p>KLMk$`FC#ByA7{5(X zAerv>#}@l6lcOuLd;y_|&p$@LX77r>IhR^5r{<0LLx8$adMR?+3^z#i7bH*YA|Dp; zg(-l^LUQW@%eF_ZTabU22d+n=2d=b+AE?B57)2%x>@bYgh3i>6)hbEwin8gyt9r7r z{pbMIcxMZ0d{oFoEVb*@|Hg?whwVo{)x3pY+kS9|5>G;NP%5Mj78%Jf9`H;(2St7j z5lJ;wVS7}&97j>xnGGyiYz1vX?I|>5>Akafd{P=>xdU$9%0dH9Y6YnEJ|hvdiE;+| z7ZInmTL*R3VRp!3nqbbY>(vEbnc=Oy$a&$r?@$XA4u8-hcfA7kHzNOtu@Rr>G@ zN+7yOqylGntU73%>ucsf9q^Uy;(_cka9Qdxj_^=YZ`r;^?00mCu-TgE`==syM;@*Z zVRm#0Q1kK^8$&k2kKvPIEA&VKff!2}uA$_gcX~u%bDHA=5%w|q94R+5Z(9@{L-FMt z_MKUN+~_YiF5hcXC--BK`OofXf!)h$J!X??`JT7=obK+pVSK5rh|e{@6}_Et5r&&7 zktE()(p}k%{?2waN-YH)WV<#pQtYsA#AISZ82wTeqwK4Y?G^^=v9eO?!bDKIL<1F4 zw5FRXJO#{NVvz4S^xZpt#68VPYjr)d)9Xci#8{WfEhN(moYX z(-{(t!#U7y|L;7?Rx{p?_y}mly(~XcqPV|0K!GsixHmm79B$1Occ@@$>q!Co8R5js z2jLe+wUxg#Q*b!@1C+{bFT0oEJ#s6)?E}J+=-Ifku-$}H`3K>nY}MayVNoR2ZqPZm zgoW}_VGiI2<>?tJ5v0Qz9~y8DoiUS-Fn88|*FZIX7w~+Ii~Ac7X57^cBuSZ}K4a!C z{9C%)&#uqednTM}+ycA4T*LDa=LN(kI!i4OHlz5)BjWTW)-=qq@(aZqg^lQ;59xOx z|E^*fOj4K)9vVbKD&9J$Ve+u)Ic&~dmp8Ukk+F|WF%OtKW#Np-f(is5-t7RD#KKzx z6~Ri%;a)fl-yqdev#lmSQl_cQ%k*b&(1i$)$L8^L5AQ}^_Qc;-JhdB7=IcW$SwI!O zv+aRLUmQaM`1hj=d8n^e^o~DNQt{GxnSILR(g0<+3{f1IJX8Rvo1B{A)T9l+M*a>s zlJ)VU+Wo-wrpG8pLO+m1+LNMMTf^FMNM~vaA#4V*hLa*+O>m|NQ?I~omuJaw#w+=h zh4sI4f|LeM>Yv!Zf^h8Q3ugJTL@Q48Y~PN?egXUTV>aiK`@_il><{= zm+FnFhG4x*820OaCizUqSnGJ}{t~%gACdGIUc3|0 zwx9cJ0TU12vhh){vgFB(w8}@BT3eq>;k%_FLt2o~p~1mve(xXOS>c;?v=OhcS(>rG z!7*{2_u4Qlu& z*hB?A&T{R8Gr`~m2w}^PSu*c*2KnK^aq9n8eEli!-1r>16!36Be)wg(E_Y0tWg|-x z%m728E&e?>IUg=PU-nN{NCC8fH)v&Q{Tx*G;j(KucS70@C|fBkroce<-?9DK`1y4h<$A5eu3g zyPm&@;-j@Eg#vBL@!*Y_3l*2>x(B=D#j<03-D0qrn%xn;(A`*D)|&g%_#TBvbR7=v zMr7Lam0)@h5kzv5n8jjabFLL%_xzlZGwDgWKW#0M(J8 ze*GFZYX2bm;9~R@^G%`6uy-|q5b=wT6tL!E=tpz*zG`}lmRB<752gw?VOTmDcUHOV zb(V7zgGS0XZ3&8lL>Ao%vG#_yn(jUnX3S8JT#Z{K)V}Wm$A!AT=o&wtjSsyBj4zJQ zpSI(m3IDaij&X(tO6tE3Lx?Kll+hNLIc{p9HHH+Rv<(0J9OZfjB@L4iuD*g0gv3=- zA~cHLxR+(dFhkxf$L*j|WD&U9^}hbiZ-k*aP8Lo@1oG9if3-XQ6_+qvd~z9br17v% z^m6Ib0-k$aEUuqjqou^J>6^R~NCywm@|XYUy<5u36Wt0Eu4Gh3CNSz{#+P5YSYjY< z(ZK(7R!Fr$fwlWca-zIhO!&X#!EsZ1hyBt~Lt0zi<7aF%Kkq~op9woFhzrv`v@g1o z71K*?7f^a~h5Z8EjjQ!arcu&(@!XdS3LCTnUYX80^;gX&uL!j(bqm1?vVcpp-30?i zo}2osFTQhTce1cd;b-c~#X}_0v2oBjD~A!5HSq}Yg?FDdPF;KV?CiF$jf2RmXz~X% zFpB=}vo$m}_HSo;>fqpDBQ4GP$F60bogEz=-8dZQ3qMvxGamiJna*Sl(m3Bv#%nHz z!6umECKWi+MBopmGIyAyj6cMxbCvySv3X*6o9kE3X|*F8p%K|O^_3QSrAOq;EeB~6 zNLh1jP~gK$SLHcE;(m*cx`_4P5$(Mh36$n(!drBVulVZmWIBH_GIZ$tc)#Ly2H=3b z$^xYxiunJAA^(kxm4t?US)Xz=yNj@L3}eZ+ePmyS69-$C7FpbU;E)+*$Kb4zM*!65 zlPgxK8APV5`Y*sfBY#L_cS(Lrf|coL`+!|YS_7Y194cXcj5a{;o8P|Ap=fAFe0lx{B|gp3C)!N+ zebI9hHT0MviqU5w(L9LnF=2|#kuVK69Rx?#Qlc{YSl(yIE7w6RDw-*Nq#7bRTe`iM ztn3A5CrZK2j|edTiJ8hwT>Ic|}EMTm5bbcz;^FA}Q_s6cuGIedl$}PHMC}1^Y+$ z7f0vA@BEXhp3cC7*F(ZthRGtvv6FsQKE0NY)`QEf`pmU=9c-A7LE&F9y~-x? z1f|>|{5Ba8FMCz)Hm9~(v|{MB4-rq+Z3*Ug_jd3#xh)vQ>*0VCBiQguQ6k;Q08zNw zB(c(oreK=mVfNjBK&h1E5`pI&dQ|?_Nm|#O(~Kqw=tbqeef#BhFU5xJ2Fy@oHo*-k z&)|&J_|nx9=%$5wsg07KkGHRXxX0&&>X9t3=cu5AlcnbH<%2@UOqn3(h=3WUfiafS zG>q>y>Zsb9l;KciDdA%BYq_LWYp zFclv!UQ}_OP&QwLJKvEveL%0j0KOzLS`DR`MSI=CC$@We!jHm1A)9N_UvD-%J&nmL zqBp|ciQLypkm6jq#3W-M^x{(bH=PfaE|*KC_SNA1SsYI^2HwNUwjTL?b`}(44|&KL zZ12v9a&CG|PDcxpo$I{V|MH!_jLYv;t=aHZ`X)Ho%-7#|XiilAb!=>pH>d9Pj`qG`+7;%vzq^r-2WWP5{mSow$nLJ&X!RM?& ztFvD=Y-}>Dp-d7}07v2z7u7A*!E_)-{?#i4*BoSFJ5yey?Kj+;8FXjtCGuq!@Zip1 z)^}mb*U&nL;Zu!2u1jQ=iCbSycKqXE5Y0$5Dej)|bK+qHQWz&_4kQt>u#eFM)!X+Q zaXG70g6VVr9HM{8gF-{uxzE+aQe;D{^_KN*9f4q&?mbrXt;F?*C&C`@4H@R(37AGw zaK1+$>XKFxsh zJi#=}0iMyeqXm zQvL9`;b_O-t1wqh=i_13%ZZ#%te;DY66iHILd*;nt6!LM?~-9B{`X|;?s-;oiG z!+*)XexE;xIyn<@Z|O~s8^g+~S@x-NS6vq}6J56Ha`6592mJIdtSDsl16{|uW$ih2 zb6>TteX+c>2>uol4*-8V1sdA_eLHTh4L$hvi}kRYc1q`_etGdlu9&cPO|H@Un~>+~ zDm8w#ifY|Sx$Cq2N4_G!@83GVYn_V|^#b=@qbNcSs=RjaRvY2O7^AHFe{8jvl1PfBXYA-`bhtR@>CI zqHRH(Pab8X=_A)apkhn&co)hR>T-_f@X^(ZCi@tpRqGHZMyhm8u+DIP;bYP|UT)aU zn(!d+m!ZKWJE9!EmUB$y5PJR5n=O>`W5M5g<;fmr5WwK;&+q^%=iTA z0rZXaMjFcxiUQve3*yPO)*5uc=WEv?;s}9U8q#ojcowUiH)w`Tyu%MJWQu;r^ffTt z`q`7`eXBr&JZ~%VIqF;GVh>kkx&Pzn`$94c*T_^U%SsNs*=~xuF4g(_lETuOkm^g@ z2yr%HcmgDdqS2{e@e7~K>XZf|>gx25^Men-l}t50$$M8v>V#+HN{RYI-Zsty!X<;o zR=V9icF!fMj!L&+>mzH%Y2m75-_T#A)}f}4>rkii*Sr`Oa~}aC+DqslZ{Y|&mpA(c zMViS{JJM5$E2#C-L>d$aYK_6Hdp?cUsin1UEGjRh8vLo!tvVN&Pq|z z+*zbd14(gfqHh-vcDn(&CNwo{ zEWBN537Y(>*uShI#R&CcL$t;6%FKC=Y2lSjh5zFz!WKo-9cobu=&(5 zhU$w{wWPr8>8D1}8d?fmV6w?wzv~d-r7DQ{tQAQ5i`_aTt)K??#33~=EV*O<74jt| zCc*zk!(Zib4&32(xcdBHq`KPwHqtgC za~t8-BshDf1pb{VHJH&eg>5Z6ay(f;j1P@}C%u(kibphS3KH{YVErfUcDT{iq*KX< zr;y46swNjlNCCIvXD1NV-XQND#IuH%(V<7;&P(KeiBx_`5gNaT|bYQ-!@E zz6GRCO=v$#3*M88Xw})rihf)cAZoS)1ywVLqj>AIop`ccc`2qD$p|`yKA`BN*GppT za2e+&4D3XV(tPGdo>MMGoWy2L)oNv_Q|uM}>oso;LZ4Xf(JKk%_sYKSQsvfjvBoJo z8NcjO%RPY(FK!Q^`;nDYxWESRB<9Eozudz@7Po0p$KA@oCD0oeeR$)nfy~sTGlsid zC*yJcilP^x2l2l;qMzxjxj%YZ^FDRBCu8VB_!8kqUsjwlIJd{q8*F3U;zC*R&%hkS z;_U5-EhDH45LR zq=XvJXMBXI*+VlIwj9SoP66RbUgNA7Rl9?VSoBy)<2?9WG0ByAVb@7wLMK&>f##t= znC@<#ct{01l!HCU%v3Ta2P-$LXd_q0Q7M&aqcObRdf6`=Ft7@I{+gH2tDuEy+pHZ+ zSG|-0I8}6-K+<%=V2jA&L#F#?{}|!;)7#E%Fu>+nSp>+*xIB|9Q%ZO#U2fl4rb}D} zP=N>hcOx(dSXf?O2CIdfPa(-z>+kCbA_2L5Ib$8>$wuDa)+AO^cCA65!Q*e%!nxs6 zT(_czwkDjP{b8oWhDYkbJ6M}^hxqb*Z0Y~wym^c1Sy(Nbfh;pKGD5tgq3C5e<3Ma! z{S;! z1ht%6*Ip0!nJT*j%bcaEDUi*lcXFD%L-K)NKCS?oDr^i778x@H6 z0taYN)4VIHw(14+wexS1WPEO%F!#4--KyEc;Z}PF8Iv|JJ%LIlg*H4*XxF$yNOmN` zZ4WPu7}uDroxI7^5lM$U%%LK+GdZt{6`r4Xv{)owr(&-RjNmS#jKE&dF}h6YL()}V z{C(3Sk{F^#p@G^Anp5hv;d_}4Z540!sI@sTXloaDjk~4qO)1mW`=&L>e=pD{@~< zUmTvJFIHbH>d@A=`9897?VPjV$WT9|GbrNdA`PdDsc9J&$gOVv?_Fu`>Dk_}Pa8uLz@D^p@AL@dQyDb2m*7k}9ZK4>Sz+#JT3cOt`|1r1y znpBxd2e?y)C6uh}pECS%br1jO%q#a`t^SH0lgawo9+c5M<)PC%2^@$beE+dvUSx_j zyurI{=z#=DvP8G(vIz!!h)Q2yUNcw(h8@K91Fpv+-b_AK2@%u)Mfz9N}$tkYFWBR*wB&x)qwW!wpGK{eNajzrF4JS8<_ z_QAUh5vqWBL+t^5u{9c+w9P#tF_k#K7jfFK0)j|hZ zf>X`s4cI80cULa_`pI%eU0M6rW zUwW|j6?YzbwsN+NCTYQfhwaGNaFC{^Cd;0t|Ne9>IadSuc6&@ku|&Rs{u5_sLCF(( z!sDu`ROovsJp@TucJWeAbqj@tG!Ao)F}1$z3PwvD@)u>p z5*+n!7o(tTO~6GNqI{G68S z?VH<1MR^xMS7*p}B^N({63C0E8XjN^{|}^|@0oKDeEobkHi!u8mTQ0rdKfGGQqasf zMH6txB?rEqlr*w%Bn#GeW^r71?p0MfCF;QSrRQ~*MT!h1hl=acy=&mzY<=Sn_=R-z z1Lk?utDA42EwT2Nerv-7s%!j!Ap4w0DZG|$&e|R6oEnt5A^9}o39)WT`+s#5w7PYn zeST?iVddM~ zm@gS6_e^j7F$qg7(^HFg2-(tOWO))B6^`7?Q|TD$Cgs#Lau1g6-%PTLeUyf=1fe41 zMcy2>k?+4SsjIobtfbAk<+hW*RRq(&tVza;=FFE{zsdctfSZ~G zE6c^5!~;$`U6g&C2d5)T=l}Wn_cH!HGd8Jz+>r`a2;R$rig|xAVos~XK!NL5Z~@m%)NWHvplFKeRzvIMjiDbh*!WC==Io(Xoh> z#HpBsG2)>BQ7Ke8?ZYpG>p|(7arsuS!=Bnh(Rs$7hI`OSc;e!=2rL5@!5nA#>eYvu zPgwds7sGoL%Ki^Xt@B{+N=A+-#d3M^%x^d}C0R>BP_Z}l{-vlJ**S*B@2`hkr~{Za zmAr@1A6j$;73*&B#<6+@d0)xQ4p?WE{`K!)1>mnhAbl2fsJ00{^u;<^S}j+63wHk8 zJR2LZ5hMCJ0R7R%qxOgYHaAP_Z=b*A=f5f9bvusV+b89mX)sIjxxLbBfCt3Y?EmpiJq%1u0&Si@e4fqZO25aQjh6TOUl* z_D4nRg9v?|CsEqpSMf{==xyL?ieE1GX~&^ySgXQ*yO*w_{My%Aq;kp+5^?PyuOEV3 zKGNguP(S~ypZ=BoKEsm$^}gD|a+>lzW%T&=`GBR!J=^xzYAQcs_-=Xfo-29SqU>MY zbQwO6UTA>(zGAriChV|n{QE%F{=4~gjn4z+?+oeyU%hMV9UVG?bR~0)Oy5u{w+{9- z&BZ)|WZ8YPH+wE+hVakT@kDL0X!NVDNO4)+FeVh_#%UXV#t-A&c%_mtzDm8mG80nJ zkk<`xE6^ztuWerVVlv@cIHdrAmp#vNIiJm?y1;EbWur9+Qs5CRuAx@Dl5CZzZHr_f zPtMPbYx`S<7QKoa`FuWrY;0pDPrJ$tj-N1)e<6NTPV-d9Kchneh<~@@wqfWW7!o#* zw@iBwQYX=5@fGx5gzW-yrE!ZKzx5YdhiUR6y}y)$`bPf6{tLMuDD53wz}qegTKPd- zca=phYn~s2Lwm>1F-$I>?)^Q1@j@IsDaqtd_#;(Q;c~97pxa4W@WRDGK*Q)>1DS!6 zxpx{_e0-g=T8gX;Kb=!2TOiRRc)e(~oQ^8Q%*HLPpwl7fn}c&6SyHboDuDRs$60PD zQ4mv*W`A)0VeqyLZ)@}1AnD?dYkTL2Yw`*me=nLQybYUV6;oZNJxafK4Bje>L39b- zW}~?;<=;6#!5za2HZaypY=cSadjc~V-wcQ3gC%@lvCx<^OP(xBiJDa1u=R0qayU$G z)C+v}+JR9eNy~sa_>zR)(XSyFzN5&@--)p4t=i~uxT&unnbCg??6fp*!iL7To-*qB z7whxufSIAIE&iQIpwRn>J2Vl`g*+dAVM|oN1;s{7QeqGVEZ#0@S3n(JiTZyLduIp= z2na@yYH2RrJ?kY;9+G3v?Bq7&Upn0W%e8P)s(=bS^d_l>sJD5MBq_cmOIPgddEOq( zxfEaWX$K569>kK3DW=3sqv7cgA;ev6XRoP0NdJx6^Pkn9v<4QNB6@SQd+D0B+RtiD ztTCgApc>Wa?w_q;HqSS#`A-t`K+3$&>ylU$I|}Roed%Z{+72+P*7ENJzvIs7_I}!o zsKpR5WUY9Xgjx?k=9qXwNh1`e_?%GQ0rKBWHzfb@K>R}DTFE6ND7Vv!BnETjp(LFD z_CC$_nL29ZtS7)AdfFYEMZo+cKPOyO*B@)d9AAf);``0~caC2J$k%{Cn7L&wQ~rC4 zVZzlNeoC8>!L~?f53CQ-*tWFl-m|j#jdUWIe6VdT@Vio~M%A zTCHn8t0Hd@-=;I1d&T3fvJy`7%rx?V>s2=zA8bv~G?ZuJzmgT2wf$(Ibx9a-Kl+3r{WZX#r7~|Y zRFt$~jnKA@CWn}s)#)KkC^Yc9>2}XRE8npj_7>~M4us7_%L=fX&_;bfX7uD0R1^2` zV@YZPNJ11)Z?W}u+5z`*pJh3p0rQ2msPkdO4*LA)&nR)RH3@XAr2^VT=E?wC&Aulc z0*2QCUb|c|8g$Uwn_JK#OC*ulI`L-gl{Qh8zo+^DtDkj8<@`4o!H3zCa`u;~7J$#- zFDr!7lH0Vm&-a*Z`l2T@%5Ld@SfATRQ&%_&k6OW#j&Qh;khB#(v?o_$*P@8#`#@r* zidIKpB5GppRSjh?;un~l_I)YRQm5aO8pw`Z;)Ggwg=~S^aUVJhz6sdyq9)akm+Hz0uaw1w9YsXVe#QQHVBW{? z%8;1O)F5_`e{*>8fEH=d9lGxSBF5f733a|(DGSHxNn95_r7;#>=N;+2j1=;Cm_3(c z9#*a@#l2bA5WNZA%rsu4gIi>R?C$!mi&hI3H;>^A?UmA-o+AYjds7o*JtDELTW&e| zkq>e{tej7c(R56ge6+OUDNctZusZUU#LNvDqNH7z-qH5$i9*QBC>)g(L7ldeF_iNp z$`F(+i8>2LodE>JCwL~jIm=M^4@svty#+OMuUyh(_Mm1@W%jNZU64QFh94u}bE2Ql z%e^DCtjeBytXdiQxr|R)B^?9~?@B8)c_!S^^7_I1Nt&E?VlUP>$Fiu@)G~1?If@xw z_nD?6qcIW@S4%b&0e`?m7%E|O?8SCDG^B4n2YyxZc8FMv5Xv4{n>h1EJK0no@MmX& z4h#q`am{=}Ju}AA? zM_=uRK?{rz<;VNYRmcRIlWZy!PMRFZ3G`T|lq)~^}@ktOkz6=0Ec5YcN#?zxKbE`yfv(Uh}MFiV@Zqtj$Nx= zOLVUL-`A7eeT?ox&9-0!*65pF$Ynlxj|Lu@3s0ArWat9PKRwvoZsLLAIk~%$SqtxU zdta_^7pgek*)XByRe`xo+dS50r{z;^PQ;75J`jQ5x|7xwdni$+>GUwC>9+WQQSpEO zfcoe7J>bRuErL1J`_`W}8Ci;$!q+)LZag=*U?1O?WD=Wg4r-s=3VC3yT;_SyuYgY- z@>xC&yzWO<3t!8xh)@Af+gN&RzU_9k{AvvfwIb&&{+isStbKC$lU8==^Reh<65*Z| zhdWHfWX~!@vK>1+CyfuGPDVf&<8?t1jg~4oi!bnvXGPkY?ez$x7IC-I8b#88V$=Mg zh*NF|2=Rq8Lym8bw-;i0mnQHQ4ZR}&Pixi5FJ8iCE3@N#jXy)r{$LT!NC@| zN$+00Grn4NjU#qD5`B>aHSK7R)uW_SY(wlp6AY^{Hm}*mXjRD)JKINAlzBXA@g!!h zg`=o?dFtB4x20HwKCtj2(?JlfN^T9i79u4uWeb>@>0{ZK(h#xp8{C9hit1!9jLvp& zQ?|Gqwalv=kDk})&H8N!cmp5~)NT^w2%C&QICTqoCh8n;1H(&Ndu07{`o3StC4ftx zvO8TV+ks!7pURts;W3%4QYS3MZZ&!h@W)+MAPatRK*l&#*V+7r^XokE6FKKlVy|5n z<{4<3f8{nOb5mkc;D6nm;3$_~?d&$swSa&sG5*t(Ne@nV5$G~pBakMWg&_hM<8 zmLD_jUinq`U%9$TDNZb-5)miH$CKKU(lJr;M5@8;eS*}@i?Ewx-oK05oLy)wiW(*msZdtsgIW2$|1}RO{^cmFJ@$I(Brpjk5f)mge9Pp|!^1dP@aP271Ai zZNv$ZQc9UeliCqyC<=(&f?oNn$=bo-aLe=-{dci^ra&GO64$w8UBFEC`M)0UHk87U z^#$?U@g~Hk^tk4TXMBhrSh|Ca`VM-xv2eW5G+t6$ zX4Q>9tZ)Z>3Q;&1xQy5_Q@~XNi-tutfY$RPb?T&o`)Y~c52DZKp1gTa)7p2~rfqqF z;qVvY6lu}>Z|cbIzw?i)J@rXQN%&LM`~WM(^VzUr5gO80fkDa`{`pbGL<~Z4Pxa0bITFi5e@^4{FAw2? z!pi7^GFsB^q!2RCY9I^M+a$@eLG+)H5pZisH5u0FtY{3bG58&0{3&t# zTOePkB)50Ee2ya)L9?Fq@AQEK;+(q{4GtV*NoW_qJiKDP#n@B>@kuO9AY@%;K%0|? z(F81omg>^R)iw5@r!)Jo5!ndN;LPfF%}9SBta`(HH%= zp>Tq~??dseEu4tIs3X|Ehr zCiA19$ zbHjV2U!#C^9_FKecDG}Z_~I*)9}ZsKzch;f#Jpa%S2^?fzwO6GmXzI%=bi>%ae)Ao zl0}&YlUyn3Agn=g=8VW|E7jeBfEbfqRUx%hja#mEQdSXwOQ zs#td-q(Z~=N&()=j7)9Qaxh%1th?bk4NmxPIz72=XYFWxUNM$JhuA<<77^QrfGTAf zVXPp1%0(;RDbunv)d{yUo$q1w)7jSRF=PLFZkKfJp`D9W#})Iv6!c)#yQwn%Dq8#<+r9Y&~w0^0e6n z;clOLVMwh)A_qDQLkk=Bl`@|C6Oy;Gfi)rvY8`Kdt0VnyFk;z<{0(R7~w}jI+ zaKBei?Ym^2ki`BV!c7zo-W;PHn*?WlgBM+MT;D}L?U9)&#c&oMM9lFW?cn{ol;MyQ zxAy|OC6qv*0q%IY^grT5$Qq^0h;sc9nD=kCJq6)+c0GVTlw^UKxR~s7af743C*J8FW`gLnQ2*L zV_iKT55Pma7u7C}4%v7B*cs)W#0r3+-Rm4O+xkM005A;yKbpQg9IE&Izp|A?MfOZ- zL9%3DXY>|Qn95E}AxpN9B+LmVyF!RE$-a|NiLuL)vF{89V>g3gjG1$O^Z8!a@9#5b zX0CIt=YF31wcTxx+FJXy_w=@eGmnj&a+5v%QmIXGkV&Sf}iT8~cJUzNTh zBdxh?#-|toH)}K;Jt`>Qa3)2ow4**L4pFm#Ie~;$B&53t;3XZ7M^(+5?t?2|ojW^kR>7VyU9x_`*7BCw~Ou z*JF3jsgMErhr2MdKyujDosp=SozhMm^@+9^M{AchX==fQL7KdoO>SyBnkvYxwGu`SYN*G#8%F6xs7N&qV{$*T zQBz;M#^pXG8PLN@cX}-aNe2Q?vQ=9F4rMf9SZeRKeo8h9x(7&s3wN^WT?WAa}cZRJnb9 zWyV|dFT>LU#`tj4=PW8`#gP32g8WofeL?C-u8-_@8{?P!&vRC#K-Duph9r+T8GF;) zE&kLTGu2Ozb>Pb&{kW`~E0qQ(e<=Q?7dCWuWFXQiF*eRVoipUe4|Cl2KMqShN@|@- z#9O4e1qkEtG9{zepT;Fg>$>16L&F>9M0Wtcr}cF z5<**8TI@~*X!!tbi_Uy*0DTMMD#X&qQ_HC`oFP|>wz+#URNKT#pZ(J54=q{>cB>g$tLL1+6$OBnuF9O03# zAx!7VH`kM3!O|rn~`-sHF2Q~1-j+K$AqEF6TYZG>TL}6_FlB4Zz zfBv^$K||g=TQt-*>|WSclDHVXqqAj*7g8UE^3PzcSuTcZc-XeXmL{*=U?i;<1&P8& zsK492l^)W0+Z4dhhsaA{BA@|Y4&dUX;Qm2Kh9UuZwm$fcPCKkF`QWXKog`edOk0bD zLF~NxqND%#TGLOAj?UUA{K<8|@sode~zP@CQs*73}J!#J)SY zZ-~kIU|bR2*mr6vV#`65$1YIX*oEGQI=f(h9wU0=0&m>3+)M9wR z91mm!mCT2`3VBY38bc3{ruUrFRP|!4s>>Dr90=T{k0)8u8{S~z zX{6mHrc0cmI6QuV9C|I!t_b*Ehy|*(zu;@B%~3TQE2i+TonN9m9@=ZqiF5=<_N<-G z^gM#;da%}7EB7X0vSEvM^vUf_qTwIp>++mQTZNo$O|H*GSgOMIyQYP5#}q8JKywr- z44T)3KRc0pctj+G+f^R9`YeB1TIcnbm6wLWx%wenT=?IZ+o?6lSmusZ8B)gr6QN|W zm*PlP* z=x2&KJKpFI?H)hZfj4#LfoYZ-|G_+bM4w&hxr$1tQg?{;j<46pOKt1FZd|u&Ej?t3 zv*wSSnh)NkQZh#6GV2>t%Ih3#7>jZM5eY6pA7f3O^%I^-U4=T&Ak?y>q7$!I*;Xc> zQO8_(qM&M3SjqOzoII%eb^Kh7y4{Pgtq}Yrv`3r4J@(ckmawnvHe-AGyM33wj{~3m zPTHwfvR$IIh(go9QM7mfmWFaa0y*mroRlgbR zUQn7RT{VRLpAu7V38V9ZA`Qi=wV?r<2=N@$T+Md=Y7ApiSd}h+>!2SSf1dt($F$*a zRvUH{9EE5oCh!3z${jj>v7ua!FAd?y_0M>3m?SF6o_HaD;Hg>Y>2O2x^AzI*iLLAGNn7W{KOblrv1SSeTQ z5=Dofe_%vg`{;8(@K>%wRxkVYB`@iQw0m4p=dd0B)VA<(`+8vEm5*kXmq4Ae`?iRe zp6d~^w9QS-b9^7-t$w=&`b>+Hv6Ds$h2hoqsdFT00&}?SxR6_b++y4NVUW zAGl@Pj=XqMb4_IW#*hqfa4&>jMT1es$&PAL5`me=ANk46p$MuWA^$6Oueymb_*!o~ zseSAu;@$zVgd-C{VEJe^+d&Qv;;y~=cO&BE#_o_i9egyO>i@5fN0sUSu`Kt-Xc6@V zStPn8%RRoN32UBrYNrK0cm8hG?AlICDnA~eG4GR83FbJ-R%$dnVf19}u=3v47Vt)z-I(! z_Gi2Hv|&+51PblbtaW`axtn+0`=vvy`wtS(Q6<|iAvrVEh_R$s1NG>IU>X9nh760J zJnk?6UVJ9M)l)jU;?Ovo^DZ;5k z=8!v`ndJ}(?nQ2cL-Vz;%MTelOjrvPL&P-Ie(<|AcIy6F?6onstgKRO7~#C&iw^2< z7(V}MHA#%K=^#dKTi=L*8FIcKB@%v~>M`=9?bAUDb>lUg0LRIi9y$IGzeV1<#!Jn} z7(xz*s?ZWsLkQ42K=F(G*=18~$GJ`z|FBSf(3X+%xtijf!xKkri-R`#v*5I29Ipz_#Yvv zzU`iBs1%0|y_Xf;tscb;jeI8LO9UJ71`Ds-U2HIotu@hcI@cAO~rvI3`SU21ciw*FeQRgrM|4V#Dz=^c&1kPpN5VkOTdRv#9PI$pr#nJf{7wSkz zdnH*Q@RH|F@}I$PM#I2z)!a^vsQ${2k=qx^S0v^8_%_)kJ6cmuwR5(ff`KK9OP3Wg z-c2`SxuS7Sj=4kIV?i)T=i3t~ULyIi+|QFq>5s<=$yCL7p${u9nc+mTN$%BOu+$B1^SkrFt(yi@yRxh9WrbCZ?R zy=(Y4@J~&I?;u0?E?gzQ?xsC@7J(#h&v?;i>8u z0&*8ARq01ZaGb|hU{MG_H?cgxtWL% z1Wr@cn3i(BA5ms1t`XBeN}vqYf7KK8!TzV|Wpriy?l?nHUJ7y2js&|SMylj7yPc-) zt4dda-eAS%@v&KNp)zPW|A_17XKF3j_Hw&>A->5iAHKO>b>r)2IC< z8|mrj1@8%dYIAq>${t@cthCTZ7&lP5UDEegLw_L~hqgZwRm;OE7UvOv^+ zIJ8MugqvE>Yn(_3Dw5J4hAm7`FlMr~gHG`f3xZAb^^gRJUG_sj6_XFg`Xl}*I<+1S1vU?>c#&3U|b+5L$g{JZ^%_;+!!aVv3-UghaBk_Rj8OX1l4k0^K5zwM>op=Z=`Mht!zbHt611Estj_$>6P@h|DHm$Dx zz#>9RFzJXKt(*~*2Vlg1Y$t)tbD0MzPPt6S3HE;&gKP2K^>a^Mzc%*N%@jjM@N|IBTG zvU)C}KJ?>nu^rQE8Stswy3s_r%)wx<84nsMn70C2!hu|LxS(kWU$5%pp&vAePDF4Z zIyX@ALoUNiICzwTK)*>m!99HyX9(|I*rx6*+6LX1oo}Ia5k<;q!JAbUH zR-Y_$@i)Hg;@9d)xQa}Hg?-_n34JUl?Z|dN;yg##8MOleRmxLm{}$V#uOAm>pXpR^ zxK1I&2!6j#(Pwivht$d0ghn11XPcRF(X~9Y&COeY$V8X=dyKFL-P~{W(vrQZN+J;o zh3292u+w^x%cj95H<{o)5GQBMS{wc2mhT4YAjt1X$lr@7>X;E|CaPB<1$_?nkHhbl zo>!;f?khbHh{N+j$P3Hw{FN9`P&5j7-eIq>M0(gEi6<$fGnAsDY@NpZ*}Ig{L~5Se z;a{I2;$b>q*Q}71zq1b0NT51UzvojgwU|u3@`2&0@uxJVu0(A_R0~(#_odGx9|>;og~vUy+6;;d5?Hsfqq+cuGdF1bT_Rn=Elfrj&(cX61?b z9TsM0GA1vKZ$L8uEWzv_sB)ibkAh23V<9GTjJfB*a+v8!L<#heU^FX570@w*EKL|% z)F|Jv+^xJ{!q}r!dG@Pqo!FQ6M9i1dbOy@q@Z1l4Ub9z%@jOi{p-I=AGxOhns!1ND zRHa>TlQrIW;4z0F?PR}NsO6FC#SQ}BkO#KqzV|}?3y^MaAMM`Qk%vqU@;kd=(Hzhj z1OqV-u<5^_T5_z_&EgkpH~)pU{kn^W_ath!DxnA!{O8`ca{ zmX2HZ%H>vTb56Qaw2ysw>zod z&W*UU_8VOi zU)*bJ{UV~u|LSzG+s&)jh~v3}DFMGS9^JjN5L5R#$|3ttca64l9$(I3Ko|mRVj})) z@Z-(Any#ZmN0}{hGz_#3o{2(a+9k?Hzp7WEwzP2{s5Shd?JwWpku(N_`Rse;;9AJ4 z3&J&2rQoc6t$;t0ZV?+)=8*cO`%3rh_yQ&b`&3T&#)X%3MdMC2&K|M>{g@NYZ_G>_ zMd>@IeXT6gchAT8%TxZ2d`ef#@I9-(QL*PuVmw#J-%8`GM#emUJD&0uiois;r>D=y z%rO}~Q*cjLLxmOz<6&jfDqhgkn&o6pVZkWqK*7J3IB5Ufz$0j@OnGLv99HK9TK&k0 zVgyynRmQ{9e_}z1x{~zr#;KIw&K{LHEHZhs_#iqrNSZ@<{13q%mP=GoTS6hYAskfz zwPR%cHpcb-wxV_~+FqRE%Bvz*SubMHo^>j8KGsq82M4PN%DFX6~_ilSDVQh!SM~IFyq5d3*!@AoYgTvAFV(U=^Qu=G(3~;NFBc2>J6>#q5YF9C;3oU_+HK zYi}#zop0}+zxbBI>3&;_LeFD2-wqbDlSyD9R_x`j} z<$e1p>4f|Y_)kTAsew!j;LA`ZOD}I?n4lM8)%*PK&}n({8iIpPB_74nTV~<(ABf$6 zV3T2@EWceQCNcvT|u{`w6`KoydpG2y!elT0fiWj+w0>eYi1%Vj!`P!DxChWX+o)& zFItGtM-UYzm~jebE9vgN7$|e^j8uF|=d~K=rrPfYa@2|cW?ElC%NuK|&q#HGLHP)- z|GLHQdQR~ReR0tnpV%~X&x_X|W6gL2e-Tz4fn5r$YS&b$nGo@Q>Jd13W72{A_x89A z`^_;O9E(T%t=65+Yu>M-<;uRbQ<@FDJZx{+M&=rY*Y8|pbM$UI?&Mq~*YMM#ijbdT zSRt$2aCacA$?`06>iSctjAk|A%@C!#g_iEWOHwB>o}^7&u6b&~<9cUbfce|^kOg}RG*$8Zkvaw_A8MxiQCyZK(!Dys_<}YlM4AQfqE|}8F zSoMeQJ37%}&i4rcg4lj`-#Vb^EG6QmtJIx?7N=Fy$MRKj`pi$ux!v;d_}$rNfRpinR`c1dCq$*5Z;8Wtc_-_nGCAc%_!@8aS! zWtl6EYG9RiPUpXN4sE$3Xa1Q46}FbQ(99+erd1!FQO108#ozGK1T{%6;1lHuN@pn# zI4H2<*!?@Q^M`1wRzi!hG&&M@BB%Dd2DakDbA}s&GlNn({pTO{kj5%mH^qG12RtU4!yra`F&h#xAN#C4=~AM=cdr%bnRNm79M|h zQ9*Qz(y7zsJwx!>?WpzaAR#N=O~*+T!3SCnl20^_2OsDgtR{0}QaNAmX$l3|=;W@UX6Qe{ZZtTFpftE0t<_24Rnu!{I# z!}qnE?2g=I+{J$bXtK0c0G&*}E3zxdf|JT}Q9?q)H{CL^@rf+r2ge$B>m*2@4Lc|JAUA8goBWzxaM1?@-aF-_Wt-q&58VMs$o2oJf%#Sav9bo zSnEU+GwLo3f96F?$3ix}&7M^1;WhW2cbxPO)(9i=?0um{`W=NDp|3b6XGT6P!^K?y z6ty2P=q7cns4&b!p&+-)BC#mKtQWnHUI1bu@gqj!e?|ZmFk2|+ZczSa=QmszG>edV zzJw3~i*P5<)uD;uTrK*H$26D1SgJ+~o)*xS0FG$Y_OJg#P=W^c^0?`bU18cru+lKv%Xq~{u;S8mdwI65DcGt6Mx2qQS^!?DY*p=+8k>Q!D`>to_{L30uW~U)X zbSR{Z9`?U&9Wqp7Qyai!6Hw5*gZ$-Upp2?e090R%{L*IGee!#(xAcuu^U{kb$lHdz zrVFFs&Y?!(9xWRxP;#xC?wtfRx(N|K!Y@8bt^W#r<9WCI{4wM7Tpr}8Tr-TYgsi96 zWuY_Q$||TXbc3yoNfJh|!&*HZG|ga=LP5;{Rbp}VZF}z<6diLbblikw*n~biM-j$u z;ESkst>xOcYI9XbK)qMHsKeC37HW9)rV}i;KL)X3k=f^S*R5xj-#O zZsKSAs4g86gS58rLA&>D=eHjOYd^$1?^8zC*ZcX_OZ9=si);ceYxRCz$u zENF5fGVaYN|DE(b%`^x0p5Se0;?apIa+|pB6dM6w#35}4MALgrJfbycy{M~6bs3Ds zuXey3-;-rn$N)xNJT<7u?ZUh4PgUr`cdFJR=(zc#D=bydLs`%Vwd~$?n+*3$YUaTA z&IhTxPiWD8IHO*7UhUcwx@4JD7Ouby!VT;(zdWAf2GD0Fmw}iqy{Dj{(3x6s97{!# z-T7^eb}9YX?J-&a5kXqiCh3klk{nY}?hydZQl>y1p3OvnYG@@>B@&BTyB7wo)Ald5 zE(fhbC*BPdfWB(0muUOvu#?nBUpSlgFL;3XwEFff(Tl~%b{w@w8GVYE?%Zm*d*d1U zJvpP`n&qXEO>`W7jb5xsPg_C(fgR@OGm?H*aeL9{&LjN&y6JzjLe+@-I|psl^&yu| zce;48A1Z#|?mz}JBpRH2=^RF>oktG;wAD0(i7?5uPXsFRPPJr> zG%!`MzwkRna825FRXL?C@LO~>!3z5pd-nnQcJv)LaiL+CAK_cW(`3J!@*@&~&yQWN z;o5o_u|N|?z;jLhRhf)DqX3wCAI9zTU5=N}lRkD@@yf6HNj??|KA5z?c+Ov;?qwu6 zwqEEkg`tWO)md1DyT`7ELkF~9RAKbbMViT*WtO=(fJ}i6tRgn>zdw{F$&U}Dulc=q zwV_lpc6}P4EB=hRJMBIY5j3JtHvMr!*zV*ddZl;)>22}bjLz!rW5dZg6* z0{Sp<1{j8!)F@sag8l-(zTwij!vD|=dlM%>HNWtD(|ESqUPZImMyt=&1CSTc$Y664eYe~h|127|P!CXWrom0rAS=ke6^ntQQNP{5 zB`XUcAC!9b0r>Qpxk#qVX@h?XNzF_TCY=wGTGysx8X;k9G_y_7n?;Ulh+vq)>62Bu z!GS7*8Ol_N2H#h4cu#tiSit_)JTytw|3Gb?-XT;-)*MoCc7(|lDz;b7Vqq!r4^EEx z-<|MLSQ6|>&0{f*Y(p1nS~OyfdW+r(v@bNEa~f0!PT^ z5O%&0IW=cWDXdt}Z+4;8hOLiBpndY43~Ltx+Q(8_Vht8{pj;GR1x3OJdJ2i z_hbNP@WLfQwwNuD7db7=e{Et97c$Y~#i!npKxsrBB=znEl5q@ZXCAXtE?!008X}4@ z&dh0U;LkxKYdboiH2U|SkAO6Ee{uy=3}TpOGF2;c|zm$6KQM0%1KUv3U<9fJ;u zKpcwZBLY}35K;}BWLzC-4Y4^&;i;BAm-H!%QbxHrUX7lTJ4KucZU|9ce0A7J-#-wQ z;#E=eiZz%zgDa(?vqMD_2~96vZMLQR_N)k$>xm!bmv< zvpo3iinU8g=QRv^Ga8hgcCJU=eEICror9RpNe*A{@2E9%_jkdk!i(VzJfkvsZkLON z`sobbw*};ifkw~yTD$ah+WU=ky%0$RNqsT0T-TNRYyUZyr9KzNP8O2a!WEG8`$qP<2Cz8WgR~EwOFwyGylKCB zI|emq0RO2W)JAJ9*h3|11TAyLW3j-PoGpXTcMl$d5@# zd$u~S@kRmwqGr&4Ou!j)fQeL|%z7_$H(`OMKMU&5sp&5%rKLr%2`8 z#XWxmiyx+HW^$t>UV_ed>&*}*0RMd_0)$f1WEn>EP!Jb zK3wt83%Ypb+&lK+{6(3K3yb_76`Zdm8dB2KK7aN~@;$v|_`X{CBlaY9{`0Eisr*S- z=l2#NCRE!q)9>#qo;JlqS#zErZSgl#A{QrovE;6n3fOuz{T9aP{7iHGi_#eZwcDV} zpxHO*xF$)vb3BY7P^zgNvYup?yz^&;Awp9oe@hyPp0>j6F{hXG_vbjd$8 z4I+y5ZG8^Z%9HRGdbE2a*tkNr;hEB_qNKqXW`V2*jPb&R#}^_fyO@(Sd=nyf`kz-3 z+_tKWtjVXb>KBKGIS14V5&+92UJC;-|gl0=MbH*Uw+0Wib?!? z^@h@!pou;>a?JQ?Q2nJWN2kZna8%e~8!+@n=E5j-*zLW;GJ*EEr+}x=KGR)kqMO59bKB~=R zQ5l9r>WFj7aPL{$CsHdSsu$iDM5zQ;8TMK5cMad7_hT1ovdp@sU26FD3jC`BAF-pT zF;ojrH{ZI)PwuLH0!<;&g$j{8v)qq0yjk=Z#X9L<)yjs#Ei- zeCh|Wa;^7a){156)hCMT7@FTi@8Hz7r_Pa^VxVq&rm4auJ^K+2dXi~EIa%|!ezj#t zpOwD`e9TXRb_uwfcC=j?jqRXPv=>&OoniFPcJGMXVC6RrBB`6Q-0K&wsrQ&+xz%4) zgk4n?PnW;*kccv0OUz0St~p4}M-H4ax_2ET4x~QaV^elE`9gq@ZXm<0RW7{2C#k@P z(c>pC6jpPlkoWr4knh)T*7RauxMc=Co|-TI(Q{`^hMe(N30@d&6vE1TjglovGFRa1 zxhicwuVFJB+*+QXnft6qrYO$u^Aq-lHK_8GmFZO=GxVZJs;B&$G8||n`d-VeZ{V9z zaKA*uA0=E&*pukR#=NM+96$PxKhzrS>&4&Swibi3>2t-#4OjB16Sf>Sp7c6&iov9j z4OT=L{ghpbEjMlpuTd76kF>=!T%{P|H=m!S$JdJxi zq?1FqTZ^p^Rr?Ma`-iYtO`l#U>I!>9oKVK;`zLd1j$4Va2a!4`XNacI944Lh$#$)p z)e4H|wXujHp0pK(B3;_fXR_eOnTBU`*nT9_;BFqXf@%L-ehDoi9w+B`L7=bw&HXK` z47%FQ=)dIbDzXHjMZyl^B+iOkV1z~fUapYgNt{z1asA?@o!MWXpY}Lr36L2R^fqYX zhq;`DU%sBiZ~Apnfh(aZ^as`zzHUK({Qea;wdKXx_am*i8=BjXeeITaB8=IHWE|SO%LGAr*X|*z_(#D$0zt1 zq2HM%(I#M-@QDwJ(x2!D#B!h`hwL;S@JiC7)wy2m*yh%72i@&A4LC?*`E?#311GB& zTK7F3#p&8_1BT}rK@w2ty{Z&H#OWnzCanvsXe*&_eD;rhwI|@7UM)-G3a1f^a23<_ zN!t>jZf3#F-hMVZ6VmK6=$Bp>g^(0%`&OR|HQ`1@Z^B(}8P%~$B&3Q%-CAz{KMMgq})kg9@L$C||zLQ!z@tA`VMfBLbAf`fhL$+&@h zbT4((Qi1?X)-SWT%3j_U##mFV+c9%h_uc!U$5fv|yPbcKCH*5bgT;iwmiAU$TFYbP zBR$dt$mWQRV)*1eo!a-CGk~Iv03!w!vPs0w1BxL7_01lhkf#Ya4)`q5>nPUbS zxS#h)lE5;`ROD_IVA%L<1JC!xd878I#pHVU@A)7C)u+Kd_VC#*wjiu0G%xn|DEaT; zQ>`Nl2L%dMmvR+uec~tlY2jUMn3gAnR9}UH@4mJTquPB}?E19+j$9FA80^c}d;0iu zc)i&B`^Y0_MIJ9R4$tEiy*cumhl+g(B426Fg%nMi!jNtG2c0CF*Nh92tmjgI#GMo! z+q&hX-#b^Y>wk+CSo?brkMCi&5BoCOX$Jph^Iw?gCB_BccK_~D82Y&hBe0`|A_}n<&m3%b^0<=)w^Wp!KWRcoV5CX**`nnmDS5Q_zvuSHq7S@1JK^(5lF2zQ?<>rFe5%+M z11#-|=%H^AqRQ$R71H4XA9#lKst`Rs7dil(Yy1xY~dh zVO0eLy68df=Y;MC3wQWv;Fd||cRq=?N=(N!e7mMLGz?7bY$30rKk+Q9jq-5$zA&Nn z9atg^!?cNi=EfuC9Q)A9n9<#eBbAO`&b}|Cr#_>C**WjV+&^_!NNj^Fv8Lp*wFMam z7abG-!~qE%cj`72J&2LN-F3M{QE=)}3hRp@X<&BB7Gk5%W<|uroH4LN$WgQH-I2y^ zWjaX+b&&?0#O80ogR_^QA1=XJqp_&g*_h!4Sc82pb-6zjuu1ipOy#~@NM=G?_o0V{ zKjsoIA{jhXt?lMCVfBr?<+8^nFRKxjjP5Gxud{b{)|s(_JV|1^a_GYg6zNdr9y7~R z7zbQ0MG15c`l^7Dcv+8kgSRuSgK<2u@q2Ngzj+~b`!_EtgdGZpx3GDl5B!dZP8vdq z=xHgQ{QZ>FNi zozPN1iX24_GL0%H2gRYZS2C$IN{8H;j#_TvQ*rnbKmk;`b}C5-rSAmgzSiux5+28N z-tf0)NNv^}yp?}`_r!p@vd(cC8J6r{w(MP8VcN2yHD79<4%JtVztTcEsN`Stjo#v@ z{3Mg6dfio4>AFWCvZ}h5lGeil>VfwV9daYLK9@I%(G4b!!Zh|t?_P2in&0J#eE?st z{d|%tLHl3VUWhA#A`Fcu$sOcM_^8V~?DqabCaArPH{W}H{)BOe+I4K)qRqc=kQ-z_ zge?mUgZ@!btvt26D_0<+%|Olhzf0$L5zE<3;&W)+ZWr>S>tQ(NII_gQ91>R%H-2Cr zRaY-JGVznmJ5XCc0{07~6Fv>GZ)sDqrLnCl93>3dsrJ+`S{b1&23CC_F-W_NW(lq+!RJWuqAKVV&uR;1uQ;X z&NP3jd#9U0ucZt}16Vkw6?*p-7X@O5umY5WSjOS$5O@%T)&~VB>SPQr16vP$p2aNh z7&5Ogu&1CH+UI4U8A{sy75Trr-$1&3#9I(@75{%1!k}@=!8#Knhaz#~bF?*iQ1R{p zEnPq&?(;944|kegn$Zv2+9&9XfwPEGp>wUt3UiM3 zvZudtK0uH;p!VAM563>9!_uXq;jqmCj6q z>d>+IRNs>g*7STw@hHkG#@S%|kGs0qW88{;or!;R8}GvmbBxz$>Xw2cP*L*7g@O`l z-*bkPN2Cb@Gu4D5&z`8K@^qL2#a&yL+#wEn6IKXmToIBMFq#hv&CU<%ZJ>M#s@I@h zxiuF`t)f0OGsb3Cz3S-SW>AuaYaQi|@K70rX6CCf&_BD-hw!?L2O8%uYT`7(-7n4JuS3Ey{(mOF}F)gX7+2`V{ zCi=lZyI7@M!&Onmy@ii^@exTcPr!-`d-T9&qY^_i$^O9U7nzrR-mBk z=pr=(uGuhL@S0*>I(Y`4oH%}1>z212P!7siFTW!Q1SaRE^kXfgjgc9jh@$_yNo@Dx z#kjh4gmXJ^^;eYljjX8yOt_oxF6`eO=h2Rx2Mfz#@RZJ)Ss4&lF}w{|H|dVhTRXAbFEc ze#_0{_Wd9C#UP~0754aij3)Z+)R&-nT(s#$|E?N0AAxR?YGRic^M~E(V!P_CssGL7 z<2Dg+gNVQT%idmf4j6m@WAaH0Fku%!#cdulfd0I}?jU9plGN_l%^%(AmYDUX7rM?K zJ_y;IL&OGzT&Br0Y>n+OufCtB+$gk2r2{E~YMC=xz{HuI7%KBSA`qR%^iLl+3we(X z-U^uON&aB=B9)r7!}y1_g~S*LW3>{@V0shg>eM3SnKJGluSwFQF4MGV9QO>rnn8t$ zj&v^gHDRyPpnZr1I^EuUY=5U#fpIa!kAtbe{03?v54uWu(?0JYIy$Jg zAdYRMKOg8pW4+h4h|k_KVFDc#oHqfno${xAkPXS)0B8wa@iah$=?zi;2r)(Y;4fh2 z97=*()b?Tl^4#SS{b-et2SW-gTmLsaPMWvej%`9+O(!Df!3||h)s?Gk(2xGzTHdT42JxlFR z$WF8en8f%S}Pc-7c?2Oxw^*0TJps{d8I^XX32a#V0EQ#&3%v%R_zOQ=f%I8f- zI3p$v?#}dof0N+}Up1w_oUKq_&EAToavn}oQ#F}5@a(NDr^Ji)~%t_GLe$lmYqQQ#t6tdzt7l^NysIcC4wJMsM(E!KMWLa zt-OxULeh;m${oSQ>6g*+dPxFMFm1u@p-&8I#L^nlwAOUwkg+eu{sb?!$p}r-x^N2b zZ`%#6nOcH`G!$*E(&an?50F0mn9YBEC2W~nQ8dBE=TGVJI-WrmWnn z1^*Uvns0wa@=m{fjUMwn^#uqyeY|^F*P8O-p5Rfz6a9$wZ!dJRcV@kPY%}tomjo<( zYsM9ao|tE&)0JGv#ocza_iLs)P37S58`dHZI(LZpn^T6i@0pwVt`s_5a)a-Oz`>Em z5v*&W>h5^7D`17luc%lq0m6Kv%dZXs>u)Z>H>%aIeItG7X%Jf@=CNYpS)mgNsX4%< z`~GJE6)mbG1G(*pQ6uO*j186n)?4jKh*lsE6#V@G){*scr1tX4gGtKhGKwt|R39NW znaHrX%yvNMuEjgs*XF3%6|}vjwsCTP6CWfPxR5oMP(Njz0~aXgS7|$7Aj=)xfUdaHcwGgHZdyg3)g;VE_ubF624!Sxi``k( zU*L4Ri~Ha8v8RL+I}c&5T35>SrvpD*uYEarxk6IP?bAvx`xUxF`hCevXE#6CYoy`p zm_&Wd?7N7Mr`%d$%(1gvoP`R*OFruH|E==j`7>@Dj9$;XX+;Y4%;mm8hv^IKs?WhL zl;*>m`L5A$6S8Z(ycZqXy_T!iu=5@3tiZ6rBng?d?(uC>-*FfiG>GB4n5NAN3=;pM zk`GA&P-Q!xSZfWay$T$~I1D1z`j4DYpm>|A{*MIY(68sF{`Zd1S`HU!sH%Lz4JPH} zCA8?c5-}1S(5UOYx1BiwIR_h?WioAGA&Gb15-jyJ_ibjT7`f}$I zdtRH}0BsJkc}TGQz|B-l;x#&(fD}qg9*%_hIo?V2^Fd?PAm<|uC6G2#6NW!Cg~*{! z(*nL=Z|4MP0*kMk-dP(GyI%iDz~y+YB@F|jKQ}XBTOBy_`XkJG`x@CFMD?b|isPjq z?^poA%!z}X>99y@O)Ylk>;Oim+hom%)k~(#i}mm%?VUi)k7q>W6}NBHW~-G=L}9b-f=EN||D)+T z!`W=#e#@guhuT}(T17QQQCdmWUX8s;?b@|hNR`^F_Nb<6@1m#~rS_^lE5zPH5E03n z|A+Tea^&NECdJ<%S*)`1k8cVeb4rnXA1_V|l5wyZ40|KDSCj_xjDRM8Nc9_Y*^^tnGdOOZ zIAL(=MF#2-wltfZte=PCl%Am*LdI#Z7i^(84QDLQ3RD7uQ1a1C5keXK7LsqbzCS7? z3+E}n*vxdFCXVj@2x+nUDWZIjzPFx6=&yC&>gC_ZuZ-z0OGfAcT@_ljps}&F6wL2l zMELU+phEWasRZ=!kxa}T{lS>)l=C+ykL;sM^s%9oWZRPve5R~|PDoX}=_Supb}V?l zVVCjqJPSfAqS^mJ^}67y9{!yVKx>HLEAMbLbL-JyKdkc$t}HqPkO z1h)$wQzW$DjGiw`5%TELG)Xm5#80G;ba3kdm84G8m@jU#hiuhu7Yvam+-qV(en1;! z{0|eA42cg9Ba!Z-&r2y8IWaytI%2$mjJIt~1l!4ViQGfCtr-}R5-F+1hzruwr!(+{ ztLx~Cvo5`eng5>1?^lX358Qkh=>K^QPW=E{m|PQ7D{i;;t9{^sYUVIXYNC9O585La z+Es$;`(GZ@gK424E8t0y~1Gyp5p4ZlbF4O=07 zAiCNl;%1{s}-?Cgn+QXxYN7qI_N^l7slz(W&1C&?@l<+wA|C9$|^(Lb0G$yQs%_l@5>Q@6a zOw|l&>5?AHC)S+`Js?Ig~;U;am}xt1}o46mgai zG{|hH(w2Z=j}|v(zi)0-Yv0zi^3kw01ru3&(M>7bv_EV}5krFE{_Wn087g14gy|+a z1Au>d+Ja8f%O8_;Y!Nqlac5tF!khSyhZy^}cT0F*A})41%DYS%We_aLL;y0#Z`a4M z$L~qp{KG;D5n7yVBO|OX0k8Tn8ejpn=mZZTEwh-B2Jde-y@Bi!jwiC=Gs=2T%uP?nQBhIdn-V-7!u;{f&3qVFMFE2o}cdkx_8dmgt^zTZwXNtG)6W({+uir&Tw6hS7Hhsx-{l{!4@JG5|vHT0~W9VMbfM1Z)=6C$H zebyQh-RU;qYKT9J-1t{sS%A++=7@8w*_6av;1-mi1PJmwa`#&*hyRa?)*vmFlGx!n zZ}(e>-I4fGA|pu?@rjg0enn(y5Ybby^N?-SPqUfV2{P@u+m!jU!^AuEHxu_ClC%IX zCwNYjJ(|PVdp!-GhkXDtQ->|g=0g3)_`)SODimZR$A{RzT=dnv4I;#3!Y9|xZ%+or$;tRU$1n=X&tb)UlA5?1YFYd9~b%!(;5Okmo>Y z?{}yB<4C`i>Z@IqQ3q1di*8om-zr&?^_OHp#foP!k*wk*I9}sO$uVsCh)+OgY<%-M zGfB|dBxh@qt%x$N(s^!}rUR%XI&9SF_J27e(oxL4JKr*Se!%Mz_+Cx@6BpzCmrO9K z`jC12Rr)%5m+8355>-S0*Jg>9{>~%8FHFAS)E?sKz5dkqr@m(%yAV7g{XP ztSrdts{ped;;Zq-wfZ+;+LV)zPa!D^0W*SE*8Ee&1c@Z;S(ZWN+I?tK_-8dif+TN7 z*_H*4`r#008u?L+FwS*{p~khT08VS?@>%azjAHaS(2Q*kl=j{>&H|(L@VCJirdgNM z4KPP>{4)`msorxaa{_z<90s*@rFM83a0c8kRFXq?MTPC8#Dzjce`6&#mNnF0V)L#= zP_|Kj*CN*4$bBX2d^6uQygFSBtdCBi&9>2s>e5c{!TESKDUGKYQkoaB+> zq_jE7JPX8caX9(^@72Cb(n*7or4^{Nx8@*AcwF`Wf^N+O0bi?3LAgvMfP51Dcf;tL zAr;cM-n{vK#MUj_F1OA)Fo{qRRNSsvvDy10?sgq33)+khQyE2oQD_nV`eOY}=u+Ex zr@-$=3>i9A*SOc>JH+?v@3uD5u>vNWrr%wYy?4CUiyC!oAAV0VsjH|Oa|Hg*y zl*1P|_gj0f*WGi8{Fk*M642|Zt?;{sGItgLs|06-cY1Au%6G0@-!UJCF5xH>pj9^u z4}ed_>sKpM;IdA+@XJ419Qc*7{nc|wQ!MOcYv1f(c4@IaxHm03vF!-Up1oDj($eF& zAMnRI*zVQK5W=)`5UWhtJKA1+IomQgn)K?;d9p#3)$}{pB8vR7lTBI^RZV-}UmH4->fsZ+ru5MBlncDs@`JODXEkwFSX1Q#W6MVE$e~ zL0%_AOD8TCAY>ul%_z3;mL16DY&qf1<`0hoj^eoA(bE3Ys!mm#Mmk+O= zgYnr@*f7Xrq8|(QsZd zZ9N)J`S{N~6S~mi9d-Q`2Q7)Hu*DwEWwE(rQP=ExZ&DSVEQ-{-s3X8S{5irsIk{LcYVlZBOWbDf-<9oz*Jbrkfji zqpz?!von69)HC&B@Sw|gXBD@(9S5az{CP$Rm7;WA41Z9VN_A!Ae}`r#Drz-+Gt<&illvDhYdS^)`uX<>!_RSfBb)v#w&WB?@rr|0 zg3=GR^xtIOqZ-sR5S`v|+-4q$ zr>pTlS7&6vUHg`SZsK2b0HU2w^;;-sVSTSu{Vi>W>o?O%C+t3b` zGTZJB;VWJ$y)(-aO=3Fqf)#Px#B*v-)ie1=uc-5VXIX-j2doE)Gp2@~c##=&C?N@e zbL3Z@9<*Pe!w;eWj+uD>T*qXB7z$@ZimnE&Lt$TBx;D2I5zJs|mAkkvWXwYgPY!oX zAjq8bT(@07LVn4JUpcIUF==m40WZc!>LlFUcu-58hv+N!D?)5F^xsloy_Gdujb*QLv;qSy>V-DTLh3Djqhkp1} z@sKZU;Vqx=wHCwcjkj>osADi1v0WEO=QJiYmsu06j~GMh>OGGM1gIL8gYsb1Bvn{3 z@%%)P_4~D0ZUTO@z&&T=)Z=M;3mk|6cNdqrv7VY>jDo$jVnp#3#O;$Zz}MxyMp;5W zD-gOU-cX>q$(pV1(_`fB_CSUv8hE!;@X{`%i4EE{{e&~}%$zPQ<+tUiNc4W7aMAJVX}yW{7U|+kJApm?*LBs0`P%ag*MOT{%I+OPV!u41I%*RP0@Z4M{Gwd6 z=5ySFd(56#r+nFNQJYlUbz8qGJ(|_aH2L#8YINtaB_&@oM%$$eII5#%S@vo|sD4Fd zf~xP0M3Cw*reILc$D30#1@xCZt7kqGQrTd-DT<#xXmZt5{qcI&;wWW)elM=&)X~JC z&WJiTO46BOZbNZIi!>QH2^T4RZoDsN{dzHQr#V7{O5qrH@FD-q>q4< zZIWrRoZM3&G3$CfSGKK>7n!xhpQs^l5RU=+aY{r(xJy=9;@kh8Yt>oqFT{BfwE{*r z^G?DQZd0e-K`zGdE}yd?se^6px(H0?=!s*R;(_OK@(Q>3L;_vPELWwu%gkIbg@$uv znITv2JrWVkdKEqI--ubj6oX*#s04=@+%R9Lo~^ML?Zty0cZb@_I&@cs&ef7o24$7% zVu&?a6(AVG4#Pnp>s>MU%!Cb!-mi5pBuLrz;0=m-xAp9p9J6Xm6mf-g90@0=kY>nH z^P|DHUjZyu8(&kQ=5qC%kLm`^&w8KkIsi0pwzE8K3Z5~QMjn4K$VcScvqw2l(fs(P znyzen^*Ah4NvneE*XkhXZG2_s%2SU`k*|jgEEkX2zco$#$5$Pi zey7la|5iXJe4_Vs^be3kSAoKPuiCmtPxZ1_P$YVLh!?ch|8HiKkz`IX*wG?YUhhtv zur-lm8ah={6)T>nWQ4Cn%A2j;i462MWV^&3TeG;dh~Kq8P5hk)ulebB@x2vChkw`Z z$_TfBq{F{Pq?FXBi#WT;WI~2vZ;0J>n@G|nl7#JM{ToCg{xb(Rj<9gaDjzqln!C&O(r88~}mEZKE`c-2P2OqCk}EnEGGAA5Vf zj|mMzyOH~Nzc{|n)_;svje7H#A3uA(j1RsmVKW3dbhty2xuWt>?wEjUMwvA z(r8-#*Fr!d7#FHO^@e2RZ85>iAaVPo?2g@I&t|dp9&a`s0hYEG?pbT2{!U8?2+byK zr`aA`utvbRKxZ&6Y^f8zOwGu5TbF*bG(LqFzCioJ_l9LptrYwft19uhxnT59qTSB6 zP5_t-T4ct4x+1m6e^m@x(AO&xA>GS`6?l(834{2gvVd%kHw}xjx*m#w_lciLlW;Wk zL`+3O?%i!M*M^3lPS0GApjfrl8yQ@OEpy1`+NTb<>|__O7$lxFw0PR9JmH!nI7JG9 z(Rvttrk4pOqc*VIU9dDtHu^btf> zyolg-T;ZKZh<^R6Tnt*QA<^yl*XkG-TkYn_q zN@K{6SdGuVHTO~Gs8hb#Cp3ZT*_fxLgk@gB76n568>FnKv){eUrW?q^bdI<79 z8TGiYb3|H$5dfT+2WAyl%t`Mr+2*31qYRl&cn|G+{_c=EQ5lfE|%>^3GDm zHNLDczuy@{xH^G(=HSN~%_b_r4BCr=x&WUSXq&yt#(Rw}Ehh{hAFCwEHF**tF!sOpDlIctb^jz{{@5r5ka)Quorq|ARfHm4lCE_t+<#a{*31j`#f?uNY?z!% zi*wzRj%sD$0%Vngzr+=I!I4s@H2&Xtj%DlP|c1;pn;ymP%bVfZTt0_5@$A} z4MQVN>zGBThNFcYDfBu!Rd*_~IA%Fr*o$FpNkp;C+n; zn(vi)`DmBL4zWaQ6Y`wuodLbtm!CG7I0hcG2xUj!5d+X{l*91-4ET+fAGx@eCydDd zfpz>IMro-V4MOfbCly06KX&pPCekZ-H2zK>)ncn1-!90Q(QtN?K)bfJHmduYPwt0F z@GEH+DYIhoHDVPvmiKUf;CQ>`QR48T7}i06PV$Z2JfldF)tyMKuDS5{bR=5?IHxCe zbS$2d-j?&{3TFW5e(mL}mHqijxI!_0@e?3>`C-k!8=$t^@V3w7n!^%gg>?z$dt$ww z#JbOG@wsD;;Q#t>I}w%VIkEG`y|uyVxGCmk-oVzFzzg_Zr8GE0Yf$Du?z%LD->&jij-Mv9%+&W7vR9qSLV|B1=OYD2OpM(qg z3!EL?RAv{Qeqod`NGX(vgk4CeRGquWDIUb?tZmFi(z)wRV=4hD{lW@Hh1perUweWv zr?HbC`QLCjjHvUAxvlGWC~%&Z&j?`^@oc!ku2Vl(`D!stHOMZNnE3 z+>OjxD(HgY$aRqOkEEVALYf-gA)yZl#jpC=nTIbvNgZ7%NX5q19|2g|ALOZFPQ)I5 ztMz_yl#zDw`gT5DQn=pyU8Ejt*NGhl2T| z1*E`gpdQ?W3JTO(jj!IRE)(Jo#z(g(k#5-Jd`d3$JOlE5r_uu8x^BI^I@;QQ;hZ1l zI2G9lY4;!T+rp6kwfH3>FtKWnAcwY3$4ZD<5L9P;Pc;8f6eUnSlH~P*M{so_bo%ft z>bfPv+@tQj|Go{!-TZgpZ!1z^g}gm`lJfm)B;6|v_~Eh?Bsv6TrRl> zWyG)CvhA;&gsORqJ-)4sb42bOhVm<43U1t?5t=FP01F#nBEvr>E+&I?^wn&35i-06 z{PxkvVFvEU>xxFbBsW(I2P*>KaFpQ~dfjLLxVeTEJlP~@Rbd%61~rW)1-X;e0k_(CucP>0x5)(F?g&)|BN ziuYIBwW%EJj_(e+*ttTSeiPC+(u>mZOY4-LH-eP$_nhv*?J=8o?_Q|>ndtD~33Y<~ zRbi59+p5!Qz~n`VFmP%9NzJhUQ-|D+8_||L=52pTwHj`U3*AQ3pq|qprd+J~A2G(t z@t0h6%m`|67rxg2%kYHu)id?&s9{O}naDPAqPoV_F^^L3IsoS;Va0O6dOG$=w+CrT zs=*2!S5FsnAf~6H@XXw+_h)NrtB+4CK7m53X|pP0@eT;+Vl+j_R{q(0&Rqa8f3!M_ z#0QTdCWZp=|G^t`aDuFBt03o|>;?2D2~UkjwvJ{1ThXp3tCgW3m8#xb-Pr+tIp`3za@T^y)!Z_5%#zy+I@&Q@=Qa=-dPYLSNo82?L z*o3weLHE??pQQ@7fc!#ese3cbyEBeCE@t`Lk41tm=^o^2v5dc6R@uEv+muN#mHA^d z(74b)VnTw7kNn`@DdAceN*V z|F53XN|uR}h3-GN z_+o@9zZUR`TT3byGOlC#KJ{k2o`HP1>7-`BZ^UCMzt1l0FMO2T)c~ho)hrw7*DVyN z;xgaH;*PCyUI%;zq<@a@blx^<0*%KC2vz&paDx2|r?lpnaqN!kH{d7FZYsXhCfM<~ z9$iSkyT8ghWJRd*6l~hH4ytHTxmFTPCrP%&%9t_W1+SGJle`$ve&i}OB>2J ziFzHt6G&*uud=j@MESVtwxkC$fC!)`Asqv;&;>Jxd>6iPZ*YI`R22=TM_Nt%6+G)| zm+~@B{`bBn8Yg;z+4LXbi}Ilr9PWSy!`v%i%DvV)FTBF+rwm5=Dio` zpgj0k`)}yJ-5Il*WHtOjxtY%A#ddi`wJc&DWSMM*Z>I01MnoIlB66VV-8d8xl)c|MeTbzB(ftMahZpzd4pl)wkNn1g#hv zAn%Qj4sApm)pKgSNMa%5#Zk~nK|&Htx)#n$SA++_GdOgGHJHGR*HSc zs@9#Iu&&MCO`WSB2a3%zh01{&o%-q+k?#-OI*mkq4L}pUv5B$x zldzIC@tRg!M(+r`QHB<-Pl>XTibIwtD|89%>>N#?uPA5BHvo5W~B(^%eg8g>z#%&|;_sCLI(A z!DX}luni1UsDfWd`P)}VRDzja4~GNa+SMAYl&hu@oy1lFr=G!&M1P1Am@$cUjV=!p z0Vw!)nH7yhTd0urSANnM<<+%WKO1i&zmNKao|bAXIjHQGl~R(U~$qH3B5sNRIH&ll+b1#s}Pl3xV_WkYkXpynMW(KfN z!C_!q?nI6&PhweN+%w`#XQN6&7t%ij2EvrDu)jWoB+@0Vs_W9gRF{QoOLgIcPv+58 zT6;dryD38?{>XvU6Qj~`T%9=n(Q9#-WY3L{mS}^;`n2vDln!A>794xy0#XDHY9cZ` zo+*Bdx&hkn;|zGO8QgVrr0wP9Tn*~?YP1b`P)sN2MyU}v8h_iy1(du++0-gCkkqLq z-gDml#v<&XmF5uLw1U?cFm8QjeV?7)ey>|btufMCTPvEzfKiJEZv0Dj0MpNX+!%Fy zEy55rQs~s~k1b5=F+W|z9Wz*%Br_tQ&!4PJxKM$bWJnz|=oU{{-Q;4yd`Tyio!CC7 z0-*3B9$1G&zZ+!6*1YR0OE>BR1jpd=!1#`t)2r-l8mjuD740ge1xEsm%bzNCxIgr<95i0e35;cRGowh_x&QAW( z8c-q?#K}qE${WsZ7aT2bLiY>bq%XzDUlt#{+%QH?v;U*cFdKcg=%}nlw^{s++)Nk& z?t~nqt+vPy86U&vKfssG0E0OzyBA?GQ2Vd<-$_N+`)9tpL#+KgTY96;&f_Ydn54=P z%^ng!C}Y%3a~nIqjo_AgzPst6H{%kx8VS-L#26HDfw3M_;qERW+YtO8!$0IF=h#;V zI^NWTOVEj=Zc~cjZpkET8Oz;s_i6bErw3!{h*`oiN%);7H<6}@zX%EjrE~G3e@BRC z?Q`%LeCjR~^>V(MF2ptO|FZyE8{zNYzD?zfRQc(|)*KRwB=lZ3hiG$&k7S^40;dbT2cJt$5RQg>*e7`{H|Ocnlyz; z{gQKA_Y-Njc{Z-oRw$<-kF~p+n?plG1(c=TDtXvtE>cs-C^2Nuw87=y>2%B4ytBj5 z7e`n~YT%EiPsLA*?4R$d?Em@{hEnzNEDBsJ&zi5%mebmyD%=1}DomTv_ssBO~ z;Vzq@d7*`|83cTGdCn{cXHIs`O1N+&z;}xb)oymjvH)O5GmSx`zWe*!@~({a%N zhlx~rLa|g}Qa3x8IQEv*(F-Eh$ej}ilcjq5_V}`ozAeqo2pGD*ysG(FBPh7EzW&a2 z%rkbkx8Isdqsf>D-I=Si*{S-YI&5Q6IhEP2hy9bR@^o-H|7Z=|yb zOqEJLeSY%|543c1YYSA=ZZCJ*(zk}6i2@q^JleKqZ1N;zh=hoS7(d3GzbdhHF2~64 zjJZRHCY0U{2GnQ3G$S@&acjEoZL9JkUaGfnC{n)m|7?TsFc;#dZGN|B3P_}DCflJ_ zobz4uA9XbnX$5M&HR+1Y4dVibvpbEq+zFfP$bd#K!u@uo?EE(H#oPzykEwZ|w?!7)N79`QzN)6Z!Z zKKX?F`^}_7E#s=8Pb@SA#{m{|M>F5@h>t!ahrkoK3YK-mnzrx>Bp1G#0(cj2{fq)- z*Y-4YCsTVu3^e%8n<9=NPW;GAJ^g*SWwse0+h7-icwl4SQQ@m)hegUee+$5eW4tFBaAm?gVMl`s zE}z3(tp?bJJfu+rFl%REL9(*8#Or)8<`HbKxNY2fV1WLL0i|Gl9`%=zK zc3m4S;9aMQa%XHEL(xyOj`71%GgzY`z*OQH7Q2XD$hlU4p@INs-*(OXlHi&;Jv|%A zeQ|y)QuS(dY9#yW_kQEy{=T!T^Y$U&<>g-8#&EFHL5f$yobQ53or8Ez&uDBXO2XSG zEKFg4-}OEgd*=PKv5pc_q%m){kn|9>eDCtj8vl#b+tUxR5awi16ERFdL9|lzF0lZf z@4$Q|_iA#~+5W?uoA0Ep7pVAOcd{>^@PM*nY#4(3^JCMa=G|k4VtZptYj&~<6HhK!H z03t5(#o;e*0Sy-5_q%nDABJ*BliYrbV-pLpHQ_ysR9WtJcXbRof5){Pv@cMeqx|O9 zjlNu_AS#f==E3F+$f{0u_JnPFTEi&?B;-!u+xN>=FW{ub&QF@;0zBO6U~~G_IUMfr z_U#dzL_h*Awp648ha);ZqB@-oM+w`We0>YiK)R?1pFKs&gH3-7m2wrFs$C!6gr3>6 zGxJKW^IwGYbD6v%2<~af((n_@)YOGS+g>U3kK^>#5yC|R0S1(1&zmSjVC&qtCQ*21|VNNd0;*B3yxG$D1-ha4pjl_uAeV{hbO27>E_aS<@xCF@v`8yN`Y=!L~3e-M2P-e}VdY9j@E{@5}9>=kO^m zz%%m_lO=4HM2S=DCebBQYh8u|N3r-#T*s>qA(h$y)#>Ttbh!G7aB$fvaK4cO{GV>@#;6do11!5y zV)8k-5`rg~{RU%cVE^5OeMHv611|)aSlmKL_V)!p{uc{Y4X1N-*p5B@(nV;WSStx(H12wylU zt`NckowOP6o2Zq0$h2|UkpuQD_NG*Z1se~)W_hc`*hd5IK zeA#Hhh=b|Kiwr+dAR3nGPO@<;;OzC`J?H*yaS9zuZ!w#mcuIyvzk5PYWbU;-2Okm8 z(ns6sF~BnhsI&u-uSvem$qUxRX4$_YJPK{HVF$%-NIveof4QSJY^C-h%;!hce`T^& zbwAqWUcEeK;GGbceD~@4+boBkDjx;2o|y3J_M5M%Jk;!O&7F|jlJd$ff)3Dy2OWR! z)e%!=R^uc9fm@;7u^B(>8*2n&Ih)wi@i*}2izi<00fT07QUipW+!sBcQ8@f>sDtQe zZIxU0w@fV)Y1jG5>2xW9!SpA)_(f#!K@V#2x!o~S{effsv|&hWs!7pXHt8^CA`T4ugm#=Ne|4i`n4*#Ta zlYEOYO`!9~#?GWRx_2_3SfFLRahA-Ph)1C#wf3N_6*D?R zz0NO2Qjn3Z*b}W_XXjfJ29K2dlgFi>KP{}Uuc=`s1_#SgK6X6VkKA2TOUb;cXVFsi zdvbVVqbQUZDjSFDI4{cPwztNnAaMUpR6O!rsBCO^Xz_yvtTXRcHi9(r1+~5oxlc;z zPAb3MVT^QfbFtrNYsh~hu;el^?EBB>96`jk2ApgJUkqM;4d_XnF?V(~{bW`n!U8GH z?i7r*IDKWKSp_djOy7#QnfJl&??d$mrujbMv^^6Z8_2CqUg3BBw;#47jjF$BV zWvX%sdIU<8D_=w&(pAvg;72Px7yUgYLP^UphV!Od1s6EbsgweY7nOUk;?|mtZnTxA zQKjxwV@}HYj|c8_>F@x-7mcL&&kAv{`ThEz*y)&ZLQ*`>-jJDqhGg|-t9;8Ij$*iZ3lJ>+V>0-2pa8G zHy*uyaQElYD|0rI3d`d53SJwhulH+s-_IDE4 z>}-#A{%=VHQ;uuV5mGQ&kXvyM4gTDS4OG3f|mJf@DL>pP;RfmwhZh^6YuYQ4Ts z21T5r^He+KVyinM@0fjtE^FYAfKVyv*rinjdg>*ibTi6~hX z^MOX0%jHCh2CrQu9Ea5!!qW)J@N*#Qg4%bjomQLJ#CFq!AbWF?zIKW=lu5e2xSb8* z6|q58`#R>=bJRG`R05XmhtzoCA+f|RZ&<{BN9=9#G1 zGQ~S71l>*sEj%`B*qe8*ZrT|TVQ=;3IDKE8obQteCgQ$#$EH6q85s7yjY$#k;qhM% zjm>y$#SuFyv%kOJ7A(h-qsI8pi%6t{Eh_*B%L3K^{!AD&k0m>ns-*~weHGaF8?ms( zVVp5?Tea&;F9L?e^fou2y3f_RMZ7gE&kdX}twwp?dV0~ebg=%wm>ZK=YsP=2L|d!57c5#zy}0 zj>Q%*O!tt#=fUN_eWXoM+}F>?*pE~e|NiZZR-(Wc1?&KUCGuBC@wL zqRU;*ob-&NO7XXllJ~`(3KD;oPpO*B0m=ih7j3} z`@#Q4xuj4uatxo2;PLuP1K7}K5A7c&mO`fAPj}EBP5mfu#iM6Frs5{(HMq{SF3(6c zqdg!a)l6U2C4m5rwX-ACfO^Tb`AXZ&4KBy8aQVgB1ANUN9va&n! zCwE#Sp_TG`#KMh<3%MgTHdZhwDg8(iv|I9FlwPP{0G`I`_5~**G@;UcEV^y;ri`lb z8F5z?_g;hOK1mCMhv*Q}kGx#=CwqW9h@(pIQ)YaD*>VNl$v3qPUB*t$)6klpqR^#bPA|fuw9IqySKp^yC8g^0 zomRYJGkdHBD1N0EZ8%O8$%jc=Yh?H!&+0j=k;EGk4#Va)^^&N|rE{11({yfb?#1|# z+onI3BUF68Cj?Z5@Jt%aRJYs@wYSoN!%;%TxxV;q@Un7~vYdb1ruc14L798-u9;^2 zt0~lzxSx$a%e>?^w(C#XId~Rb8~>5z4lmWFwMTjTugP3R7Q;_6-A_+Xn^}S8x0)HK~;=R&SRV$#e>ln&m^w$3#z=H>AeUzP6ZP&i7$QQ z0~h%M@!Q3>?7a)78oh1^q$bT>wi_m42`eAg-YV0xv{xCavNCEgi8k5mvqT`}3ba4o zzSMm&72{iWv>K>htZG#ty3+gX`^5cwxZfW-PrG1wu0&qdih!z!{uIF#y2a4ZYWth+ zKM(S#0iewqmKUG)SzhEPc_pLl65DxlDnoTUfsA6(H^{5 zQ)fTS!^ZvvWYaSWKdV!?3&IW6jaUa_ON%!`{Cl~piKhl;uJw}QPS<~Jy{BBi4`KPM zFl=i2e4~bYpL;*^gUn*C@6tBB{jzP&xw8AJuEN=-Jr1@oDeo*(nOfY=&my<3g1}am z#)YN@#EJ$P#U$rO|Cc_Z@Zpw7R&K_NkS=guH?M|uuFVF`0sZYdExWB!=w$t45!pji zCVs%EXM_3xbe(=}c5?2WjW)A(#2Oa7-|{oLbA8HoI@!0anwFNrXr?9GK#3p${G90S zCK4k+V){wT`CGEpoo`+7T-Qy#G6r?C^%^VXe%gwP;kVYnRcYzl48_5h$hGbQZ?oxD z%@~p@Nn2d;j=|5vkIZC3BWoZXSQh$uL3+=H#5ZxqoUXYU3RE1yta}XDe>hjUCljID zDdInPCD;SuFA?+Ol{Tg@=p{2t4?{2rBW=RG2s40|qWhkMLNB$Y#)tO?AG#tTddg!R zoK)n^R=mLf$@a-O6x{V2!OaFnEN-3B8TkA6rE3RY%$cl!*J=uP z9ZaQ5t_HiptII7qSL<5!`OW{n2nD_qkbb9#BX9hCgIh}2tpa?W^2teT^GF`%wB}*L zq)u`c9|Nhwh`+{=vw%-gm;-Eq6f!`lgdd)0EQdX)*ObG>COoHsQZ&&EJ$}iMs+FK> z43P7en}~EaI?`(%s4HC|^Fk6TR74Q%K<8b;Wb*E}x?dlF+3!Waz~ec{Fu^UCB+ua2 zq-DA<7}5GsBw153VSx+dy>u%73w6d|A^*l@MXO3CE~FnalAp-34B7y~>sRyFy;OT}^{wf-mn)Kqe+Zc(j) zI-cy{@F3_iOBa@XnELWUVB6)E`~+=gJ6^OKXi$JW6}!%SoA@0>k7UF+kJ6A*RlKgvqWaJLp`a9j6JSAy8{>AId&puW$uH+Oen1x;V96xGb*iiO=W&%MvQ)0hg6 zn7*|z>~G>I3}g9W3B2;MFaGt?gsD+`W>L%RY}xPxtyOz%jdm~F~QlANJe!h^q! zZ}xEg=lqF?Ed~c;VxgJh0rTYL@z+ek$$t<72VZL>@UU^=pyi|^sKv_<{N<0yYf-C$Lm0P{!Vi}hA5;D{;fq;a+yzykorxzADWz_0*;sZuPL3wu`0)@ z3Flfg%7Qbf9T~>Mt3e3pa@X9VO~!BFkF{sxv=GJKC>FykL)a{wNlDR@`S&N!*J$*3 z1Mm5T@7Jm3NL<{je?|+XC8Gp{orz;kSyeqpTW;xx z^bP5-0*2hhk*3_M>!YLBQOww}CpQ~g87)eI&1kvBZ{*d92>*$7fyA@-zgJddS~0PI zTQ8JkP+(T7>q#Jl9+|Me&4uA^raq;0J$?QlAtbO!fL%gWeFFaT9nUqp!}24^-q~(; z0XB^g9>isF^t;@H@UNiGG+Xu3KHzq7#SrwZbu8Hp=U%&Sz=STskt(uS6Td;=B9tgc zfO26hx~-#_yy4>Srsi@5WYDNoLa2!zq=8c){^P-N2}WuQNr7S2(}J@QstWnhrLiaq zNYC-8&ow0h^s8#0TgP+@h=u`P^1anfac{_1hk|+9q(?vL@7j9Q!2q09w$H!-VEgp{^t}Cb>QIceW-nphj z9KS;^A|$owc~g>Hh=xB$k=_LDfV-m0#@x+>XMpbaEGs9jYBE)UgQ(m_74Y==TVX=S zMeq(j6H5u#VuM4~Ov4Mz830<_j>2b2>}yxq6C3zDMysG$4!k8wj7o*$fKQ8dzN_;j{cuf8Q3JB`=DnXKu!_%Gp#U(_-K_z7A{`(@<|xJ;fk^+td* z4(1LbPh4?rUXmtcqzL zmqKXR)^NSzq6hY44ZD$bUBLZ4O)@Ryo6e~;onk$x))TRZLNeV7#A#mQy#n;ZEkkv! zS7xry58Oo;zuaO_sw?#$pbL4%AO)5P)HC-1YrCq>7Ji&}@nd+Bvs`u6Ti=pAUzQgq zzdPOw5BSH+3O#k?*xAog?5b(Y$n1p)cVhh@~=gSFLjZ+8-3u?kp-GK}?t~+T= zGE+S{{>mRR7Edlc^Ld+PE(wlE_y-NjJmhoySAxv=|ZCb_2B5 zZ=B?!wa^KLVAJxD{TFJ%2dx+GGJyvzf%=&3-y?A&q`ur3V?MpjbM@zsJ_XL&`aeD< zKG?Z9GOP~$hMH8|{&Z``!o8xf2Y0D2*02(hIRZT5J1ox$sYQic!Tpy4rQQs^^Tbpg zYfCERen@VZ2sh`POvt4sCY9f2cwG(SKs&947(_eVYfd;Z0i`5+IBO1Ew$sc2(E{bP zDwCi%SOS|-vT?r2bXhAya;*&zTNXY91#gr; z|3L@(HWhnJSD-HzfFr4Vx8PA4yfxrR?@qS5M&AJLMNBUTvC_YVj-dGcaLy5V-L(d* z_x}hEL|#9K{M$pbjX?uebkr)uMc68}#C`U=Btj)?wqFABoRn0S4v#?0W^#y{cRDF6iwk@I9C8_)v@9@Eb_7kTKmU@iX6iQ)9Z!E37^9g_f`AsH<9kJJaudgui~Nr0`}SGE z>%c=-+}+VXgLsmNO*Dr{KRR7h(6@FW2t1yU=@%)``u!kRqIUp>iS&2g6Yq@mujNP| zPdOK$C3syGP^=K!L0UKeM6liCqG3QN=-XAoSj|6=X{fP*m=QEn?^eBj9&=Q^m-+)5 zxGoAKQhWvd;g6-ZXy3Jf>NqxQ+PkXcc(5Y4ueY0re=vnp;WYp1!pl`G6uJ#81XG_s zHJM8_=@B|g8_Fed8`L#J5gEzq&tN?+TYTsG)~2it69 zlltlFyWiPgUeY4q*M@iP{!(|G{LZK4mn+_! zIa}R_kixgqnaEp)j{Sj)LVoR-0`jHX0xiB&V}K*b8IZ^u>`YP5SXg!65`zE0 zhkFWsE?f4pUD;f`gPOD&#wQlNbCvtdA$*4iL?1z#u%ItK+mk-R&02D0Fx4imE0y|8sbe&1j1Qex6ZJJhUR)d3EN9Sl^pt$+qxLCS2gmJ zS|+v6z(<4Ozh`Ja6HgSHgW>nsjLrI`7qOg%6ooSjq;OcE4H+kvt(mKp$1KzneMu{P zhSo20Bq1$feF~&SL|$RLt>@5x)B_bVU)?yKrhmSlk6H2h2EtF`z7)}r01Z56@O$OG zIMj!O)lPA^1Yfj$AQjnYz31TaWiFmv zoT{F&a>op*db$R$kN0}EZp`>2PO^0aZa^;nU5q&pl~FycUPBaOK>u`BksOcob4YE=!({M}y#*(pMB9}c%~r=%yr_ObCEOTW%&G}zk3T#p@7>xqZwK;q@- z5tMk9A9|K&ro|y3onq%{G<0*-FXH1l?lpYJ7A;fbC?Ybf$O4)@ zR3suw0yQV&+{!CzA?I#2vmhvhLfE=#zr1E7OTk4{yXA!a2z=3uQsbPB9R*zMC!50DxNlB;Oe0Q|G20T?jJqgTyY98bsYDZ62Uixgh}hr46g1L&+uPta+ba3A~kt*=90$q+z-xyj)g$*`==?BF{ zdb;cD)klIDYT`3(<`5AAX}cZwBt{`n$~e)#6*wQ(**4_=+ltyuJC+l zIDmONABrX$h)r1)V)1@CQWjkmR^@Z&%IJmP5b(E;3PZ>QpS$uu>gKzZ51du}8owfG z334>Z<7n|3CBX}h-Z1*DsuXC@qNN&uLIePKhe^!NJcblsos~N z<&0Gje8yGyRLB47M%%`Nqb9(3QFt4lB>~=3-{!Blt+wTFn)oD%j?SwQAiuhszYJfs zUs_u(Q^N?`0>v+tjieEm=2@up{IQ0VHCV1bGtnE}4T|UNdju3hmz|MQMdQlCg%%eg zJFt#~7p7kYLg*qw)m~W1LtQ^7{It|$kqx+rZx(&@Jj3WqS6XQTT&AMmP48(_PQlHt zZ1cbZqn0o+@uJ%*ZYiu?mfm^2F%mR}MFk36ovJ&1=zH$IpT6<{O?LL*qTH)mA1L5| zt+cPK#a*8_qpm#*(>+@-JsOWMg*>VnaV)wi7#s0I#ZL~AS8P{gcl%bkr2Ij3b+x`B zucsdwKe7wV-=K$FUmpc+H8=RhG5)o(zDqXGPQbq6*kU#+t3dQzp@<>8o7?xwhDd`R znD>Pz^4fo|qUM#3J8S*Eh;`HYtU2gYX4b%T`koEs_Ln?hgp2- zahOT6-KExl%|)-WC#I%`s_g21NmdjD&XijQA2x1Pg`AcB^+L7=2b#MC_cEB0m-8@)bjoUUgNPeKAy5^X7$c>^Fn>ENDp3`uCFU4S^Z(h z$n%jC4?XPrmf`Jt9c&Llvu9NI#UtnbPIk6Nf(P3r>+K)+NVkp7l5V2K$a{lHoI@Dl zay{86G38RSEw9@r%c|;v+}%&c(DxR%$r;!D|0~qbj7Xa-8hw3ST#` zSl>I(K|@0U=RT9}D#H3$nHQFU)zZ8E@%i!XyUrndKbo7}H@~GH94I=D82;#OJFBg$ zeU<(!a8RUPaVrb{2{rlGzl$c)7tW;#Ck(k@ZSRoo)>5t-toF{>0h)6)_Ye4o6Uqu_V9+{l-44C^)K6;XC5^|f*}{sl+9C}N zIhX;~6@?wfgA6&tPc!y~i2U=ZSw9JavNTAsw|F0q!RebgI=3vU>+=6pqV+w?FUE%+ z;S!Tq2af14WnX7rA5+K?CBc$k9R79%%NKE&dp>9$n6Ojt6e*y?G7C zn|~G54TpoC#l1HbemKG_PtMP~BPF&x77;4BX}k2Cb{mTS`m{fJmL)vrWrElPM}XzyM6&S4t&>?TcGjVsHEvkS^%@BQ7wPl zjqF_4!S@2TPu{Fs>D z;~VLiFj)0lCHb*Y-Pp&M2RAyo9=LEdPNE(m!*cS}yuaFZem>ZEVAbf0!IwSyocywn zrtqdWcLs$p;TU`N!}%z6bi3*q!n?S4@#MMqv*j_4@lpw|n;ZHufm8N^8EKumof$^> zREpAX(M=mBCQ<_Tzpws;3hI~4Rl8}^J38-);!(@VQPb67 zT;0bI?F6ggSl#f~oA!)stvkbBZ+;H1PEp@KbzOeVQdx6rFVEf!1oiUbm=qdf`&@%j zXZ0kmu!}aIR>z3qy8)o3daoa_%41Ce$MyF8w_eql9^*n?<~Q%xy*@ENRqBe4XbKLB zXALVXyvqwqQ`t?+NAWrb#qAlY>sxr%4A%)WW-6A{q^C>g?Aw`>1-QGj3Nr)6koaS% z5j>_s26YqqW>M3||KHi0;hey>*#`lmS4+J~%+$dgY>$tp$@ku|rn;|KL%g6UEk5G* zkkn|McV?Jj$dG;aOe1sygc-RCF5V^UvV$Bm3kzZoTrMb@E0BB>!xy|5rp6Q(y+^Y1(T zW-W`N{OoOQfl5TY3p_9)GN#o&Q)d77)!}g}E@)FCooD~8*OrVtoX~MI#k19ZuL7;k zd;H_~$p{DPbtlztS^qktjK!JfaN`db?n>DX88F$s9cTI4%GSzQGD6-FX}KRqbocal z|Enf%?Hk8+S~@_BV*i!h?6*pD(^0Nb1*VZc4k^6<5xtDg-XW^NO*{TLSjKkUxuDmYo9K%P(bCpm)+`vBjS>^7uhUQ4^lXx4U?T~RQw&sQ3 zR*yoa83K2VV?l*5<~xW5O+Pw@P9Z~j>|k@DgX1pUx!zW4TUE8!Zs{i-ocVH*XJTp3 zP&~VA2?Zj-!El~r%Wjb*+~mw%2If*L`O4@J@*cm@W5Q_Bacr0_Fl{Ft;OSMC%(5z88@`XzhW{$BvuOl_m6fZHJ<2f^!ieHFA_=|Z z;Bborsu90oM0!S(5h1U>{EUbPgI|L6ZauPIqiXI)JhHsv+=R-S!c@!B8YAz^y4*Jsjr358}xUGHqJx=EFU|mw^9tv1egRde*TCK zl#3{i{9_j9~}L+p0EzAMcb^QEZP=5_>zuc3xsQq0>U!iM>@#PaiYj z6Cqp(L2T*{{L&>(UvI(E`5GjL5t0#GBk`(_{MoJsMhCAWSGzH!$G}4zTASq~*N4qzFvP>+u*@DNtgyA*5f*~UTg0Zcy%Uw+uCaA`$ z3T{T_ad|&a0j1Oc#UNJqZ0wg$msO(Kf+A}H>zsoT>?Y*;mdxV&TQZ71pnNSKu50v0 zDyV0IeTvNe{oDNd=B-T>;bVUS&k?bWEZ{+s=f|=tqf)D%x#-P^Ll)g@}dLr=aDHqQc z!d>14M#a`oV%EQx@ptr5zDKPh!y?3EG2+00@FvUU?x*=@;=#cYEYdWbSS8(Db+>0Yv zc)BtCVW0)s?EHKn@z%vG&;mM}eSz-;w(Kmzvu2raRZ4anzeAG+H`xE2;zGkKSMPOA~NflpFMd1VmYzS z5{VT#*;G;@h1OYnA}+V{V1xkzD7Sw7b;g~OK-Gn_N85xeg3op?@$$=LNSlx1{kG_e z!vb5F=usg0GudOv{_kv(BY-PTk=FW0cUyI3e@;bAlNgR>gR@20`!N(zT_ zYlJftA$UCj_`9bA&VA#SCkbbsJ{M!&?YtC^lrWyWOuA{|MS{IAw3cL zLxs&^N72U93CB1&oG<)paZ1a0hf(UoZ#J0yEtb5d60^LpO53i_FYb`%RzUXmioeQO znSoa*EQNhc%U-wJ_l>6_E%Aa*ZRlU8$30b-gO5+|=P=-nD}&0$M<@}E(WV;N1^w?) z6@mhqYtm;a#}bFCZtL1ih~pD%uJMa}!S{31vUxGQ1#V%-*t}pt&D3ul{%?w_ngoTx z%@3Qkpws7@@>wPn*reR5Zj4@g*N3CX<1;Ouh`cL#MKLBIxIXEI z<=f>WOmPaeivLO8%OmU!@}Kdbr{lW56VbrihLBGshc=G09k4g$NJGi1%@(PvXqoT| z*wbmo*l@_%hG!H~f5f}GAdJ7{r_*a;mzJ?9vWXF16ODfu>_3O{rYuY$G|T=_Ml(iAgvhI#kP(GfjeDd| zFGH`GWtV?i(@ACA$fxNxSv`WJKBya~IrdXG)M18Z!(|Pw^Ywd>U#lqa|#SOy0l@nF_%a_O`F-H|&<*vUkq|?9Ivdp)9g){DSKZNHZ%kF)j zSfNXY2(2+$x!L{xai#z`txdQlyNb}IzY+N-8p|MdHWWgxb`s!6dY9xq8284zkO=sq z3Dp-*QtP^XAx;pswaXqcSuFCEGzATzp_-P882d;BU4 zrg5T9f*V230$GZp#T!g-3DFYjwoX29rh^w9h+%ZQsMQbek zfA|=1uL8NApmRtvV8)0c(#|Zc6lTW6$58e2o0%TOcIs@Bwjxqd@uSUsxOPVIU}~}s zvR21|Z6o!lSq@CGZpL7eKnm-TlIg9X)U9hN%4V?;*D5i{`zz|l&_(i-tOj5$K=0krqrC5{ZWMFJ;g@;HYHu)noxchTmMIYni(CRi$9ZI5UXorlOgU@y?0%~(t;?Zsj*oLIix~sZsuc(IvVvxJ zIi)*{^M4ntiKL)57!rUN52*^K-Jn{q_}vX1nH|!p{t)-eo??GogX8*tWYK=jsOXU; zVX?#I61kjSlwP2MC#?aLE*5nX54epM5zF%7qlVrSNDk_z>1&y@a}fLFOH>H8zlAQRt9|d~F?|tOvOc zEQ{ti9pZHqG~*$(OX$2Tb|aL0+$4TZJRMcmp-||?)_L6_Y1g%Ab1Qh3a-9@_$j|UN zF-p9W*r1|C#GIyo1qn3z%u)+^fUflWn|nHm!7(T+jC2lkW`Zv!BcbCJDHWy~L#5Y2 ztz`1vWP*=L{I4acEURy;s@$w%FEY8ditLT7D_&sCX5}*eRsPemAh0pnt)U{Rc18eb zF462;)k!mXg&@K?ga!k8M-USZ`3NypM3xU>8>`PNdPsS{kNUVm6W^M`}dj zV`_ZzDcl>6TH-*I4a(Ic2XL@8qS!rxEItNjass+T5)m(;!q;UOb0ebnX<*CIR<^P1&JenwbPDNgRThFvNbzC26nm@xgJCfNNV49q6Mf^9rV>UzGVf{U}Sg?ry`%2SKNx%qzHUR&Sv{(0!qeZZF7@0(oKIIQ{gRG>S2Yuwd`GXzqldGkPq1roMZf)K>@v0Z zJ)f`dm`N4sQN>7*plXA1yqjAo2g{V>f~!LnyIR@ zp}z2?dR*jOh_kNFH$B}vvRHk}ytnIrxn8+g5U>)?R9E**3O@UH@@kt4mWBOC2U9w< zIJ8*Y)cgCcwR=V(#Hm-LZZ0(QwvxH@pO)s}g>SB>hd(LB*qB+KreE<>E9|D}?;nqr zRUvpsFCC^Qa*5vjHLrDP>_RS+>z9HkM{V-1f~pzn-FBJ&3jjbSnULu2D)50&&6^>s z7AMUAc6P?El)@V&4#``tDx1esK|AYmp0Q^lC^-W54khDI7NDlUZ}|jH?06i!kHE2% ziCMPX9ziK#n*S*cUR^ak0Iq(OL7s_}S9B`gy0$__m<9wfOXfpd-q@Zz)^$<^Qt)HdBHd+n##`bdvYNe|0lfffj1_&#k?PtEq_M`KCf#;K`ZS zLG8>Y8AXWqikeQUXnS*->J|C{2Vnchm%2G0$}oq7m?q$*0E}U;SA8qXcVxzEE`#A? z&Rvgo==Y4GeE}pnj9M5r^6aeMVk$guIyfcBV4xOTdz^eQ8ck z7W_u7#X*+9FqQB7K|!d(`c~uLQt+UJMe}JnuwfjTBEmCkI>xn;bNzCjh)PSMCGnYd?vlM8VHP zS*pAPS+38`FSIGaqO&BU)lK?de&bX4FR_X=9df8JWKkTIQ6Rd3Yj)M^s!7b#g0>VX zhB&MLO={|gE`owwBkm9-_~up8f=C%3eQHxCSNHwnTqsA|)I&_1%JT1o?C8y9DA?pjT{%%XYHyrTHgjs8Z0-KjS@;SG2Fm zd-St%zwvJy4i3o>hmD5@6ox=MT0~Vx_D?a2EimxM2atPGXm_9b%8Fl6X~3iwWSB`x}jsl}8d!@BPt@ z>_xdtDr>J{u644zXUhBwY4Xm3QX0$k(lTD83;K|Tcx1ODBH#i2!RZJ8TW#0=tR5{G zQTW`Q&jE+~-AuM!N*_pe(96fc;OHwdg@xL$-+DtN$~I{7;{k(rtU!kHo zSkCJAy_l$9kc+>m=~A!G_p0g%cAy3K{?!5vb_l(Dc8b5>_3kV(=p++0#18cqXj3Tb zTsl9m=kS{vPb|_^dH*IfPVu;JP^>vi;1}X^$mXw2MY%{2?KbP97IX*bmrC^2Z6;B> zzCR-}Fu9CIHC1cJGwBQBfO(pP;$?oK#poGH{Pu*q8)X%i7&3oRrjQMYnqVg$zE z?%J_{mCmMBQ=1)XU>v4K9N#&dwN}7-B{R<1v5K%e%`3q8VGnxP-ota z<*Q#jQ?{MuXI}vJxh~IBWc~5!>0Om980A^a?_DJQZFG0Gup5i(Nr=07=C!J7< zniE)IA{kx?+S_y(YgQuhBYyQrH?Ir(oCfI_ORYvs3SQlnKonqnO;^oFD)isxM4g^} z>EW;v7hZWh>1>LrHCn5AaT65#D1lY_tc>Ni{SnD_O=A}=_S4L{Mvtrj!?!B5;fxhR z0B&%p{|lH&li%lMaC+w{bG{~=F;bpUr}CFJ^L3#~{czcKbPkJw_vG}a)pR@tZJ4re z!uj#?FoykwFZG&8{UZ2?gV5$=5Ujzsmz@LldEomp>(a9Eq;nEFH8C?pf44?X@d5Ng z>HMvyhx0<{tKTg#C8Cd+dw!OxhqV3nOp;vW=)HfhPq$orrQ^}7R6paVqn?jg4z234 zw%RX-)7p<{J!_^}&8$7n&Sw7o@cNNT&i-cOdt{?nX^ppkQBeQ0BE`I%DAuv(D6sE6 zsn69d!GXaVy+M2B!J9BBKJ|*E*`+xiBCZbQ^a1@I;O)+euS0uM^IiM9+5U^Qdxy;- z*FUPDJ31hT9MAF9ZYG<6r%#+J5V+mogBqq20}KOgjpzG0FQgdAZ#sFX+1vQ3ao9du z`1xbo(Q?MJrYEGY{lTg=gR5(52EYSnjXIy93<O-8&%eZ- zJgP5D>DiGg^!4Q`TSCLmM@D22-qv^hDe?sIC`_FHdm+vW$>A<}v9$j_V#J`0V7Wva zKYa1_zGaO=L_DZ$y;)$B%981gOX@=wvdeC!5KF}Gc#gvLTpyCMuvX9gdSsn!(ua&~(DDic6;p$4CA{g@bhh{!~_9w&3%W{uE@L}3eM zsujQQmdm=YELj_6p0@qQE^WnYa9r-*09nrz@Vrsj|8-5;NcK%g;mMf(y_?jdR@yi4 zEMBks>pKG2l7e5Rl7KVbkoR;KzlcHiZ+L&*HpIVXY!%A_5&I zJSBw-f^?W}8f9)(k)H6pja~)0;O|ghwg6>u zxq6-%`U2z4qa_UIt6c{t~P5yJ6+WH5#lf{b_d-AG^G}Yhm~K7K@>R=aQFG^_^QSz1X@8OWr5Si-o98#7d$) zsD3>)?0z9$qc%C=P8_zZ5nXm$&}ig`3U`!2=xJ~}k*hP!#8_rZ1-*6e32kUu=IHmX zl+|;ngn>n|)4Sc)XltT!SmHMiPbViF{aI1y-aRgKB1BLA`+TzHV#>BrJK0^6F9T*j zKlbH(1;scC*kLe(`Dypx?#=(H=276D&-!?yU@rKN6b-WYdt*~~^jlw9O?awe$XP6> z>OE~{^5AMvwcY+M&hkyN2Lqz%5%=dUU-Z!Z2;I916mEaUe7ISyyG>t4WhvyPy5d(H zUKxmP6`GhYwF8m23dwGliR5Zke;_a*3@*81X|%5d`Ip$bE|scHRTum25%z;Mjin=2 zCa~&y-bThbku>Dbr+)_hd2!?8siu_t5!k;YeK*Gfoq3k}{p;O6ju~;e z$C5o{1HE-KJ-1h;N;ICK~}Z*cdmHIz6<$v=FS%iT#|bjZ4&*(l9vIXQ;dS zuFv&afKGMIg+7dBNy8^@^f3efVDp5>jTiKl&{7O3OtQ@sf2S-?AjBEGCN$pltXG8a z{H_>xMw9R+@C1gCS6I=IaenbMdb5l##IL&|XTjKqaNIzGgy{o&D4CFov!&pWyQq)L z$)_$uz3m-PLLoNXzn@4Bga&dE@0(M9nN+&^1vc-#WV#eB4ES)z=@(QLpHOUQ#N!yV zpq+e02_oLJslII2W{|9|;Vz|Ax1|c{FatYhavF1-O@;1qn|w_EIcw68CXnhUdRdEh z<(hVhtyib#D6zM4DD-i{{x9$iHmaCdLU}CyP4m&c&`6Fz*}|c#9Lve=TC8E4Jg?iZ|8Lfs&hC1wyJdba2?>D82&3f@l%F9R}>q4|g(v;kb zU`MAV`cMkVCP-E83!twjL9q`0F`Z{T$V`bM78O1vkZ670U>?6izo{$Fr|0n6`LjiE z@ZR&k?|e7YT>$m(SLk5Ck=rO~bI@>=k*HO5S|(c~4EC|{%TXo@&#Mncd0hP49NBTG zppX4n=kHnDouUYh@iSv_xlRK=cJ8)KAvsi`sqc8NxD- zAoubQW+m4C*?#GF9H|%uFo`|F;g0N!$CaKU(w?4*Tl+O7uX-CJKLl6+d;6R?2T#)= zYjUq)r0IH1U$LE9b>LA(NCF-BedK{67nYsFyIE9Ec;a3k+rQmoJd9yeo@H zEr}dLx0kSZazEFw#njAPmtK-lTFJAd8=v^*C0hG@h0^aj&OfOMZd#+YjAYmy3R&SI zgI@2&Fo|?H&)B-OI>rGgV$))7lU3y{lT;z^8B3Szol$7S=oS)vc@lC=r~9zT=*-y0 z32@!*l>2ec@1|G??Ac|A+0xbedN0Ek;De|9R!LYZm7gMEjLst*!#AnLj3*zf5@mujwJ0zGy7OL*J=Fc7 z>b6gopD}Rk{>|$Sof|(aVHeeaGe7aaat))8(&dnfNzK4^gQ;RA0K^I`4+;0iiQ%VE zs)k-kU+X`7n&k{e3j&c-)n4(y*RLN{+|~^>PPTgqJvPy>HX6N>KCo^Fnch^i((xe4 z!VeZ0#()x9s+xM&A2P{@v=&@QTM*~!C@dARSQ;S9XV=4y;!l8~`x`X{o8Tj2mcri{ zkRwG+C}-v;AKr4a2aslDk^2rU32RJy-q}2f$-Su!LTYgP)7)-eKZiOPq;mq(&$%Dx z&(JFaD_es@nU_~{m)u^&GNL=<*KIsO+%apu{XlmZbvvr~1xQ?d%>G!PU8;tgYy@p= zFI_Nt6$O3&B)_1p1n6jJiK7$V61AV73 z8}^Cub>OZ~E#Q2dg&KFico-16le@NfoMlA zKvhdQ5vqyM)l3XEh<~A;78SB8u^JwOLQBkHaUhLz{yzQ?y|JiTY$|M&hj2*a6L$dL zo!5a7l4tyyauf8uCSb!yxMxLN`l47$`ZI-~${cj65m$N^|463)?P8@R8=U}hked5S z*_~|FISb?({PqU8Y->cEuUkl{E+{BPN-X8X;Z0cYk<_$`>6@=Ojo=q=->zOTU!~v3 z^_!82uU=p&!&rzOeg|*PyIi)+b-9(q=iV6w6!D>a$H(T{9Qe`cboQ$efm9)#)p>bT zJ;F!_y@zzVBy;cHXvjI}DkwPZvO`tZx&3pLpl8$i!}v7$T=};{`};K)OTFL|>qW7? z-$9_ND8a+3bpgj^slY$&xIM4>f6bO&-9uA;v$lg&$+LFB!(%?UVizk2G0FJjU&uw| z0j3DPrmlynb=&0Gt@-yz7~D^ z`UeN%*Oc#}f;=uL2z#KDrW~auRQbih>LAT4B#~KOnUr6;qfmc1A;yx--stxMLn|K& z&<%_F>N{f_%>GW<@6CV(T+RAl244cxFNd<4Jmbg1kp5UIC&Sm4<}!l=W60)6ho#*; zQIbU;evRKkw(iJKGAZ@yvhg$~IR??J$JIUOt+QCwIC_|o6eHTWP7;I}1V6H|ar(Uu zcn`A$US`w1vhb*#UjL7ZN(rK5$NRkP1nV-C)_x0nDHpMH==|l&tx;hM<=zz-_R|w8 zKmm(zHQ=w?pvX&#&42&BRI)mCq*csO(8tZ`b!4D5JmeT! zd1A7$If;k6nqL}l+e|$9RMfw+O+uq7sX zOc!bPWN?DXlmE#h$=giW0sR>NTb$@g^Q}-*RfB=pQlB_6<(K50!|CyhTD{Kr)tFnCMLEu?XjjXf&!Z1A2k%(T&AJA@_qWhU++4(jDpX?tcPks*C z;AWtZPx>JAR#-iW)|p|(o<4!hgRhL(^*wu;)oABPNu)`b@h&#KA+J-B3EZ>oi(B*| z2r}V?MqnMVo?8Q1|E*}E>;=cy&eFC(R+}$eDupwmkn}##PfsS5d_S(>2ZNd^2$z?J z?`I6Q&V3-?LGC%_)MLu3YvVRuO#V6lJ;a_`LA3D#!e`S8QH7HUXet^U8qzS*PkN?_ zy<$OO;W)3e ze%jg*epzkj#M<$~{>j#$ZtYKO0UIXsuh}@Orl(NT&QrTvcHdW%H#?Z|Ino^7wDI!l zc=_~@VyeXH2m0*aV*5GWCZ8sw`*KBNY=aEh{a;PbB^_4p^!;?c3Q5ftjzB$=uegVz z%6RhM2a{^44EOd2F%V71Aoo;TK;!g`^~p(ilW}$B;rt!S5g@T>U;J-J9;WtnbX4w4 z-R!%D8+0#YQbkuz$8BbxK?a_Q++%s{@PbY;?Oq;!x5mX$e^WJ0)1i{gwKfOrl%!!M zF>}1_?^%-P_eM93{U!Xc(isotPTIe_+5fReHSC=#o1{{i^B z*ZY9~@J|m#%U#@}$bJ@}Tu$k(BQO7Hr#M)OT;AF%ZSYPy^ZeSD#X{!Po?!X-`j_&{ zTQxGexBSqZ(R#5%o4-^_>yeR{VuvpeSbyZwq!@bxdFf(=)n0O@n0;=0>MQ^CE})My z>b)xkt4{dj^3nYctCvO7f(t9-_9C5mK~rKZzw4dgy;FUAl}1qOo?46=pmapDDz)_V%~F+c)s@6*aGR-*0tiCunw z`)x=;-o>{q;B4euwQZ_}MSImUaK=j5Gf2U;lN34PHa%nRn$6a%#x>umippK`TYcDc7XR zMSS#+HdNLXYUrOZ{g(J=`;k=xpOA%mU{eTad|t*`0e%3Af6l%jfhiWp z&oTMu-wBfKTA6gzw~BuaeS{JI!`npsmh{j3)2?6i|Ccs? zJy_XklC|Yh_UTBJf60VOi}Op0pp3tZ75SHWWQH6tyn<8a!l|*8W)F$+vl@RQe#eMk zE>?8^6Z2lW-b~}O^b4^I;~UvAWH>GIlL5m*8Xz?0`TEC$3#N^!oVZ9y`y?9QrG;uY zk>3>T8{?-ERZO3x3gb^^Uu7FwcZF(zCL!FQ2LCeup(Syx#P=+Oudl>EvK>?Cvm*T9 zuk&y0SqpZfnX>DvG5II`LmxT2u|l5;{(^PJ`1qkYpA^l1JpAX$-#)W4`Uk#EL|0Zu ze_hR#pC8BLKfXS-{AW!2T6epMK4&GF6Ms(?s;0e11Y@U$e;U7VeO~SVnhi+!;csqB zpx`@%3i$paR|RzapZjM4Kaby<{uBIj|E%C&OCNu}NY|eVtP1`W{+aYq2#SBr_zIuR z_}jiOYy1}Vuh7T*-IX!zTh0IZ`{!8W!~doDbzk%mS(~56AU{w3@3e4vIhNzg+2N?Q z{L}Ge|IEfWIiD^R)A@_HJigqhUZQ#VC%ZZ**;l)yngu(;|Nrcr2b`4E)&F;=?96PT zBhpctQHqKhV>D5tMx(}H!xpNnYQW|7RcAnP={E@44rmcJDpUx%WyB^+9ToQ7}~0^)rBG ztq7mkxBO3b6O*%p-0D9lr`r5aAh~cGiTNOEcv}mEg`+|i2V}eUTe}2S{z8LkAcV8G zg+!7m9HfWvDMR08>!{`4$SGb)zI8btDq^{OYjv{vfCKj5qiN3@uD_m8DK?&GFRdGI zypa`-IAV6(X{YC(e}2)T*MIuc%Z3gaM#&{_Eva9>ft+~OgsmTAQ%Pf6)~s2>4)lpC zy*Sl`)-Y<^&`1*jg;#}ioGH&gKl9EhcmCwU3)v3emQPj;95{fT@_Y5_jhhGjafkpp z&O*P|+-tAC!pV<`7*S4H7FR-tShQm1%$eZRl5OGiL;NWdw%@*6yoa}3XeZ)SkJIGr zv(LTo!i%5~X&W}wFJJzVuf!BV2%57KC)wnS(>YVA8py%6^lQ#7*;dNZn%10FFS2k_VM>S!o2kVpOU#ru`C7APHE*z328e&Ernnl}xp?wsfoQHfYd51Od+-^bz|cKhVxZk66D} zMWu1-CHy67kM*G}%#>S`tYUYBsIkgQJYrc|gbys6O~3Qbd(a0q;W6Ya$dgVw@vHmp zx7Fy;>^nwm*DJ3sc=CxSSmlZsKni?n#Dd{r+;88n9W?P9U)*JvBaS#?^{O>cyQW($ z8QA1pMa|zL0o=x!^8#$P=k(Hhh6a0w_gFezUNrHp{830wN zNXz0e9C?}NX3o6g>Z|aOwq`Ozhi>_YKm2jPfc{yw*k`LcTEID)Yo)PKmSeoD}>ZS={e*#JVj0+8RE}&aP9Foqf=sk+zOfIJ3MQb+J_|bMfr00 zrSyS!JXcC+e^d^`A_7U3^i%fMH|`Lg`e7{L(MIqmT^6gH{sN2HgKzENdv2BBPl(VW ziIm-nAjaQP`ly7B%|MP40?1{b`sF{BED)XHZ}7daZLFWEzJyo(3;KorSpQz4UV#IcB}$J z6(o<>$Cg3%&G2Uq(hA=>Xd5L}2LOKoz<>|NYtvrdswyZ4`^;#j*ng>OP(A9LG!sz& zD>x2cLzR#|ZawMb%?CvtejEMO4t5}sgTxFZ;K(f!8(1DF!G3tf-i8wV)&BzD`ggG4o43&cG(%MS z+cv%uHFZFN;HqV6jklrL|0s}3~zr4hVG>f6^^kdaU`w;+ILx=)F zRFXr1H!ysK^pP?YVe4U#&G6UgQS=uUVzKB`LVxnYU*jR!`6W5FMWBL7b^cHH!rgxA zhn^aI!^4}&uroW~94Nw)JP`{i$?ey0nQlY5Hk5GR>=|5e6<5(#XkXCi+uSV}& zpPv3gXv7`;k(O!RgaQP!1nCR{MxSHxiKa&W;Yt+P|4*pDR#Ga2{))Gzzo-S@rm4tn zpueU+;=b88DI{3Q?wDdre?3L|^^f_lpd_%c%6KaR5ku2-M@wxbooeDzs!W01t3pc) zKlCly7#V!pV(`diCLsKwX12jE;%#6|Yq>K}UXLt>-I0dJDiY5sW+?mqvSq*g#V=N` zT0L^BQNR1m?{FBgiV*)7AwrKl`Uw8Jz4qSg*kgy{eZe;Yruct`ZZS**Zp~93mk0g} zJR|JYCBP9jqRO%=ldT+U>+m|Ui+Tp*l*_iTzu23L-((}LGtN9i3pv#(RSX|K{DvEE z=-IQU(_PHM-d0xPIQ#LBe>{KQ{B(w;d{wyB`tPCW zd{qQ<(ZA7=R9B~Pz2OPk(6E6b5QZLT(0~C>Kk9wI3R_bV9W2-1P*+jE0!-~8_z6d{DK}RUzViRy>#rl>iecy!^>;8XGWpyj{T)n~{{Hj&+m8MSyA(7~X(w-+eo0Zg^tYTo zCZC~kXiuMw+6PgV%tq&c*lqT6qw=qu9q12~0F1^Deg$48;Fs~&Oj*ER%1CM`%MnB> z#fQFm>_3E0JNpkX%NpAMjleI(zsx=d@ZFwTd#Rsa0h(p_Tcb9Ozi=g6XZ^1lMDsR@ zKW%9T-((*oRd+z^GW-SA=;O#qSBm&k0Q^ho(-D3^f1^(T-|)AA2=xZ^aRfTWUl?#% z|88WYMnTeL*!yQ2{$=o$t0Vqx_PHE?N2e5iFn%!mP)eVT;qUZu`07Oi>IC?IBKnlk zzjOMO%a2h*^igAJE;BQ~Yokvo{S7{X;>1@0H9O$1%g;vOBRdX1sHqJ;4VGvbNTa_V zS#F1Kj1#{$O&>D1lh1bgD_l21AE5^HP0Ig9;s1x}FZ@+=ot`F&VxQB$oIc7`%3r6y z^S4WXos1pnQ>wph_Dw2d^enMINVJJMLTRZRmrrpgl+TUnm-3kYx~3U_4ZhhQduVKy zg5%II84%C_3FZIS`d?s4jPiO7kDZraP3E2&^gxXb>^+GRIbGcO4WEvY{Zyjd8D?RH zzrUq10CMSBRv84DTQUa#{t(g>G$2oO1B3de6Wcncr4FLr#I`z7F3mTktCAwnA{oom^|Q% zowEo;0&~1(T#FYk)(T3YRCsaji=0Imjm8?AnpvNS+l7-5XV0EJW}9tBjT+5v^rN>L z&AEizY`yi^@#Dsg-*)QMsd(S00vh77;ghE9w(G70F^PJG0v@Qd&N_>o(Qx62#=L+q z(+zIF`Sovb^Z=g*4j;ZHTbOM*eE67gW5FL6>5`>O-h68*89)^taS@Ys@x>PdABHVk zwrsaAeF;-Zo3`y^{=0O_Sqy-KFPY&7ese zw|ey|aDaDgh=yN`0&CZ;t*)tsek`jbC(Bo49Yok<~;b& zL%8#J+Pin}g$rMQ<+az|T=Ld-+ivH+Fy34uj=*niX_YXbSiEFn1!OF$_`yP9#Hf+~ z{ojA=+qduOr=5Pt!G}1)G=W{nnmM5o65~OW6jkCkghbyv^UMK*23-Er%YXa3-*)do zz#4vT&I*Ow;ON2 z16j#NeIlHF^PBXLeuAV2hEWl|ij?CIdH|qoQGYwaFU8*=lAz1^TS^~Kwt5|D5Egu0 z`V{{5RIN~EwL%gi#LO>W3O{J00re^kjsU5sU2393k*sm2*P&c4pWIU^>1v_DC&B3- zmQ!XfE9AF=tj6imyIP8nWPl}~P9MkL;Zs&%-ToVVE9T(qHgH{nEWT#6<&Z1wY&H5A~Mb4L;+$s;Vf+w=km3<#e#TMtrjWELQOOg{%)d zid027yBvlx@FIUICHOpGX#$S`$$lZ-Q@T-IhPhRu1jhd|>qZ~Z$oeWCFg_QQu>S>Y z5x(~?;ZuXDjB1f+LgBXlSNkhameTZ(Wbtm2EYP1sqp%d>EW z06P8-pJ&DX%kgh+Yz84P11vP;Kik6C8*Af%fn+Zk_7jG-Ob3{AFuLbwZAyb!;FtEl zrT9C1#~+ly9~uh7u>UR6N77j?pQZFEr9YS!@pt+-<-h~@+ISkmYGmiKr|~n7M3D0F zv%|OKcJd#LA$U}ZKR=83JA7*gO>q{9>TsbQ{himw-%jKQh-7g5oj#7g(?<}wOurCj zr;h+K2D4r0NK7U*4R!f(`E>etgPlIeKa%VC(`tBDl%FDeZkV=IB_g$^7e)PLQk+ag za$2+1T9afJ)`IY(zfcvuOC>C-w6ZcD*BB2S1b>wgmDzs29~NsepJ(ENq?aLo&t{ki?FDuoc=V(<%ed-n?RU| z8oVj`@T@5RrSb`F#a5Gl=dZ}XUx|D|Gg2HrQY1p@`j_=r^ww1>{|?_9^J(aB8UCUe zq{WOOo^ARY=)N>aB$e2h>E&Mk9XX?q-?z!9C5kczG_-HhMI&HPSoyKmD%@txC8cdw z2r^96Z_A+`&qbOPJ(Uf%Ck{`_qRhpDFiMYQFZdrMD*(qX`>n_5U?W?ARTNnLck8Wx z=7>Q`?6|`YI5Mgd+I}nBlHK_WUx>3lv7+$QQ!^GVeEs&@ZwD+PKf8WuH@Ph;3T$6R z@DYBE=BBi^1B=HcSn`SFSc!Sqq{CS$x&3zA6K!UcRA?Yxt-}sGbnKWh_(@m{2wjL! zBQj{|TT9-3_uZvSmMmSmbjjOGUthRz>5{kJd-pvXAz-hA&Cx(?v(5T_~br{^FO!Tcw-S$@S^mXXnrF+)F!c71;J3l>>kF%GYLLkw`=1 z2Aau{i}-r*CmM$8jm@W-wrtS*IH;D3n#O=mb%(nv>49YMrP@3 zriFAIk$wC0>DRZPgr-ojp|Rnfd+s5a3$si^=(49wo_zDoH^bjll9;VHe!A6bR>N>; z!lr6`uz?y)Wk)kA$AM;t4^St9MA$UyDY3>!*{fI2?%jH@VHt_^^jL7h2640Y>e&+` z4t&sa{3Z9+wubr*_uhN&lDFOhKJ;I?V#Q87?eN#>fBnvPzD>{6t*$%x^MtUsZr!?r zJ=RbXZ2zEJzkZuN^w9sDddexB^T1D|jp#$qNYh9u)h`N?s63(kTNV=iH%|a;dT#JL z)vo}56>1M(X|A1iXTO3w)vQZJ91U(eh3}{o@h=vH2rBq-`s?_sqK@!C34J~memney zpW#?;f7DiMY$^RcF>I0L=wN@^<6jEjYicXsp8iGtT18%4aBGG5D5F2mObG2`Blw54 zg#(~+{+8QM;FZab;0N{dL{;mjB^CKwN}nQs+X|J@hvYK+B@t~9%j~~F=!`zb-y(d2 zr(YqS9pGD5JuS!Il1t!27DL75)1K<1oIZf*Kp(@uBm5$L^hkip=-;KkMf!9qKSla< zLVsv!Wg@h{h}a``m)CHPbPKT3Z;jy`7K zi>fFZcBD_2{z64JH67^VLdEm8OC;14CT`NcmC(PaU&VqS+yA0;>U1d?j!%mEB_Irv z^c1a=tt`TCFaI0we@&@7fuBl7*$+3H&cx!;nre;FSW?LzCalDy+D*e}04eGiecIuV zi-2iAFC-YgGHT@&Y_2uW1|H1Y@xw(q9g2p4EiZ#xD{89sw$l5r`ucQ@c>#Evo6Qs$ z8soTY$np>}bb`OLQ{*uK;_W|0ABq`;H%kA19sVZpH_nOxrTEv@Rr6w;;WtZ2Yin>W zY9nC$DMkvB9s8WgYULCMVfE~hYHDT$MkLdc-_V?cRHgFsuhXX;fB)Cv({!VcEVwbE zbNv5l`48cj^H)^tWPbF?^((X`VhR(wmTW72J7(^jMn+tLJ=I5%{{JZcHVNXSwR~3R z$+gKpr>vo)c=ZyAI5NP>s;UJ00&y~#qHvlsOe8B~vC5W=x>yikHiZ0uhwO(E` zxZ&dWruo~cd{S`J_MvqAV3oGv|7pn2KdQfSURG9i>t2o5qG*4L^r3a&4;e8n>|%{$ zp_KmZ@c-B8)46<#QDyR@AD@DKE6V4;0>2abfW9@X9epgh{rJQa)#BpyiTdAwJ~4^5 z0(P+D_7MoW%n7#J`Wj6GyaFi!rqm=K$GvjMG8lE8(J)`RbqOE)5EQl2as;XV7$Ly{ zNf~?@Sd{P(9STS)@p0fA#BVKRX3d`c!t6O%cHkd=#H6OiCghJ0A*$GR{P?f!yWgF6 z-Nov^-~RqLPe1c?b#*OANuG4liRc=T*hC-y6q%b@fSFQk7vVnircF8;GV-@4pTt*1 zqi|!fE!W_ogAP9A;B(GChi%0W-@5MIa7*E7>d~{;=RUU;ZYKhlQpsvHCQ=|w3t!Ok z<;&lGdnrzxXccd=biA*{IMsV`ukJl@7BGWx~Oam ziw!ve04E=gq3yQc{?bb?Nu<=Gop;{l3!mTlx!KQERoAXryB6o&*s(;Cjhj8^g{5!4 zsa+Z>D|+_qe$j;&_3qOf{}=!5Tt!X}!ULGa1yxr=WL0-?WH^C;km|_APdI@~D3fWS z0~|DH;IYRZcZ;^G&wjY#qjS$a@A?~W+;;5PbSC@20}s^JRP)x4igy3fmnVK>qL#0E zpcdkyO(KkK##(?U0a}%ST!b^ib=EWD%5?eR-61A5oxzt!AY5e)OB@s0@Bgd&?tS@{ zmp3#vz})-)dOseeJ@?oHN7#!my|nzp<(cL*jijn*JUa8)XU{q7?29hGWQ#3^S|7t8 zNj4NyoJ>}Om_$`=!zD(j9n6*D7=FOk%PWZ}(gofM^AZtEs05!h4nVNNhlbOJ8xWG0+1(u15H~ZrvJbLxrNoa#H*Bx9QQB?gWFKfD_uODzNF~4Y*Yk@<{&t&+1QQCA zQ#9a%wJ;w$>z(-mIUsphJ;FDXw_}>Zs2F43`3Ed06;0Ey3 zJpK!=o_PCJP-F_=8$JP=2FMBtJ5^3L{smASib|7Ft#kVDIHZp^&@c({H!uPIEN`UE8!%N2t}Z_UT#mofU)mMwmtmj}0n~>T5RyVn z`9&zBKkpYjQ&@-P78%7~mGH6#J%rCAqmR%Orh;a$mHuz;evjAzi<@d zh@-GnWcvR?eM^a8Rtc&GK0T;F z0*BF<%y~guS7lmS7`ExHHen|gv!6;bNaIj%w&QYqI5ST5z}(^1KIT{|bg>3M*4kOrfbMR~3tc8=GyUsuL?#HPm&h zCKf8oY3JD-J~B26*31-a^v}PnX9E`_j7TTFkV%()miCLJ{KQcq`bb--P56U@X^WD5 z{9yFqz;_XsK2u-PFGw#P6JXJgKZroO!!M&hxC=9buif9Qh+v6u{NTSfm1C8qn8<&F zuWUv9^_2QF7G?fIO*qA9JBMVYn_C;!H`Vs2p^ZYy=r8mH6$}X!imh4GP*Yp26fq~7 z6Yf-FHUhu~nrCDHep~-5oJq2!L%Nszbi^MTB2OYjIen;2rS)OW2Y^Th`cMr;{EY|JIf@SAuN<@j z!T*5%~K*l>g4?uf2OzUIj&_K)=`?hsYLSfxk129Fk9?zr$B<5xEWir>(z4mEf;Owv)+N zHtqIP_Mue%;VOGLr877m3rw&7A^nxeiKmiJn9aS*kGgO*Q;oGFg?Ru^%H-4NKx%vW zQToT}ugQbVrR3j89Oy41XZp(*YtujgiYtdc_-dp7C)8g`#8sSvG>EW7NhrWgP}M*x zv0sRa8QLO${eg(1G9ePsha^f#e1yLM1@hT$exX6V#0Mg(85qo%3aSjr!b!ln47lN^ z+v261vH=iSGN?x;|3N1G)*n|94ENSBS@4a)r;OKY7M2*&DE<@*@^%PaCZdY)&0B8y zGy7N(BgQJf5yMBkwPXo8naSkp>beoxwBP>wO}pb`ZMQGs^TMei zXAIl0^BRkAqn1`89%+1QAzE8kH*&-%mP{6E#N^SR8~wu{{1EtLuVXEwuqtpSCMzW| zq~phrzx^+hH{WbCI(XuZd8)SZLM+{U^)=UAd(AcMUN3^i6VRr*rn)Je898#~O*h

i5By%N67Bn?K zrC2a{hb1Pgx;qh1W<-AR^Is5{Hg(#x=B7q$<)YVLU$k%$&8n`gSy#V-psL=z`rLZU zt;TZ>n=~mkK){Ee|N7BSGR+w{OXrkLx6FF}`D2bc>d&{_!ubG5FOmX(fnQTwv)ShT zAsz77uUn6=Q(TBeQ7n9ubZ&Sm8NBs_1CWr{(9R$%`om@AHdNNRo`uFQ?t|zC3BV72W>AaC?)(28f+O$=p*q} z@sJS87XEdFZ~36V**8PiC=XTTh|c&$Jpmj0KZ7gppBb=ziIlE%fAgs#c&bB|BQxC$aefy)$QcJgZz9F z`QI4*#l|-HPJ3&v(MR|>TKaYvZSvWUJ{|BU!AiBkm-dGs1+GhfyY#mx5GX{cefX61 zS5YvE;Q#0nVD|B6%QAi|B|Xq+(DAQ`H)06^KCF{%nG;*`zA=LPE=l<>R-oe{Z9Yb>4=d zuoeBCKJvR5{`S)Vf*Fv@;hTOLhQatv1A7$-#y@5B58(sZXk|5uSs{JkuLQsnl;|`z z*zz4sY4 zYGjPV=Xv3*RtUPGsxNDn;EURNWccgbM2j000Ox={_ESXj+K`u^|1Hu#V48}N%i-$| z==#@2|5EuZg>P#tHNg$)bonu&7=3i7YV}b{{}TApiZ=R_T@BRpjpSJ*jJftK6G zWy{_ImdQ^C_^LwqLYV?#V$9!|M6PK_KRx5=haP%(&I_~q_V0J!)M?Cyjl+&Ah3wo7 zxx;CR{EQ{?6Igdp6E+54QdCjMX7e-t_V>o7CZ^f@e{DaUV4_u|q9vV4b+4{p*R){% z{O6y4Zr}a)-|BOtSY>5vbaE>{`skEXzs0JiErt#|=%9llF%}_l#u{X@CL8=HTXPFL{p!?1VSVq-m-UBe&e76 zclqM3M<0Fk_7k?J8BTu?;;A=z^@_D;o_WSL1vo)#xI9tY)>`0PB~-cYC0A z7WvB)>6h?w_(~9b6><8s;#aan;A_dLyX-|c<*|Rttt+6vCz^h3g#N$-rjs2odE#6s zZU$3|swaYGF!`YrC3J)jm=gXPOvhj3GWbZ9#wTs|K~N38(?>t}-w{5nRmIAsB2FSp zu@9?DVpG;X2AP8*RvP-U4Zj#@5BZ=vi93}BHHcMOu`Fk;RV0MSrgXklPC5Ngx5$uE z_1F^CLj4ZN7674yYEdo&B4>x?G6q2syg57~)QNkZEPU=X!qUsJoeaQcid@*%P#%ty7g<>(0z|Rc4u=j@_1j2zpSybfl7%T zWA~?cybAd2K?eLwF1@5%_wIzARVPz}h7C?uC54-jfe0-8S@g983N2i?@WKm!oJ^!x zl37<*tFUC-?SK%c_m?l{Cym%pzrhxVt5rY)vE9D3E75L~r%bu+)>{`ZT8yJz{yuro z*y${pX>K0B-S!i|@r|R8IR-xzZ$)_J1yn2!>l0Z=`P0jOI^~Wjjm^!Re8~#Irluw& zee9TRjz0RB?|t`sAl962#{crsM=NNuB3!E?z`;wkS--w033O&9Be-$;Bn?khCoy(V ztXH-M4%19*y-4 zEa^n`aGnx2jErT8{B8kT{5kfkvrFOgAUwZ&lDhXA?h(a#V*_uT5T6sSWf zb$l^L_zJi+0EU7iE4gaMF@#HvY-TE`#32+%F3 zzwq+fL-^vW!xv^D;%XfhLUy!>zugD$jgtX=ND?i$fCN4ve4A7R^j8gJH~cKWG8gG% zCDneOi&l`^bxtN5hkCq?Me}b)*8dQO#X$6a_|rQsjysr zLTd8N&@APz-BVVw#P+SHA|`_0>n z7cu@n@9Z;n-DUP>J!`T#S#V$iPg}>z05f31=|aRxbnU9u_dodHGfzLeVBX8Dv^Ze@ z18%tKM!koQM!NM#u4~9Lvbyo68*cmKpBW|}f9lB>vBrWZ{b(hd?6P6*-=2Kph=~VP z#5lDpzQgw8M~oN&nvBZ&^y&5RLk}%|ebEE?X+3-PIN{`zaEj5IxVG6LZjUsy_wuR0ZH+mb*1>Ax4fGXJWVU%X?)h#61K zfCp8Hq#DE4QBpaOEv^asCJ{M9t({voxr>x@JQ|Ju@MA$NTX9 z{ra;ur77E7ok$@U#Qeb;GPCusB+&YIz1SC;w9_c6d*hDDzxTf^hoQMfG3$|`GkGv^ znsE@@A{T>Qf}ryDL(^~()8xMdK9uDO@eee>K>zC!GVq>ENG5o!4abEQ2vxRJ|XKl^u70d6vdkO&{%ideM;)7*F2M=kj$g$BE zG-EGm>V}?)R85*$p;}kSq?_icwdc;s z#~+h!Xkywkd;ZHqhi}OyMXa7oCZgHK^nnL__0_pAGc6>x;esFjC{~#VW=$$})lE0d zeEiW_GiR{g<*R$|&H-+Cv279m!!P&g>HTViqj0&;PB*3Sh(A(jlV&bK zlCqFdPz*o`d~yh5o)`hz;8PFdHa)evrA>lT`0MoP2oMeGUe_HT#|g(B|Ln}?2sTm> zbyOR)Z@&5F*Z=7H*hJ7n28js*AmxFCApjIV@wOm3@Rf>0 z2~}7zbm-7C&p6|Z?|yd)t1g!;;n>Hwmn`kwce7!`hH&oTxN&32ujP<3&sZ6ir2QOY zS=Cy&_S$Q%{Mi-v-a8e?4MA8CVf=R64jeF$kpw;%Qx8?qyH~Gi)26Lkzn(h50aEL2 z01OeIS)(3j=l&}%zXEE2H2p4!v1gajzk+3zM^2hVY}A4U3+B(82M|uH95Q(D*m2`< z9pdxCr$*C&Oq@JRIPunD{V`#QWR+42Sf6Sdx>?`N@L-bD!NPj_W2ZmV{PuUgbMh&t zFj8SntF|r6W}!b^yY$jak2~(TB}z^y$@`^{4CW*X{o0-Cuj{ z)ul_{9y50AE??MX*}Kb5IN=1~V=piquqBmaXn!ic)hx32tXrAA^_tzuwF9=FMNQK!RY~-m!cXDZ(!)>Wg8Oyy;;9FoPQISzgNF11Fw%Qd2rDC$H&W ziTs;TDk3zHe+S~9ZK?%r?UE3gXiA|O4LCRxFX@*(3ldeB%V9{rJb^m{Q1T3rQ35}}Kjd!-{$vsZ+tFvE z@Ill`qnpx}b)XMWH!eS_)Cet#mdZ~D{JoBL^b!80@XPS01f&#w+R+~%R-L|eW0x9H z-oI>&zv53&NQDFZfn@lTQiRXVCgjKHrYxJFkJG;${;Hw~T9xT?{Ha^=8S>X~`FQw( z7UJ*l0@>|=KTo^#SF|#M1s^62PJgG*M(JNBpKbP`6Z&riemnkt#`V`_NlkJ2G`f}A zw=Vs4NebvAQ}S8UU;G3z-})cgXV@CZZi#)M3F?+uDYX$>+^+w% z@d-v+bSSr7Z%{Se&@}m{YKl*UVTT4Iq=p&|l`LxbHs*F{!*x`Zc(_i}5sXOkp zlg4%R8DSEsXjos*mLcpGo*@sxA;cqWq+?r0`KW*9>XP?cm zjRDz)b!*>w>#aPCmkRkW?6f0qqj`HuXg|(?LYDOn&FnwRoC7CUBE^yS6)n7$)*PL| zcttv=rGoM+X{N5ZH)H3Tnj}-~=5&r%<25yLw$!a=BwVOSH*3)$bAF?b^)E6#f&B;D zQvK4RH0vLkjWep`%qw2WGt#zc1w@kjKqS^|0+I+0(*%b7BD?6{249aX1MnHyG7yDC z>=K-+V&MWqebzB(nooa>_pQ0H@xJ?}v%fl|<8-g@pLdSUV>sT5t$2C4n>+HDBlp<- z%ad=r71H6FBK@>)e_IP6D%o%ppnzV*I) z?&dy{Ot4D{)PS%WGaGtpgQ%fyP>u((v1UP_8w}}TIj9(15gZ3Q8T~9$x zr6?@{H%at{(oT^>4>0JWMNk8`K>?C%5~d2dsGJ+ z`7f`%{E91>BO~)nD0zzO4Yw0-MPO)x$xmg0?U!-EY-m`=cGdgr@s(F*&*JH04?c3i z1sBK%R4A-ny?QEaMFrC7Z=Z2y_qsZq;H)uXRb!kjeDIMdo&%S0j%id?0-tlFTWt-i zc8CywSQrE**X+jA8o_6(-~h@*qgBkSGc7r^59c`Rc2G6^({Qa}Lz=)5?USSNVLSTE zH^m$czg!Ftmi$hLZmt&1C5$DPt(I@AsWw%;uD$`U5EJ!Q{06MN&SXIn`_s(2p{y^< zjk3o#)Wx@6$Q3eqj1tpQlp5KDLHN_On-H!@=8vr{g%mSB@I(cxV=Ui>pZN-WdE~~S zEQ6P6O8@@Xzb-U2Mv|%K#-h)hCtv$NNeS)Vl#1iJoLbW1lTgGl`P>X&1RpUHS5y<`U#bjK{|nWOa7sjbU-pL zVlkzTWE&b3_`_j?a_U1sEbW#P4>_2&G&D6rZLP(^m{mkMXd$Yl<+d)>a}a{y60|+ zqHlP|#*g2YBMyaUq++kV_8L57$np<97&m_WK6~xOi!7YjK$jU7KP6ICF~kNRu7W?Q z0=e(mvnMy`Cl!%JR+Pqt#LQ%L*bNA=MiN!W4TP5ppA`6E^eKhhv-%RJ6*;-csYYP; zBb9V`CIIbl!TO7N2y) z5dbyfNf_v-g$dzo$XPG3x=uut47-|4tadWM$gs@4L=!Ud!eM?+}1}e$( zh7CbsXM2=;?AUP=CQK0Vy)KMqN-J1JoE#f8u+bLQReMH&8*XzhK!L3~`PO{*o@%Kj z^KI0Dq;Yn9%*u*Eea@PsU=-t(uC1-p?qm4^^yK{>-b2({T*pk22VZmro2lsOo}&3UmzcmlUw1L9lzm_B&WvOg9Z>S-04lNLAw_6ES(&Ct8Z2$UVp+ZaBms9Wam| zURnXFwIx^Ejlj1kEj;(!bN2koS3tN&w{DL<_V~pYU3}YLZU<*Tm=x*2Q^nRfx~z_t z8ILzMHEQI^K@1VzIyMg&#OlvMEX5U(3H7pfUn&oBp=`O3dABX1+vWJHeaI)dtrnix zt$sp(&&duNuOf!99)wYM;a`F}RHaQykbUs%_HE?0w_&mnz(+WW z&Vd;I`dCdt?Z~I< z1RF8YP_Pel1_Ye`!2<{6(5yr$eQY2P0NGj3Qu@^CV-yIWkjdJr4{ILjlK4{jFA)>E zLCXdF02?O{O*guOkFm$=2A1QGDOck{`WNAQ|K+FzTrm2m`cQnlcBhYO5mkYuORYEj z_1lKsCG~n9k;3zp!uK{CjG!5Q-=)7!Qb^mSzb0wYhEfeGrH_QqHQl<|e?ouNNt(zO zt3bcZ0Cef^e}w+}lv`6A&BZOS2F;PhHoIa*GO$0gLJZ+FMK@!~;8w;-sTvxQi!Pu* zK!kM9F8gLE1vKkl!u}TvYcTa?_Kn=3;Ug+x=Cd=8KkiuK?inXyHYJ&0^{>Ye#zTQChRnUSFp?p z@odHLh_Pr3G;vy`n`j4-5ljPH7?8)usQgVla&+reCJN(8s7M>Wf(F zU+m7coA{b$NK#8|i*~Wnq?|D-wTK%*tdqJnBeMZF4v0f$AKUDm5fAi4z31TR(vx z(kB={7=;veq6?tvQEX!>(OvdU#fpOt2WaDG(s^Qo1shNaKCTIJ_?V^$AFP?UX|Uk% z4O}opP`r@}!b&w-qY2kGJVF8fLRm=gA7@aAzsgh;PCNbdm*&nzM+i7#K7)f&;d2!g zf4=RuTW|jht}JFa2p;-1Yu0nO{P~uLr~j2X6Dv|U9-JLt;8&rszJircc$wp|7hilS z-K^=VFw|MDib7IbwWp0WqN0MC3mKRLQqhrzAO4GLuSH51zV^y{Z@)8g)X4h!4J$wV zaQS=7nA}9F;wPVcBC}qmb?9WXcFe*p6w9<^2~41+H8s^zs)~viUz&I0O*cOC^wXT7 zM|c`@v3>X5@32D;+h?D>aJMQfB~n3gf*RTG22%cg#?!e%E~XH|ILkJzGZKHre9_0) zVf2|Zdlp*Fwn)hL-h1uM)=4#~IIE@>E`A*mWse+4%HBzoAtVaQvvdsC`Jh2VHXpD# z-Yx}{Fyo6LmWa1z{U**3L^=j#D2wl&*sB-jzM!Mw*vc?gwfW}#MvfXy92|aT+mR_MosaM@S0qzR+R^kKcbG6`%49b8 zYE5gyC|ve$zx4)Eg+AE)NV>pA-AIyxg5;L3WWzZ)M+rh&kO%l~#4wSRcw6TW1<9_M zRb9wgrlFpsc%l}EAxuYYDTZegv}JPhFiSc>RHmlyCL69%B>FGah=7}aMwO`sJu#Lb zzH*h=vSBF|Pvl^eP|IEA?DF)s@$)CsUwA+@assI`8YA;73hO4L9D1qOiXf*}8Y@hBofK*IqyP!3EuW^uS>xP#A7n&6$;Kz+X|pB0JJW z^gP~Mf|+%;4JaT!$}BUhizN4R$(s_5;^7*~6d5w%c#J zt)fyVPg4RwCEQt!lhk7 zbTr1oNg_*uyz&Ak7Gnc7)EAEo^uf!+M*jG%@Xd)1c8CvL2Juuf1)I$eXpPc`Emoys za7FI1scgcztg%1Qk6`1_MOWFd$ zz!zEclhD&Nt~$`hOpwA2T;M6aybHD%VR3`*ea4K7c@$?D)XI9v?x&x2+7pjI&MHyp zKWfydBPLC9wgL@q!X|>w0Dj(i=bnA`**H(Jyeu!}eFzn>k6EvtJ=kCDn4^z=>glHt zO-u%ZCtR!Ddvxbu%45HIEPl72{MUbhDy?D#D<03wuehA<>fLwVUiQKJ(40{ed_*?D zoR@H5Ml1M)fDnY&ZuBw8(%>P33Aht~rR&0`pTit6>#YrLq|LqyvRu=1+ zJ+J)~5B#$1o#~g;UywX+P`Tihzz?AOFX^wK87?8%;y1Ng`@?~E2mF=O1w`m^mD&gW z27l`T9Dk+gGWs~NoW22FJ}G>sPe=MY^tMlGwJU@hev6zzn^kjAyJa=`?+m_msg~uF z;UDAzl-KH&bV47mR<^cymC(o1LcD`-*Dt4H5q?lO)Zb3!C#;}7{G$FEyFta+WinBu zPk>$teFUvdK7~L3L&}CfI+Gv4Cwn>kj`#~&=kVLw;ddd01=F}0;;+Y@&|jHkxVrSW zbNYyhMVfZ$uZg~WbY|ZqV=l{f7yL3gG-V*+)6ied+JJBWH54$J=BH^5sqOX4@!xp= z(xtyf#EsfF5mvOdT*1dbIew-N$%Hr}`^Jq8=-c3%eda0fJ*(JeIc+{gpXvzk<^88Y zQd(*MB5Z*k=x=BIHTX;r8LBY{Q_KR3gP6rNH?qBJB$3QD5yik-JXQ-bZ^|?EW61_H zKZZ$qnVHKnE(8~1$!h!d88&R#&Y%AR=f$%WfTat}>F}~KgiA%M$;ptfmJxLpP`vWi zsF+0?1ZW&|zyXBAFi>RIR~BkkR8`NN_58m3?#rx<3Es+;tB7tPR)V4Hfd}m0#CWf^ z=JKnqWIWGUw6VVVrvQwJU8NTbW2c_&<+~*>)@|I9O%I*?hdTWu(l3WejNX z1h9kcq)CS~e6*4V%JEZKN$?me8#o!>X=s!+`h#M~Uv(AEE9Z>$k9@G}E}o7hOW$JX z&QKb^#n2%`+4YXTGDs3^qLglIJo)648&=_IV$4~{GC@QaaH$a6ORuYYwNqkjpNB8`&i zLWXlItfK6@~ZSd+({gJy9Th0k=+70^g!GNCD;60`W4EPsGh>6$rW-sAie~o6r{f z-$4GI2=*D41YL^T{K9ANChouv;VZX9T+l=KBn0qX{()fn)hT={rJ;b(^lv4#NkK>W z0;fEDn|_7xNkjWun;P>hD7DBw`Nbd`n@4kW0yQKPI16*>CbW#w2qtk21#}I{CQ?lF z1`QlIcH8kg@3PCNQKQF<8N<{jRh+61X z_dR#BijWv9v=R|Ru8%(INKTApl@F6RoTo@rF5P_O;fJ9G%s_Y9^-C9BbP4k#;73~{ zxL~4*7>ji{z>TeJUVr5k76eAR)#dBgD~hcpd;j$N{yKH)=e8Ps!pSF3oH(&p?_RW3 zD;HU28Ht{D+PBxPT1f~-qFeV@UtJhwevLMXm_EJ==ns7ko;Wd6zqT@#L?_;Te;KpG zbW85T50)Qs=pnes-8iruObv->>Z-s=_CI5ZSY7QlTW|f5E4n2O$@v#yDega9KvVs;2{w8~!_B0p|+w4wymRz~Kh!YEp=fekfH487LZR_31-ikl@l&nE?B< zDh-gHVvGVr%HZ20^T_a2pw(ZfBEALKuqM&cm08g?*i-@+D=H@Jumge6Lx&FI5W_XA z*7P6PpY?a_EJml|_~S)ljo$1R=3qS}E~Z%qzw*cdrNU-(P-6T-B-|9Y@lf;6qc?UGUy2q42*C?vdy0? z_dl1z$4}&$>Bj&?kEL35G*2~kn8OcqiY`MG8_KF z(-;X1!G~BZi6-cmgMQQuWfFXqaEVkU_Dh-hM}|rUGzr}>reRt5OZU94!w)3Wf0R_deQ)iud78eD2_wMaMd9`&|e%$bS zhh!24{bCP1_#oTo6Mx&cPapO>00D~AC0}{vm52>oaNjtXKz}+C`kTRn27wGX@QITS zw=AAVoL{YakBI#PHE4vKQnq5vn$@*6TAynMmj^~4e!GB!jg5GKPnv!B10zz2ehD!R zjoS1}dLltEUWwZLbo2#5|8!B~|Ed0Z!^CiBv?1Xi&=o;5qz&B?{3UqpI~w^l8suJyf#byZ#D)8tH|MK8Cr2u&OjVZKHoFd}}%s zY_o4FrdR0LbuuFeK3)p5KT+2)CkL=2)-v7$5 zU~1g{z%sb#V;p)TH$MI;LuodMEuw77D@nwi!8BOrPaAlc6$E93z^s^$4}k($gI1=$ z{3uo+`(`6aB4wC9*Va~VHGVvkRmPLdZ%2;Wk~tpsnXn7SMYCtUu=(Kr9Aib0J*8O5 zh$;59OOO8T?*!S`_UsX5g;tC+xN;ot&Kw?J3eGeZS!4*+BD#cEs=Qr}B|$sgq-Y4< zP}bI@UVY`&=K6J&QQot*Qa7F&X2P$`n@7WhDh{A%JeO@QWE)A(h6g+O_it|+S}0E=NAx9X`*e||H@}q$e(iZ$*;aR7k|f=TaI}4 zx#t;FE1m_1p}5KHiyO#ZS;fXm3ZD|s?6j%?J`cOqM4MX*xDC72bz|XJqlQeJL|B#2 zW#X~!GB<*p4v5W2T*|;Qsm*>rOb~ z1a82Nu>h}$t+DteT39OzwP^``GG)PG&Yaob`|fuci^C}v*nIuqgD$z`l4Lc$HYAF@ zqFQ4_LP|85@?fE?lMcr}Oh90)y>Ql$2Kqn{=)^xRjneJ%+9mt=?o1FogJ^F_-oLA; z>XcYZ1NaU8roY_VAVQ!~1WCTMEP&5b71JEZ7^++p1rUy65QI7i7GH8+Lk6NXaWEy~_`L9A;ved}`2a|br$(DZvn(i9#Tuy# zzW;;i)2Ff67(J?FHp*uIi8x&gY-Q^^b`g7Z{>$f|eb$2y-v8e>|Di|kJ_rK}PGKzo zX7yzxpKT&sE?=8Uw8(8|B-Z#rl-FehjwWmId_@z{Znd>IztOMiYPKA!&_-fXZ<$2} zFXGZ=N5Mh{KU4t&vY~O#^RwnY|NQ+A{m*s3{?(ZA;|Q_j060y?SR-60)Ymr>w!*Kb zrj}=(c{aUzrL?Pg0}Os|(QE(n(8J$2^bm+iFrT7HSZl3Vu2%MBTDI)HLk>M;!`k)u znyXT&d#Bv_xzQufI_KPnAGn{1dcXetrcb}06KffBUB?5?l0RKsvV7(Z})TZ$85&PF_ zi$h&LZH`SLq>Ijl61pVycuEzcLIT7^TBVQ15fc`<7T^)W>r9f>Lgj+-GoG7&Kz|{a z57UJ{HA{cpV2b%J2RxxF4Js`SOhAoV?4O~`54aZn(iU=kbeg zAtI-kCyD=6RsH&Hw%KNV)hNwPD>h5?_k7A^_+tf7Y{bRdZD>HoSd`uC)pQv$5)G(K zp2-0qjC4UNuqp`=)q+R5OqTl%89XGwP4pE2{9?c$VyI_N{NEM5`t)Y~Gaa*brYyOP zkZMvneCi22Np19zfef44_6;nxJ^*3J2KZYo?dWd}6BLTI ziQGn?5I&^f5uvD=Oqxv%4ND4{I_QT%qRx%uPkn%J`PmqJYiDWugL{*E&Mt^kUh5y@ zvHt8Usoa&-sT5|yV&&uM`0d=ZXNAiaB!Iz_u_tt&^7 zidJm(0GGWKu{KLm%tO zg$iidGyBkOk-u%pc4rH7gUk{=CE7=k{+|K;6;96I&h!hL;f-yNzuV4D;%}@B>_dC` z5xqC1zky&E;kU`B(Z7uWo1(uMW%3W{g@Bq8(!U-4ZScjUHt=>^Jq#;T3*w@ZH|C#DiI zlG2Kf_F<#;=YLIqWdcgtEci8h=aen-RC|!wLCM*|(xC zE3t2k>e*@BtdYdZ^ozNSZe{7reU#26Z*F2&jd9zKV`s7Lx7+UW%P;TMqh~$dC8l|J zl#)pb^7@;xG(%^+X|a5~u{q7U5W=b05ua%wb5UZ)aNO{anLVTXszj94QQ*oU^(c+` z#aeig!EBY07o#%XVQMj6YYUNUjMDJnNW<_5GG}8qu0p=9TP>qL_V;0~gw~l0ph7_+ z&5d(km`y+!Gv_8^#)z>1w@8u`ma=J0fCvxCb0ic_o;a~=&1_RdDUFDH*R5Uq{s-@m9=#O< zU`R%*S$PHm$Pht%iOqUtn-vkpyQsT;Gsl<25HSw=-!}PlyCpJE5>iRjWw~@&^;(od zC2~xWAY!(of9d?oS6z}p)gc(l%q3O{E}vj#>R`#r*kS(26RSjpbYt)>4ct2DR~i0- zUvcd<*REXo(PfuiHgDd%t+v|g-f7eEj6h*#6F>UV5C8I)DZF>T`kHHY-E|izEbw`@ z>B@0%&p+qJoBu$p8P0jO{}aa( zu>v7fxbsk>7GfRbYcNj8zCnHD4257xF3WW#ezt#+FqPW}&aj0R!C91H9d)**u9g$$ zpc`QrwA~^TqZR2q0WS;Y&fVuLdp$AZ>CvM{BDXp*AliZzz`WoqWT_+VpM3jmsvNO_ zkbsV-Cf_+_;-Lp4`Lv7=oJ#3tyz?ox1ibFL>u4QrXMeh|W$d^y zD_4C)91Nbrx*pw+KJv&XpLk;ZhpV>TZo5MdJB)3=#*7)e_dfehzxyugdVSHN?Iuhh zV2pb>j?h5{%pLX(PQXVTP?A9&qf+uS>%OKswZ48sbt(yxe7^twd&eGq?6UXYV@VfL zn(QkzdCDEy6C9h8!%c|VMhGM%zATe%jwfrZ2Y}8-=Fa5P>94A#M$jKA(3V70t@^!x zar>r(kJGP@*WYselElfCrKXm^w;ur(ZTdx~4)DDkt&n{HUCC701&>K8!Vg+SmP)pD zQgToLWs*2p3q=cVyfSQy4jzEx{!Dd+P^dE~PKwE*L|I^!#mt*%CL)`$@drgXhBCFV zhF!vs88Z~fEnj|!SpMzmCT$e6Lf&BLhx;9Z~2kwO2#g>Yp# z+=VBU6*T@z2BZ8E1gtU@m}`j)hL7^{2t?&`lFJs=MHz9*7xMHABoji6>Bluo45unF zO7f}dyfLcR=%#l0iw`OSX;XfK4-Zu|q_gh;BrfQkuF+oz>bb)gB$3vO1!Wz7gY3_g zmw!$l@K;p=fN0wT9tkGD4P8}G3Sa2a;b8>H6u`22twfL(03=E1dS!O&Rfq6Zb$k4+ z`JQ1T^szd4IY4Mz&_*2ggyVz1uV)ngI&v!~Ug%yn)Wn3dQSe>4P?;PGX$>gIr9Jx* z7}+BVG1|cJu_|0cF6S?DWzZ!1WB_?XP~VarwN=s}_~tS*pSphVOs}u>HdVc|rvpR{ z5$823LD7exP{gduG$l-)D=3Hee&A2OSA%ECuNkw25ITPY z_PP}E2`copO2r&2A3_Q|&0Qq}9s6IhV~*vTkYS}-O{%CTlpn#VOeU)tBEIy}+;^8O zVVHgO&#sy@=`hCT?6f;$#*C)*8(6{h>@&~uZn8(O9&6UECE{KqbZtYy4%r-xm}Ze4 zfFo5=BKmnrxuJ0bOZuprSeTl+8U^UHr4eyP8h-1@MP_|O>1T5p0yu^aA94Tv(;s~B zVU9;+X9Aq@tBfi50QMwr0$9 z<+9Oa62}3$I`S?|${`-~RSjlW(8A z!}i;K|NQg7hM+vQ24RTWyHCHS#tk^dU@9BGu=I*8gr9i)iDzd%iw_dc`XF}D;K3Xx z_QyZ|aloL#46Wq|OK4du!h{C}`c$Rjuqc;dv$?7?M<_D7v++40MUBl3L@Iso!Mm4S za@o{7CgYw%gyPl7^Ugi*JExzStY(iYlPS>U00bJ$2E9ffNh9sE{#R{PQ(ZohN1oVB z3(i0v&G#UxvIr}E_>HD=WafcsrKH{IALNuElUMxVk2}5u*QhG`s~&}cA^$*9HX9aL zG4jDZN^ns?Wi5|Cq@=14j!daCA*XI{Fs^f8vdYd|RY9X6lwCME@-d{Fu z?D&Sp23&{p=Pf`!+45!Wx;1Rp42G;~0ye~9w=cGz<|spaRfq|d;MfJ7Xl2~pPu$nn zvy=}Z!OcRrrB)hJbvDEPQ*4|a$!q`q)GdF$h49iDPtQ2_ymMyFoqNorLkSX?I_=(_ zcG&UUbIut&WY8(6erwdIk-%wgYRsWBc@}gg!T;W=cRuszV`>MqjYUU~9edSPS54S{ z0$!YIMV2)@H*@B%uKU%jS+j_-h^La%?!Nm=U*7YGqmHajC7LU>wGl;GFRge2af*NN zoqxgj9R7-BI5#Zfg{6k%gxlKmjF>Mr0+bVra8z;Twkdax+;U4r##OX(cEpNRtLDym z;g(x&{qWs);04iScz%Cy!TERIb7zv+JNEVxXK^cwWO&+UKJ)a_H{T$dhG-bc5zC}< zIXsoiKUmIsVfJoAa>**pglFNR3ojfoeE83<{MpDYwz&1yTW|R7Z_R_IkiF{Ky3PA- zcKor&B~l3}j$3LiFAbU+Cw}AWgmmBtTeWI+yt-zKEr$K&_S*&y9Qfrge;EVFlor#3 z%O9U34m)Hfn^E*1W(Q3~HB8Eo9efUo(P?T-qjkUf`Ol}^ap&3Uzh*>8MkhsbqbqdL;=r-P;X9p>J!A4d3mV zxaJS^X-GQ&X(esc(+a(LP+JT-5QL1b;1jHL=ePmHKn8G~4gx|#Ks<7To<5+pWJe`j zq{&qTt1|pF=xy+o2HpM46N)+xmNkTr;WMMA>f%~RnV=nRHkB_#+iqkgv2dhvY6hQF z-^zvp^c=(h`$}d@702biP~+tt4e@8cexaa85M1U@bmmqO<3to^)uY;$2Q*)PdV~q_ zrAsHGQ*E^dSSuYq5Q_A%)_a`>RG6!gA^k(lL_y17#A~C!c@cP^d?EgVU{w1A^jCEy z^aqSC;2TW;w1f~o&K>A&6i_3qG+$n#V<{mfyaV~MUk0D7hNcDsR#!RxLcIvTjXp}F zN;rW8;JCC4>D6)D(Wqj2?LZSkbFE#n1Tabb4DokZXgU?E+mn*RD~1uA-$N&%f`%I% zm>gurJS@K|7T2wb6ysfms!-Dj8Lq-kX=Lg0UW#`+b{8s zM}~;;S4fn|4?jZw3WXwkp$BBaBa3eQC!Znx+wiwG2KZYU<^JE%Uu$hCe1W#BBYeZZ zjlYI)&yUDtn_iN{Vm?exi$?|lEAw{d>G@x~hyw%dU*I7{=i1Ajbm_@qhIsTyj+i9^-iY@yRy zsHv@Gi68TJY9RUvcbJBA8k`mGyz`E`?zl4%i!FR@Ax{~h-E-GHFV3G2DrcT{y4sM> z5j1i0%{O0v!%bTZ8?yJ_`{Z)jI2)4zIp2EU^r;WsKYi{Cb9UL~3mVY3WI%;&k`lGG zERd8pfnBZ`6>1M@fPq}aHCJErr(17jD#;!oa`xCZzPNi9yz)wJLxa|o+4)q_M2&@H zK>$31$w*F{aG_5c;4v^1t?ai=Qb^ZKQ%vxAofv=vEQvQu#|~(Mfu}E48EM);{0=nG zPSDI|VIkz`!^=TX6sabW<1s6|usrwV73xx{VM$M^GC>W8zb2Bxe;eC%Wa< zTd%zG@^vd#l7V=${(}c!{j)2GnqkaF)Da8Q@R6oDrjNh^c1aiAushnj+xlOP-+=wG z@@dj33ajzvQR9axhvZy|qQ6G$mZ7|Vv4;|PlX~h3Gz>%xgmUw+s9(BQE>AZ2N&&v6 zg9@#KsR#lEO7VC2l76A0UjclzHs8AC7F&GfD_?o^!H01Z-f;c(aE*OqR;~Exwbx!% zZIQ^%pWg*$5$~laJb6#q6b}zog8jp^1P`@ghiEFsMzFn4j9p=3IL?kg{siJT3FvES z#vLZnI`x!OYkPDfIQ;B+i6^rO8oTmo=c=k?7HBOkYgVr~|Ge|@CE-nCa{Re%ww^qBa zc;c)3er<~_w*30UiOZKRVV`=y-gqq-s$>8FKmbWZK~&St-#qTP#tn^8j+&G;Ak+b2 zu3}X+Yo$d94!(@Di6ImeeV`O^n?xUCwP1_#P)oJiVbVl+;zv4v%i0CbtS7BBOq7TqZ=EOQM^a5?ijuO zzqbF&FTMEflTKy18GM20L}IqKWM)78v^Wgfl7a~PlP#S0VkDMeC8)$xF1`Gs1+TQ;U9AuX*+uuVQNv`G*JIaQXhA54J)%1H5eKuSugw6t@5{rdUy7yRJ+-+yEN0<@K2 zR?hVJ&N=6we#ZBdoh|<&ED6uB{1*2;p2g;7w#n88b4@w}R~xpNd{S3C{UV`M{^0=` z+xmA9pp1$M5j}MJchIkYRDXqtIwm-2Rk(iHy&ia_$b8YV2|8;O{ zO|SyWV@*_jLf-OPOAV=FPOH%#%ikb#Qi59mA8R9^LP$O`-C+gMEl&>+39uzmcR>+- zqfC^P&kJM(Uw~8@NE%4TU-y6w>QrB;MytCM!}Jt)oA2?@on6R0o80-mw^#5SKB?AA zmaJjqD6Q50rz88tmJ7WT36`ux8|sbcK8w@ zSqw_3U6gb0n6g-lEU!un87`;4^Tp8*N#vNAaD`kmp>UD_S>ynK?hFyoGdxAO5Wdsj z+xKtNU&G4jV>nCrNGrk3h!NTcQ4MV}5iQH3*78J8xvUId!MCRgtaMXUAs?UIe?t9L zfe=203}D-bB}nLu@Q%O1Cn?k~7!YFBrN80~^cO*RE3j$(H8M-NRmA;|Dp<}1du;m4 zqjvli093_v+~p_G000V-$xI1j1=Oezv?Y)q0W&cYM!GXZsGoZQcSyP|k^g^R|7%z|eIS7lH~dR*3+N*|qeiF| zPTR6PW%MVbT~05*_indKf1NOa%mkzf(@dTr@u%7UioHH*huIQO^p`0;EOi|bm&^KJ zN6r+qOMgED`U?tn!533jzQq@*uElprE!p$MQ+Q~0Nn!M|SH?l&F_Z~LGuvW{EtcE( zpHzO>Z~qGPm>Mu!45)bDC_zyWnFc~khk-yW%EE<3M@6s(+XHdmt9Ngj#-ov{1`e&HNQ@bN)nBIE z&Ynt3mb^vV`}FE{%uz?3a_Y&wdiG>8jQZgKm^EwmOE0|$mDjGuVZ``9`sSk7Z~N09 zG8`DyyYIw94x#F%T!w{S*-Z12HtopzjX&e`Vv#qenfz@^sw!8bDJ7rw(p5*f*MvsE>OuWH?**@i% z;lSZ|w5p+z&E4PxfhxqFRV-V!EZf+eZ`{E0H{g@26|V$U5W1%BfNw>$X-b0cfn)x( zvXvW7290K0qLU|2x!{8H5sAu#PE;5?bl5`=J~(*rAPVT1g;W(wZX+C?Cr2I|z_I6* zMLd{%TK}s#ImXF)CyAv!;ZyRgAJPL@7h&@h8Wb+z$-O%+VqJN4<^&}`=)YuYh`P8;NT&P7cb&iu+Q(fLsI+9 zz@WlM%RhK?@f*ZvX|XM*spj(NLtRZZts-ckdhn1$JpSb4k8?RvnPh)G6(*qE)yju<&&(d!Ezee{v9?YA#Vonwo_Nc6@Vu4j49h%JWuia!<|6Ca9m zhozVpnHz4pImiBDl|(hMaXRm-5U{9pb`$2py!iM0kjFL==1|0!kIZhXj!t zQIOt|5-CAiKm{YxA%%p5gium%opxvD|M{+c&dl6~@cuqUe)rsaXU^GY@3q%nWuLRx zU2E;di`E>v7WUJ0I|Ig)9mn~cQ;>;Jc#X+ff&uw=c zee}`vftMv2tJ~UIp=Z8a#Ayz?0dK8gL(lx-S&u#bxZ)Fw9amg2_2h4#jErV8n9{&b zydz`{TV6h&CG8S>dLmTLYj6FL!{N0rwVHk<67sE-AlL+efjwF(6c^2(-@>M%9UWM} z=pv*0D_LLCXSWNyw(Vzb{LVA>Z$~?^^ zuQz64&j+1h^0eqWj4$ZDX%c4+7L@Vyi<8fEA8nM?#6Pwti0m)g$6&pAKrGI-V@`nT;fkR!@u)`r~(~k8d z$ih-YG+fn0sC!4{L}RYNx0<@N!3hfxb{{@4x)rf$)EPcSy}`GEJJg&Z&ShZqa5IFg zM>-OH1m8bIOMcW{kMJef!cn9!482R0b)^q5D)3def0hCe;5%8? zA}*{QVKQ?+X2ej1G9D*3$28^fw-fNmcl2V3+!^whk_0VmBZi-?j21_^wqq z`0C&2!yQ58w^#b;wpI5&)s4Co1c(3UlAq6nkEr)7Kc9g=w`_dg{9rXzW~`G^I3#U& z9j{y`k66f2l^=1`gzNM3w<@3A<==rf`uED;n)$(r8I8P?KK=b1`YZZ7f_9^&!BHL)b`o`~rSMIqA6L=8>~J6j4A{u>46bbNOYL{`SUc z_|5(B2WLeSaoRrkzys5+xfU}P1eB8uZd!%XkIp)a`(ii zVg$vgz<{YlEfes{{6^KT@J;^kKN6sx;2U@YdLsouFhks-8PcOXqkI(#Z_uY5HW(0E=zQ1tBj2YyZ-vVPCTV#?=kBnS}Jb#Ry zoHKuZ@kR4yDwB?SmtA*VqyK<-B4RsgGUyP=?~3J|wt9<%x7}gK-48zKlch@w9ocv^ zwtU&LwboioE)F0J<|u4IJ#s0NQ-oY*_>4N*YQ;nzdRb-}bu`V!8t7&$5oyVg9ZXJ? zY`u!X7Y{^?XtL(Qdm}raq^oCt$wVq~_POWm`^=tL(U^JWy|q{?hz3tXQHiA(Y6BRb53<1b8 z6oRMyhlv>E!GnjK_Px`|{`qLCwyI`WJ&zE5B##H>%^MJUb8 z7(pN@^8@BB%xa7iTNi3UFHSxsd7A$@q)LJ&!PbYAhHFNFe({S*i7$;siU7ja)+zzc zbU;n)T%%A+J8IxtuX?_4zefIDJ%M%bsV4YB#!v&D&se{Jx_aW{|JVDoxCZ|0P^rb4 zcQ`*R8>_}H>Y!w*$)!=pkxXbpACKqO7>rdaS6Bll+Sm|*Q-yD59()H?Ga6?Qz&erh zB)*C;O;24AP>V`?>Ucbom>K>;vT|Q&syb~u<8OippPiOz5MsU&P*@8naJaS;E(|{3 z9JTNT+=tIJBbEV^0a4$8=>HG)4+zIy*S3Nm@YTS9t>7=1?&w5+Rd(IEW>!V0o0vHG zenY|HhJD$$A=;j4$28_Tgm9o~V|48`O{$jBU;pl*f(^n*Z?y3F>!VC>J>hQy5`6K@ zL|9l-SMX6$A?bmzK26je{cV8$O%MMnecb5eaH^@^xFeATvf$H$RTcGW@uyJ4(4P<4 z02vru)5TiQF=81ou=e2t*xISB2FsTOSv~24j!bXpUivKhSUs8`fOOJ8ZXsn^Nrqu&N&)-h(Z&lsop`-iONX}#K^+jz=0Q6}SnmBLzi)@%a z%iTr}L0Y!4Jkyg%|8AGVfL{WvY4L<0=gZ})dKU0FGM?p>D=;Fm6j2<&)$e_VP z1`WbNiDf4_;c--ecL)mvuJ@xk6ZVh%2pB1^|&O3J9eK&e%$Ru@Y ztV`ec@FQgSCdP?DPnmqgMHgO_r!A>`iUoYu>g&=(XOa!=p+_D%?c1ken!?>iR40o| zI8102mn%N~@cL`6#@2Sp&o1MZvfQGIa*)%GdJtbRWg;*>BZPcnHVV&sZ|;<%j{IQG zJZK3JLMzMBSg|dGxdf{6!uk5&XB~g+alfBFy`>*(Qv}O6Z@+WAnEN%a;mCabbJ1y2CTM~B1j?&e90yaGzH7Ck@2|qe?gLH z>TVUjNjTNJ%RhaYeo6j;B{YC{y{Fk?7Wk&34hvO*u!fQUd^AUSo?J^dn6OKC(8!N_@OMm|J0fPpTUhess&qE0!6Vdc-x7}ug zQ6uZ>8&11r_(ofDm|9ozR!t72E-S6xBCS&n4 zzkkMePB<|buP3wk^oJkbW#^p`)7DIm%zA{cBKhR|$6|;ZWRjU)leXu8ytO5`Ki{V^;@iA9HU7{&=G#eWm8w=5fynjrsc zCB-UO(O*q5dd7hc8o+6-{u-h}RWKO>*lBIgxgn@C6>IhbRU3{zDCE9gNh_GChVTcz z?;kVW$=Nb|U;Cfx-9DbTzzBH#(0D`tnZQZU2gArhn)8!H913>5adg`spa(5k+6U(MR-?{C^SpJN$j>8UsYw zPV<|OznD;^zkl|{^tTKChH4eQlOH$qTK;yTkKeIRq0aQ_JpTYtJzA4K{e@y(=wFp4 zlg}?MKYjY!ZT|G-(*N!(pH+&x`|hq(t0JF$`dgWQzL5Ts>Y8b|^HGs^op8&$=FFQ$ zpg$Z`Bj{f{ztzq^USqF`D*H!`{2LSypds~pfUh!#1E1C#U;p6w(hWX9JZu~a{?-3d zern(gw}12Vm%#}M!$bq~=%ZJB#>aZXf6ThE>%P68y9JTqPfc%yW6dk`ztM-AdOiwr z=l9!M_&#j>J4(g;mgC2Nbin=?J6JAaBFg1+Z~Xn$Pd;2iH>1aHw*LAfS)tJ}NyKDH zVG+V|*{vJdK8;`2h2;Io6~k<=q;dl6!q~Ggl@JCvt}vFx++Y7P3&$!DTYH^hha7S! zRs$-sLjR5=Y6}I+hqPaNy=!Z2sD-udz;i{IMs9xFoBRwmvThn_F6+pE^yM){SVxH z<8MesjEUs7n{L`<%-C-of85SHO&m06Abu`Wr%r8MwUV1dh73OGq?1XbJZ#w77@UaX zWdFMl=FEj0rD878(C_r`pMKzh2Vnbq_UUIYI`4w!mS)^z$O3ljtv8?g-S0)&jjp*F za~?vVg?TZL)^LWY3$GnAL&u$sZIXgQzLUUv{pL9gu29$_Pd6)T!CSeOiuLtQ`c74gA`4D$us%Cn5mwr5KO za|J`KZEXcvG}uP1Oe@8Yv4yVG03sxM<(Hrq*bCWZMI7l^5-r% zGx?V_OrlS9pubE6{vHTtDfD56kJX-E~iHL{5i`<4z7 zUe}%ASLe^}@WCHD&R%=%RmilFlMd-h#gnU>R=@b-i&zq|4eYt^K2bKFh(_R2b4v>f zCH=(_jO`ngS6};%k%0?7%CG|Ki=|jY1D=d&;^~UT0}eb8mSOn(&dI0b=&V%UXRp16 z3|p%l4b6CJ2AWvz$lZ0<-J>?#VB*A`v6al2@f3U~6Z0RR{^Pi@V>cYN0U5-I%M%|J z(C79i4J=*yVF#JHU|lJfSYzlWW5y)o5lmW{wvxghp|FgGGW}p6mC%SWEfHUw!Sh&9|O_F^e4xGP%N% zC5zviH!m1ZVk;-&d6P}X&~U%~_WQ|@A*()F5)36Dxc`BRF1`@C#e0jqc!W@sPz=(c zUbwl*5kw+^LZb*K9L9Y+V?g4*=4XV+RFhp zb`=T#%9{kLH%u)71pZ!~&shH&V1Xtc*uZN>tM+10xnTk);#+URQUkJYeb}M?yi>k4 zg>hIRpf$0&A^4O~1*;;E9()u=PbhW7oXfZVbW;_@L=V0_#tcotR3kmSb#6a=c!v?7 zXZ*3A`jyzuGf-eVOHND#Y0Iew%ZTPBfb@=4l{S!zfkU$OWuL_?V zHez+F;rrMMrUzfvbFWV@bMUFCL3D#}C>VXDpnw!mK2R|ae@7oRwHwrt+;)Wk9zo&0 z&$Xb47H$|62x+}IP``q|51$e_z!_KrEkr<7m^=IhjYE|*iW>f^S*4|&i2izDs6#_T zEgd4l+COTDMKh27Urc`;e4*siM+neo4gRh>&#tfT`}Egnznc1sg6YwY!xjmul!2Pp1U+xr+YrSQU){CdA9(ZeUW-@}uxB9E6uHB% zRTLn=xfW=y>8|uw0WTzurxH~KpIfZZLKX<)no_7&DXqrK<%14A^y~W{h}#0|QYF-Vzn-~ z@S@YtIGtVD5~+A=rd=*6bb~#O7UXDW!EV7v_^*g+(P!6xi=Dfa!!5+Ub(lT}CRP5Q^dEv$?(Z+UK>`UN09i7;Emk@BYb? zk5CPHZIoYq|clwA&k-)j<%*II5lYC;FC`| zg*805C6eiEw*9R)-ej4I!efaJ3^FBHoPnRv7`rSG~WImYV|yu~=fqohF`k+P85$!J{S;WOqMYj)=m< zwFla0%?*FBZeTlO(SGWw-(I$43ER{|0an>W+QGU!R*7=(CqF*#i67squq-HDJ4DVBmRZ1_5_$HS zrvt%8q!LR2A_E|>x7o^DK553%^H*|d+_BeW2{P%@Bv$U#Q%WMziN|n6B20A-2|tcI zfVTCiVCzRHSL7dPJ;^@}-TKI8O+FNa$ooC;91wL19DvceKnkKt1YfTR?uW|V;M27D zo8R1s8Bx|{^QU|J?KdV(+?jR^;M(i1YiQ_)a>=*}?xo_5(|)a|NjY%!b=ORpJcaU` zZoc`y&-gwesoPK5{k{i&&t!lgwP!Plcna3`>z8V8!7>SpA$g1heF{G$`jJh{@X*9_ZK7+Np`U$A`d2q{gR>TY@FDDsC9vq>mleQ7MymgGI(Fj?H(zu0RSXqvV-9$mi4>$G!L7 zH)<47=i2pBMYae7Lz%sLluv*C%YDVrYa#@Hd-oNGVY9>@=~lV@NKDKD(-ljEKDYi> z=;P@xO+EeXas8_pL#t`pYuE@W?*u|Zam^2o^mfNooihmI9Zps4LKu7c3bSx$RV~5h z5FaaFPQ1li9o~Ic$hFko%EWMY7n*^NgvPm!s+D!cH&vy>BAyj;T?1dO)Kr7fuTJgU zI_8Qp9fH8Q-?uB}Hf471YN}H%Yv5ZAYwrqm3Vdr*#lLs>eyg7F2bNJleG6wd4u_*d z&0(e6K7G8tEBOB-_;ew_Mias0&uGvYzTw{s{d=O11l*Mye1qE){*o?N6T(*ESCHnm z8;K9kRYsS5`1*EqF>0%vKiA4iD($)B%TEnZs|tWGK>yz0JNc2ZNryy5$f&Pg;On+( z^hAGmL;nb%mO%LODfl(`R~6Dab0GcU3rH2dM+sr$PJ5zHMSd#yyUGp=k(ag}eAk(O z-$x4g|3v+*!C#$we?8*gr@xLqefnD?{}NdDCpD|cj|os0`Tw`*udg-2(Xp?pUtH_( z=LaYBHk-K9F8JI0)~CNVzZrqL(5J`wp+f&2-XBE#9^|J7{px%E>X`zq^d~j*=+E=r zdmi@oeE+QHV=wfl4`0Fkp46}R{`N0^{#NAYE2F5Ndlj$tGk`_%a$FQm#vi`)WEONFJn5FaQ62Mrn&S`oxAhNUKZ(%~_~Qjo#*v^=_64`c$yqq7_(?u4iQjE-tmJEIrOI&{Zy&E1La2^efWsO4^xy=FmUfZ zcd>CKg~T}Ry6Yq|TembfDQQtGTEr?QFO|@$<<0lscOPkt%7yICJMa3PQ@?|Q2}4LE zu*Ve-I$-}lz4+qIH{S&P7tDQc)~s1O?6@QN0uyT;A2@QbGEStEEQ8TW#?YS0Za#jC zK|_XMN*XqF*w;3nAWK!44CgrJbPOH37Uql&HWSdcoA!daOI zY^ng&c%qaqg;L2(Ha}3_G1vhx!0C_z_avX(smiCfJ_4FVRWfZ&P104XX8!kYQB!ga zd?`S8`L}gHz^t*pYv#|&`?IZ&RK+35c#S@~_TU>X{}8?~2`*i-q`jR?I8mt5*4p~< z$DhiWTWAhM;)Lm~T(MHNlX5Vbhz(zNom@Vv^jmDjsDL?HiOH=lSuRBgA;kWIL6GP| z(#e4dCY82~4A$v}dbVf9eNyq5VVP4g=_cyxiLq52bA6pcY>8-;xl(&k;uMK|<_lS( zeF?f`>c9nzSioXI`!JB(E|f}_vB}1wskAnqHL^SUb1pq^Z6S*sLRya3_?8`8v1BgTw_7(>uw%MKXZ(%N$W0}qz6Ic5ip zXlzZuKnPtWv~r()_jzW<45k6xNS}D(@l(EiDmD&W9Z6xvM90hn5X{OLg*nO{nfK?< zJ^bK9)Tv|ws}}Y_Q3cjH>c9&sf&&%g5=g4VbfYS(2Aqrl8Fl;^pW8(XOW7=e=)puv z#$`>9sw2ZH6KDtjB#=|66scJL@Pow~Hv8uy3K$XzGDTLzeazTI=>~D(gyv`*R%yuM z%cn&3Gvr@_Df^5=-;K%mOF2ON^>0PLEP5`39A7?}FfwT5N1GL3Uo zqnta8ntCd6_^ZBGS8zOVoVn(L$6*&Duw~=oc?M`wTjYjYogK6lPai$ zUrzKxk?t8JF;%s7p$}DM;xwD0K8;#*u7Rqe{W<(S3#EY-NdL9~`JQi-sT{af`08A> z1yS?_4qzo-YH7vR4d;T2xzQ6nhhJCx4ZfIRjqGj>e1q+95C%XJSmUFeRp47|Hwe`* zrO_*-+2>W)4}cn7}|eH{HAe95r~-#Sqj zhQCm;d)!5bJld($4*J$`jex@2!{5pTLUa}WBDvvd(D?A_qsCNVu#~923hct2cq4JJ z!312q5q6@F;qRLJINFu9QxgHUaR@bT=>uTG-_;QgYGReGZ+jVpc$6qCUicdnhZvu# zY~_Z(gHJ~ewgGcB45WvF@Mp#mjBfCqe0t+E>$Pejmk>zUNwo;->SbJg3@0{e+nGkY(S!`4Ne%_i1g8?zmfvup6Ju3zh6rIH6@d_ zIz>jlO~#S#KK(^#nZx>?zc#=9hv~1_R(%%ffutoQk1~6{>-nQO8=` znzS^3>6?!)=kxa~tiSM!HIfEs^yf;?U%re_oa1lfo-bmRlFP80q?}#n0SY-qG%U&TWXIXKbSKikw)iV>(6C5#GV7XAjj0})t;aRfgL{=<&n zhINCF0|jBrB7}>F2Eam^@S^6n>~k|;zySk{+;DJOibdljsAl!g(wz`X94J_F<1CX( zrdx9)GK%re){*PLa=>DiRke`~KQEQ9x#o~vChh#((@#UDH(q@m!$CtsysfR}$}6vA z&PPMAl#L(1<%f$GYV5=qDp8l2oFb8h3lwQ1nt;BLE4 zy6NVdf*oyvaQyKnp4@J`?MRZX9P0!(k+8p5j3-jKXtC!VnSuzBs!uoka_UvEp^(GU zfxSuoEZECf3bU2r_s=*3mn&SQSY4CI39}fqVu_A55(}16gX6x3%MFN@l)Q{}JIvJw zD{Aj>8(s}VNJ014N~YlbC}_{+9-{8MOxu?6<9cLF9d{8!>p zk{^-Zox@&cO-`&g36Y8rCngfSioy{V=i!9B`LtG7U@6ub8pw?XItmLcZ&+R=nMzV+ zf}=NGK{w3#*7MhUKiB%D%lk9VFcJ*9wAC&3b-~E^B;A-HrTNXhduk2q!1n+j(7>lV z15%ZL2Olm-?7il8SM$FHZ1X3$p&!6d|JJ+Vp;OkzUYwxtH|T@45Tlcr2c4&j1E@JJNOGEhQ@$ z;IywO5z1I#Fy|DIe=KE~T-#gQpu3bPp9OzhGV!`1!T%YjpK;EaKf)A?{3AC6LjyvF zW|EV|qa=w3G8%-T4SP_&Ku&!kVdcYx==SU1pUo6Bf?Q51%xsKsPxAxx3t*?tr_u?E zbA>=$+gg^0zGZ#@d4eiQ=>%&qkRcw4h6W6{Sa`7|F==2bcGMGnjXv19SL%Vo3aRIIH%gLz$9%yO{Z9(2$>(1FnwV=m$cCumkzSC0iYFC7d$ z^Yqgn&R-Btr}2UP+Gd*#=+7pbfp{{B&(}VC?fFA?tH|d_()a4CudZ1B@ft(cY;R{{ zOYAk1RIRU>BP2sq~G0{<{zvTb8WfR;8| zAwNt8%mcYhL8B&il>Y}52sXtD4su19$U@q}A}>8eb@*idX>CVa*~y|5Ah9FnGlYw~ zQYIoZWjj-TNDA0jmc2)iUwBiMe{hnts=tbUN&cODN+VG^S>~u`Rpq;@9grD={%;@> z^JqY;g+Gp}@EufuB^ccOeEN&b*wIOUMM68IP|p5fdZlNdy43-Oj>4KBq_nC@eJkWd zKNQ&rHAWl1HFO`U>e;X4|ENc08PgbWT`MHnT8kuV&n-r3%B=aVrXwJECS58Sa*)nY zwOX3HbYgcr_`(kODpz+Nd}}QN>LwsngNk(O!L=q-R%;`&fpBoD@EzV&`2Kxbt0N82 zbtrTJ<>N0T-3U6t_wX08I@H|I;@qffjcBM24ZiviEd2^q*NVzk;I525kV#W24FyU! zI4bZxxc)e*@E!g#7^&9^d@igB#UA~8f!~!ri~@A3@Bv%Fzm`5ebQ_c-g8K)v;qS2T z34fp58k`5aNB9~JpuRBvYUKUZ$fwcAP-Sp7LW6E-c9KuOtv7%Q{RI?4)X2Yw|38o) z+Vu#Zap^Gl2G@qmnV6^#M4$$L2BmRz=dUZQ(!VG9@!)sCzX$le@K*w1bDa3$92{(mrZ{WYxkyy414o~ zDRH0vy7{5&^Tp<|n&-4m?V&1#-sU*6~MS5JSTyH+8d0x`s!w14GoZ+yOR)G*bzlCu^yIzSZ%0ZanC0ZunUBX3<0p(i zV#+sUfa}Pwz4kB>99wFjwD`ltd}3k)8TN6-L_@0nSX61_+uE|2`7mTPty;B$oeBYr$XM5oLyCCoS<kW885cR=7jk5~j}k>8>4y0b!;I`QxRD{!1lWp?Xl1cf2$SSJUS6?cC8~>vLe;g_9ISj^n9#z( zhaP^2xd22Fa*UE!UG+7=Z*9ezKYaM`Yo|>^&7744qn3<5dWDo!@uO%*g3e1Fh{FlT9sAMJrN~M{LmifE zELIGHOl$E-Y|4})cb>R2_D7~dStJ!lNm{fdmGO&(C-CUK`R||k{nMH98X6m}x%%3E z{TdlBnx$$%W?H5}IS&%biVbnZ$WaWOjXb3R?#Uo=6$6AEhsmd@hS5!h7D12FN#yn1 zko;4R556F;4k!erq){gSD%42r0KOZ%QA`1E;$#&**H!oqe=6HJ z?9&={dOtC6f@`=6mO(ct9{%)O!C%iIfrE>hPlSg)3?KhOwT%>3kq4EbuQCQ8Y`BJ^ zbjkG2UGbi(HZ*GKTpt2qCprZ^p`@yULtS`)N?S#d*vfUo;Cb-55`2DCYKITU3Jh&m zZfLnv2VeS&#A)tw)eIqV-tf0V^+p@v=ff8qt8axKK-IBg*E^RWY|4vZd+i-SQ9`#3 zpzN@YmeirXHw2@VIRJA{gde=U6szP&#icLiQz0({A(@UWvR{sx~rLQqX~*wDmRQ9uJzITby`fTuy~ zwrV^0)>L%1`c?c@!-s734Hs(L1Bnucm*7DkQCwKqrCq5Xk;AKJwDJnnQXuKkF!%6O zsV6_`M730hexbh*198J&vjEpdf5&V7o5RFJ;s6-K?8%D`hhBp;EJIAzdxIrJ0O#Oy zrc-@!`1E%$Yw5#HHT(DKui0|Bl1wQI2r!P0FyBZ$f`}7wH|Mxt9v4|-@7w#bi>=oKV z(q0aTDq_hcmazciT-fpUz`I8W%O_qF^!_W~qJmocZ}L z_>tivB?_@8BYCy4Lm}Of(8Qf4)-~3*wX{ISsZ)P(*rA8KFmq3@1NQ*u7Il)d4f8KPZRC%oFZo#20QI1 zNF<~3&*OuEa|f}IxQ3yQ*td9T#ICet$%m($c=Btn{#{;RiWwyd6#499U zU~V{=fO9y>)hAO*D~4H(o$N|&P>|R(hCnzc7C6$V$##Zq5e|kEkapIrzg>9Ig?Jkg zMvBu5nBZGN)+kC?f=Jd??;T(-H4j=+ZJK6UyKIc`#D|G96KLUB%2%fDgBufF~}@J zs|THGy3&W?K#}+Uj4@Rp?4F@u5Df(81|Z3`kU`-(Llcu-AuNm~?E^0b zDp6nav(p;aSe(~ei*V7RNWF_Pw3AK5syooY2Loyd2py;o76H;y8an)G48TEy2ag^* zMv2_nNEC0uj^d&@v+=pZ&uipH8`A^MP}qgnA2A%rtDBnHEg|39oUE&psV9gLvw(R_ za7&0#p+vYYyCsma4dZmCfT2n+;9l>T(BzF$&UHlMi8*s--*L+=(PUDSKzn;Qj zj3aW614LRGY&>W3TBZ& z8lyQDq8$5azJ~PS|3!@PmfLK_z6RK~S1eog(qCQ@hnOGB<+omc<*nCV!JSoH(BlxB zEy0Y0qcUBD?z-dNi_X6|5lfM*Em@yzZf_$Xl?YV)(J)eWWZPCWHT~$^bIFv4D;a`~ zy)2bVL2aB=af<8TuYXHR3j%_LJ=f9k$tRykSQswhtC1KdO4bF=#nP~$Yi~GeeJIIf zWA6soAF=i*B)rg(!=RT+C>ovSp%hsKV_R>vP2<2d5E5163os^BCYcl>cP9UOo~z6q zc8)N5!p^1L=3zk{YB2*-QMN>;rcf+}ebB%ow^YE8r+NSeKH_bvAOQ9_Qjz-oeERDY z7T^XEQ8mF6VE6Xn!K{5hp@t#+PqSa?!O3vUYfPGLgL0U7R24NC6gT}tXF{4!H+)@t zw+(Y}1g=iKs+w=Oa9mO|<`H$P6Fsg(QIXIAag7iQ%Bf*bW!(arSG5Md%GAFP-{u^L zplS<#6@LMtgfH(*SNy4aL2*~Q?HXE--W_+}0dRL~;cJ9qjri}u2XDaG7+skMUuE6k zQ{d=h`18@7K0X3AB9N+q-vj=}njYxi1N;h6e5!~5)q!@UkKx}de|v%7i9VvU2cL0w z;ji7R(#KsILmd6=!PE1@JM$O5N#syp2LL3knkRIh!Cyyz2j9cL zhW=n>hm$ejOH2fm8AaHM6plV#J-&LPkM2_6=wHF#b!X+h;a}zN=ZD`be~l#c-d#Q= zzjo-0Ka)?!)rG%?eszAJf*NUD{}_L&YT$f%XS(9g(cgokdn!~FLFc>n@S3R9gw zxM~vN_)FWa@J)VtDMSD)_tS zqoH4g@6lhxHT^R94irEPng{M3u ztB-M0Qq4LR^A?5=&2jRK7so{-`@~!zn*$fr1DtFLffyz}-0_ung*ykL0a(Hr&~(2s5DSoQP1SS0E&gx^h&A+T0w0W2Fx ziO{OpR$Ff~X6#tWwTzWmyFz5R$1M)i6w6hbZ@J}Gtmm;nMdR?J9I%x0@_Hjz7hhpk z+HB<+KK9se&3@}$^0h%Dh&pb}*LK<&+ug(sHXJo)&fLQfI>`KG*wPX6RuM}IfBGMj zEKTBPhwPz91Iv546j7NuL>7}=VdY1QSIOW76}d|rYBCXEP~gDWYF;7|3djv5u_h#8 zt?YT0h@D}ue8+|cY=)2d2Ie4X5)#Y%4^&XVXDrj+Cht7bU}62k#Ys_<*lBf`YleD- zCNLc%t3uJ_mqy>J5V$kD4q?wq?Q?cN{P%238!%1L|N4p{0&DC4K^X2H*em^ z^+%u-RLp1Fur3kzD#KkUQpl0C3ys10B-03%U<^swf>Ovg)YoGP!eNR|hOf8o?AKl+ zN|)5}*ZlITUtRqx%zY*%5SSR@xGeD<#R2^it6K}R-<>V>6sEy}gNG>CGglZiVBm_S zAAx5yUH9&sIqM7`!48ZFf1Z5%g#!L+%3~i0yuWZ!hAmmuxI{yQ99wxydkcmEA~BQ6 zRC}g96eBrW2%|Tqyd3z4qfC#mmJKMesL!13osr1zAN>9D z#Yj-$KYRR;|_4=FgC*@$Gun5Bdds!kg1nQX(;=zW09aer>%#o8&Fk2udTEf9l3H-z! z$Sj-9$_W+E*eot^N+uX$axj@;GvC58#bBB|@!x||3BOmT#Og>w#W*GHCjT&99ZTf> zm2PkHPovuT-^sriW*anU1Ua)kXe$)L;1O^h@n3`XyMp z7XF;ui0qpW!KZ}&f$y}M8m^js>83k$rp9VTJ9}POTa|n8nH%+En#9zEeUf3xHtK*1 zkVdMEPzmEQ}``@m=#$Puua5pj;os4p#lTWEW{q56VZ~pv`(qHfSRXhK5 zng9Frx7Ybo^IT>A(glC+`K38f=bB92{MPsU{ntEy^?qfXVCuH#xz_6Nz<++h7d@%; z{>Xz@AOF$M->;zlnrnzyqc=8Q8F;m@S8Bca!Bh9d)hmK~{xWq@*VgX5qkC^5_Wq`r zIjSp(6wM4E?-X)ekN|K`ub{qusz85cinCM1hpOB+-;8)9e$#EY{rs|@4;wag;>3yV zZ5hIi2-sV^Wa&P8?(_cp^Ecak^T!^0oCW!tZ@-Cs3ro$25$m(UWdW& z6(Lc?I!P?a@t@0)!-gbRTK0$H@6Vrq-jC0H?6Jp2j2!;xA0B7H7|5>1v*X%nH*7j? zQyE`^O5u;~MzLU_@aom84m{u>c9@H$({xAp9#f5MbLBu|Lt`M4VChentl){gBx>ox zi!Rt>mr3N@ha@-tX4*y zyZ(kr6L-O44oc`Gx<)Sq-Y}+{ZvGu4Ms^_C=#4f?#^Y=*nNFmmu|zsu*V?=aB9!xO zgbXu-ut5fU7hso<#S@|c9(d*QrI%f%MHlX?;n3JkH*Hv>AEaS(gQ;w8YvI+fX~LG< ztkHh}{J_%_ix}~wM5nT?#G-|Zmb|}+wxscT;DP%N*k@mfMWDnMk>G^<4<0yRkG=N> zIQF zo!OxYqSQg~CpiNljuZpcW+D@F^a%(GR z$)^m*75f9iD}h7o%wX;2e~D^0`E+t`(k|I&ijc&JXehn3gJ%C*`xPB&vcNgf1_@(iD{=#} z(W?)KjQ^!BZOqm^1m&9+-iKG+G~Cs))>3z?)Cs6iwhv!I+6~vJrnxJq;13g3Qd6Nbd~OL%-S1AH+IBt!1_oQLO?UpPL2vZ$Ods_r zAP!w!Ia+wvUEtT$^WnR14b8*9Gkl|et^9bjuEM7w199}}%-^o`_vQby(3=WKK}n!{jH%-clmLYa0Bd3 zetMA48vfQ2hjXJ(cli_@`}DUv{eAhDQ1wRt&iW-Wm5}>%|9@J4Yo1>fdWe!0`c%l| z-|o|2$xomDA`kxatB3imB0s9;iO#?5`D@Qd_X-QIe8H{E4{o05gnzg9pBnxGz@Ake zoXY(CrFi~+RrR+*a)d@46ccLRPhbK(Ax)0EE1uY_W$!H2+L`};ZY&hUTK)C; zYfle*KQzHM&^~-yeFuq2{PhjfmVLBz#j;QL+i$;kBIfe1ueIiork2&^T;{b`ULlY5 zgb7>Z7+2*ef{~DZ4h{sGe7#(E#BTWw(k;oU; z-iB+5e3e*3M&eP@&6AUB({Y=vJ$(4$1@F<|7UP zEy^OFw{RuTO2jwcY;#t~tkYP(&3NMRW4?KGy1pJ)FBZd8V733m(=%?k<~nxPqw7mA zyL8;xvC3CY9~!JS*kkJeeD3fjW8XfKsXhD5v)tmPF_x}7<|sM`Ia|eK7Qk@S~6Zp`0rXU317Kmt2CWPGO|E zjyZGYfd9Y&1M3>HGKFGF-0gPvTI(pQsz;aQDn%tZrTm( z!+?#2bn>mO&Dm@_800d!MIU^CK2QPP;h2UCql^Wi5b?swm#>8G#L1>GB2x(TqmDZ2 zhH1afu3kZ2u8$WlKJvegns&pDSnIIm5Eu$4kYsvU^ueM_F8i6bnk{O}h`sjSyP+|u zP~%8=+pV{H?(s*JaV*gB+#jEwzhJ@e_17m!7S4v~HX4V_e7y9-TW+}pkuDW;*nCHi z-iVDPF^v%y+SIgKq5?MT?1^s|gARts7slSK!C)L~VE@Lt_ZKd{`Q}?@{NWGO#DS5p z&;t%UAel^}GmDpe@X4Y@GWC=(dnKm*dfKpI!%$m%x3YP;bYT|9Oh0$d>|J);iR@=k z7#k&lkVA$H!r5^7@@1=5tfcdFy8gWL&cjg;7rY;x^OFzXUx>VKzV+7Uo_kI*k<=cR zId%Y$Og#9&gI8WTpJ5GMbItF4?|X^5bT*SurPBPvK>sZmIWiGb7&0uwc@XOxoP`50 znxSDYgPjz?3+WP*qiqAiti?nK!>L0jCfTwhI{BukDxW$vdM-4(gnUlBB`{;D(6UzyRk)g6DUp+>sx9X9GJeH`jO{su-+ zT?0k9(##_iBi1yfe?sU)r>S0HVk#ekcFLI9n_V>)M2FIpecGVT7f=|ymsY^+#SwE4SvkF#U*eUQ5C*A z>Jh%tP{a}rf}x5As@k0>?9*R&Ijo*WXLN%;0wC7urdLM*`Ol1x686{{{#pYMM+ZHNr;dPX486e@Fc0g0s{RTm;jgiJ^y$;z9_9zJ z8TyFVefldou8O|SzJ&TKjWOMnh)c($h@IyDKK*s{5id3WXd38y{z3=z+{uPbjrP#> zpD#AqJ6Y0HZL;;Rd;aod$FAV!A=WNDEqLx|X7cp0)AJYfV5goq9KQVj=RAMEvieJ6 zDJ&#nip@`Y@ERXOiXVcfygTWIK>tnGlvUxKcdliI-W5#z?EO=|3rL3f={WtRA(hPU zMPyVwrYqv2_Whp=R)OIVOZtyL`ItIHdmVN3QGfc=pR^ZVGR?gyQ>GAX1?(GtbK}-q zZ@cjQg~4!wNWJkBw&d-I1s=J(cr60ca17rioE-vKuE356zSh>(yYIO3zPs*ub=Kb) z469Z)o+H^9X_C_EG^e@2o;H?3KCi+kycP2t4g`*WBgi!6Go@p z*jTJM7#==W_#UvxG!&>_P?ykG;Vvj+iq)dl0uK^89Un2h6>#yyw!w!rQlIht;KxHic@S~56A3p*A7IPj7 z=CX*Qxu|fzh<_X=3@Fk;OdK$=Zoq<4E+cOsqFIZ1K#;7>2_^&7R4Dovji<~Q@o(JQ@Gq=w*| z8tJ4ve9$u30@g+T@%53W*;KAC!=Du@*nkEl{D7fV&={S5IrQuXAQ@P(wXJRXqmSSk zMEx+QG9nDJDAy)xL0H5}`}gNzkx1S1!WVuQ;F@qq8V{pL}V zkBBGJxm=6r1L=WG+IS@$gDlvuaH5JgH?3}HXp|v16k2E9bx;4^ckmSz+X@{oJ^S3Z zzI7}cXx2A2kmn9_DolU-?RSnk^2qiTD?~b2SSlZW#NimJh#F?chMjia`4{z#@(fiP ztJ37jlkd6rp5ens$STKzIvig7!NS82J*-W;J;)M`0r8N74_)5W!d*7pz?3Y)i)GA- zHWif0xX2Nzi2${?x7~i{?H6BkaeI4C@j4j4%i;Al8FTVUCleZpG5Xe9Z;RL0=bBqF zt?jbwqzy)nQtEx&%rsnl$A~eFL!vR`#;v=-`itkzCo~cl&)@#;H)nkB`=n+(^UO1! zobe`4CUwm=5U3SG}30t_KIj2x0#27mVz0aFJZ|{BfLA&8E zMnhci*ni{7E2r+e&wlZAT`a&(phyz$RG=caOU6zYkOWBz1>$_7_L6fPE{o+@GNGiC z2)lly2dEG1V?Lm{z<7G($*0M`YB~6V0}X9m+eJPN053DVpCiqx25ER!kN{9m3;>wg zi3eavSJm~eVextO7y6*m0wDZ(!rAkS*7{-Sj6~lWixtUA=+5|d_-ovTA*g7))G(wR z6;zwMI(l3GQk{X?4wclRwys4-^=0IRmBLYn73xGl)sLYgJHOY9kaARU=xGG55uY4B ze6Li`J$npRU|B7IhzJ0;>B@)it~DVF5H;xou)stPDmYRql31@5_ztYY-?81A8ze;_ zcf;So*C0InJqXs!gRgU;;*hCSsD)1L;p_jYw&k1{I&+p2JGNh>GOr~cknetqm#j+ ztd{;(#(%{%ljv}D^vR!Eb)k=|qtQE3^dz6%;d}A}$R6?M9kVK(!a>4XfiDz`cWYl z!Kyy}?R);(!wh-oasHPwdq=0|BP#AcU;Nv?-v6tfzu?Q;A8*(m0lOk#l;JIe=z#%`D-%AhyKk;=KKW3BvP*0)c{)cHQOZZyhsX{1!ALgOxf`g6bFxR9nrHrazX?v`bEt ziE=K_Nhpz~7b0*l2gl;r*K({plj#(u9E@{}7XK!ae6zO1rg6w2huw1HZ?QMXlom}C zbGg0u*;k5E3MS(Td^(8x#Gnw0#xkv~cx_=CYiL;G_~TD_=`VjRKUU2^T&(C~r zhn*(kg!2Bv1rI&^$dV5h5!nduw%u-fXp+S&)giB+hWZB9#(-P3eA&_e_07#T9oN#{ zcIl;;?za0LH%|L?FsjA&c|ZB_-(UILmRoN*aNxj~XT6L8Wzm9#3`!$vYn?lL?klri z*>uyftOm8I9(d!0RwD7a9JR0xG&eVQv^Fb^6b1mr zY!ikrc5q2dZIg@8`Y2*ZQ=p{A+#WLE%fouFAi`_TdXTFt5D-cgNq;FNdq) z243hFIkuj7Vg?8X zu?Hebs2qkwhoB)Ydty#II)vCu;n7E@pLFz5P#Nb(xPV^aIW=V1dZ(Xp#x6VVIBNZo zk4}G7{liO=js-$Fc_v0fq!uGEIh!TFUo@`$TVW{HQsBqqN%}|tiyO%N?N2XmyX`g;CQL}xrAglZ{(|@4dh1PwhPPU{t_}%0=bUqfuQwd};Mhet z;JC5l_S$o=NAAB%l2$A&SulV5?Y7@&&9~TM zo9(tCpptzw$-~7~l*n)-94~aRZEGB3Q!JLq7s%xwY+K!=t!y(n&%P0e1m*9UxbE8P zv9JTYy`}AuM;^&Fw*&))c%t7Ae|Q$A?zT*;JYHc^EJAdnnVkt?j_){er#oiP4ulkr zjYH-4fA9mgOXxsO^GpXtR68hBci4y=BcB_4AAm5%o?Ht%S1N}hLP20FjKF6WTNsw> zOmh>~`~X{mk{gjw>`-9ks?}_4f&~n}G0pNOXIh-YUqlkiGoEyDm!^*^IE;{Ri^OEP zG*&_7do)OfL{7dXjMmgcKBA+Uh9DDR}Q+;Lrcd#n*Njvqg_Z|6j8g6P2K<(qn zSy%n~Li#KHGWbBkdMeR(3Rii*GH@EQ{aRi1-yMI=C9dxZ5Kc@z+?ZHjsD_?i4(kg3 zHV*#NBU>7E)fFcioYybs0&Pvf%z^NUp>J+eM=exU_w6pjlc~RuN>^V%4E24^U#*iIb)l^H5JQ7`-e z#ly|u>qm`ZBmWLAY(KMxO5Q#lX(^8#+E# zj$Xi}mPEW0{=%EW8u~l>7_uJz4!!`;i{aoSNrCVbO)7OYZChU|v@zPJ;}z9w@fW2H zD^ZD~D|}Zb0=SMaj-+b>-tv6Hh*k{&q`bGwcMy z&V&yD1ea6Z68P4k4s}?8YU-qC{8bmhP#plNGs8phMZyXwSIu4e^s(D&CHM}1s;Q3E z^T%Qc(#FuF8l9+$aINA`$^W(f8Uog}=%f>)kD+cTQ|55?2W0iC@}uHD{e^aY`itnA zIP+m5C<14L)7rN?h#DW zhvt+Decb%us95`a(S%27-}6^fz9Y})d;Z#!LUXRsPn34=%ThW$JSy~89rqCb4}JbZ zm9M1!Dp1K7=;R-Rm+{{O!KNKagk-|~v5bYSnvz_E8n1-Bb<#%vSjTvwHx){BlV7d1 zBm)SAm~Z;!;7ekxD~i3>HHrp&# z*HCC{TQGmYkAD1{{$HN2Vk>CE2=>R@$Oc5oNe&|jmX z+SYpDLF5{1!(Atr%Y=gcuxjDVMARZ%H`0<(U@wr>j6~a!5Ie{1u+xqkjoWPQJ8y@g zsba1nXKU=9<`g+e5iDzvp@7JP5c zoGFth-*v~GW56)c@nmEd>w;nuXh|G^j01NbvjyGO^m32FUvBsbw zq%b2sD}u+37#|>vE1StISh%pcsfnEq)Afy`MvYX)B}fEF7Ve~T)AHTYq9IlxLSn;s zh|wH>lPKxQu^0rDH;pl2iePBk;18QwE{As^`-{qOlP9~sJ21LH47Z#`_B?4c8I!n? zv5R^}9}{{LJW-4Ge9B+V1eF5b)irX-v4C3#W;<~0HNROU{_)T1m_lobCaB!irm4=Y z$oll*S8%Wz@|uxC84_RtJbozCI#fUA1xEppYt~-^Zu6~!&&`T{(S(@6mX#|o)b?-e z_w}#u_wGAyV_l{T4j6aXp@;qKqS@em+0QRszVu_PC&~W(k2&U8jADqtMs2_=`m3l| zG$|0?ZTH>c1J=l`TnRYn$z}=Uzu2eKewi6ftZh6N3v`z&6=<>{Y+I62CBo z@0HOxe5Vk3ZF70$l~>+#@4X%64#F4_fOsOD*H(!IBKqp<8}7XO?&FU=?y2e1Wo#`4 zR($;NlTSWb&bHzU$uo%^D+!iFH1GsE;e_MAed?*{6gF;|TTwc^(yqP!x=%m-_~jR7 z!aO95`tQE|*6erQVc=xH!&b}Is|*v~Zol1jx7>Cs=4pEqV0R8&7_s0{D-#Tntt+|m z*LRQMOLOQ%tHQB|Ifh;(bHyFL~)xY}H)?06dd?ISUfB2!*OFt6b0+IDb4j;ME zs61ORG8c7pv}bd;r-`hgFdKv7vw6f}lkdFgR!mb&aq}0UnL72YG zf*c++a1G+O8F)P6Y|yrM0^?9{IWsL28>tAj8!hH3M7T_{vv3&gqDKZU!80Lq@(KQs zRN@1dJ^4h{BvKXm@1kGSM@LM7fMn6P00od}!5K0uP^bm6Z`YD?Fmdo{L0ifVzN&YD z?~t^X!nLBmf+roU=&y9k>(lhhYP(8;FAW!}LQ;rz)?W|*3K|AMRRzUbQy&Owrcqfb zK+HUV_a7xz+546xRjzGx2BcC19O~;&4IE}7fKK_f6AyxSQ(tZdQJXICZDvu22F|tD z5DiYH9|O@Fe5%sijZpw<@MplHv~H_wLDhYwdvdS^SViv7yDtbjbkm*r@C8JEj2gey z06rz33%;mey}3`Co6*(acz1+LFYsM=?z4)&kD>?CVFiRv`0HjT`d9I<(#IgXkIzLP z^&r5mC->>WFkiC@D33ngJ$-#H_*T)uH~fuq-RRQ=e}DWwr7I|UAoS(R44qo~8y~Gy z{MFEOfNBkYU6~DA_q{)npu7C5Zg==bC&Bj*lXOavAiv1b$Bow~u|wRhD!AE67y1Ll zxbFjVg3uK{uswi2{OF_!mskT zPk)UaM!i1$byCw^znuL1-|266`SiF_BeH$^TSJV#=dU*(y620{rZw*m)%ndN%YA&V z=c6>BcK)nw-;?~;-q-tsYi@7L-l?0M?V^YG2iN`|;P21x-JUPPls|p>p8)LBU!R3O zJB-SeC%DMv(W}$@TUEdG{Pn7P0wMm+0x~B|3I7E&q6*CmfQ-k;sVkhp#_kL;H`2zMbj8&0v zER{}im&hINUXva46fD}&QCFWn@x+t<_|(%YmMz2l5yAuzjj}{(Z_8q*bd_+A_gd0iVTGa$Y2?vcwVwk)Ln)_hk z!XV?#vh83DQky#%n7Y08v>X!$Oak4f@TWu>u@FN^D2Ifw_w|=2-!^}m{8()y(_jO@ zU0l%V1y{wNYwjMqC_uE(3UlLa&spkFPda1&99rcF4w8~bG(lF4ocDLPrr&321 zIu4tB1d+lJ3PF`sgKY{E$?bRADcQf@#-lfCZE5|K-9?M}p=+(V_PWEcj1jYHsivyy z7eqz6L@%L0zsCAO0|#d6>%V`-nfva&2eFWF2k9c0RsstShqjNvf>BD=r$=uxhCt5d z))rFfBfW}Pl;dG$=+L1^NH|QGW0Y+Afj~;;Gb~Uw7Tr>4paU63h5AGU^hWU?woGDcgJ~SRht07KgBe zR5L2ft3{Eari_ zR2|-|$&_;Nvl{@)isRn!4M#1UI}Zzbz9WCx4mM70ZP~4nF+KU;QeTOaLcUSC6z5^5h~cvG2hdXPiL*>FBYW-gf(~#7<&TBM7~K z(H%==vB<^(pv)YnN!$bwnqVYSa`>#G+S$ScvLJdLdB`uDAo1NwESRpV*f)?UYC3_Z zx&Lc3h>@g6`Gi{Df&8nnC;v`95hq)<8Z_}q#N|t;UKfJTQPHp78X^%@5e5z zXLVK?n3#ot2!F>exh@KrwG)9Al55>j(>1eCyni*g%Axu`FF?`5K*(|kfP?ShFY2oS zAgJ;=;p0@S>N*!6s}^mhTaq7#Ve22fF zcZLsS!Up2it8jhL6E3|dvG?TjG zorO{rkc00!HxMGfsAE$BG^xOMqqe4ASNfN%YT9@VzN4E*Nr#frpH4M^YPTAQdJ(R! zN8RV}=x-bo{aqb3HTtWCO{w1K+|1$O^H0=Yp<=`6MjzeKUDvZ@%j1X$Z{>{137p-q zF{<3@ui7#e6&N6+ourn_?($=G1lMbzMn*Hk&B3=icEdmWJn*H5LPY#EL=Zi-bM!YZ zGr}tUsY3%NKTZ-HeH{MP@dy!JIrspm%8zbIwoLvtUVr|v1{!{!{z3xduc@fVU&qdW zp8i(o@4=@}b*rhSPk$Z#`}FreOn>cl*3C8U`NeaCxd&b9{`_?l6e92DA2*Mg26UQ# zOgp}e&)=`A{%Zbb-H%DegwLKn(BHi<>ZbQ#F=rg~)ID_cKIl$aTk@pjZIt&>&1d2- zZ72nzPGGM>cmU&?>Wf|!JY(Y@uT4&uzhS~gub<)|rmov=VfsEE~KQA$J+0bd<>Fh%gNke^W| z7uaL>JzjnNwL~hlM*sexiQ5smlQazNt4Xe_xG>f>j-h>a-$UjtB~HS(OW}m!6aifF z0bzqn);ZdF6A!gw5w94|ld04RC!O;3uOFc4+!oUDIKgYIqf6jCY1iHAuepXm(T4i; zR$FfsPNo<*8)RZ?YiR6$$^ZPKkk8`ofk#OupKt76&n}CFqILk-VvF(5Jp0@u4?R44 z_UsQoT!M*b_{jCwAGyJVt+pf(42PR|9309-x-nAl89Qd&V~;=n+{_o2fAWcN4uyuT zv$g`LBB9O4Z}HccUnWJ=tXZ=@SiC4z-!N?LwZ@Jcv-#Jy*nEr4=g*&i?|t|G^`)0F zKW(tVhJ)4`I$#Kh{Ty$gmbTV9(iJ7hHC8Mq0*P3nrM2am8Bad+(C?pr?)gkMN5&?M zPeX^TwddY@)~`{Y!^BrC;;a{Dm5+TZ+?cMfE47lcjpS~Ly81c-Gr2)xbaINrHS7@B z-Ly>tCW#=?tfV?p4miZ-nWvxr^{=N9(V0jnNfQhHx8HtS`*?WScOXIqE{uuCEp zh?@Pkk9G`QZ&iBLvJ0*yl9D+|WbOM8*OE`SzEdT89pZ~e5jIx0zB9pvuY4mR`joUv zwgsPW$*tDEtiN<_izQvFhU6drIt|Z5W<4l(_-I*w%{5#hmv~M0DSyw z;VY+=EV}y4Wb?P(a?9d{3xe2rW6@C?jehLOCmwzDQIdyQzgnehgV!e?U-s$p$p;-$ z&XbKkK55dfifXKbFU$s#a0ST{T^NbxJBq0^soAkuVq_~nI{k4R74h{-*Cpvs<1Uks zD@^Dvj)mB#Yy;T9-(HzDY|WwY0IL)FuZ>fqsbGO1T=#^qUiiQ zJu!}${Ln#AO(#R``B18{|25Z7JMqMm|N7Ejo`3#@MT?iPIRL53*af1ou@T3&?YG-@ z-QmO6SaZ!xCL;j|1rQ_xc>6W>yW+~JB5!4F-ir+f+*gf{#gEKaZoG@X09398oF%oWG-GsEWFF=S%NOr3@gvOXgF|-*4;^;#WtZ0X>jxSQ z{Rd#QmRLlXheGUSAR~i3WF!3s3_-Ijay%GMMiVI{MI1*S6mbmyOkzm5pUHOMYj*y5 z=dD_~vhn|8@6BQ~+s^a6>YA&1IOk9j$x!AoW6C5Yn&Obif;@<{Y$!G*>n1l;sEMKE z2o@}nG%OjB0>e@e_%@CKi*}+6u!8_rZj8i2d~aD7u^m`S1Oq|jL4s|Fln!}LS5;S6 zRp)u$Z>>H2Lw8m449(+zch~>#y@vOD*Eg)a_WIY}o7>fIv7qym_h#NYIL1s&GrA~M zM~lC~cTYvp)E#{=EdufTHjeXM9pT*YlxAfe9h8g7OK$JpW#$U288Y*GzbpH`x zi9f|?rR|EXQl{be{e0_!Bmc5*IYTOT1UF&+JEN!t9k4A2W-WMT83jhC1$#YeUYTv63{6f6NR8DEN zztsObfW+tH|6h&g-#2OeI(}f|`pPcJ^Gl6Wb2hF%YWVd0sIB?T`IiH?q;lEN$0v@j zD3KgKYWzSg93%U7gP_D&^qhw8-YuRzwEri+>refw-~6ptzVL_>m}<=^=o z9_yn&J-lF{7QbP?@Z57O@nBZP`PYk=|J29-_gS%pKxYkjrWWFvIAb=7>9W1OLsq?f+qeB8MDc8F<{9tYxy}7blw=_d zPYwE}54@K<4OuX<IY~J=3#^x71diZaD>tBE7GoN|o zr7!UuD{pYSy87cE|L)KKjlc0VU;8{@5zPc8w>+Nj?B3zl(pR`ullhU0tNW+-xo3?h zH}S}C7N=kw)Lzzc-#!1mrD^GO(= zbvOIG?~f%sdHra-s76~(kJg=IVCw_Un_xKIQ^zE`_dkq_+NF3LbMQ}6U(=1 z`rwDY@dMxd&wllv{*jM;?AL$&*MH%!|2%gavbc_AL3h9A?L094&h5MJ|E3T8(m(&D zgFAOlzj$%`EpPeU=RSML`%E7)VM{|;Omu-+(c1m9S9k>e#e<9eL*{UpL%Cwj7ZzlS z7@Fri4$k#o%*kD`3~+y+S(}|?@KpZ&t1DiQaC!CM;?@yMkeD6k_FeVnn<4JpzPNuM zf7&_NgZ19_J~qKu@7;U;!8y~S-b%?kXMfL&zxR8;@B4Wf={fUrEF`QVB=9-~7Q3z3+YR`P8RB%>u{c11FvF$>XCVrnI=)U_OpG z!npz1-QztQFu%o{F23tee~eZ7$M?VZ1?4ie4f=&|z2vE7Z++`qdG4D|s;{Y4zT)h7W%)X3_Q%9;xhY?> z@!KT6djD_OSq((%B!-;2&O!SOKKrJLA7RQL@UI!aJ-+fC{)%i{lNkJnsx~!ViLYvL z=q~WDW`I;E9dr6U;O8`mcbVx#9H=!-Rp(E8fzn8{$S#|`L}CO1G?v3y5X?M5(SF1B zq;OrV%GElhw(zo-vIb^Qb)0oA-~oRYrzYL5Cp$M1v5BhHZZ-dBI*WLn(&Pj~xzvm( zAen~-R+;kR49Q;175;WYbOTiF{0rYT*%N%89_c@E%E^?eAd}OMO2B91T;w2_^ zpOg5&zwoE_br!kXmkJt#Cs~@N+~#D`Z1%<3q%`Qi^RE=fQ|gCY{w?`W;g|ma13tuR zYyK1e={0AUiPpEE%iT~J&a>FK0f|L(G+R2V=tFrDI z9P6l8l?C*x8Naqd`GWTri7zYChxkR;0~vVf?%BetcA-}M6L%plTMGgPJ}de8?(rM_ zkg21M@oR$iHQhRH`7G>$s}$TtN#@S=U;3Hj_g@#A{(EZ>v8?~jA2l#2GwNsf6a6bZ zP2(dtC@e{hwCrm)0uz}-yVN-Umhn?Um^c7oNR0LVttCf4Xp=%MzQ}O?R@E)@Fa2AZ zLMqqN;1~Y|{aZ@2(rU#NenPF@3sac`fc~Q;bZzzILx8*I7ydMU8~-8z#$5kZ6F|u| z+e)?gqhawIi_t$TekG*CM#2*RWVV)X;&&6j@z=zG&aTIQwZt#81dxX_|Fk2cCSKC# zo{{3;B!1~~>mfv&_|5q3nK=JCTfl#NGo6p#&g<7~cJmp3R;K4)Mx!a6Gj8~2+#Pdg zN!&{emDw2?_vuj<^%>^5}?r#H7X9fG6g2b0UrK^7ZoS^x+w^V;u5WAeJVvMvO&CJb3Fz z{>+cO_~P$5x&MmGMwrGrIb&f7Z9jSCndxX$1)SzP1$o=cXDp=ln>zq515rTBx3FP&WM9_({( zBC8X4XgL~V+UNAa)q}H#EIK*#(%L6(IatrNb9}VJf;p;ln$iUxGVic=w7buG8KwzXhK9Nj z=B;r~viab09Dp4VzTv&!@VP(#7k>8Ve)j6@!NGI){_@ZL+$TTriT&eaCgwQlss|=b z$U5{1>pZ6Fg%_T827>qY?ed<%i$@QfBtbpEvtWoPhM|Wm9#%#N;Y``1LuLwh53cqe zec=5cIJ)=3{V)8sF2#J}l}pQbH2?l{&wu*Up8^t&uw~*xRp6BE`cY5S=g?uUkMOLz zzH8$LwCh9t9iPPS8Cg5L;U75M(dyJCfnY}lPyEI#IrvZ}%*X5sUakLABV`fgH}r%? zzWCab1pZ+mFDA*e0Ra>Y#JavC!vNu^VQ3didwn4;@QyA^R_i}gLq*n(jIBkT4}Ivv zEED^M|KY#?{!f1IKl#7@=`Vc#^WF=@wsZK?Kl9T^N6s%p;-eq^_MiPPfA**T+-H9B zC;!3=FMKU`J>n{vx8-VY5BJ4R2m4I>GLG6keZbwyc=`@Ax96vX6G5|e$h6Ovv%^r} zR!Rn_EZO8A3V8Rs-~Gw&`y}s_e8>zb)1XY?(p*h;>+k;`fB%>N!7p#^AK@%#=T|JM z!lhZI#UuH7gx2=0eI{<1$c6+DuIG+g{L1;%UEYs*et3L%|LojbG54|3BW@b>Rhn+i z5;4FI^Qc_;vkr@V?q@tZIpuQl_HA!+bj}+4@lY-9F@^KCw^E**obg~XdI_F4h8MFw z%YKI64bVF8MBQSF9DLs3$^<%(v0&k}P;U;tc@jkC;$CJi5cw;m$VeD-p}< zZuhxdOS^BK0eG8mzz%TUViJuYJv?Rt`aV-!_!<1Up^?v@{x5#!Q=j;R0djEbbD#V3 ztbJoO>%s9MuMK&L!gL)whlT=XW?QQ?>7r#h+Gx0zMyzyHs^<#hb$63~?pE`bye}aWt6JLgiPU>Z6 z%o+dWAihZ+Ma%!`kUa~;0H^i^#3T895ia%Q8(6Hh!^uohJmZ%P)f)d6adH|68bNycY(+pl> zP!%l@L1J5pKjFnzh+BTA;>()iWh)h^gs6}-glP zh9t~GOAI8H-tg^n$gOfye3tgwX8zB&D#P1skY<7Q=@B>#|45nP+j5vhO!UK{wNk%f z$0mh_i(5TvaW%T}{M z4QEW2?i&3oth|H47>EsLil5(qs$FS!D>D%rhK$q`c%}cwKZ?f_fi3*B%@M)KKlmk= zT&mm8P`#0SL$+v}Q{Dwi%`?Dy<1X7b$+?EBG7)`3;R<7_b%95L(ecpy`2Y^$(Bb zUyyVCRLjY+o9hKk9Q4z4v8Mmg2KHBc4%JDprx$Ia|8@8ltLpg6HTho?f8dk`rN3#y z^0jDRnXp~Mf2xPp(=9IldH*a-`L=tMq4BHrQ_KQVA1NIY0u1Ax$DE`t}j4z-|@XUXje(ZFzS!y-^%5M&{0)G>~YG~1aN^8kj zsejdN2dQbAEjRHSO_VlYi}hkUmR|M$?#U)}1C!9KW3sA$BW;FH z4QnxHbaC+q{=obH%767&4v&t1@CQEq{BtiH9v!^+;=BIX$G(HLS0|?@EMMV)`!76q z?|=QpU;Ncy`?ars65kmz002M$Nkl6xPS7>!QSo<{m}Qn^BrH$ zJL#AP@oDc|NMAiVCVsedNyqUGgDUEhLL>F*oEc&B=_D8ul?VT^a^FR33 zKX~`f9r&C*;JIunmBuo2foPWHFmZMJ_TfiA`Z1}R{?`Bew_pD9moS^nzq-NzFTVKVd*AzByyE2S zjQLHZ_|85D3fs(9z@IkKWPQ<=xW}ZM$4S%4ERpfE8tdoxS$tKYnSW*|LRFQ~@Toh- zFQCd4I{02E_v_!dw#jbse+@PF|AtQ`!!djdU%dJ+{!0FZkA|65{PLGiQ18Ls-OqmJ zGfY%{%eVZ8Klmd*a{rZ=-}T~)?|biiKK`e_hgNYTE>9V|efREv@`>-ecl#a&;t^w* z3zuNkA7=dUM?U!HKKmJNGyJ-@y&ccusdEIy*(sjGYCo<#n486Ic$+G1z>9De+=EAH zoqof6-ut<~_!n7Y_}=%t@4LR|d(KWyn5JP;msOht5i@1?@1K102S4}^|KTs6oY_9co{CSyMMsTF`2VN99NGA$4rAVWw%RQFx||nFP*jH zq(iu<+4?4Acne$(PVN`H38!_=k;XCgM@;E|X4q{1!bA z59`8vd2uQdxnG%dU|)XaAr5@^?%~M^Pri@y+`{uOSqjOlB5w!4MVS<1?dJ|p+uOyH zS$65`A$Jd1L&qvJUfjV027U4xlj--*(CTg8P0124Ow8>0{rjgM`(xknUw{5@9v)XDLD4`c&(wtWou%^p*4lw(+MVEDcym!F8rGMtfevFE5d;43R4ZphJ)@OB5 z{!VM~&*nU8h}It+G5yUuH1|LM`Op8WfB7$I7U6vR`1s|QUw+{&Z+YiC-}&$V{@>@T zM{@D(5I%}EUepjEJ%vN#D;X32T z_#(U&3w8?8hSnmI*=6ptP2hQ4_>HEGr`pQ?0Ss}b=SSJwF8N0wMKA;q&H;vdYlBCX zf&g|USV%eKh^@_3RROw^tQoahZ&^x(vCN?(6kDN@f?Vf8qHNBo_#99ByLay{>lj%| z`py?$WLYD=XqDiHlhq~LtR+6dog}iNkt&xmD#)^kLBC`)4(t|x$&f~WK6cAb#s`u@ z#UI=OUv3@(D$hVhq8akwD<*8(Ac234zm+VLvME2&Pw}pnm^bHN&JzIJRfRcB^F6&# z!`a`!5BT;wEVSk4;|QjfS)Kn>KYCfWZ>Eq+loAI1*ud5lh&e0tBe#xEQ9G5Coampv z+b*GdOEBY~%lw=F_=A|J)LO>3J!WX%*rn+|YHs{V8uU~AEgE&;pNu;Od}$Gq@Uf~c zGvzHmxWGJg5z$f?td8h~GnuO_j_ zFEj{>Y5Z0z_-qPwuY)e`wwz`Dp0?;z{MBv~zy17`{wE68H`${175Y!6ezMSimX}?p ze*hX8+nHY;7{E-TY={(DvyoX1g^qp9XFhC5x|-PmCE@pX9{*Kl)CWeU;6>_ z8u}@Ggbe4ekwGUD7~o@$@S91LibbP9%4$^!g;lFNR4A5>?^F~S9xm(u%@M!c_``Xd zr%3s@XYLflb~KD($(1uTQ7Xe8y_=~NzPTeO!QrTzz6%HV+-E=gi+}5Ha}oL*|MuT- zC5bOloyiEMJaB`GuVMUyB1I2Glp2ee>iCLL;2Y?s?Oa)NO`7Z;DbZ5d6#1>#9f4qKK96wW%g#b)%ah1+O zJl8THtLhjkwgTupPr~Djp zvmEf8sRD{K-xnS(UVu*iSipC|TNU_v;NmiNy&;B?v;7sb7Cd`BzQlx@<-?z;8yce% z+A8x|(}e9Rey8Jq`9;s{RR0gYYG6HElwMAXMSkKlpZ#C{r~eWE|2JRyUG87QPt||$ zBSM=}!^$eV@wY%y27H32C?PqmR2D@bNy!#pmyiD5|Ni%W_y>Q0gn#`-UIb~47|SLV zf09Z+)~f5fT)zzXaT*ulQH;eJEN*)6;EXpufW!1rAhFM^GIk-PyI+00qjLsjP;`JTo zFK0(K?sUVr4Z46J5?D+M@EAbf$R(vJHK7 zb(Xux(FR#;QEc~(n_vIZK?6UYcNvPy42ZSh0Gr#|(mU-^|^$&=y0QHuK1o0JlF6Gq9QHBoudEcuiVJ_S8} zn3xREf+g+7$y~Ap65onZIxf4zd6Y9Kj>kG)emG)w+`gku3>21zo|d{C8Jc<=O!XXq z8t2-Jkf1x0jZnpgTy{NBICdj9=~U+=;l84fEr0PtIA!HbQo=(bfePxdSIJ; zQi{FPz#kO7JJ7~w%F>Xc3YCFBMOtE=2_x3Dj-PLJEbW@$Ro&2U{Hvdh-mM%~;`720 z=HQu;G_>ocOF%n5(l8KVnDT0(;Zr~m6;z%VD%tI-nDQ^J144b9-0|6&;saRrtq!#G zQ}}6#XVz-dz&{NKwHd{s+=PGOJL;L(w*nxFpYfMeDAu>#UlEn{kSSx5(*_N;z@HWr z{(^qcKV|uS0Q>S=AitIH>K}lq4mtTpgMM(N3%Juil%5jR6e%dE>B6Hb`o_j@NT~i& zTPM|yXZ$TB<@jf_jpYb}rs|Ziko*pRWgM4I>OOtlEKFj%$Gmk?xgT|{vyqhfK{yH@2_4sP@r~l z>w-g8RtGg}27S=KpoOn$x_$fk@#yw30$6`=dcyjc?c?LyTvPOp489fA}u-t8rc?xIkxIF0k(SNilk2m-7ZTonSdjPqz z^*s&Lh205jb#hw|^0@sH9?afR{q%&zHL3@XMF6>Vc_K-&qu|m%pWrJphT=mzNIl~# zvvrfS1h0#qs2}~`boG6D1d_pN~+4+z9 zDgG$CQSJC%4?cQyo0l5yY`y&8f+w-i2Bw=?h*d2PfAQ0NolpG6U!OwlyF`%<4fV8; zX*6{-xxRpZ_n+LNWs7fl4bGg7hT7sK3;=NDLa4(3?%gA1*NG}-n1M7p`3wvFr}H-} z`k(5Da{O!kkNS^(tY)c7Ajz6uEm>fP2LawbIz4f&ox33K-g}<;IF?NAIRmfi^MTcx zQEhGdH%+HFCO_1=b4i$l0@*z&SO3gZq!9r+zd11_?bHl}!$=}cKNyTI6FCTB z5}7GCggFV~`dn|}Xm9H@vG-nA+j-D5F&=us7C(cr-cn~!e51{ zmN>Ds!BJA=j?g_ZG)DdwwshV1$5*O?g(g&xapbo-N*apmej%kW?517_W~-!i^spk= zzkEp(|1jDjq}olT!j3!t&P%`h+;jJs6!T1bb@kFqFQruc4lXfI`JgRCrwVq@764Qe z@M%%fRHt;}aOv`^x$4PIS|)teoLW0Bv@~UgPrFv)BQ{vflnwe3zw4)nG*zf9N?9^z z2$DPh!tbgfmtBVh>c^-z+-+q_EaO`~?HTZ!{^$6afV0hMI#qZO=N=1b`OSF1m(&t} zVN$8|v%nwA7Jp{7!72RKN)GzZ6~_qNp z-$DOPF_GIo(Bt5rtMn7=i$CZe{6LTR{`kd_4bp*ZO0@@%OUuawTUtrkKjhb&Eq;@s z|2jSWZGKN+AvEjZtB@U#0Kx#Nv>b^vkFO-8tgWY*4oFpNz0vV&d)0$HO8=^-H7iXE zV-xsm&4hO9P>uL?#8p+Qlm|4i^yBa*WyL35Re5ahLyu7sNT#D8x#P3PKe#R6TioFn z8)Vdf;j2>rntjcUAw?;uhHp(N_~~xfKS>&>l^2x5F@DXaeDG6~(`Q;EhDEE705cGy zeG#A(g-!Zr@aO$Q<4?gsKhYNSQhm^m;)=eq2K|WNOGc59N*g#Ry)NpfSp#O(=ui|U z67r=e`3GRizp!mXgp!XCVW#>IR`85%6}Su^b`GygH(-*e#ZC`Cit=mHzjK$ z8*Po&dQm9YHufgxuR;G5Qa2O)&R>+W-R%6Rko3_Al13||ZoB{B3B4M1e5Wr2ds-W3_CdG9CWReu?-EyNXh&WCD2uR^VUJ`py=*ORr5`5?tm4&j&M!^> z;(J1~y3317>mjaLBE)H=jGtTouV0lp#qyVU)NMsJ@3aC=0Lbh_JDrqgPD`oXKzpRJ zX_s-%{6L?JWW_T#sqsQdrU00FVDv?ZD}!G+d>SLY8zW733cHm5b2VPwvc zhr6j^^yXZHk_6zxZ}iX8{PoOW6t?EHB6inhc3t>FVQdbbk5q?j!5|j5ri~eE!{f7Uda369`*{j_jEZyG=N)BcTO>VL332Z(L< z1vzJkOiptnfNkfIph{g0)*QnGe73|O7oCMsPyaKY5cmHiZ~6p={vW$ftaS_ z9IT*05)+`i+xt8r{(yCd+zUxeWyzII3;&8=bWOAR^^yJ=Pt^+Pe+&fBCi>Aja?ANe zg9GR~FZ`Ov_VcO$eHQ=cku=;xhR=8>uP$Zt!FMeZ)wL?qe8!gt!Bq9M+@jjoIc4#6 z4S8Z#$XR^T5Xk+FIr-S>lH<7XQ+& z3nxQaT$Sn1ss7ViJvjQe$q-U=^)I?hM-ok`9~HxwhQxa4C-H?yb%!h~N`M6PqkcUL zgcJ(V-fG2+$gV@6t{K0Gbz(>Z0btL-Kl(hxn$Yn}KPeOY#=#fjR|>#P;@1F}$1j%Q zU;0K(qcC-Zne7PkNGw%X?ey{JHd}S-NB&vGge1WSGurqnQT>mF*!;|%-Ku*YXk5QI zvI^hXMSSJ+?cKd3Zr%EizVkbIXU#cpl7Ir_I6cB0u6ewGa2B(T^-U?xdmP&fs0EE{ zW~xU3iKbxDIC@%LRt=E!$itz-n0f;~>HJgD_3&B!Y;k-~F2GoECB8YyjyS1B{~X0J z$VwXD2EWJEX;dZU0<$z8+$nzXFZ|Mvl6fM4&lKP$bLOM@kAEBP5R_ot*Q8490ZIQo{oBq>@rjX!lRIx#~cq$~xdk37ZSVnus>wkDUhF!Jf z`X9U5JWe9GYKug!XMSQCW$fj z4{3d`G{Nt>C^&%T`iUGQKC%B<#-}Rsru;oxiEoBmm|HLYLt^+RW5K>Xd>VfBpPFw3 zvVXe1eb?bn^&`Em|5#?AJQe#m{ew_#q-Ir~>Sv1Ye*a^4Y?jGnkbn-x^Goq3W60~} zJinxR_n(G5!Jqg~GM}t}ilrpTBffGR2GM^?WB4;UM<&kvbCG|m#8)=U2W1n#%kfqH zL)T9!YE68t)&JxAhh_h1_QfhOr16zKz83uR-@W){V9oh5iojXXPdnkng_rb`giZXS zhUh2$$$<*grH1}z>>tYiDXnVxoSz?GmGLuDkxtj;<&H8iXq|rlB1giNq(Mz9AlB%t^!jbouRp8wDWuM7c}Qb*Ds{UWRitzawb-Iog3Gf zyo->_A}=?*zCcc1uM%}QSrtj}Fc?AS-6%|Qacd_vf%9I#0pCV=znrxsVHKB>Iqkzm zqE7>VboKK62fU(@mmcx5JUF=W19~Ef{q@5l;%B%$p>&UB;2DP+jPR zBcUwY^h2U4XvZsb`0eanvhZg6*5Q#AxTlQiA|N>k|IjJ^nv?0OT`$Jb^FGV(GH0U# zL|2Y#YeX0H<3&S!C<_rJZ526cW?L@l9Kt_9S#rZJhsn+tMavP96AjJuPm5rj;!Qs> zuUeqRDiJxsR}}k;`iZbL_D!<-uPs;{s(%v8|IG!TRC$ntP-XJp<%pSXrA1~=`BMNm zD>wd~Yw+^g0@vK(!7SIh^nqFDxAGnaBER!e64{52`k@W#ORlw|KXOk;rrMF1cUt0S_E|g$MR|L26`C9K_PU5|zeb(+^zI z!iHatZhQq_u}YACS$yI@+`PYkvGss?Tp;$D>Aqx+f?_;qpIdR6+SdO||Ki2Ib_7kJ z{3-1GVuz->FK^Q_Gyf_6R4DyNKPl7kqYG`0ewgN3=5K-KKcY8$n>F^I6v0KbfAmj3 zm3eR=0o9<|Qj*U0LFZ%a6cBvoJAYQcEA|s+h zoe$NMy0+A^Na@9iA`=7Sz6X@iVFOv^Jn=S!&~~cg*4Ev7_sGWixLZ5Lt=8}h#7eP& zzJPtGMF}lscc3IUTU55;tJGpCT_5levXVfB<_>$Q6i8nitOV04Cj_mfKPV~EQ!%OX zD*V)Rl5)sf(oZ}rjqbIB-1PIY zkACb|f9+R!+4f)itA91{)$E2Zb++J>fy%F=pQ@A3LDLOMFj_65dlS<|2>iqt27IhYMh>ipPAeyV zK?M)p(uU|ZdpdpRKj6zb_7w&rNmyHO=Bf5%U;Bzk(cqUEBc}0>=_y-OWI4p|pr7Xd z)v;1SreFv;=*JfJjXVW*)$vVUgqHs_|5sRY{PKXmrk@U01$cqUo!z}iK-}ECxl3V{QT@iZ!5dBE3p`|UW;#c`yKOzuE zoj~EIK_r3CKLD|BcuHr$CnszyU=vyi8Avoj(*Lkmh9`X!zv`^?6Kh8Q+JnEQpH2LZ z`iX1#v7n#$TO3UX)#dfVo5e5ZLauQ%218N4xR}JdSH>^N>E95viC_Dl=p09WL(jiF z{&{!jm6HcoOg}jZ1T8EmH%*J7jJ4{A%G6v(ZC~!N)QHOtDq&2fr@RQsFs#mBth>OK z_y7ikHa@Y7G{wx4kRav==SznE=d>D6_hwlamo;9d;Z(Wc3cg-E^`GiLl_GWVVZ}AK ze#pufs^g!*_mhX(@GL4DcKOIBYo~%Vy&yz1MY+=C={;P@BZa&F$YL(X_Jdu;VOlLH zH-|IXHqlgjdGIs7a!l2X%=yKuNoJM8pORL{zER^o*>xSk$67lBQKFMMg26vi6MczNtA+SQ)(3>; z{RTcnMZNH_uzkM4r_})tw0u0_u}05CL9`woR{SAFaj)#NVK@tUC4@9Hc{(DV7k;QtCk=QlvTk{Ae4+ccYORU|GV|O%`=COIu`yE&8uCV*od)bz*|bzSgaH z^`Le{D5X;0X(JCu`vzVKB?~HY2E-Kh%+h*Y|3%2QGY8454^jd%@Wuuk!9)@e=_{O! zwrxT13;oF7NsX?5pC_j`saluKu;tb~tA#%;&{7z}g(F2mRQ7qqSkuoHYsYvB6(uEw zj9O!13pAZ&T$AtH#zl!Ap-8urZc$RH5z^fy4y9W`5EM5BkxoHMm~?}*fH1mYbSVO( zOE$_ew%yPEFP^vi?9Fy>*L9x9`8|$PqvK5KjT{~O1<6s~8@wB}bA6f>7wvFGWt)-_ z^XKH8)~b8$Fk8SVFdbUJZ~0^aQHc?~bEUxnI9c7Uk0M4bK)!#SaVnJ_E%{N%L^yW8 zWH;Co$X=|Iv^)_v7Tg@cZb8?x-Zklo6n=cTsDbC>EhX)ctM3skU`4S_|0oQoB zx0VYu2&U?NjRaIm0aJ2bF%1m zF(dy7V?e9vf9hVpqG!$ZHVL=#`x6@{4?$8s@@Vv+F>0`cQi%R4d*^I9~l)y z1Fvd~Oe!;{n-rH6EAhc0#gcB1`nDU>Dw0cP3DCiA_`N(>yV08G)SB<}{ZWB>zIWvd z^Q_5cGA=RM!Gy=bJ((R9GwSAiN0s&b3X^{E5CJAKhToSh5&uF9Zxc;=^a;Dlof(4}r}Zaa{H^o6 zIsKg6#*eCX{v9JuwlbccL@6{&NOsF-qz68&GGK3c$rTi9BTDFr zGpNxpeQW6DQddlF3{eSY;ZWVY;Iip43G0QRRtXmKk%%ssAati?8C;6bz;LYy5DU6{|Zinha*Vpt~*<-3;whM9mSKgusZ>v z-qR||c<(VWM%Ov+3U2%OpLeoP6cC6p`J#g#KbH#LH@rliZLs&4o5eLXH1u?ud^na< zff;vwe8;r8RYUAzsZ`Jlm0|Ujz$d30`6lhua#+5XWXJD3z<+4EF6RLW#wQp4QNbcN zj@oa5ih^*$aTjlSO*o{A0*sPwrM_!TgH~=aRvhwGBX)5Gw!i;W zWoeT1eWfB|ERzv)`}c{%KQn39(V8V=a#G^5v`?NdoWs+xOu($XVu+#PGn@dRK=i!1 zZ&`bJ?4$a2jNL8@sCjEiRl;t(O!o6Ew)vBU0ot+JqB3d2KQy=O`PBaPli{C?I?~io zCdO4U;{11IYr9#Km-oiR)?G5rs=7dorFb5?tmk(q28bU5dX)H!&&k$d`sP*Y<{GBY zF5x)+Pb7vo|J`l>)id`0;_mIbXTjD*m7lSoj2`d*N^2sR43-A(qnFM$6abcEw@+I8 z{Cb{`&l*0$ITOHF8qCn6$5eWm>n--5>q~4ch;_+wp%Gf!Tj9T+ahY`9{VMlnAU^CZ zbsU?n>FJc;$HNBQ=7+XwWS1aqE^#y+p>w8v= zj=7s?A4Z#q@zEsmU{rReH)e^_$>!bCqTrIPmijd2R}A3m4otHdG_0U>1&+R3;(w$< zFX^Q4=qSB1qEAnpl7Mp4H{aPJ&&d(RHWpWn#{@NT0sdB5}!v6F^C z;SX}vkMQ-6fPj514rJG5hC4k=QdLuT<|C=hqH1oG&FPU ztxrEYQ6xS7CnmeWlq<;p0OaH1mkS98J@O6HmBwqwYhbu2cA-=)T`asR} z$ZfS(9L*S+&dEWfSy9B)Cpf0`NvgW{@AExFMcrKD1yBE5xx4n8 z@^l|-rBMS5T6qGP&rU`lFdXVkfcwI*i~dj>&TE24-supyseEl@oJ2UPRWGB6>44<| z;yqcq-tM*FC-ef`)5VtM`iZl{^Ri5-+r^cFtEu5u(Kh_#nbwX`;0#ka30m7R?~(7H zdvg1wq3Ji7`Wq~Eb9HNDM#%y3joL(XF#!A}mr&L6k3Zr~0pG8@MxZnS*NH1@qiBs=3}V`+oIt z?VL$d*9a}v)e0eiICcgej z(7Gz$`ZC8QD=W;lTX{6IeZYQ6MQ~iG z#oWHSmozK7(qo85AOO8oNz}aXbP7px;o4j8gAM7bn@99K`vw^gNB41eq)SFr_JOQkYt4>tn1YV0Tpi>C|ub9k=S7e=K;59?GgD%Q}iVXD6EH zph0J=*n>_V8;kD$5Ip35I-?e}uHF24pRSm5-kaDO#G6I^+ z6yy2&$Qb3#3l%*8&=59+C!?AX5COY~HEHlCTdqo7Ijg=|Fhq@_s=u^%KaGL^zL>(~7z<2fP zi}HYF{%WSH@tX#A`8_dVX8-Q_u_1j=S<=iNea`mREm&S-`?uG&>Xt<@^M1{de%tkM z`Uf!fjRv%6L9j?EUuHgy>crEFS_qv!g=c2FBiP8F7u=gPH;++RX%>(6zZI%O3CCCT z=6)eEYKdw5(1W-8DE(NipMVU>h4B<%S}4fIUe7TPTUNIFdnwT?5dH;VB#^N4&p|Ab z{$sL%WAY^b%L#It6OB4WQl?p&FT0gErFKV|zNoK~MMIivi_{wfmT32hnY(1ewEI&E z8}PB0gCj=43-k>eMm{sTWD1p|MHu&|((S2^{jlc{?*yV$%@`$~q}Cx6{PDp%BJSI| zHZwBk3wB|40ZfQsl^pDx!nDpVlP2(GLWIW{jIiWrFFu0?%jTTc(!@<4Iu~+kj`}Ji zOmOZp+XRzyWJ93N1pG@FKPqEKZ1P>moo7Sc=AXabQt2Od$Kc}(Ld^GfI>#?>Tp>QxjJ17n%@rkC@dGtPlSutDkif2XFCRvhwC$H)H*} zYDZ!q)Umy&kef^FS>q<-Go=v@UWvL zH3g%G3bn&gv_-OTsD)Z&Y#+Mm)w;tyhkhxmQ2C4Z{#@I5es}h(5BY6NG>~-r!HwbC zQPy4D((<_GGB0le(02$eUaBU0N~GX8OuHuR@tSPyKbbWD?|C&*-b`YXkPQ^`V%SWbduJ zk>Y4yu1EGVP$3&p>saj{yblt$w{&BQgZsQcp4;d;}7Z-6X&m|psIW+l}eLm(;!Kfc%}wLkf;DKclZYL}gXbxnD=>ogw~2R`JjQ!v&st zAP%*A>_$HIj{c^Rp1vVLShe+NmeQbK)b6DmHp$UYhy@ zoPTVCz7}w~tVb{HJ*uux#=2mZT8ie$nvN942i{wCvYmd2`aYa5893@L7Mgs$#H?&R zLfANp=Ikb#-=#}xT^wyOmc&6!wm;tliHX|aPs;zSXcOY`Mr@6~GWb7b*3Y}>yG5Z# zCTBAHh_jjX9lQ>HlOD@GevasaF@SMN;%g`5rM`2S4b|C@gxrnS#A511Rt(kG7^ska z<>ABdnrmI$AnjQ8b1=$5PTTwEoelT_CIfpzzNpyV=KLdx5oJ)t0v}=aZbPjJc%85r zn^PN?7ZwFpCQ|~;ZmJmsm>GDsOj2tm+6W(Zd?=4~L5#e9E95l<_^a^UY%nUHuUDfZNe`^THpMkwG z3OlPGM}(kJfm3U?hjhf&B9hEhQK}9;1UF!m&gkimUta8QEjc@c5~ZUszUScc$=)+y z*05L+_DFT?*#XQ0x*j(gY=qATKKwdI!-v4rN&t1Id!4k!7$q#}E$|2p~$HTHz^W6jA_+F;`f{YCYrH!Bp`U{5;vLe(D2L zBoBBlM@8Sys@%FXlJQ#G`OhAQr%zesDmO}hT)GrX9)7lQhz-bu8K;DILAGcaO zT;rr^?sk5Vq>Jn}Izx^To7=i;eIC={H?+dD*$L!>&dv4ymX6f-y@N9kK<_C%J9fX> z2II5iZNTNLvAYWbLkW^l211$RJ4DMopP9Fn6hWkX>|pGT(3M3YjUP%uEWyv}5%8x9 z^9w&yTd%T;Gr0u**WHi`ujSLyQbFBmWb?fR<8U)898zH)e>K)8Rbi@==c2C?G+p`U zGZLy@pX?ggmLDf_OsIo;yG5Q$h;{+no#r2Nw0hK6J^HMb^@z+`{_B{ zUcMi<_+-vu{31=`N!S%r2NE|g`S`9lza}^kS{@fXLWNW>Cdwr8kKg|wV7>eO6K_E_ z@UTF^#)VLhlI9k|CSRX-mroI4xJx@PHtzsJ%pA}6Oja9hPfW6YQ)dA!(+q-m> z`Hyu6e?4$9@JY(07gPh$=qhKqCKVAxZzpW;1+su3}-;Nhvp-#8RUr*X_sB0>gOHUaP(?821>X}`-48N{`zu+$% z^L{W-9^A8^CC;{`X6hu~UXGDg7nj2|Uczrlu5oP*WV8ex&g3teDL z+|6WX>R(DYnDrL<)5E(0or>R{vpmIcg^i{IX)6T)8A5~(w{Y>iPHzOc1}L#3c45(w zGo}UxiaHh@NAPYm8WxN7O)Q4Q6Pdv5XCU#%z$nInXhFE45-xJEVY9pty}H(iwf_U= z$}S1mMKyW7bA+>cpRN=Z)j8>A9BY=D<|#teCFN*in3D=%p0E5CJ?(ER7U zi4rBjQrl|%^a1IjK+(qu5{uzl_?qZM@IJ^;w=f|FD#Ak;LrZ236l%pL;f<_MHhxgHrHR|ZqPAhV=JgXkR z{5L=d{0CyzP19=iz5=!|5pUpMSJ@^I7XS~UzqWg{q_swgNDlF)zv!2#lIn3r+gu_~ zumWl(k%&}8%cHT0B-E-d_|erLL$GCN0koievE=~3;X?U<5aWhp3)1C69KrPJ6LxTZ zZ{A_vEdEo(>1btNn_m7x>G{C?)t8s}jnRQr+4jzMA>w_gZK71E;o=W%AQp4Oh3lyn zkhT0z6zyLGCxaCORkKK&6j;p}AbjFKu~#gwnksn_q+0W}sjbdt4pvhOkjOmQV<2~{ z1dz#$uN4KhE)&u(a#1%B?~{y6h^_G^6n0t{sYE0AE_fv+_x)a&$pnOzIRf1Lo&C~b z*(O(68L{YkOZlrI(=ajOGNnT6K~2eWXL|()CG=(5TO12tGzT$_FH!HhUcRxgXWFi) zQ~ol4$G$*0V>dFsO!6+AT0?ztquZ5_?L{wN(}+MJ*{ltd+#s8fghw%xP6`BjQR0~O zZDE%l!~7wdyqu9@NcFoQ-_2@friw6hkf8UsyAP^-sfiV;SH(b);*`I$UX#^~xllUF zS+HT@0D(<=phjJR_ie$_h=h$C9v#;qIW<7dcUMBl?5AmGlisLV%;%R>(Lf{uQQ##> z;%3erQK5e)LPGTajdS?MnN(makHn*_>Gh>W@b6h7ibSKT>o@pNQYUa+5?@M|$;{G7 zXOTWwFW<9O;F)CQD2x_IvasHTl=xLA&OKUimdrF=X;W| zJY#@d>m~~w4xc2@H!;djs3y%srT94GrpBCukhG+bG z^!EeBsC#C>=bKlW=W9{?lKC!5pyhhq6tMKxU+df(xdlx) zkuX?^r+v10&JZPoN1LcXJa%@a@%>v;SZJH4Et)yvLCUZ(^YlP2>ep}4&af9;&z1tC zjL_1_UYtk--N*=R1pEv%8jly{)Spipq-3FB*+eHN%TNZ+G` zaeaKpaQG6N&iDMZr}U-B{puWyiG`(}M@IH6at(aw=!m&BR>sRZ1xfhNKFQ;sfe&6s ze_c|@a{tExgoNiTI43m+Tb2?|^{mrhHFH&06EAlm&8Y0)bm2;F$RDLWx;a z2HWfi51-gTPbEFRZ5T5vLUyMSYt$x3*03e*LJKUvw&_ZX@@7{o7b7U5hDCUr5|NRrfY6zk#hV1d^q)oLT zA`h5xTCH0M%lhJNDzuH5W1^^0pP$eAxGMwgU<2hR9-N&D;H7`=$Ffd~fCaYL@>9M( zBBs8x=sa-G0^Y;rsUD)klvxpV-_vf7gA=^zu^u?kVS6+fT}v5s6-?9)LULeA@$heO z^p0~kSP9zY``D?Cbj(-QASzI=(TvE4_MbgC!hEMXEdbGDPW;R#_neT6SZuxSx&rX^ z63Glu6CYtoC7R8J|FXFwNuqwrzA&b9ij*Tdr(^+xLKG4mfcZl!_W{!UC@2AdBA0 z=pL0{8e#o_Xk8JU0q$i%aAB53z1D$WB2?fDX8oXHlYxxi2?I7k&^?UO7KEqDsRI_m z2jsKFZ6K|1o;P%x++NP{$XM}*E>Vc8`Y64o8VC!f@(BxA(NPOfnGoc15565@@oB$D z+sE*$r}3y;&F3to+Qv|t&S4JD9Saez%<6la`!ff;vZz*G;9!wT6EPCvyc%ua4f#-sm(gN7M+|{N1Bppf^jfw4 ztN%T1tgYTa)?S`FVx`s~27sComfqSnrZ%qXAJVq^hbQvj6oiKf!uI-stuF_Id}f#+ z9sq=9BXvpcAAs`>?_HSmBc&r48db)N?}p7?Aw3M?wu!aVh`*dy1NWh_K%Lvkyd$LS z7A<=f4~X!O^ze!HvkJ);IaytnWt#3cBZf0hXY!xyUM#v4zwZ!dis}6F$>_6Hn@+VM zJ6W#35IMulAEn!2u^&qRF1?hWj-`oCAcgwZC zhyOKll5%OCg2-iG;mx$|wZfUN^}UdvzkZ=%$rkf6i_>T+`6l!qd@=`R(JP0eU?=3D zz~`&vUQ9`iwHg-R*vl049dQ>NvE*?7=6UVcbML9UrLN=wiTwlwj^nXCFMhOhuIt-> zY|kUe4`Sf6i>Vc6Pz7_8_@z}tav_ifSQIsK^%2eV5?}(nZSmeRmmvI9=jTOr?`_iQ zceT^Ng;=C^@G|{}0t|>8Tn+=0#Lw8}zS=1-ZfiPi6-n%D37Of0oo*p$p)f&b?px{~ z@UCrH-83QomXXJs+^d~!JeQ95&Z@}q%_PWgr8}Ts&uYe!UtRf4ATS&AGRko~kUZ&L zY}U&i`AUL}wVWyk!c*~63x4@;R{m=**+KE^!kR|X$^;!J#(H1&WYA?okO>P%u3aA{ z|KMf409DxA@L_CLfnRXWPxRPQGh_L~sJ9!~tUt0FhJ(noXDZi}*&(58;tR=J*pK|L zWmE-RyO5c;06G}^^zW({2ZG!n^E-w%-J*tJa_?>3{yz)g+bpgAa7Z9KND=Brv2tHh z#P`wxZq1g925Do|#H>xJ6?+bG>Z~!`kq8Y*&GGNG-yxwymIRoZ*$S1@hwO{b6(?g_ zU^!?oi^G|7J%8&bNW{{bFtnVBs47&vp(Dl3XRvaz&T^aT2hDf;xTGag)`!>I#)_I{ zZ0iPFznAlRX%RN!U8sK{)&M7Z)pM7-;*PIB(PD55EJNTgIixzif9G&iH7}SZIS*nWD3kpHWn;sJs^d({FcRnxbVRz0bIx}pkA_{1+zXA4MOWx zYhF?PBhV8+@6<}%OT!)fn&ir460s}!szrBIR_K25<|s$Mc*i`r|EO_{e089>UAObS zA5&Nei{X~#-|MQk^G0LKA@kS71)?mczq!gN!qO4f5&R!RVrzSn{>y$7NaR7t`6NJN zVS`erVmJq&e?6c%{ZkGKpS}0O3_Rcxe>QQ?Y{Kb2(&&}GvcMgJkBvA<} zobc6cRTTQe{FQ#)(8O6FPuTtWz0w14Y7Q`lt({CtD>$WW>q2a?!UQXEq}p7eMr7!m z5;Mp9R2)Bn7z%RzJ=B?5`Kx6Pa)!*`B6+{W^1i`^FWvusKdYj*Yk5YR<5gq1Glo$ebs@zPi~ZA$FW4Ym4{HF!h0utYY3`x@Mu>TNjWG_IL7diZ=#Ka5fIk{K7Zc8(#H)t5F) zEoTs1XzoTvOif4`d?{}LTuSQ3xl9$MRI`76I$EF!TfORH1wWlkD(`?j-2;osHeC(U z;0Lj=b)Ac4?EcXC3MQu~0J_^|5K};kNR4>H9hmn1aHn?UYp>K^X*H+jM-PUKrBubC z{C#{G zwq;wvbZfj7*}27iMAl24+IndRgY{8m{&1mqY@~n}gNY;4kpW}4+edvbI~mtWM1pRO z?B^DugoD?U_DM;Tq>Yx6lg& z%r-1zL+jn0E1fuM_JoJjf?}9}l)9U~MDQS+cygGFR-|=ZKQ!{l5)isJn_OjI?8|v= zsmFYrveo?2wT3O*V3O6b%H#WC4n%s={IGJFk#mKsaKdf1f-}g5-8MuW5XkqJGoJ8f zMX>ON-5ao>jrf}|S;m^Kw99e3fr8oavjQegp7g_C1Kva%AIk3wcNQV{{sfzU9NWgn zLElh0(8NLSz=+j+VVxPK1c9@pg4KyG5H6j#ZUQ#rU zV2>G+Mvr1c=s2Y9WHp4WI$XE9dbQ0wUgV*QvrvP-RQq*dx~}iGFMTT-Ye+8EjD-T>NmMySwL{UWD=ur*^PdMJ|lt zdgqqtx01PztS3g%U*#p&Z}4V&%hVs_e;1VsAGXSU?swvk6 zH`Mlxi^nm7&khtj2P;CaQm-^pZ*@)-8tO0IKIDrmtMN#d+aIJ0n0qYizJZGJG@3HR z4`4*}6a}07%PGSCW?mQIpk)0zT=&)_41zo14#`oNG;?3_`Z$5+$Z_;*2>XCZg=Y9sZYtYXDTRvO8i z@N5_>0t?2rt3b&Ny5|vfMDaQ1ICm6S-vEDNg>g;4z4o~$hI_+C$^dQJ>!`o}BJFW* zFhw5Z4imxvJ2J`6$OxBfVL|M#{xAWOuQgw4Sq9C1nZA?a8sdsHR}u5Izs2Y|Og(l! zfkE_A^ym&B!ciJ5nfpU!sFK>=!8hq#c*cU|k+|^1%gPrFr@o0L#xyQXSyVRw?v%;Pw_mj{o8Fr+ z;_NA24iBG_}u#ej}J_<~rxk}S&6c}4_#rK;5CP&-&VNtWiqHT zkp}R@mD}2XSj9tB_QEPL?a-e4@l*to9e>k2%;+2u4Y6W9x4=osk?wz<>N9u^JRNsT zglmlZKkdazD(MF%0Iz`Kd(ANr%lWIdn0F)4pF3>H~gZ-GhJ^{9*P@Jz!8AI3Bt6TfBAy z+z&cGwGG2UAOI81>;VqJtIAe9_Obu=gqEip=XRKJN&Sx}uBUziAcCK@w_JO9zmxeQ)H7FLSY4P#CZORY7%u^^CuQ`^ zi1%W{nur^BbFhvFa8h%a2+?FzG&uosF6faOfP6x&Hx5{XFKpKSnZ5H)=NMF=r#d`n<2IM;$ysaUA6 ze-59(G;!!FxlmZ&xUS|;{lw~4`m2)an3TH_WDpJ1<@)dYZ*L-$^Vq~woSHe)6Y%Y< z2V3u$@${$fv=Zs|T?~hKZtSVBTwW#_1r80Wk<22fM$Gy;B}~F7=b#oDZ+3 zdw~B&M!YWi3C3zAS`0@^<(RHs{t176k|pgkD}I%Fhi2`A3PhN?NQ?D6O{;?paR!IyUw)vnV{jMr{-o=*m=md>7l-X%QpCWmtK}w6){~$T1@YZ2 z7CK>@Z{|x9iTvwNOU@v8QF(Mm&Lr>Q<|8c+8zsvUc2AE@w2)%=*TFd>&A}I7HO7MfUGyB%V zp`I-d=5oyx8idhkth*=@(i5t@mgD~SGiGCEUOa5DCEk<9+;I%0KsRZFedJ~#>XCrV z41ygiDeHPe8>K+f-ks6#DJ-=LrH)rrQ+So(2d7>pCaJo#cat|xK`joZ5RieCxPKL#t#xW)DilnJN8}20qMnkPTjDAJqZ1XC zlGIOh7=B9`o%3f4Z!LxbZx)46i^eMXw$ zlF)s$2unq9_)kWP;#qQtY%?>YAJ*kAJh|S)31b1msX)Ur#Np%vPD0W8B|8?(3K3AL zdJ*UKLmrO;@4HvPJRf_O-xX{|ZQuE23C=SV!mX~5!U$f5LWwV4yC> z)d1QLah^E%Iz7&A)4xA)6`KLo-LdTo*5@EiVwgM2s}nFtE9?r){7b~Onsq7rJ=g$5 z%LosfMbZ#0@Ds2}daMjUC%o>4tzV<(E6+@#AlRfNf-=n)xFP38=S-Fc3ixt>`M>?B zoBEjpHteznT&X{0myjtv1LrPC8n0=Oa6idEKW;l%&3k z{D=N%9`n2Mzs*9&1`!ihXW^FgoeXAbHmPmVydAwOO zK8Gx7Om^&stnAMl8INV2QWymGk~JbFr(UW^UwUcoQ~@*e%88IK32RP$vh= zWEU@dT3xS7>A8IFPrXWa?!5c<4^<0#Wj(qUUCYlZ$s{iO36SR;zh;%i*yDz%8`aR$P{dbr9k5ewI8<@n32|CcvuGzRfHOyIp4%* z$?qAlnwfZVw7)NIyu9;8)_|hSil7@*cnE#q8!~fy0wSkq3nK>Y%I1NN0)D?@syKNTadwkl`Ht#1y6nI` zkoYcr04Xx9CHW?8B;n?Acj1vE?#(GSusHPt*NwU-k##+D6ecTeZOS`3P_`T&|6`e) zprFq7j;poqh?SKU9UFQM7tC0>6Kz|Zz`ogh@dLV0HCDK~zz!QyIW3%=z8j^IG!#gP z-|!Xn$+>=$Xh*HcZI(U~L#8U;il_STmtaAE#sISQM6x_~j8dRo{#6(KP_%zZe$@9L zZx?rKyIY{Ql}SMj#@dbJ!OWd5k&>~OHARiDyN*#i8}3blus<#$>6{1ChIg&LZ|smk z9t0ALL9mM4gHjb0qouXu#pxJm!LUa|Ha{%6k|oSyoN{Tu&wt&fy%cAXz=E$5a zy02_m#$DZwnZN8wrUzVhAwtLDEmb=k zn_w7ecHdih#DmT{bhR4&d#g55fOyAO#qc$2*q((EK}UuMeq&B~3%So42F+M<%A6LU z*5UujaXLlpp6HJxGI@`{=YrDm)yBQcqeX5|s5>}G)4-9fKytsEQ70b_Gy=Mht33N0 zz7%9c&sav@h~>tP7?voLxNASb@*1?n9J)}S#`MNb&gw02AwYj&#Xqd=ekir4Qx+@L zr~Sd}u~_fYyMmn&4-w*5a&^7MME2y^-~Y(>m>YCQk#X^txWek2 z5TeIhWO_viMM}k>2dUB8f4_Elw~-Y=E#5H>;28cs81T=5^QHq?-|j+P2Y4w~K6pHr z*OL|!1k(yEI z#2HgzZ1epeE?aNSz)biN$N-2m*e8Oq?lxj;>);EKEm z&{D!e$`v_0C(hJ!aYoC8(Pw84UeRyG8f8z}y|>4Yd&APLi6E>IUj7OyNTR>w&irkg zno9sdz-|dh^L%5~nKLX;dc7$I!u^~H@uQohRuCmI=R(nt!_jY<@~zFJmFefeXCg0Q zWE+3by42u*P9I2U#Ke$knzZ@h=j$WO5w-ubGzGPb>b&59H;nfGXlcUIgC)ZrEpUI9 zQOCb+Y&zI%ws0`K^{8Y-?tH$mqa#=49`L&36UT%U7A7<-VQ|n~`!l5G7rp201^Pst z7fGm;2jtQF-Ek*6^in^aPV~iRcr;$FrDrlxxv1BSQ*(HY)lz#7w*`6duEY;M*a+pz1)qq8ssw-;?wP1Xcz85>X#s&Skg#vDkaSW= zLO?~^Ur08SFIb)82dsv1Fg(=?9nKS544`!u%F;NT!zBJD$p5HvD`u(2 zAUgLDrp8NCous9`?_AjXw!l_5OI&MJp$Ga^R%D@&rJFpz0_F~>=u9_lv& z0d=%O?_tljYnG11(k^!w#wLKJ;k1;P35m();1 zXrSLgf7yRU58>_bwhjU zy*cQxk`9@;8FyV-N0{%v(*h+a<~ToXcUmh$Jn+3WCafqnb%5=v;GvxIw}SMF?iNe0 zqfcpVPnVcr=2T%*|A}L?c3XSojfd*Z9|RQSsYGNm1x7D|JtxyRFjI_LGc&qRruD@- z=NSeSeP{AyMRfj_o~}7~m|iAO-5;dq6}KNG6_13+iMFH_F-fmrPRVqR*1i9RBpi;f z_^0F3>}C-zMi#E?YqHSwUuh!cSWO$+Mb(Q6csdxnUn)yRS$)FBMJAdlHC&VkNhANh zXK0uge;0Osw{xyaOYg%{iuTujZbE67_MdX{!p--E>)y1TcS@fSN)Nq-L{Dhg`pMG* zPzzsz6+Yg6z?PTLZiRx2mTH_K)tRiBEx?Vz7mp<5(~Y0&7j>suDx`V8?|{yU5c!I51Dt+zj55hDXKM6bbZ&HCa!4QehUIy3S3a~g z3waS>Ht=BsRBzX_EqsGPm`b9k&_(x)PO5t1#`EpWl$ZIDWjsWh?c~zY$*0F><#BH? z!>Hc6lBSRcIsYQ<=l6Agd@DJ=*_eF)%f{P`Hs3JUD}MOzm3ZLm?N|4-!s03=28*j2 z0)?bpT_{yQd+f_$QPz=dB}E-4)=$@86WgmsmM$JVTbFu(y6yVCH&w`wD03r}NCEDl z5t<*G%|$Yg+S3S%*oN*#H3vElNt&sa#Y&uqj(VEXU=FJXRwXcnc2WU-XvDHp)K``Z zNy6r`U}bV+vMvtE4c=q_b-fraW=2wJX{;Lk>)0=kwZP9x0n_-ax1#;3IO^fBMOKDv zqGyEpl1Oykh_I-rYQomEq(RMO5;9(lvhLX){G;-(GOJ0Od%4c65UYakDK*wIpO}SJ z=~W``#tX!bD5;j5QtINRT@o5lWjy9TcEfErtAri{*RJO6Dy}8Ft)Qf@tMmg0xM@?Xo_PcIqUXYqU^%#_^C<(HHmdeK0ix+Co@r0pL6TwPY%J2V7Cay!G=t zZ={OUtNZT*hSuA$w1?*#k*lOMqC5HbeA2N6AIkNq{S+=_I%5Z))PFyX&E1 zkLsiX@jh6!!IqhPPPl$jYLe z`@!u{dpsXiKN{XuhMgmZmujE`Js#$3ku@;|-nW*6#CrO2Fo<(d7K3=MzldG5?$xMv z4`E=rE7UfrI*{!p_N#$Jl^z8}^yu8PwQ`0Ma=y4d*Mu>BY6~lRB~S#5eg?oU{sot$U_H9;Q++1Vngwr)uOG3p8)+y`q^ay+byab9eH?E!l4r$~Ovb zFcvsaFj}&>Lr&VHFFPNUX65LFR6Pm3s$I__B?kH|e}C4yEi3a69ev;EPBAi*Om0D{ z;$uQXK^J=b8ij71dSY7Uz>3NP;ndyNlXa8^HuhWD+6A#5bFA5He$`vh_Kg)z;nP<^;!eQv@!pmBRX6_R%%!A_~#;0;@lVD$s1t^LY@FtIw6#<%jz? zc?-_^N?a?KaCIMF0}BX8m#Zp;aX$2&S)`0sm4EH?D@BwG$fP535)@Gjst`h;Sn5Yhqr5Y4793J=tg(gr8zs2S9w2>*5uUI(fzs;7DvF%u!<&pWko* zv=aqg6;vTRt3gntY~zeD-gGHB1Qi~QRlY$r-z zski0VO8p=nGk)rh{AoE#rT1g!Bn>&Lj6-i%w|r51CI$VYvi*1OCdlA)-tgX^ABp)! zhHa@+iQc8qsxD56!JDn@t;F?`4@YC51^zqs(`@L_^@?;h`G0!Xl^41~`2H)xS+Y^t zdzZ0|ev^_Ce~@Jt!mAlVG3K3j?f+=H(s-!e_g$&4il~H$rbWq?$SyOLkbU1{6cS=8 zA(@$z3T4fnb&BlEkg_w`Nywh127|FP#xlm4bN=&t@qagOW&^>-V3H8KEb)A*y@BJKf}_o$XMG>hB{= zd;2NtJrz@6N1Ro=aPjb{pL)%V`s$W@MI>ATbl=Ig_c~?Pe>O*0KYOJ9z`j!Yxg6kc zC9>UuOUf{x@#^%`yYd~PHLq6~grB#Y)!$+l&IBxzU_ejSx zTsBa|GsxV*i%#1I4#Fpa0rzs|wR#nb(inE1#qPd6u?76s8 zTQ58#WQJ@b4~dSRE>+m9^Hi{2n`sl(-KgwsZg&s~96Wy^r%t0u!`!oYV$emk;-%5i zwD*LIGk5=ZWTs2^b5`RplARBx8JW$B_Ag!xp3GiSDUtg$INTQEI{ibThM5vkaiUo= z?c8SpU}_?>Q&K4NRb6R>>DU+34QGtdF%Qo}iwcZZl)vu%Cel&geX4VOq#l)GhdB~aFL_Y1Pw(R+;u)QpR#_4?xyWH55fHpM|V>xnv? z2I`1ko5}w9`@Tg->f^L;d76altnYW%zSJ;+T?^8~DrdnM`khq1&CFN`oExk)pOA=C)18e8-F6-yC+9Vtz5 zdtJ0Cch_S=?(+NMJ7h(C!v0$&blCAOZE!~kf{!W|tv<9ZVG-yfBMp>PqC7FnL61eI2{4HO*Nq)5#5X z=f&G4IpI5FS?$K*#Y{%$B}KENhor7$3!uw5@dXvIoD|MN-tN^JhYgahZ5-iedK0qQ zY!B|2`sNt<_z2jCpDes6?xYl|qkO~1t!oAiH52>Ry6;dgsg}i#bwW|_3t9e{#!35lQs6ctT9!=0 zvljQ(&^GF}%2-_gih4#&{Jj?@wBI@!ePkbH<7edk84RlE?|8YQvm{)gzfa&^B2lrq zxypEJz7GMdx06}hy$ma4Lq;c2IV*^k_=B6Zl-${5eT)|1(M7%2l(Q8Rbi=!hmzSHR z16RPIPRx2*F3k89<8*iiPPUP$m+ew-<5hgOdN_K~h6OarLeEToG2EnmGW>K)8L|-O zZj0^tG_Hl9a&`{PO5f!d=D;}_#G!3wq2qSL~gg#{6!Bg(29%P>c&#lbI_sssc?2-%**d}EqY1K2Y(u6U z32nwc*YBww%e}S8;e00Xa^dP{q=Kqhm7^q9RR_`6Og^0dEeON>z0M7IL`1$0aWoC$ zo~`&AOsJcbfqXhpS`ve)E&l;?dwjV5&*IQ1(*$}8JW2#sLF!qo{ehk*aTYp0aWxEa zMMGGZ{rcBJ{G8>*->&H}-qQx3bfD55xc`H4dKK}xc~?|6b=%u--V{lfOG;ZT_ z8f|~DXP5n1$YdJT1*&YRo`L--)KLbxnZn(wPsz=CsOC8YaEFaBh644Qud`4XC>vG1 z-SE^kIdUQ1+H*>_+&nryA)Vz5r85VaQ|)O$`97Cy)B42kWlKDit~<$dO2>|rD1N=S zICMIEWCzgzL%oiqm)%J;)XTl?HC8=9UswtE$B+YJ=-b8VD+6f!i|on`jRsF}l&-w? z?_D>_y7O0I7Sb~s9%P1c%T(0_b_Egdi=&EPTz)GXfmp{Sr}&-NsQTH+8Lk!#x*oqZ(`mk0=6!X0$p^E7`V#VMm-$m~d8jeTdQJMn z&&oAYz}7}3X)khQ8{>$?Gna|bBJp1xeh=I=M-A<*9wS_OgQ_oGF?P6Oe(-TvP+wL=qhBN+NCrqSSWhyT>bgy$Q%3>MQ|OzvWog zZJ2gin~}KV_NRQe0KkhiY&!kAbKdEltVVAn{ZSsA%R|#}|4Dd-D4mqqL0oPCcXcDF z$`d^CN8Y}F%2|~&r7LQkY|!Fr`l~a+w&gJOPn?D3P5FZ&mY1Exv~Fihe9?35=!m#2 zMNMozgkUs9SO{s9@}uC0dtFwFRz*0zgHoW|T%Ic${fHkCbYR7^sUfiH%wtkSQ^3mQ zNXYZ7YVprfyLFj+62~?7eat__c`1)^c6F=olWXa>2g=8CW7FK5hMD``lg2(MaK%~N zmiT9Q1$WJAo4fB%!kvWSIf0m?R>NaJt5+ZDEnBF0W1woKaD^l7&QknSCC8a)Q@=WH zDDetQaj;VQLRLL8*!aS=m?x7-?7E%Jbwnss{s(=BKcd!sxQQ>n7bOlq*7LQ{YfR$6HHVSG9J9-u(TvA zkEdW#;V&z*kt!#;qr1c=I>QPV0>=r=We}j1y>g&fi`TSa0=m^2V)`kmS)~_$i=cd7 zr&bR!temYnZ6J3m4EeSag{M%jSk)ihrnNJ;1gpo)*1acU2{#F zAP7UuQ9A0XHSy?S>|$%C!b>^vRyQN4+Skc_Z1b;dUpwv&iX-3L`Y)>$b?&n z0oDgJ2~DymHw~*4;04r-aHw)`2US_J+I99Mb@V!3XKijY-3@$bWG8RzSc>pND@xJ4J=m4HnzXOEa|oh4}JvL&p@)C|_*j_MP`o z0{K{6`_w5(7jziG<|b1%!Ac-|vs{q0k>^R!|0WDb5}!3Z1*d0oDI62;UX7B7Pl@8j zP-_<|d+npV<9bEsz<8=MrkV`Sr<0AT1Nmwpw`f0>(5o#&q;2Cjl3Q&;qSrR>mbwn` zEUI{3Xfjb!dWd6_&1N?@Ed2vKe!VkJ$ge2eIUSHkzUB0=>y0YgQ`#f;=1`T1LGN&~ z2-k6YeMXw~__xz?rS;_>wKUuhi#za0`OZWd^v1Z)9Z>7IJ+wIo>?al4(uz~83_oeW z$agq?g@7Z)+R$X?307r+y!gr)F4NC} zX1xo6vlPnVe1`*vdZ5$j+Ac=I#ltIqekj35)T|zJLF)uhu!Nb`KB9=H*sY)j__`=h zYYYi5oYc#WMyVrg9%xUsl}I@VJ9KZ~7oDAPf--%}6aZ1cK+NsoWuS!D{zPGaRMJLL zT2y1_U^>i}v!o*>bNw{y|1xb2SB^vdS*!JV`CHXejznS5$<{Y!4h)3f$)4KVgI7uP zJrtM`zVB)&hA^=qB!`r46GEAHx=Gb9R=BlLcy6loL|2;Dtati$t{NU(nu-Q+J3lg8 z!;O=ZlYQ@QA(imj;jw>9xGw}_giw_WTZazsN@4fE?$vnpMO|`1slqDb*`ru|DGzda zan6h`tAfbw-0EsEKD{rlVr*Kp;TX;5ZVsmfBqE6ad1b%l#dNK9FuXeRqpj?jh9i$p zaHa8qn|?n>uhRCZwTPdnoyK^jbe6B)D`F0&n18xozG%*Hv7O@BRHXt@nA&KDPkJ=u zJUO~q-GW-!%$m`=Hh$Bi!jkX^pwEdazJeLsa}rl^{yVFx(P4j)yU=JH3D;SE&UoX} z%V${^xj2=l=W-&Wk>iXvY5zjZ2T$EiSU`u<$?{mP?%EL|8QR5=;4adhKU@#2^H%v# zk5#3nDzA}gpToGJ*#5girjJh5suK!#SNF!PiK{Q3a2r##cMC_b zpZdjr2T2}`qHdiiORy71CVOiqM7=nUZj>lFpU^zr*--JG@N3cM*$m3yQTf%36z#Fo zgw_)$oJT7Ag^tz*>f9mq$Xe)`24%MY@IA0~^}MRapqjxQlN(BNQ#Xei!f&WvI;rGm zQ20-W`_5vb*D{WH06BHFb20MkiQX~A7HkJK|Cs>xY2qhZ_uQWCropYP|_&4LrwS=S5pn znKsngcc7}7sPyyS^da8@_~2Ki;#c7=BBjT$&#vxLwV*QruZT2HV{O~|srCr+CQ)Sh zlNWZtaQVsJ^?=zAf5XjI(-hubmFZ$|2Q>0c36tJ^u#pTN65DTcQD5Rl^D4XA_7Qn$ zM_2+qUH)E&q36@eU5zdHIoYVws`B5`dr9{meHDJlK7Zd$GCt6Jcn(Gpg!F;+-4Tp5 zEsA?<=|vR%U(2&0Z=h_Vd<0zPQ!V|LRQ1aTU;0jY|>yGgoM9Kh5c4v97H}XH<%;6-h8c zarD;tU16%~A)~H0x{2*Vup^2v)egO4Z}>qCXc8bG&!0O|@uv_TZ0CJ^mS+8dJ4YN0 zI-c8wvOv^`e;@Z&ucBDT6xAPeU{OU?F?+@83{+{7HqL)_nf=*+);geSD)c z^$qDZLi$OaP4G!~Jx`z6-diJVl>G#j*5Gk~t_)~)DBx#-$s-lRB+z`<%&2;G2-U5& zPUr_(l3rv3Q~C>sN#ox)F?{j8MlcTaX6!Nkg0L_t2_!Lhaz#m{o^Zfj5L%c`C4+P7 z-kuHE-7rk{jHA$6(hIH^6GthfWGjiCOBpAxSD}k}Be#V}bwks$#XR=V!m)v>Jl#)o zZ6yD{vEFurVk2h4odlGDTlJT1W?EW0&~HhBbi>?OaR=BQqN%B8ykpk~Kkh&2!i*_@ zaQH-M>@zsiYiB%S?1hZJgzFjA+hPoQ>>0j4k8Z3>1-9-m4rZIur*ZA~OHEFN2F^_J zW7k3gKP(#41`JHp^)4-!&);G3O;(X z^9MfX$&;Mk)7!J(Cf8W>SlB{F3SUr~tizOLxQx#jp#SrAiGXz;@8{VSWM4RA;4d33 zf(ei7`BDN|sqipqOk^I2Brjgw{$Cr65|LCjf}@Np;)%3zdY2mh-x5j)FMn;g?<8+-0J7gFBrCdqmcJ6F|U?LtBexWo*#6k1Yl zuvJ}(D}#PLXe{*qn>QC0MLtfw{d((9I2Fs^KY-d}S3TArhXk+gH@~i5yfs~v?85vi zj+kiqBc#Nr*7Gb>{7VpFAw z0yp2JS-v+UB&sr`;5shdWSjqhby7HT8g&oxy{%@eEJmt9cx|`*qzp88kL>DDoL`_1 z_lG{$LJhdJVsMO}RYTa2aNr8Ahh~j?W?RcNep!eW1Ct;b>?l=(I*h6pE)g+p!M=@g zVvbaGdgZJzUvI9OucaSys%_8*zqo~y?kb8|7;@q3YC+Vufl__Qrv#V^&(%(a^X_fuyj>(TKr{y zD4x5o$v7s%?(S8zC_U@=t%|!}ABJ%zBM`Jt5@@bk!NYL zxcQ|5J#ZS+Yz5^%ohMcx>GpS8Q6q$w!^e(#yu$=OC_q}B+PM7wIKc7y=3Kq@<9XSI z1=i%!p-oJped4!!=NZFa5E3QinEZvX;Rj_*lCauQ!*NG?-6O2x0RgcB;gr#?>!uah2j z;H&=cXV~vh6vS9G#{9d1V=k*^FV9OWZWM+auY#bUMAQ+nz0}~M-qxLO!`78}&$_+~ z4Tai{Zv`(qt}A{}e%bHQLVb@PDMF=^=z=PhNW%|9M#aR{uMOP?bg_kt-{r_D zmrE03R2HA$TjrZ(mZ%oZWG%m)3LQLbDJlYUC z>BQaWGt8nT>Rm^+F+|d-AwcYt*%WWmFu~j~(|A{0d$Q}#Rk0n@2D7@6-e64l5IOOI z9E&~ySV7s*zjhdT0w>LAuN4V@2lH%}r}d#g+%O|sY23|pT++=*kHA2Y)2>2_czpz` z>1^bm>}_`UhM!p{YWAE_9j<^eL`HP-h!0?cB(G41Of<=E`Iiu-Mtxp@d?!tNl>m2N=C{qYFjx-C%$PUluO{zC4`KOnuSH_? z2-l%}ox&^Tfm=JMHW6=1c`zaeZr>$GBW~-5>KW_RcP`&V_!u*H4!}+B@NyhKtkS@< zt~cSZA}nOtz1RiHA& z+b2Gj*Qc}9IPZkr(_{0-kkk zvNgv&be1an-G<;KK@z$CMGsEy-(^~Z4LB7Grv@rC{6&z2TKp&|30@(%_Ui&+*r+iA z#ue!Z;vM5^x}(P#o%XNTx3r4TY|6UQ#qM9n{ySSTJ%@1Ux^uC*hi-K?w~nLrV5%l! z;85YJ{hlNLXhel0LFjq)n=2*e0`_qm-R}>f;`Upb+}Ggp3&Y$G-@r_Irk9I(i|EE} zN0Ar;4@r}`-|lebq&|w%?H^9Fu1xlo@B#5z8T2@XiPAz06s8C8wGDi4Zh@SN!dLJrRQxQClQ;N z`Fly3Qx#j7sU+lmPmB}Cg=~JY++f*<8@kR?$9xk+@e2bxIAz??gw>N@w)Wq_Rt}?d zF1?rjGrMsQMs69o!5&`bHo9-Oa)y#^$Jn+tc2|GyW2Xu(+hbnZ8n{WVLGFN6F_`EO!L z9~cgsw(X&d>h|>`s0U-IKG>`IdWiami=>@iPjFEqX-juikzGDiw_$Rk!en*&n|F8E zWF+?1cd^lpw7$4AX9b^)JEJ&+Km^uF z#D1~SsC8vwn%Ki`R4kov4}I#Bamy32tNW(a%dNW{6mzu?f7pQDkkb*+alNO7SJSPr zKTGpE29zCctbCgqj;k4i2SWPYTCL}b(mT*wWoFfE#o8SNfmwDnz&v`wz#ZDEvUIpR zcFs=ED$&})J1mu^4_lGev&Z_xqa{rbD0Fq)hsr;=g+t9>nC0BJr5R72Wgkt%WMH5p zf=@hH_gGy|zjth^?8MK1{=s!*9~_IcIQeq1oN2tJL+ec`-j&LBRf2UvZM4P( zZ#KZNmvAFW0xgH%O}93Y`qilPthb%&3hoXv5}ve~W#}83Tx*~?eB?(6Arl$FcOyS4 z;+4ua;J+-wsU!tE1>^_Sq9t=4*Pm4_$7zeRQUrpsmh?~EY$PF$_G2IY+(Ki9Id@R! zLnYa=Ytn}D7uB<@U&+@aNW$SOADg0R11?3eXQqD0LOCHpiZ4I9hs3SJR~lTy0X?p- zRDz3)6_#(US$Vv{yfFx~ySHgeyGW?z0^b;mCbE?#uX}edLAk8df3>f8hlPO8yqg4; zC+l$^DN5~<_~cMx%SG(<5TvE40qHnt^F+;3G*VTAMli?qlk7h`KHxWEym_7v2!iI& zDLtvREy6`GQZLY%Mqr@hU`2c{3(YAE1$;49w*g`MmbLTH+$k#b-mM4{GQYJ=p>7iR zQ5EhjR<{tX5r0P+PHn%%A@t{sU`BpNLF!qZKxoeO$!-cnSY%r+AFpa#^9GZdmB<13 zI{u$B>9<#+5Z(DzY^gm;WIFW)qG9yY8i<~sB9NF2%gw;eWLj+z?+9cGFV>697DYd3 zE1~%~SqW0-g-K!ekKi$s72srn3Z7Hr)|Jqsct;G}tg7seF# zw!n{}5r*%5p8(u50E?@Jb|~zTE}(-!AEjstaJkX5!rwhMaykkgheceG&wwM>`(AKN zu0~PwF&bUBl{$yTj~hRq4c?1P3yel&r>mV_qbu>rYzKXs+C?^>p|o$2u@TqIfL^mLxdM6;vn4uN0~3jdvjhvd(= z@{KQSYKh*wI6)I8vDfKEbr0z+$Ns7Hx^CIFcQpC7(6cB$HL?f9T4<4_pQdmWew73I zz5=rGVaH&8Z2pIxXRP?J%2n}9{xJ^D%#Ve=9=e$Tvj_i;G|naYP6JFq@}D6 z%i#Rk=?&_xv>j$oJ^DAPgg2mN(uL*zsYL@UU=@+uz$WKMNZ{ZJcN1Yt1_FEWnCFbp zQgaz3w(!2orMi~w_C=r7-DpSchP~W zw{Vx>{A+z7O1t;^{|089|H%Bab?q<(q<2+dt8)Z>DT@y=rs|fL{b_SdV5d*nY*#%5`$MOiTJN! zFk5k#tI2N;b>oti#fNN05=61RUV8~e{LR?RC1koL#Cfa$nD#=&&~4k+~h#xumaHs{=Ax{1e+ln(X4WY=R{ z^hNxN_M2CXZcOz2@_Q?eF+J|6I~R|L_C$oIpX7)O%D*?QpV=9D`L{jzIx^->(1|M_ zO?T&;(O^2jA~U*33K%i_;%1LyUmolbv?OAEv~6gfJm+< zx-L_<=yQU!l|1A(?+afA#i7k!OTzo;ug*l6T!SJ!+=KM<2#qNt#38juP%{fEfHJn-8t-S5Acr$_K+l_b=l3^WCm?=qdWQp`;K{RjctX*m32- z@|`ON37hj5kQ`=`r+j}r4-Uqp{?cp3_p>xmhnrPS-Sw~TQl`~gKDuCEHhagp?bbi( zf}*A!qx*0E`y~l_b?~FUqyQ&oia!Kl4*9k|c`fVEI5B(m!Tey)$-3@S?ghj}hcDij zwEy*O;$HppW7=G_?+gerF8SaW(K>L0NBZPv>6GZ@jpCy{T-f-;3*cMjJ$P-01nkXq zuMFhwjvXkV=15^`XAy-dckLLZNu+|7hm$@THBIP1gS;BseV?Jny7b^TYX2yLz*5*< z(@`d!=lA(^Vd&D2^U>sT+;T4Lbj3=;yS|a>WZTHgR=feeexD<(y`ll3 z*6yy zcC@CVK-kr{L0!@CL9)X+3F53HaKD$93w<_IY0T3WQ+%G%jcj;q`qrqch19mjVjJwn z;fww)Ly=t1mFJ3QJ;7BF;U0111|&?nBG(@32|}gxHDd|T5=WRCDqw05+3xiX*<=Nn$SnIecvP8I3!}|C+`r9v(?GxKqsE5p#nV~$?hejHb z7kT331~e`k$@s8tcs#ijm(S@m5_I+DH!_76-hV*7^!{fC*qP@pEnBYES47&>=6Pfk7QmFi7!rGXNDtAIW?oWt+5PpIo^>L4rIjcGla4P zadurV3;fPt2JWlx1{PiOo2zLEHd1`C5&Gm-7kN7@Qf&7fTReIvA@5l>=^8DD4HssN zCoKNp+Zx=W?~4o)#Ub;I)O#`$O^#nby9_ zZ{T&pHMyb7MlkI+Gz4JNLVgykR`s5T*59ThD{euJys$;wUt~l7h9bT%H|6TFfv? zw*b|Ow$lCWw@&MIv@A!kHmlrS$o{OZ>lGjIFpF#LjT15aUD{>4n(Ay}&%q8mwa><} zD*i_vop{Von*8vm?i~hovsxZZd)7x| zm`Ly%yL~M0z0q5^Cqfg(8M1q8FaqMkO{0$a=a8OokSUMj;2?-fX=^-dmSV3`d+!_Cu{VgPkk{*m$AfP6 zakYzd-)Vn~f)Z1|C{)s>5VN_AMZReKk}`Q7tObblwco{#REEzdm_=V(hd%=^UAB*= zPn-SeqT8Lv%0a`O*NC*pu?9taB=6v1V3lH8#xA>oV)*`~K$6IklyL|$p;ApkLL5|O zzsRRUh|5Sgx@ZUhw~&EfoB&GlTkh>iG#Y~6Q-^As?Tii0boh?q$Bg)FqiIubPi zSixWuLv5Kngd!aWw%}$)3Zn{arc_RIErbDtXZ~H0^SVvD;78MmQf08SCn(6PX(Hg7 z63x7z4S^4di?8dF^WNXq-*nDz=wH*KOs`1Hukmp1_5AF@p*H^$9Tb83x$iz0*FdaU z{z&FOR;cFaz~KQu#>4xX36TirsP8VkQIV3xvA5OYvr^QT3l1+(L7AmJ#r>bV6ZuqA zl#%nW^PGw*{yF@RRCH^ZYAd3Xd8O&*$3z4R=sIK}YuR1}LU2|7*C1#j62Be`sAz z->BN{VUNd)YDHa#Zxt4Cm(Ypkt2E~C+_zur<`>kbd$!IL#>e1GAq$s^KR2+fw0|8Z zJKc0&f}v*-|Lr$MmV|iBp2rTOZUoNk1ZCe=*I0SVnoNtK?)x@_^FiMKWGkQ_k#RNm zkAZ5ttg+oluk`Q*%Y{n~u8ozq5kn8ibipO+4Gms=--2SQ{>|1;Po_LC1r=ceWyJHt zT*(U~J@IpdEIUL_fJov)mzy`QRlZ7bQPN$EYs)#lHOuVywI2OhUF)?RZ>BU|cBNb) z&d2Uvd;JB6bs6LPkMI4kOLveTUD@=T+(YZ@*T-kInnW81p;iMY4Ps%Fy6yZ8bjXg5*M?b_%sTG5fQPChUOC*Y?NK zsMG(koXO0Ny@P}cLt25b1k0as`#bxRS`5{*emB8aiw>rPFadHpE;jI`nNG zOtVY^N~W z`ACTeW$a+{p{w>|RsllNuhsQrm><850K%OSD|caiuy(l#m1xxicV2o(iLmQo?i@r1 z#tF0g+TAh@a+}{OHGpd&jX_A4eqwapSj@JipnTu%9ZF^0%p~4=vYqqLebSA(TS^G#gCq@LI zM!nCRlkBc}<)-PUdZez?NXArX{vfGj=zF~zxZm){gfv5pt)XjFxd(p~!7i=OP$5gWlvWmGC78{(cDsFMX(i$;V#*7;Ca%!6ssZ*6OT z7ieC~Jx<>z}-e%@`C(PC{&E?>sZ(lpH?lot2R4 zd(pt)^hQ*i?2_KI;_KftDE7a|zYpfW(0vD7USGXK&tUA&6HhI@(tndNZ#?E4tKvO{ z0J+<)#mH8F$h=*9OyAS2C1RwNWcG&7oR*SaI`w1sJ@U!nL{KZ6T152NEIwtL`FzKD zz79v7@P@PTFyCf!_}8Nqtz{v=%i9me@|NyV zMn1De9#PjckSej=PX*knjR|qPmx3(%EGQITNU?*b9T;j*fuRmspufL$&rP^ykQS88 zEixecBuoyz+@32gpSjH}sOwDL3?%@?<+h|X_bSs79z0t8+MtyjZ@x~pqv&gO#vEDD{B8Rti7&ZTqrsgd}pyP2+&y{|m#~v-e zrG5CML_xB|Pr4j5$P||dc6KZ#v>F?8bXQSi|Jz6nQa+&4Xqmi&jf>Z5C2;a|%-JeE z_Ga9=TO0;BQNgMKPQ{ia!Sg5J5nO@GM0gWcU6A(!TTc|9mDr~pW{d77Ss^bh-yuw6 zzGp2LknUw)cvhCT6#u1GlTdU_BPxi$u@Mnf$nN9kh5@blC%v!F{`@L`(QpGUYpj>iWm4yt(lYu);mSD7DV!=y}sLd7ZvS^d35O zlnUj02uMJ)u<8gu)=7p{dry3Eetn15*ofLkyG@vR8xG5KDmC>UohiJ{IQ3t^y*?i^ z-pvyo{~deHE^^QbY}7tY{<+lK9=ZV&*uM0&ZEpm>^3*edW0Fz7qpOYYZ>&NFu6(z( z?RC;=ttO}QT1Gfl(W)Wqt*PUu|NG=#Z(4h5;$-sKs2#sJyJUfA_uKV9-3Yf6pE~h= z<-jI9-zPPnwzjU5EyF=7+%o~Yz+$#4<85^0#lrCnKQ&r&peMRP)W$;Xs(lHZ|*Epq>+sKAJL^a1Df;85X||b7*LktdD(9K*^B^~ z1sO^`#3nctRK_^hwg5BA&I)$QIHL)ezFwzsn%(j4^! z3kHH2W1G6av>Be@*=U$2>HA8%a=$iK<2UL|&Rb}nQ9YSE$^I41Xx^tQ^$==kS6M@5 zhY`({6w#~FJ&9(e?j|{mw?cyar0N6jj{7BeR3UXDyl0UO0=U>{c!cSaOqQ_CeE5lS z0O{t;dQ+#%RAVfB6NP37&=QnRANfgsfS{lm`RDX6^^iUSk#LP|qT%hD;GgUyni2Q| zAjt}0VwLa<&Zxx)Z0QMruZF11w;nrd&8&7a9muDLw27J`=&cRFC)p-YEb4I-ZFiSJ zWvZXTd%^73Sb%Nn0NCTcz++tE8*i-@05uzIU1hp|zx^n|5}Xs=mrRlBM$!Bm zQ3I5rl~q{>cq3etH6PToVzAIn8=OR9G+wx+skW*(6*;F^DEHjIAP8Xtql72wx<8M& z^&ji|>G>~!OY5gbuFb>)sfH>$iyfPZw^u*O*-s^%KXxvAgA0X7+6l?5F zer}vTA7{6$#rqn?75MW{T*L+Cy9YXpEws4#3-g+m#i0UYlY=}>lg8DPNznG^jQi#| z$1dbQ(5;oz?JlO(3iJ6Q&ZfwB+zT2+9Q2gJ9VnaJ32;1$&g3J--rgUtpz9T2ZU(tK zSLsWFnkLvTmwt;cH$5i(N{punOL5+(^n|~O7jLA&517Bvt=6x?mwRA;kbR>y5U2F! z+*=?(rt=G^Swc2YG@8vP0FzBr>_G8`RH&TPPmdR+XjXr9(EYs8D~4NPY(q5iHyLjlu1`}sJOAe4VN-@V{CqAa z`lfCQgSynqPkzYY{3n2p18)AF{R3QaCXXLEndMf%Pi6A~TNt+BNE-~}eb;mh_u>7W z?$_v!?+C|-`3Zo`4~d(;Uq-fwQiZ|5RPN*}B>R#veNFUs5LD|Lfsa(HM=H7azC?gw{?Z?Qd zDMQ4Wh}(FtBbZM9C8X{1SK;qKtHY*hGjI7isx>!Zh_vt2b4k@kOs3WKT{6+wp80GJ zC1!bD&$L5y3b99>B&w-RM3KH9TCyN~TWCQkWA)P)$!%vE*vzviI=`T%WN~7BAk7f* z$ENi?)V+BVW`kY?iWwu>19@p>E$YG<^O*cTwv(YVBbvIu0#H;ST15;w^N~DyAh8B% z#r06&fgr5lIb8`qV$&%7RAExpxID5Y0ojW3&3T+lft4T&*_{Ju@-TRl}^ABp9gD}|zXE7$ZL3W;$yuHf(WsvNL=vg~)Fn{6?qKIg;<*tve zptYBO`i*|A75L@v2PxPETkbz@hmK^ev-Kg31zT?3pG3CSjxHT%XIkJ6PR6>aIi2DN;c(?e5M}>GYhYn-x<^1>KQFEWt#G^7^ z#HTLRPnJa}Wxh)hSa&xSM_2xrRu$~H&_gr2Xti9j`y>8Hewe4q`LCA`-&{YI^_cI* z4TB3>j&-p!{{N*-#|1?muA4gU+I0ULX+5>~oJm5pfA>uIo;g->Q`;#84XSLOW5z@5 zG+hNBmon49^?Z@jWIes7+wtE`Gxmlpl_bRS$F3mWXNx~83b0&tjm%Z}oR`>tWbXIXq{SO}(QTYJ^(je$WQKY4Nvyl>|LmVL>B_e|4 zj*5h&(mlFiA_7W{Msm_6j4>LC0fRBNooD}x=k{Fe=A6CG`F`U4fiG&hrX(MSy;zK* ze<J_&SwJx2*YaHHHXnIm41!*IWtBxSW{Zibj_7+-UY!Nfim zShSctPT`y;**`iw^%P34e}BcBFMh+sa9+NS%jL(&4x)FCt;Dnig#OuI3wNO4G%dW+ z<$!gW>gi49QE?gk zzcUc=_Af09tI-Cw`F||1GMnW`s=n(NGc0D%&(@5g1dP?EP$|b1vVT|cT{nAVACUnf zpOj}CiZmG#-Rb|QMEbfye)8QR_&G_m;K5?SYKisXF z{nw%(a(rjN9%Z-bUTh+j{0o>@5Do+BfsR%vquHkOD`=SHR{dAeNI3k+2}dPcT3gW* zsXqDl(Gwry?5_teGDemnwkJ0U$&;i9x+($ma)+u#ka0>HZC@`E%Pw-REgBZIj5YqKX_rPAT;Tz$sF$o_n(fp7Ogl%~T>&{q~R#wcMYjKv7Aj%wi9#n7dLE`Rq*@&23*PytD zhVfPKbBMQs_APtZ_;7$dc!niHCMm@f8NO>c$X2@wc9HlG3^SaqwQ7oRNJ)_Ev6)?| zOI`0jy3#u_Nz%HEZ5bkcJ9dtpVgvQjGo~6Kx)>-nd<#C`MSej0-4T8l%aLt^uh%}C zpOctQPFD5*>o9$po>QrQ&t(j}Qrk7L#(Pw2sY`UP>k?w(eQ{ z5Htzu119z^1hdmqdY;Q=p2i$KtEM2P6}Cjzz)uJqCr8?C<~8{wbzEG@B*})l`hr4N zrSEkkk)D)fupe8w1Nx;C4Y}rsB>X^PCZ32>#CAK!524U26u-^Il6uu_k&FlNJ3Q-w zMeNr+P32G9?^sK{z8nh)}NI!oC%LaRJg{8nQqx|Q*)X;ZGD>mmz!rZ!m@H;buxX7@((a z4#49sS*^;B_21p5CsxgYt{3RvK)3}9uNZjvx$gQ+$|fmI=Ni+H4tq*$x)RMfntEpi z3(-Pge_i~2em%3T`}H1)K%i{i%y(CkrHi)6c8Z?rr2Q2%3ebN;)uh5!gV4OnWEd(#XEP=44nZNrDcLu=Q4b=%O~jG*`Is>LM5Ru^UR*N$>Qur zk0EKj&xfkH+vauu%gb{iCWEnTHdDty5Bh+^j@a`gB4Z75>;1^9{`5i1pJ) zb0V&h>#%7&4B(HIYrib2gQ67>CeuX(k@N zyA0uF>4~UlVoiz$SH-I@&geY^4x6`Z;gj zK^jSU^*va8fGR1B)Mg?B!Vo6%PoT)bKQ?F*&3^#HbJX;cPpO@*v;IEn?3T$UJi7sd z45N3Wz*E+Xo0o&r72iUu3^^s~O%ZyrBMz3=5v%nX#YhPx8G^7*rp@zU0Z>_0CfpD= zX$l4skJ}c_^GcSLiRf<6_Q$4uAU!605p9fAeXMbE011^$Df>51cgcD%th9Vr7L6WA zoOPq{4Su^6`c_;`XEqtUMPN0+E>G9>Q)h>s&d$=*g6(f0*ZjBHJBSXL~7`k z&D|S~g6CCUlJ-g=1geCIy&Iqp=~A2rj5!)qna=HWCNAYuVg}ZrU3ReUBxWy2WZ;OOm;H#qjZ@v1tk0eTRpV=?6A_ zbTN$?S~%8zs_^iypOm#{F6mT)ROv(vemIXu_j02{ko?Px=nktn{vdhXqNWc|cq1T1 z!)*;+Ihw#SqR?=+{rx}JI#0>Y?yFowS#3uxZMOWZ2`kEg$7!wSx0r8_FtyZF^r$h> zrfaPKYwq0WViscc{Z-z~f|w5&aS&f%29661eexl+y@EYlTD8cRYpJdZfw5iUFtN(1ch#_!%3*3JgZ=GvB_PwWsD0FopwingjHWuIidB46$hoS1T%$_dST9z&H zEhf)y@MKV&DP@sKH#0p<>g0LL54@<%@c7z@*brc`ezN|M0ozkd+o9KBGKMycm(3^| z5QYS4QaP=VC`q#`IfMC@5Q z17j6WA^g-gEEHAcYUi(K+&`H!8>sWob4-4?1>t=^d zGu}cS;luCZ%u8pm;XA1|8Hz^~jnyw;Gif}+6jCNY8h)UgKln?7$7UPv8Csdm^lAeZ zxN-{i(3yNqgqXRD@)D*zflwTlnr^O&8j;=u!_^J(7c!WLJ1eldB32c*le{_uU~`2| z12m(d4l_xZ;*FXG4q6PXsX&!2SUG4!8y84|PdG2~zcsJHF$TmzWj;uAu}lDe&^OAB zb5tmY0N3R{*e#At>>koY3fP3OCE*8BwGTZ99#i*!0w0>XQF(bGdzX7W&(s1Ab}Gfc zcfYD8E5A}`2?Na+mq`tV;FXyN>&8Q2QG|=0-;RFo16jzE;x0EK`n)Ave&#+{O0e7Y zqam-ugjkZ^FP>S-Kh|2vc$ozs}^MtL|8Mb(>}Uxm%>XMpzs5f!j^x)hY62F9X~ zF`XcZWI5@grLNPl#gq;-es&q-Py;7Y?fIAaqS)pa%YQU>qss0>Y_5_E8dpfkH3I%` zp)Iw5Q9y){hCzRCCq!HQc`T%<ihWT@@h^1u6B!UL%qS?YNX2bd9zK?$W5y1m_JfhWw6V}+hlaZQ zbzjrA75G1y7e2i+FHYdzKYoucPFLRg!-j}8d&9$avq&%7d+;L%Gxkv8U6jpqfLl-Z z?*@{rz?GxSdooT!BRaJlZOr5y~0wKH=o2 z+3)Ojh`$J)>?%s6wPl?T(TShAk95@)VzAg*Bz4bnA_x%2y>tnGRS9V$Slmc=sbTeZ zITx*^Y|UKpK-!D2$(e9t!Y|oAGp%`cgkSji;Ma@vjV;Q7=L_+1S1Er-0 zV%I4?9_ic@P8=Yo^M(T9PL;=APm!khs}%Mck@&N@2RcHk)&!l|prA$Ta{YHS!HGA& zmSCZAq1*AvXHMuTu2SVjQdu_bY`141`%pgX_50pUF`jg2+qWedN6 zsjC*pTYp!72LoK5b?pcunpzURDW zWm<>K`mx-;`8smupCYZB@(AMreWMD5x)SCIo|F}OC}nu~)}v@qQK$zsx*e)i*UkU3 zktjkRvy*QmjxWNJ#|uZ$ns?7YPU`&5!h7~noRo&5i%JGanm;poGH1Ev(!QDY^gr)+ zC2uM_rBjmvn-DVKsvhCVrZAfH&bW0TWW05VbL9zA=fa<$;q#POLJi_DQvAF7Cin{h z#nB^f^Po0mM%yLmw&B6iw&fdhs*dhr5vB|vpoq>g=N?in6J<%uusP;+EHB6fW(Yy* zB|w+3qs0{HlTv3x@jTSJm#ID|8*E17=z$+cC9&(m1AkT_%|Y=XDXA~=t2E=`T=wZ! zsi1-8ZmY#8p3E?w%M+5pGRMwkr@i)(cMErO0kiF2AP-&o8ama`y%R0ld>L%*Cq*4q z(f0Ra;4}93g#sz{cK3}>&rfZVlRInSfZNWqlf940GOU-!co24bNa5)6{8+-#3Z zbne`9tJ;ubDg8z`z6RN@`tI`J%FXTG)^`H#8JD1+i&E@N?b{<^ksiy^IcKw5jid+o zDNW0(91`zBK549=zT}zJu)>tA^Nv+^=#c?WarGSD7A98Uaan}G>k=R5Q^v!Q9PcRx z^$k~a#HI>%1B$>9LYqtQ4d`3Vs`~p3sE293WiTBbDbBr2k?!%4J|S}If5QsFzGQxc z@YO4~EvaR3Q^ZM9lQ0p*`v4LhJP2-M5&ieC)SpnfL#_q06ZnHm0;Eo3)Grc7Vv{>d4lHf4&Z#64^6$1b~OVCcy)l`x4I8uy&;EiewVo2 z+LNh8wl$)`X}zC`tmB`=TK@Tnbs|3Tn}Duq(#2%wBh(oNBC2`nbVCPT6<; z=lJv(iq{iiIVwta*Qp*@*vWyLZHGY2+KT`C_4ewr>V*Ab+?mQzw-@G@%%p&iJ$FL_ z@jWfA=YUVDIL`+%R)23gBQ6z$?S9Fy>|IkIgST+$I{Y0c@Y8)I-(v; zQ3)J?ABda7bEv%YoQDR}q?R8!aZio7f9{aMh5b6bf#N$w4YJZr=>9LNx7n)z{XuVMN ze60`$3aGF2JPaP$eG|~zH90W%)6xN_t!1ba0W}SDJrh}^1X?_~d7ed_ykIZSfw29^ zy%@qJZsyGE_!OZ)qW>>#s+8%IEis$8%MCA&IkizJjsF2M9D;mf5gGt?j1<3vAtU`&6esJ+{%Q7#fSzbAwBw&P0- zl1m^d0n1|B#prf`&(hS57@2)SWb}@uOCH zah;NiW71=ze$YA2AMUKrG0^G?H~2EJe9|?AM)y;MX!3Pxm!4)mg~HNrAZ*}&=Btyc zmF!mall=6?3;x`4vm=&;(T{+o-g~l?k?`1W$Cgp;XWl$7@WO~`vH-*AY^V20WD?cM zB#h^&FYW7;%qB^I;BvQNa+ANCr( z{2jT0g(BAT#=*w8KG?mbR**RNR`a|w82>s3KK}d3_h?vA^%hZ11mKnaRCbwu(dP{X zyXkF55&qCFx%|ib5^R;kS7>Tpu0@lz{hzMDZ)2IHj$h$|^q@}N>?zVNbJGpPbpLpp z1t^FLkT{*U!>kkjY=)1xE1Wd!qs(s$0udq`-vXZJbamXb$-N?C8u)>eBQy%H;vT!@BO{fPmU}b zNYC9~{2eeC9=@-4rMWu91?RAk^`9(dS%UWR!#GL$Bxa{o>dD_4KDajN`I&Pt#;+DC zzvN6qS+U9Q-(QGWn0&|6HKWoZ;`NE+12^U5m>+68e1~RMejx_wYucZnNe@4EL4r+Q z4;PyhF4;(Ww_^WPE#-QxcO)%z!j?SEwE-Tf;3ii`R%#MGv;gCSe&&XQPZg*GH&n;GZ4MgRO6fpoi3TrbiOqUF+%E+jQ@ z1BL@)k>rJEgHe9k6ld^xH2j3A+>;L-NV{3i^Lc{qti)kIcusZ!EpI62z}c>y@Zx&N zb9#99`wxtsI34MU7J&`S4{uLZweHnZkG-ZRK&o`P9L`FAK%jA`!17A7(Kv^jlIc4>g=yeg6Rh zRW@=gm&RdQd9GI!kTSRo@;cL?_U6Y~(5zJXmOCF;KO`KUvLPGS98>UJ>J|PQjNcB@ zZ<_wOdK|(He}Z5g8Ck#=u3^-!-ybwMd3!a}n&IgP-@aQs`$yrazcI8*?-(oOYsagd zH@{o$GD`juB39pIzWkm8oeO*D?(*7ejdJ@>>#g!W#{W)($8nz;#qAB%UPe35pZgO6k#s*Gs^9r3cG^H#vNX%PvLdX zZ=L)XS|`7th>_6pZ%=R?vOJp_Rj}lm7=PFQi|XQi%8%htxj|9m`;%r>09UNbn_d5@ z)}WuF*Z6mN2*X!D47yOHemr=QXE9E7 zM|zYkiu>K!@t1vSx_fS%VFh+O43oRC^0A=r+jVZS5R2@q!lcqv^{#rLtsgTxYliE> zjLMlYJu~Y*e6U*Pxao-y?O3J!+s;GReu8x`M6I*Uy3?77TvMb^{KoG*n~8z_yyqWF z-g6D4uEeqrv%1r}iw?9k=K9*4v(fcoNmk9BllW#uT&diM3V#ZBaA)Qc$O zlodXOZ^=msGe1pvqBSPg5b@;hv8BzQdeSl!%DVtNGZKLv+iAVxcyOe|Vok5D39EN3~QMY2gA?o%ajPC^8S*%rh47{Ukad@yyT z*AwBaNm;+X_($q8{H=29--nCYhYs5?DVim|W7qIN5mCya!wpY9>Jxl(E<{a6n`dyU zbs%=JVzFJq8tlve>E(!*;tiH^sOChQ;#&m>lJ>dDEv5`D4(bm!O66jObsBw*22C8L zH=;+O*Y1dGxpDW^a(uo26zsdYRV2T!R)ja6(C^7%^Aa}gU6^gTsKKWZPtn%3d^j10 z<9JRC`K*%43cM6gRC+SgUl!g2QQ<6Xm-wIv~Nf`MZe3?c)<4CBq%}&oi|yTB8!37#Wr>b zwY7nCs3OM?T7L1{8u)9BiqU1D6_d{|`AlaQ&uO zfuNsoQuXw_Q|fq+9V@!5#&vUg*Ui#|#Ko>L&GHMsLrqIO+Zoc3n;)ZIU^fV>XyQhJ&6@F=$AM|akgE&*NwFqqDOx z3e6MT2PkpiEVze89BgN)B$s@npe-NqRX6pxEYA|n@7U1;_RmNR3*U0C6x(e*3M4F^ zC6Y~+GC06~@DPpY5yx)64ct$TljeR^Hb;iVu zs^j;jBBQnc{u`AqGWhPQt33l3$p3`t_nRGpR$58(?hta~7lcTU3HMprmVav9(O0Mt zlevl@>woPw;TGDweD#U#9*X7fth%RU$neqI1aoF#U1;Suwj-Q*@VKU~|3=-0@lmwL z(r=p?$p+HbnthL}75H4vYg;0tMKxZZ8VmFWE`N>B+)nv!@2NmqZGJN(MIW^N_U1}Y zMw86Bf#Bnf@vSX3W|jJ-Bxte@gI*19OWBt@TPtlqIBcZ0_^!JyGEw5zk<+hQ=$#*P zqMxrEmV&8^FU|5fdKd&rcl0a=Sg@3l;@AD66&1M~QwK=W#OSq8(eK-CX8is$Qpl95 zGp-iu5>STsApv`)u6wdvISwonFMU6-g9P_oY*}etm%Ju@7p~C;ucAH!DRZnpj(hb> z5jypG6!_ON6GrpJ8>%2&?2iqPi@k=kS2d7=*td~yGF_+WdG_B*h}q$OkeE?zAl5KM zN8LV5?9d6@9*HPG(Gja_1U+0G;PvplUOK#=vUBxHae#XX3O@G=RARMS=;-=tDf+` zyW#kNgCt0LP!)7_>r+$ zu6QNXJNzTB#l%sl=S}vY{wJ<&M?quOB!5Z{UWI6Bs{Di!fn5St2XltjuirK;z{P@B zz7VcZ992o?mTtuYN|S3|1sFAOq}a#7g`FkKiiLy1nX$gLfFe)5&(rb=6D&K~WtNkv ztWqI0Zn9of?5iSc%R_69z34Om^Q^aX%(OCqh`OwyHpXz$Mo0gwlEcAEh9|F$Kh4nP z-(~oI-HoU#P02ljKc%^}^kf1tJN(9OM0~EWVofGT$@Edg^SHy?VHUb_`hJus+I5Uy zwQl3w%fJKDT*kW~n>qX{hPJpv2{p8ZhB8kYUyMH2O{oCxI z+x>Jql3^plJeqlv3E>zeZ9eH@6MnSlj^`60K1zztI(sKWf(IrUD;+NZ_Ccf4b zqv!7rfy!LRCSZjs`EiwcXyZ^Cy&-?0AU=}1o7zY}K`N`%GpqLq=CYKY&%F6;P+W7+ zUEgTdKEh>w>f3+kR~Lu^?K*7P%x)My%2Rm~2M!YS)%CALasuEQsWb|aoap_4t$(NZuPBaqwEHM zfhwog{eTJ~wy)Gd#}B=!D#15^+Faz@*y%OnprNzfFWOSNiZE)x!b49sYFe(~CD9ZQ za-3=m)^gu*jlN5E;(SwOS?$n}*t7-;b&rA8J*XgdN-tgK(cHtsmP{M;Dk_q!6!3KnBtVVeT> z$!u7bSndK4&+>1ZEZMpd78QfTe90RU_UmMrlG`)O6x>r%SMfgN;REa8Ys~+Bv`F+K`qUYpY1z5#BcD9| z=j%yi{U2o4cGSxc25iD*yyc{$dn~y*UtWEZrLo5(6Fgm1H}VGy&@b+GAo8To76vV9 zmswizS84FTzhCQj?XzDocEg)YALxW%B_QYZfWJ^&zr)mpVxofq+wk4wTUV9Le6{#fG<(Ha+l$A&!6v%_O#O``*|7%3m)8`uE%*CqGjn9xO;`$ zzPs(aRj5XcejRw>-}@mo*5d?P+~{L*OYSG{QJ42`epz|P z5%<%myT^HCN*OW8E^WEYk^-Lz$vyl0$Z19i6rrXKQiZcv9kD! zQv5xQb+RGNBeLgw$w+ZYBESm!u0#_Y6h<81kB)_DphM!IdWGkFZ6F>?s7w4772fsc zQ6A!EV34@6|G-rmWnM`}rTE=uVcP7ZmACb@?E^|Nw|1EB?c+z7Q}+c`T#|=7e|Prg z)LwTRnUKG9dGl8K-sGJz!N`$=N4C*QI>n7u6HepoA;6(Z$OIZoi#XS<@zQ(0; zD(Y!*7YAj!FZ67oE#rR%7b3uePgVCRY&6F{B}Gek z`*3#9()O}^SEZ5I(%)gZZ?C_%78>uMM6wob$G$b#U1&4x57=w&tXnwadV^vaI7!am zJuyovZVo?v30TYJsk&@3-o<&4tWY4~LgGpB&{EkCgT>mm!Uth3KPF?Ru3NUEGhsHS z8H=!vA3tDD|g?dy0$4WV2>-)EJJNBAU zkTNB!UC&XVTa`4fon1co8=Z8=#6Ef*;!r*SPJZ zB6udjyum}i;(eNS4p~x}I-va{F#-cH6=xc7-U!;hR8seG0N+UEYDJqs{O8e7^D*@H zT+xZy2U_w3+^QQ zzusQbkHhg=_d>6U8!PVPxBFUR#{2yx#%3Gxbw9_5V|c#&Vx;JsnSBy|+TSiSZLoDC zcPXVmqbVRsVMp}Oz1@$Vl5Ne0k=K=Q~^ERfE$4B?ZFjP;+m$1Z7H;o6TFM zkHp^zX0gzGV_#{@68K2?V|VSdP6V^cI8vVL0=$aUX!`djs#g8-F~7Y8%WOB-8^!17 z;^1|p?K(x=vG;Yvz1=z8l+xYO#nJr7BA4vkFCxZVNv2iMN7nb=HRDEe>OCl|C0&4& z9$;N?$+y;wt=#fS!^d$60yCN!IaDfst&iv&B+?S_F!sE&BZWyUOC>AlL$SNNB;WN6 z$`(OT<$%q zKmvX4_}d{`>)-PM%5wW@#q?PP*e+|pvm18f$7%{pgfBV;%_XOn_xI5gf*hDiUL!)7 zTl(Q~r7z<{K8p;htor@q68CvE4dX?mBTII?dxMSR(uzCS|C9|IXx`&_Hhe~LcbJIE zmv;(;9w)&c{cwiL_A-ebQFL!qx!D;!(~GF(w%TX(aSj?u7P|E28e8pr+m|kxUw2|W z#S>nX^kt*Aj$YsDsm~oskoMDY|EY84+mN>6=l8BR1>D+-HFU$N{I;b$p;)0aNEtao zn=wvXOnT~*mT5o9`S|4x_s*k?NuC~q?(DnSf=xZmzlcLd>dc`0la$!F1(Uo8RW*dK z=CC&9Ph@g?*L|zRs2!Z|w!)hWocgR5a!?rqD=XS^RIB--!<7_L#ri$jHSV7Za3YGF z==bc;cP3(K#pqT0&vd=cKKk=n1_)@d46)LgcxIOX=Uh_S>VM3DY&?h2tF6qv{g3QySZArv5Gz*m zRrtg3)wuWdq+Eqp&Am6d$_`j%@b0v(iOxc;TR1p@agjN6#2j)v`t-G-VJDMAw;$8; zdaq7@%)U)eaQ`E#zU5O1q_LA`7sVRPcRfihex@VlY0JX|mrTvZ|6u0L*Y)uu+RpES zxipl7&*$V1?KxkN=SS8?PYp&IZvEB0%f4D1LK|~lo8i5<@f$8gAiljHlH@22aKj{| zC|6sygbL5FY!*H3Kj~mWVLoeNKx@1Gu%W^>Tp{p$@5ra-IyW? zn3k^NBUP`zJg{>BJXgPwWq{*nYgSI* zgUpDzn(Fcb9P`y zllY}*rAc|)?=@OB+dk<1a{g5HbLYC`ZU%Id;4*s*(+?VB+43QerAGR#cPy-s6t+7* z{8#!qG;}>()fCcokv_i^QSl-H!AkNbvnGT~ybIE{g_Iw-G%Hf-lzISm^d|PkmDx?fHtACN4wA#~LW z2q&RolvPp&fK2Ak?)0j~#fe*{SefOHn53FVVec zP#tmT$0-Fvx*UsMATK7?b`|UJCTHgpeT~$ zvG?5oae(RFS(+pP&O&bbdrUXgdw&)AyY!JWM>EnvY3od6%%30CuTnw&Sjfvr%L_gD z@+&ji0ZVD~-*a7%kcff?#g*|(C^f&Nk5P4R2rTahgT6jg>Uh{mi}uo0e1A>4nf?A# zt83#_T%a6IJ}3}iUYPt32@5t`MDrZ{$^+o4w4vFDVuqh^JTSiMA0*6-5e5DQzWXlS zAik;}_4dl%k9b8nEsL~%xTKY%UHUEMnr}0PH~EeVmpzp?$)}u-L;JE5T@1F>GJd+* ziukW<@;>j6zRk5U`Fcy^-fEe0`7`O_uf4iwPrF{s{OEhYhCLYI)pix#cG?-OwdWML zxZH@Hxg{1fP*CC%Sm@niDrtY>P<06sbx00JZx!obzL&shX+xGI7BXYzLAJv*o?`Ie!SRgd$Wk*MeU2doPB*W-?-hL;V4|0^6 zQS$BSLFuq>Az>tDC`U3b2v&4Sz8JkXx|_bsxtej=qQQ-S!nn%7$ZqiJqmC2Ra5u2A{& z@>M2&c4036`3TnqU54WMtBLwDthP-<$@;jgt14jShs= z_P$#5G?xHW<(C9=6|FMbj4x#oOJ%j4I>39QUX4UuRifd<&63;D?CtPcR-mt3rEdum zJVVe~H#~Uy{LkIt$~(hUp`LGrP=e>M03qh&Ak}uYO72|vo4<1F^`cp8?^s4c(ngQg z74xL7J=xnsnfzG#FS6f4qVwyidcBR(bg9kxD?Q6f4w$~jhQEZkZ2`^%<+{4y9;aE9 z>jr7;$zZ)(ZrtWYJ`I_a;p6jg5ko?=4R5M1Z`ON0!2DFse1gwEf$dKy?&;Jl$l2;F z$n`ZzfXHgLHWSI80&SnZg|% z$j*0B;Pc^`)aq00({-DQ1tUNMh9TAFzYsaVR7JzfrpDd>M~?YGRRk_l;%5} zcn>RgwWuHPAp`m!-e&@Tybx(1*+dC^b=)T~KhNopLr4)tzH$;wG3e*` zvbgpTj05RrxrDs(U1H!xi5GX{aB~F65+tMzdFb+P{CENDLmIh>PlGB1P3VG;AE(+i zTUp6>8~!xDE&{sw6d4kyXLOq`qIP91bEdt!iwPT~uXpA8HxClqzoM184vc~y(v%qi zCD4Q+07*W>C2gf@!-jnrMJcb)wSl96nqRlU_C5tOsY;#DD?;T&*>cqsWmZ!AkDPVF zwcY*Cagt61@n6|@yRN?#1Bdqd-@Meyq|{1Q3pNGr1v!sDMWx)u<{b*eZ@bt;}P)1hh1jt;Y-Wfe_TX z(Z>HhDaV5wBsZ!M>BVDmTK2!Uv1p|SD3Q+={Ida)~1M%Up6Px7_cRd#;mfuFHnG+sx%`j_te8 z@A3Hkzx{Q#^LRb)x7CqQEJb4$U5qq%oo;a~lRn2Ap2gQJ@`8(;9!1bgB?%bHi@tR4 z{4VJ7ef-gvo}0#0StS z3vLlhPXfFU4d>B>9dS_m&wV6qS5;I~31n}2;)6)t6QC$;R6wrB#i&E{hw#xPMe3n6 z6+VydCLl-^+i>ypcuWZq9?5AP9%5V7J>x{mKQmOGLstinCVI7|C%_mxPf1^2Wu%9d zU&v#x>$Ullyk&KPjeiY~S~@SEil;4n;AdUa;my;@66Z>CTH&?)(sC(d!`J`WUMz9{a=K2w%`Un3>PW&6q>eSTGGUJ=}A|Y6rGs_c`t<=YEWhtMW;hlay zdgh}o=ZnHu9=)lC782%`9Y!`Wx@QUaqKpx}$HB}xisE(k?~t7^Q+h;tlDmbl!Tqy; zg5?oHpQ*&n^aXzOU71!VOF6_@Y^Ce4sf}}3-oxbJ$e*{ZGph83DGr^e!Rl*szhTAQ zu#$_M3{BTUK4;MYr9!#1b1_K0>$)sTCA2?~_Fr(PVO)534^eAppXEu?3*!E4pkYd|)TtQiugKe_1U&{)wpDU>~ z8%T~r25D!%-tNui($1ArT_i>a&dNQgnG&Vg0BWy&ci!b5b_W&X)y0#mByp=>nngDo zj=y3lXRy)QUfb6FLz+V3g&yt7%Byqq-JyqH8x7Wuyn5;u3*B){#NlT7r7xB zaBF?%!(Yl+cEq`>AEHnl{H$E+%l9B^%=)6F{GSqf%?=iBrD+t4hu6cr*>1&xrVoL- z%)R*R*y7z7U-dOmIIm}sejbBdmI&Aub~qm;s*$H@gbP>ZO^ZsnHcU-D9hMx8hbsi? z1`o&<@*tYyaQ~Iypu_^Ve9g80W}F@Vp}FBuD|2F`8$sdzV8}MORa)%)!6`@9DfBzE zo!~a^(uKM(Rd~T@M~1}a=q$0>Lc0q$gXq8ELIz(zkWhO?cS8-!3k_n2Ry|E2TiC$CaMYG{yJnQRoNLxsxDkB7!^0-%l{(v<7cbJSaK6}R3`tVNKq)Qk49}K5J zMn8qj2GS#X+-d9@=KNItQnkoJG8KdwFG~z4Rl3kr(JO@d)aS~|*6A>~Ifd=Whkh&813?SK=?U!%;9Y`g zX;bYl4FmamJdY;ZiXMvr>Q97wW z@N+O6kLm`V#+&RtE5tC9&=;|IOvxs8ij4k?#;DT5jt=cZB|z2#g++o=7xpm-`;4?$ zgCCuSK6kc?=xlGsM=GrLLV!aY5Yl#h6oQ8Zflo^@ALy59&;Fu=LT-w{ z)26Qp94JgSYyPHGWc@;6`7T#6>9B5EYkeY8OZZnCTvG!P>(*I*?>H64mb)^J85!ho zX41(A1}=nQ3I5y-7LJNM!}Pp3Or^PmT%EST;}~FiHvGx2Q#GwAZ%Q;M&6C9ZScrEv z(A&maKY4-()d9xtYqD_g@_x=iTD?7}su?HikdNZM-Qwb~UI2+ye zCrG2iCjW(&*5504&Sv1yK89Pf|-rVfQl6Ls|_ai}=;wk*3+(VQe&7j95AN6*qsXgW%Vhz z_v7=Xj1`E-jL?P9VFDW(#d=X{LD!en3^W##MV|Ig3HnT(=iTO6*UeVG(glr#s*U74 zHL_l=PChAm(U-w|fR!dg?|3|f&}tRW+tEd*=cGOZH)4xP2%a|sklM94ZR4;=R<5NB zS0BaYSII+lC#S_iHULP&+=<0crsM6gm$Wk5{z`|qHuima{$l^E3~nsLteum`H4-)- zAzV8~QjBapnV=hi!mD1MaQ%YbyKjTXQgZQr9&yz1R)e2wRL502Z+-Mx zWlE&13L8n=g*Y;ZbmZ7)XVv!s*%4FvA^ZF##6JoQ60c!1vhIB580%mXu#nQF1vg|y z9?R4YBgawlxV*Vhm9fZSsfY+t^&pmY&P63cm&!%PD)=3URKU6ty zDL$hzIc%=86sW_tXkOG;t>Tblo_#++3wh)Do;pl^Nf*V7oKkO_Sg&~g3Fcfmf@a_^ zW165q2B>^qz7NhU$MaYedgVo|kBv;Fn~=CD^>xs^_Q;}zLO3qRXlg8@@FGA+PzGrn?Ukf{?7$? z__(aYD1QO^DfIm%=?CDOVo?I@?9JKh_7;XgnilLstM+ zIBf`BT8pfe@-e*>Dm7{~^u@{DOQGECyaX++B0QRuV^U)~0OLsBzgKDSO1hZuGXym1 zz6(C-wQK!{J&EQhX$qhC0K=oDp$d}(QBVXG1KQoj()4SH#b7$wn=7ju^BcRuLyy51 zZ8C1G#-PWXkhX?c(!HN$@hBfeHRS@Ge*=cYP!1W#aqeT%wKZxz5eLXNt4v<~=-?~> zU~sTJ*Os>?Up>(FG$9;%jc4zl_inv&#Y&zrt^6bE3bYqUn?;I4;oC>pl>4-F)JFlR zA4%O6fmzP*q_zX`6JfAH^xarU6aM-4-+xzx2rLx-i}WIKo4_ktgW%u1q@NmH2+ zIS~jn2P!b>ek>XKlLRlfv}ME(chP5QVXKOZ3P1{*uN-vR=OrH|kTAxfOQyA|(@%p* zqzdFvyn#U=A*3p3V|+)Y-|grJz*yTL2NbJ*vm+lJHA|A%TaK=B7;?^5y1YO7v) z`EFL@{cW$6k5M-ox2YIUL#ScLb;-}^)5;G)CviX9 zO^2t2D!FkKJ5}%F5eca_Rnrmy*-x!q&Ic3zw3_#)9!hmUR?Mx3hts(g#P}{GJz#IM z{Pojyd7=MijOnlH9WzSY?dj9Ci#B&>-~O$^kL5o!tN(!7{xFqe*`>5h^JOoU<-103+MkKNw`VnqMZA??4|sQ4 zx8?g2s!X)lKja9i?W%`Swo?k~=obF)>9(-&*5+oW2#Po;J~7q0?XMI9PE_z+a&W+b zOQ~=L^;zufp<8|v$q*#8H4<(S&D?)TBN<6kV<$d5;UQ_e9xwoTuEs)q&-1gnNBS5&#UHhmI`+{{ zqXbw;LKO`QOeh8dP8p%4T$4-tQBsIcl<#%vxB+WdDu?;2<@m)%qV=&qhG2qb;gU*KORIjr0xJl@X7-1j`Wpmh@{xfm?dsLUxI3e00XvTR zMGLUuWTK9<@ZV8!t*x!imp7R^77h+7?FQzb${{4cLV7wNKpL}UH}Ao_U`_**nlCsU zHr+39y;SEOZA(1|>~50qx0|8HjVDn}qZj@5ZG};xn$gG&WUF{#k}fQd&npUl#F z7FgUS8uuzq)6a-a{ClJP>YukVqw)$=G@P2b{Gq6cI2)lLQ18%i9tLVa>qMuegv~4N zmiG2KkJ6b!8BZ)AAb|6Lv#zd@5O}m!gnB$ysZ1cZbA`=()+`w!Rn*Hn$qQ`T>N|bc+K=y^c1zkm1n3rMuqwvy>Qx znayik?jUI=Y42J;Gd*sn`2CLGv6yImX=*d>OBXn~9f?!zN2;IRd|nXgVZ1T3JxLYF`eUc2hM-eWYU zM06rglR=Z?C}Evkl)XX^9>AUsF)#;ZiILaEd10}xnULL;LT@Sc(aeNX@1I%8Cr23s zKlA^1@!hbgN{KA6J{FH0J|g*br!b8$vp)^W9)ij22j31E@j$#eN4@Pg?xKlaJvtPi zpAPBeTNgtn3Ge-3xgkI9B214V@e?8oC`cS^v4n@wa3K750x%)y^B)D?VaP|~|C#Z^ z*chH3xFAd}S|B`{Cnn0h{L(VhW-*3$9}NlJ{R3AbRK1@H30%$Ox*;pVOHZV657G|X z7-W&9V4ryBR5xL8gMZN5 zA;dNvZC^eP3l%`ERS#iuM~I!@dePTGjl{9HbST;wa9ba%8Ob5ef*6G-uf4LnnQX64 zz6`q3M~OSb8b39I^S`zA`VM0|&puW0O3=w0w>nfptbNL^nVQY)+M1PTse-SvMVQ`; zho1U*sDX==H{P5b*!54U$Jg_T(Nhb_PfOVd4_ea59uk%mJzeI2R7Kh!$^VkA^?O@( z_1JT9N2g`CcUJ)Y0>S-{=+gKXu@DLKmc`qhqmTl1Cw(WAJLQB3$Z^^y7Yi%pCw}|y zE@x}PVCw2KSS&#!*hF^`;1NC&cgicY<{N50q22Ep^xSQz~dBs zDvMklQj9WBCm!AOfA$yp?7x%g4p09$FjFjhjIQshJX`BNGX8_TeDeCB+wYDe`I>I1 zcgK_PWn&1sJy)k_@WD48LJd%P|EOb1h}qs`uqtygrQm8XafNd)Z0GseYP?5oeTfN) zyq!4St`Mr3$&N&XL9G}Ue-SN53nD|s8e|A{a1l&5(;yQ_DvzT-pL_qzqE6pbR2 zy9%SEz1@Deee_5c%X!)hP7<878EDz?*+P!LYMRjw&NFcBL9Drb1aXUG)#|Dpcu~;W z2_r3eFG-{8R)n&ZV4OpU`+SNIV%w?BoHhxrRep0ysFI=HM>I~RSkVqzzRx^#tuEKe z&^ly3pa>MZA!|OP%S>it$r%pbw<7M{yS|pFzM)Eq`7a+S&GP5@f)iD+;FWdE%1wpKwT&&El}@Ll<0+d4a-!9Ey@W0ewc zmRRkz5RupiyMGF*(%wmT>xEpNw|b|HBiNLR`nkV{gNjV%y4&k3%g|X7?EMp`; zy%d)7b=qGq^Dm(uErH<*YiWCZ_rsIlnZ7UQ^#cA3sG}buQ;793qxz09I+Kuvww~yo ze=>D8-^t9E>zJ_JM=8ZZ2k04hE0fD6VAykB|1pvmj`b4Xex(exD{sTppc~uCkvHJJ zTop~XR?zLXo0bMgU1sRfkh<{rtXa|0k>1CQV};&bw{7W@XAh)ageg0-d>KjR$ET)7 z=@@iI4}Tp;yO$;?RacQKv@@fTXop8dgL_v5Pe;?t9{I+gzCRARO_DHTn5V@akGlMR z9U>auuaH)$L;q|u38Tyc{QKd|3p?)jdnDU#&tDDv?|+a7m#l!6}3T-bs$I8_vOuhEJjG- zT^Un$!iL?=>XDhb!;^0_((Y}JemWz4N%P9bmEUFq<1T}<1~!Vfbz07&+uvFqpU$*; z-NpY+V%*ZWH+X}jyY;a(@^vdyTSUNGVg)l&b)wfv1nphc;l1LJ@!p{Oqj&p#B(DHR zSypc^pIwI`9;dX{x%{)UIgnmfL0j(?d9&9ul!4lxnW6z$`caWf(bKDMhImXYcdbzS zll5);eUhlT+1WC&gx#HQj922G$CS+8z2E)*IPCsCu!V`V|3G|I79qPakO`FvyaUcU zvd7$^{}}a%`g&U1gYjP+gI&v)l6^*}FrJqg>Unl)c_IBGokREHZZ*c6GZ}6_ygK3f z-h`hQ7LIB&o!7S-a`hm0nfzo@slbTRcyTW2$Wiak`Xeh4vl8j7cufPtD0AriK|yM@ zDUVq?OxBwZ!G?0`(}1yxXPndz_r^5e7k)SK0}^R66c71_45v^GHzOw}P9-jFIl88#UXN03os>A`GTG|qy#G+CENit z_QXS0P#5&>EeCB=W&a)7>7J;FFhK{jilXI7J4|hOn`JEFEgV$yPfFdBS4T#*ACnHX zEuTKkfVCN#@OQ`a4{yqruHex=t0cuWY+Qw_?`itvMRQ!rId$bzMTB~!333g0T!F7udm2TaAKZwZ zSuucEvA$k#|Dgg?R>BwtVOUD zCi_vJgzHGVOUg7`_F{t{403-sM~Z#a{hn7Ivx5 z2=%HnF!Dy8&=$#5A9R=_pGm{sDdkDnCfWW$d?FFs5g=wgoI^wVydt9j6`u*0E z9r3N<$rqC8K0ate=*!>$#95gwNo?aiF^y!qi0-xo&M zuY%fQv>cq*<`r%8^EOUMRknX_UCg<#ROS65Q;HaIHP8^c%v)*cE+ zuAFN~wh6h$tUd5%wh_2c*d0HAWmW>ob7{3U2e~#TCe@Kf>2FekDLulMmN&~;#ZLvP z%y|U=N710Fj>tXU#JszRD*ZN&aG}gC5o+EE$7W$aTnXXu;j>l+J*9a^a0kG> z#$$1`P=K%R^V>Y_GhnHfo^W>AsAOyg)HiKv#QYD;_*n^;^#TEzbx#He096jsAxg&K zJUy}6(--Ld*t+KahOo&!H!JUW-W$fVlNq;5S%ROt+q`a)dU;9y$KuNfB%)tffr`)m(=AS27}@>4(!M>(4U62Q`HMe zw3sZuo?K0rdt!Kfv)6m#(WUU@@g`29Y(?Q!G`bK14@P{M$12hDO+u9gSMG7_`5^mH zucS40Wo;2ZzT}esyz3X!A@DCAM;{15;*Q|q{D~Hyg`7wPr%~igyu){BtZd$mr$i(A zsAOd8T=G?9tUfIfS~(SXz)xE)5In8h&REaWzrz84F>r(pG2$^cM5_IjjFDvI&l(#T zM1RGx0(>{3GVBo~lPh#i;h}VY7X;ak%7AU&xqL;puJ$CBwe$`NCjPW8@U3iUG;#pj zg`C;b{r_D6@y8FqNZ>xkAMs`TW1jl!uSG^{CjYp`EFVg9zbwkMva+}0{-rK3>pF_| zjRV)gc5l7E6EA_&W#-ywto|pPM z`L%O*jUqV{so8`F13fZiub+cRA$1EN8b%6^mr@7WwHLw*$coMq&1deS+X^Rw5H9bU zA*iRuzQh^*#}p$|!q=~+o%Ij4wnR=qG-bThV}@Q8y%!1a8ai2gp^rqg;wi zEu4YB7NIlzugaWvKgtOeI*FigMno4d$!!USGhKN7+k<@o!Um{x+*DPMR6Fnk1zz2u z%k2szRJ$8uK zYO5dcFWO4PC%);`C%SA*I@B+aly`ED*Vfz~8vZ`b=;5;Zd?86{TCG7O;e$2&Lib3% zW?k!?Mcc(>t*j&F!ywJdrRlGJ1=i;NB^D#GkA1kiJ?*lRbE9CJ+pAd~* z-D8)r56K6s2Au*eLwy$gLvr$GHnH;bkC8`Xb)3m2$AUl zT>LtAvQ%Qss6{C;pbbz0aJKS=?Nb9o`?v2Nt5QlZGQY-DI~Vy6;xyl>Q2gCzoUv0V*RudOo23I%vp$02Y==L1jLX< zzIDwsaoDVE3Wm@*q&dl*Af zj7NPPagno0Fe<^jWYG>curU-1@B)1ld6My6^hg!s=O8zMvO(7~&=$cq+ zX{Q1`nI=X*eJmzOrBJJWW51v+iakzjP%6e;G9h}Q^8N7Y`khE?%o;M<1Y?unS54+; z(+k;H9uM#~uP!AZU+sL15p5o9Qo*Al4mx-n)MftdYVfByNnD>A@$FN)f2G&iY~)v? z!(_(M1MiwYZzp604F-qZksJP!3dBpB<|LsF^X#FEe>FaNVUr^ijJcGyE~lQ6tov9a zOKU&$MO^N`y7yT4L(TJ}3E@1Nv#k4PZy&s*Pl`*b`nXXoVLrxE z-7%Gx(*f^Or&m`u$rfLlwIFj9yetizT6TzuLTlF@*EF0#qLH5f;!4xSk=f7iT}}o4 z@QY8+K8=Pnee()(JpbaaA1+dLSpB+_K*vY#X9VwY&s~dsZlSZic~FH4P>y!yqO%ic5 zVux|>Iv~7u>sL?`SJZLXC4LX-BAi|^x6Ly70xI3oY+`(3^zQzcKmCY%04|obX^$O@ux^ zWmj^hF_uoJD47vZ(47TJzv!udrf?-UYw6GCa`Z=`f=hOgG-;4)f;oXAu;5h&S8g z7{M>#rM7^Hy9amrh_wYFwkY>gsMs{Ih0`J8+8Uu85Ku)$+Hs34fmEer{PUyT zs{uTt?%ls7i0;V>XK_<;CMou~4AF-t*eJE?NS?6aPs2PxSN@=aoSIo(SLz4T4(`(; zSVJa7=!c;4n2I4{!pfAn2GcQ9oEInxgR3?a>pL^CFc7(CqSKDZA^G&pr z8Ht@~dV`+caa=oeE)Y0UXi4Dr3jIT zM#WK@Z@~svT=MOM5hXySdnnCT4yq5nQto10C!oW{-5PZ$D~*jzzDOU2@zRqNOhJuv zQJRrJ{X`x#e2E;k!EowU0N%TYZ3plT@RAn3aGFMPYNQL}U}O6+8ECbtXLD%JUDbC7 z2^efR5J^(Gf81-=WL$9KfsUprz@{FYHEh&InE+^ zL3J>x*Siz^X-zM|!x;1`{*C7)`KU8_%qWtTgu>!#GoV^lY@*YR+Q7MV#n(@+)8A6B z*5Oa$(OcM1x%8*UR=jkKF}(nMoSFo%Q*fa5cMNx%8~Go=-%DVj+lPu=hkE$d#1N3Q z#@FCeIhtJy4X7hbg>6*7MqJJ^UY#KDPd+(LKc4Cbt+VOz@o_uCA|x||q;j>j-=77V z<~M4KM<(a&`?dKVCuYI-a=j+!jMkSnl{-IgZGLvkTDzPop?q0I@X>6x&3`djZ=ZP@ zDohWQ{o;fjr4y$ivsb}jx0mNS7Ocj<(p&zu?D2pj3CPIuv5D{mAZAv?BztqSH=Pk}AY*5g* zezCLN(8AtS+j0dG`tvTm%eIq0)~0l`^xCiwx04@y#H7IXdLZIbF)9xi`_^v`?3 zDL2rbFS<@ge*C+%BZ2&MhM(|-4<^A(a(J3{ankX@!X|Q}@0(R4^83AmJcimOgT%jD z;O=SNb|^d^=s&K2J+~zHIMXxcdySscJ$^M__0HNDf=l}Ib&O{qd_z2L+N_^kn?d}ca@o)tWe)W9?-?{4sd&AUB=!2+hwLNKL!*bsx=7b7H zYECvEpf6NLlufbk8(K@J7oa%Uf0;8PD$}vl1C1Y@O*ps%96s3!anClc@m|#cYcV~-&a1|m2ch+ zLp=(6&XZMNjQNIq$46H=p?1Tv`C90bYyHE|jTUbeU;iZOw3%>9Zrvw!{yTrRxp?um zN7-)77Wh3bxBYro>Rcrz|CsUszp;_^(|;F2wzJ4A825$-R5baU=QQhl80>w;7P`EI`_7 z1TwtF*#S>CLD~k%>pboBF!>|om-)jH{tae&o9z17Kt(L)S64r(!iL`s`ePBLO03OiVuCA{MFkgr0UUc4u-PU~KC+5%0y*x5VH0E;M0$uMT`^T|(Sn??8_G6G_vQJ8d8@eX}5@p!$B;irIZhkv-TeGt(Dfp3i=8a(FHcoDKc#5uqRypctYezZ`$pqBrpN2Fd8K_NdnkG=iKP z#Q=6Ed9Q1<_mD@D<^~SE!)g)W)tOIx-G(vtY=6-I9;{!a6sixV#?zRk&hDukf-zwnF!z|iSdX)+E zZt4Z-fBZwRIOT!!sN}NU9lw9x1S233%1SBuB8Z!ozTi3Qw`pgSFx|}0ib*M;E z$oT0Is(;Zxm1uK1m$m3g3a!k`RX3uX6Te3NFCl#1@>7}U(R1$|Z`#P4HWCcziAQYa z*P}uY6p&Map~D&25Z?~Yl5ZlTv9dV_c`W}LB0Yk9K6gXU*Dd6dEDe_3+84&O620et zrY$ro70A50Y0wt#yy+iuG&azEF@#;JpOXnFzsjtjPZE8uE@84(f{XO<6GkogMQL2& zPFRcnv)oZH-otsmOqG1@+e;4C@{6M!sHi=`#>u!&?Z{Qmse6!L<2LFX5YjK1=+u-9 zY56e+rTZa|ooowkU-Svu6-APYe)651mm3E7H)p^fxn8@faaP&b9#D`_pJGH4{LT_b zHhKIprN5}lq}!#()+gb-qTX2l?FwZJcFm3$gHvU@a9RAzk0!EF)1$pDdF^jeb-XPc z4eEk;#b;>`))1?Zk#S={!U_emNt$ciKcQFVzicsE72Nv$NiYkNtcnN}TB$7`}{|qNO+puc*dVPcazg~`Tx6g5--EOX&*>85(C;OY)_FQux_zfc(+sXp z=`)fsOZF|ou@SHUmb~{=4_xhL|FN?Ec{p^^@S>VvCxGzPttes&eVQ@Wd;9JEtDK77$%>vW|9#wF!bU?_%|)XO zwCWIFziB#4_z-3)f2ba_VNZC1`z#wD3UlpgqvrItY!7aIEPLg-2Pnn4tr`>^D{L*v z)m+}#om%uimf;?7`ugWyO@8AoIZ0f;6YY#Vlfxraw27bQ;z@Fc$t~&dzput9F6zE- zcP%;_K{?R!=u%EkJoCa*g`qJ;V;D0)l_@XZ@5@oBVEzoOQGW5MMwTFBAed$s0(vi^ zcVmS=I;}7Dggfm9H@hcO4;kBszT#2Q0v;HOgF^XuObYIp-HchamFFxB%%-es})vqFH zK>b~JinUjdA8`BUsgKiDL_t~zq5W+3QEursgDZbanqqHAp8(Qbv~SG zDD5dy&`B+_1SLac6nY^*k;Y4#UL(LO>ZU2_qxzT%Gb|Q1%ZyCO`9FCFw7PgYcX62I7i7qPm znkubi)U)(c0@e>kcQDej$9{mzYS#FR$tImds36!ZifBUGgsc>Z7Iy2CcM3?2eOP`~ zq{;o;@w@Ty@mZ&5MvrGD3cD$G{a7b@GQEhRPcQ!nX-Y$3rBc-c{a?`R62l4CppqfY z9#^J{4^z149Y9Sxd*0*<(So=`y-(t7OsCr%GSk_6?_D$6S%Z>2I!xZwJ&D=B-{C30 zn*VSN4B@5aFnAoz@$6oJQZr8%gy}tr?{8D-rn51OzIQe`)=;1R=a(N2FPwhXYcU3D zA2VVt9=K*%L^7bAJ>a3mO<1Xq#z5&)q%w=n%;Q^uTFbzYg6*}L|gekXm zLa^B1LR-E=8$tknXeY`k=#*6~ij5Q=H5cnSj%Jw2Ip3QQYU!`Rrp}Dps-m718uQG`l^b5e9xiE}DRlJm_*~IFN=T z-vLY*NsKS{qeXz^%N~(!6BHL3FrnV33IBm~p*Qw|KZAnD&2M3p!#l*3{on?w3B`sM z?w&y}bEOfX}U(5N1NjHcc;I}znXpT?3V6N@SpGX#|knP_yM-UT|F|d^(A{X zY(nNINVIm5&_=eNCOKVW?(pr@*6{_RQT)wx!N$k)GPcp3rxSv`m54)4{t*t>*|qRgsn>^!0mH=g6z*)>hFYZb2aKn3qC{))qS~ShR`9bsVzN4I__itQ{G$&8yEEKi7 z`1}WG<#l1CZk=BR&#>w9wp44yorvz%go5n~?^EAz9I2dwdHb_uy&(PBn#C-<-uu%~ z@w(#AhbOoqknNKI-SJ_0Op1U3Z_Q=LOqH&dyeVaU64Oe>>tfEX`$Bs@`Ep)wyOp}d zc_bE1LJ2Ts-+~{NK1z2JzqEu1F6}V(;c$V&Qy|HK-xMslNbggy{Td@>_d5+TCzn+H zC6$|}k1}2V*G;c;(DH%>_+*vUCmGEH_4ZDQyj>g!mqCBcYzxEAC4zYh#HicN#(9Pp zUTKXI861QEh9l4A-CysNX{CMhbwVa4l;tNI8C6fzgZGYPhhDNKgS@?VGeI4JG)BoT z+A>OBbug%bZm2D2>YE-|x4cql!hx0R>w}2CBAi60nQ%mo0Ymdq`jJK9kaXQmCqF^ba}|nVqVDTR_-Dex8cvXCbdO(s zv9r1lM@#p-fuU=Q-#FROJMg<+6SQv1$?L~P7`eX9)N=DpG&ihU4Ltf71_!kpfP3{g z^f$|2^`D`NvKFN(C&p4Ep1e9vRQb}j=3sMC30(3Xkw#DM#$z^rN3@N%>rf~4t3hL! z`7smSPh43oE($O?BSMG8OYSkt|J{Ub=SX>m=wYP$$iuy z{;@lLjS&|maFa6QyJQrXBg!&z5 zyG#Wfdiw4xOi$8k=d;Q>J9*ls4ygXv!1=by<2^4}R-o@xjJ->*&0I>CMM4(pmpft4 z@CQOZCt%7kVRy1k0Qai^cZkW`qNQxV_fiu5zfp#Eg`j^4>&c zX2NK{C;v?I@sExE+zUjEsR>B!fty2(j36_bn`J zA1oY9pE>z*%atusIrRC=uFTQlEhhV2*08`e3Kc(===b1EJLY40%sL=Bsv7D!eu!41 z<+hAJNgjN2cF{s258DEL#7V-MOi`qjAdk9;r_zVxajB)bCzkV5y9DdM!Q%|0FWdC6gh?1lpd|=G6ggofQ z=-K$Qmx;il_;xs$;J6Z^K!vf5&};DeI*+}8I^W3Mh86gDss?&Mq^KLFBbxdpJ9jjo z5V`xyey;*Q`@cw=Dcatn>#B8Z`W@sUdUCB%1$O@ktHVK+JV(6!{ubH96EASxp+`e> z5aYwYqDW7;GHyW|<+A&Ap|XX}p;eW8F7&@jor(fYDJXfKi~63K_^qjELM9$!pMBZi z@SfDzwSuUtc$ah-hh<_ePQS*QZ&#ILi*msDbt0(W15hp6PTxgl*kTO}{+-CSjLRy# zr)fU3`n0;FJU_t6!_K3Y1*@7Jq_MitH{D9!U}argv>cT_S6o{QteHKYaA{SdU1&aa zXZSr4d82KYy!n9@e83`J6cmnGQ%~l=DzjQ;nQ>11 z5(_<1eMi2oN6>*+-HmzPeJFYFUVj`sR?a3It~C3b73|6AVFP&R>%GkU+o?wO{Gt7g z@&O^fA80Mz>up75fHf(xZue_ujM$p_@U=fMxe_RcI=!ltMd2we!fx5@Z^Yv%mmcIX z_zSq1sxqoZA=I>Agi) zsiz>>EltPK16V++t>HFG;=b*xOolmlgMb=cAV>t{m9Wk=QOag#&I}Bq-J~Xc!HcT0 z9z?btf(T3u$@|m4f47MV-5`sXG-%XF2vB{o(C{WG_1Z$i4s~yF^qv3Bp~%P?qO_|d zO%0H~JK^N}#9ZMOZFnV8Xuj|54c&Kx!i!y|ONlc1MrHc0rHlEC^@C#-YM@ugMrLf@UlHuO;n$E3$kti7S##75m<(~tO zdD~aFdQad}c7UktFvf`CCotB?ix3g7e#{8v`?|&(0i54*x$&Y45tY^`rh=F>s2t*Q z#z9<-Z;Pj#XPGk)7X^GrbQ?O{IK7Bc54>=eKg3*(!%3siKUlQ+v$F@=%s1V^%_rda z*bn7{{;57n>zg9g20{OzPd1+xjutq}MmZ9NS4d4nIO`?X z;_%`X_GkL$Yb>-?VQ=e+cWtP*ID`!R!_lx3tTVB zJ*YEF@($M`?g9q%hM_Z75ERIUdiEo&)wuc-QG0Gj82N489pF5pU_?jxxbTH`KO1ga z8JA6iEj10lNjGJ^Oscwy+Mp>dvBrX7NuSB=g6{wOb! zrS3(c8c!WUFQ3qbZ z2H}r`Em1-$(57kQG^TN2!i=?YyD5xbVtQ#%8P7(Vwaki8N=M}$P7|aavfp0|tiB1o z59Oc$UPpz~9KKTkJvkQ4C%t6g6)dKD7crLH3U^uU{ z@sdJ`D+Yugt|t$Fo%}F3=(R}p93~TRxKGVmBE`!c0%^H{?#Zqx(wxSOWmCRf4^-}m z>ss&nqpWBYH?($I5-*A>IH1^=_t#Z77ef996v0%mw3ppA)l_21y2-loR1n|f?~3Iq zpuaA;s4lmw<$~T{9Cd4&r$ji~_hMp&JrTpt9^x8+gpW3i zf@4B*-9V0v7+^DggQ_B?JqnCIbFw}SHxRMCd)aKI^mlV4r^RgFix={K_lS2l@F(+F z19)CRLG?+{9>$W`2P5-7AKRO~rh4dlMM>&)vp_+bNwhioZ=|&|?@su6Qr>Pj^&aEBB;EQSs z!o4;czCqo{a_bz(5<)lc%-E;oG`S5*lClh6djg)>y79TU0`MNfwP~~)91FervR#Ti zMd+NljW@^X;gvan-KdSxj>BUZVn~36{0-QPmoO_vI7!?6hCiWyHUyDBV;sNo-_i9= z^4|cm`$lzYqIE%7c-#oaTrBCwwh3vnb;CJ^h0AHToGdM#kBV^&VLGC+9zZEp)zohy|Kuqtw z8Bm7s^A160gyO1#N5Fl*&0#_pIA&x$>4gjc{;9CGzYWj_Qg9+7<^%0d%^r>G^n~Nd z$`RfKLEnv@FN|I5xX4YvgFV3r9T0e z)#QUt0O5TzX&QyoBTY&XzO*e=v_x*o-^Tg%Pa!A`a=2XIplSSA|495Un)Dt``V1$3 zo+ox4K0qE)p)kwmk1^SJio!k>WQzd>Zf4(C}z#(Ym2`hj@&!WY%cC>$4EL|*C# zT@caaC2-IZ@bZkDEx&V?2s5osNonyNZ5sIe$?qhV@|$%?CPPkx4lnB{%_bx~0{^W6 zMIsn*H>-GUzIcL*+dEj48J#~BqVYICw&*xgZ8`hR2sw%*Z7@C|PW;_d^RPo1e)tWa zLG-Zx=Dy8X8fXD`|DS_|xRf;Ax*-ESqJjRI&G2KLz;ak;13Au6Z%8#BWxuGeUj0Zc zhF$i4Ch7rLG?jK5s>pJrPU_a3LBrR>nVuDw@v?JI{cdwMe2aT29V<9mfa=_*fCz<$ zP8=#L2GHHO)I6Rr>lLpxu-!wf$_xS@@2yZ}PvA2iW*pG{)^JRD!pKsy@ZuNlS23j= zWt_%BolPR(lfkI}PIsf1M-oAdRA5vmK}!5a5thOkpPv{*2a1s5BvHTcHAH}ntVx1S zD0@l@G+JS2#pTxq1>hSl@eS$W(3X=^VkdZ`W(u%Dwm%8uOJa@u(OKBI8H*3_c$Y#QT4>7M}VbAM6R zf_zBdAYleJxS`rV4W1n`MVQho78IKch}kiMlUNdDt)ivwR@AvaaSEVt6&dZTVyZ7# zA?`W0#=@i>utJP2u8H(Dxb{x?U2=yMX`cRuapg#`5spt3JTSI!uqX*Wv_qd8@a_Rx zPXGNKRSCZu^qeX{ctg_?`=%q^{p$d`1>4VEP^XK`UnQcoD5xVeYxjq%zbt71b<+md z{MyyJJ<_7tG>!?#LKW{qNga6UDJ>ZNDuO%<^g^sEKTvih(4%e?cO-n_Sk)?FczCnI z+Uf-h6*7Do7nL=PXxqV9wvma4(8)4Tg-odQh9&NAQDns{Uo}S_UrQbD`3wKCW6MGZ zF3(WXd+G^Jc#Nj3xCw5w#{#3_Il96cf0rXsBD)L`Ao%P zdC++6Lj~{nb}^+(o&7zeOyx*1sL$tdL~7IK9SjQ`x5fb(<50z~0l2yM0KAI}=v5Or z)Yn|Qs+R`X+1I65%a;-!j%#CO$|ckQ)~)#7E`T>}J|$^NRj8e2JW**%A3BKT_~A|} z-`Ii=YihPK0ptnlU>V==%uAN$z2oDd3V*%G{(%?M_Cuqep;0%)iM9luh_>->+F3wX zP_;>tDGSf;l^MMa=$sSf7;$OYzhH&xN*@m&Ie;!f)iq{PE#ZF7e}DJw?;lp_F_pFuh$Q|02VcA`6Pz^$>TXn zIAp`f_C?RCxVKq|PH0C%(}GsyjOXdgeZw32(D>*l*wTy4{KShcxXr_7q^tSw-8S%uF7V}&U{LVZO}W53E|=*Pq&=-_ zoW@IbpP(;*(j#c}A+(?(|D*pgBJ!PIOo%7-{0)|%hdvy?F`=WJ;BO*P%rByXPR3f0 zA;sq_(nMt*szlRMa!DwXA=D|XdDt==7FMMrbwl*)IC4i=t>Btm_ zPl4i51ifi6@4O!+sH$Cbe%s4CVe}dHY-f3_e|v11a$YSypGUX#GEI!t`4TYDb8oO; zr?x6#`8dEAdftgT{tW(14@W5@eGeOeMKJ1Bq&fM`K=820) zsK7wyl@y!LCJZzqrmDRCT%Cf?)eER~Wlpy7>&uHjRX2h+b9GjP3M8?kUDa~aPFqgX z!sbO^3jPb(@@0Q}P#MUHCKx=QRUlm7#w{OW9LkG1<_v4AL|58@k>!-q#%}G`a6+Hy zd4dt?41F;d#!yMXMM5sQkY_y8V3ifFW_%|O7BmUC+(lE&$Am-TIRrkqy*(x~O6o;p zC32nLANO0Rnz*9mc@>wm*&vv766drAK5P#|2h6%s@RxiJCm)knT_784t41CRt=*T9 zJxf-u*D5j2D;Y)Dgw!S6rO0 zbWfczrt^CTyn|VEL)EGMm}EbQQjtZ4y&ILLTmP-Nna_ zqFLc=4=EOX7zRM-Ivk?F#IGFUMx35Nr6}#umRsHg8K=i2v5tFP>4a9*k6$oFN>jtw z-SGcaOh9ZMVQ@imdaH!VQX;;WN`g%SE{^m zxCXMdE|CcqJD~#we}5qj1J^1H@(Kk$-b-eF=|U=aP*D~VTwf`DvRr6+@bz@uD)^?r z!dcYTuhum_q@VRp_a-ZI+WjSVqO#xqe(BKhPW5Tki@BKUHlO{a3tl>(ss?h&s?kET zckE~U(ZP=txhS8MsePsI(~U7Qey0u$_fsblcKh1*B~)j5BhIqeoGYTk$Eo(#-|t}4 zCBEN<(AscGMQhV}DJL1n6WnNGL*Y7-96byA&d+UcazUT| z8w_6CTz0{6s=#kfia9L(r-THL%gH~)nN%d#p59X(s+l};Ez<~m9?<0*D~Q8@J2|9# zSxgD-VVw+!8_Y4iE=DStu3ptMj1dHzGN;^P*8RrkJJd^Tps)3V`t{h+=>PPq$7k=f zCj)ceuxcn9s@?5DeRI*Hx>phb7BAs|YX@H3*ihV%!by;_Eb}=iZ6p=jMaDdMz47}_ z+(*f7>G#bm;>}eY555>^d>lvuF4n2Mdc-8+EGJqpUE`1t=HvYUD)C+)ZNeFogU!@g0NW3qzePG?G}M+ zTm^zT_mUFFS~3sf62LZJkEr@s@)QHcj8Nx#ayHit1yO8zBDJ9l+%lRV4%eOcayJ-_%(q|e`?6|z$e z+5BNz%ka=C%TiSxEggdY;>6ZqZ*7LbD6L6dxu z=M?d1lQarnx25V@rFSth+JZ|Vup6PVr=7lUcrekC$7sUvdBD>E$UY;KggUuIopNPACV>f9kB@C*+Q+NFUU6M?5Fk^uNB3jwA|`UfX^9a4vgD;S!K`9V4i zTow~<^o$shPmIV|;N^N7k;el~8kZv0G@yw4Rv$R#BzuRr2blWnjC3-DA&vRnjVh+g zHDe2u#O^MD~!EU94`~Mel)TSBOEaTj;~Ja=Y;NF9uh_l9S>p8?XWGM zR|_kbUMZiib?4=)xf&UjUv?Ye(kpY|UP0Oj*ZizuOA~?quCFB<#f~!^$2Sw6UAgyo z9 z->w|R&~mgnD^_&>cIZ!TQLlSlo8G=>9@#nBN3I6Q)x8b=OWy-tta-PlLc=ph6JtQz zZtcZ{-&n8IrCH^R*!Tf9Wby>2;w(kQ8YavnOScx7>MH#Bvy^E-L6$Npj`l|bbos%802d%4edASO*-bO=6vCtne}6T`zcHv%k?mG1U^ z=t@r>2Z&()D&V@&TeppebiZUmX@cU=Zu2K&(34xe#ocZvNgiJ-VH9=z*5D)K4V*g9 zR-muVe4h6{Bacg<6w+Q0HEyAIx<_&{*zw6ZW40eczjlHTqp&ChA+7TD-J-Kp??z7k z89Mooo#>PmHMotpZ*Jxly&a4^g*eUs9OrVcoW4IHq*!(J6(ieNyGw2>tml{0M=BA< zYCUpry8&TKL;hAM?EbgjtCRCrz`Xp6=0tk5iw_ZW@y_wVmf7S^Mp~NuY!$4H zxNA84?pM?(p)btO?{HB&mq1&4dPz}*NBxIgFs{dKPat$!mvQ2v%NAhCa!@m;3f_xT z9pyZyxELZFIAP)Rc2JqC{pxmpVsgjyqhY^}hBtF%2{b&Pdif7zRYZJlUKPOE%i6N3 zo;kz{Vbq2-O{(XGB|dUrf9+5g45P!{9{8P~_D{Wh4eZc6s`iiz3+e%!_7})q$cO0y zI(S8JeSdEo_wt-j$hG1)Q-R4}0DL-WMo;m^PUynX;FCezAR)&>8L|z;N>LgXj|6l3 z--JN7LxW@s;q1V05Xt=P06p%KiDLF8_b7BCes`mMbAuvC2y|c=E^u#3;@VNkoYBWC zKlFawsG!i+uH>5XQr~1P(@#IvbNOrd?Wic+ssJcH;HV~Ho~ADDXLhQEs}Pl{=-0wc zU;r9_PNB_Ggo&9Fp@ir84z4(%ib$ksUS_7d%13;5CB7lcLpf8;E} zp?PHIj%g1yjba1@HVPRFR%H|ZaYA%^vd(Qiz-h6$5 zoNBtE-;~bxCHSVuJ$n%StGxfxUQ@AEb={EzQC>(lE*jz%a@h3FcDN>XT z8XY*&p*f%SzKP5|MHYA!ME4y{KGExVW%iSJEgGb!_BtZh0v5*iU@!7;9uikFL5G6P z0$Ef~Y4I}LGlvf$_ikG2m*pZn12@wv!icutyUcXsSa0@eJ#w{q}|i%_I2QswT*==QDend204e{$>%fXJA1zzqw*3Oh2^)Mys#2jY+!W znttbc>GH0rQzn#^s$k*|V{I^(306;_AQB+dQ@FW#jV#cG8haf|NGj>STNQjX)F(#0 zs4(L$NgHMTYVX$f%4EGh_4}4L(NMGX6L|4m*Lf-UteA>)^?t6Xw z2?|TWR3&2W=tNV)N9w(UxLBqQ`$NCpQ#7IQIS&Btw~yw5;Ful=cYO@u_>*dUGlx3HVFe*=I*^GR6F6uR0d$MtLRNSxII@9M05wy!;j1=&8GLN76=3t z_j@=g9?AB7-+x!Z*6;UcZ^Khq7=%dPN@>+tlNS4|Vjf7BgG<}xkaLS-bZo4an1RLY zfIt#lo@x1p)jIaNZSPTA@q44IAXM6lK+ZM0ex8VuToy%h39f| z0~#usf{H>6^;jlcL)xTtfrL;Pr>~tykqv03Hrn4NAsM*p@;K;eb^(Cyp!D_<<2p@J z#DLKVAMp8e>m^1kIB-YK3=(GLKpww0k z(0&AlDTo_VO(C{e^niMx2~y8!_JvQBQ{nBSxs0M3xH05JeDSUJd|^4JtF8#mlPnBO z9Xns%YW{nwp0InAi3#T|7VzbzQUuA?!BWpG3ld*9%cm&-^z( zXqkHt>_7S(E^p>J0(a;5{yRM5;qA9(Dr|zBwA};d@d{lH9ueH0LpzUjqh)`2sjWF* zZ?1BDko?q+<48wmR9oJr(u{CJd=cJZ`s=W2AYY}K2P=_owxesQZ1W zK(gqTA&b}O1b-_?6mgl(Nit`Uv;gND_ z+hTJEme|f94m=H>L)Bj$l9V(9D`Tu(5#H+{6p20k#d37`qYLK!Y~RfPpV1Ku))d{p z?P|>`U7OZ3&P-3`EL8L*a7NfO#r!U_)&_mns!7PrBqpJYcbClZfH_pib<*WKRH@Rn zXHV?t+}6&7@%%n@XW}zy&t~?s*6p%IP&=BM_cHEa_+KtXf@O-NFmEa+%AAN){nuvq zr-^OJJAFs*#p%!=#OzKg_t3PKif+Vk((mpW(&;D18375L`j2}wNom=}3A3q}8o6g% zh;PGF6KfNi?@g+QY*Q=EUzExu7Za0}OI`2$G^mrb1IPWt3_OD!XvSoGcH#PribD z$M|mW5b6I%RVLy28;HJ*NP@_uIF1p&0GV9$!p9O!oeE(2^;ZTfTy7eEfiI!ScZb9* ztKjW0QYObj*!&s;ksXtlu>=fGG+oyV6M!BeNHTxHLMSEwJ5|ArWKVKT))N!#q2yWK z5BUwCRjV)c#ypvlR{(EqJqeAo|#akpcygKQh^2bt8ur0TFZvRPq`0{glEq)$pf=YDhy zu_#}cpkS~_*1J`-(WNEk^ej?SrqB)1f%_^bl9AUws!QnN`y5qEjIzmF`_Jv7>wF)b z?TGV*Um|R%3PY;+eiIx&WdJzUl5GSqVBm$3yCwI>MDx&Kra}ff4J7x0|2!A0|hvH`7#vg4!$4eXm ze)$6yNPYOo&dsz~`@n=wOnxTXHzpHM4sxnjM~{^R-5UpnO1ZE-aDgLAh&BOF}A z@J@RJHsB+|;Le={!IuJ)FaIkWei#{&sXIH)LrhARsnAH@dv|@*Cd#L*zg%{);WE@iLqP=HGU?;1aZ@LDq$7j_O=&M~@deN2ZIses|2Z>Cw0xPy_k8Z=j!j zRL-(hXEko^0JRc&f}df_a{*M{K290WRdSOeT+{A!tjVBWeSfv?E-#<^mx& z{cGez$*7FMd)MgUopiCSZkF^4_587Wk=ACZd7OdcF$4M62P*LlG7U&^x1{V6&bQQI zea%UKumVP((lVZ=_uELHRZBnhPk+n7$nqKUqMG;YVi!Rs8!9?YNP}BCZ*v6U^_1sb zS{iaO>{FKG)@bmiJ&=qaecv+dBjt~YokD;{{g}jY|9VNQRh_q8m^**Kg62CS;#b!( z6ogcq3%4)fb**t``6Js~)j}sw)t9Bg6M8N4;7yk&m(&943m#Knd7~Ze^)-?vtBD>B z3(bU@LG-6tTgQKm4+aZXjr>N?D7-|P*sjBJmbLf5f6y_A1OF`ISCr!zPYJAJ_KhxA zzX2zVWhLwu^}RGmAZ1jw7ww96WHXXB2D{;(zGcf}Svql071eerNpcMT8{oE*#LRgY z#_!XEvz$#%h*+lQcoYvLNKHUlPS@1l_gj~p+ zm*gVMa)i)`7`;O0UC=plW<+uNF)={A%8igkJ3UXRHh|Xg>gm1pEQFv zlG@d(S?~R)moo^KpcBSb6IQf?zNRj00#~)$HQDxh30Ay*Y~-%w$!_evw39m0}{y`3MGwP4a8m=3)sK26^blxYu0?I z{IwGHuQC6rcFC*Q?uPj3t-$9+)FR^EA61?0yj#DUaGpGPiWwMYqFc4I3=>Zt{{pZV ztw3^TjpXnZn0dIWtO%A#i<^BmxdGZ{kXFTMQ&(2^mxyx9t`_*2zh+%|8?9DrnRD8C z70Cd30nxe%`XbVTO3VGL59yN$+U74{@sBWik$_OGkRrR`v>+BMWw@UNjUyIvk8ih~ zwz=bVatPuU1tCeuFG++yRo6^+Ug~+VshLCfjTG@FEVOa6OahK0&f;1 z-cEuKNK7Bc`BqN-S`9;MarEm zJOM%r7xC_X)vrtw@)Tn-Dy z*_90d-;%}GEu(J#K3SLFdI2%>=DWq^Es9O)df^gTA)14PFz$?j(>-`lHMU$vNV3oUumt2|~$}?xjP$|`pCYFMMbf|y8 zO_iH}ZYbY_x}9dFpk9$f4s-P*Ze{c2?aVlh3QA0^CMB3#Jl`r3Y~b=LOc2Z~ltQ^R z+FUlb{6fb;-M~3nPmPx~ZecCoqSU-jfYxH}Vut`nh3yw%_>dHCs0dJ6C;>vYYbBQs zLCA0ZB+B*5lU1B|n#4Of1nOc*1{|fYZq(L5y>9lmJ6IEP=75&sIMF43oea)Q^YJUl zj+!qF@IBBb_fI>)%64IqTEL(HK5{uW0=alW_-_HXL8G7v!uz7KHO*S%2oggRftu&w&qpk*hp#(uCqWP9NMV;Jkbl zwy7AY_*$O4lHS-O*|xR9Puha19{pyt5O$$x-GHLahJvKa13ysnO`$J!R<=da)=qXJ z<>Fn}!;U>lAJ16X6;HY~>MP|;y^U9V8>f;k<}|*lyOUt4yTD%d&>e=)6TXM)fwm>Z zN;Wq>LVY}cavr!9!kA;5(a8gpkvR^R%7(?p6iGdvb8`&!BK6-pMGnyOF z%LATUM$Tc#zBLVV--OkoA9xb_W&j7nQWRy_)t6Bj8aec-M!%UV3uR&n{0KC zO>DKx3Y!I1b^e}j( zR<@^yr?Ko#l%jQkZV9sQlSDnMbCqzO5KYw!ol4z0LV=Y1LtslZ4b0^IaKPdvEbczI z<#YpU_oGtjml@I6pN_*1V_7EP;+dUuxmP4B-Xksy7Kj;_%(W-zG)`a25 z(MEgKmj|tCQQ^mRax$kEd>$`I?nYxOTPE%|e_j(Dgz@5kJYawh3+o=%D^1RJ?d|m( zs~%fF&M&LA1tMA>q!}u%PoM&bT(U_O^0U7Ow{IA^o2@%VkXEvEFLl#=sp)Xu3=jbnwu^;eqgdMt|^ex&L%Bx;VKoS1cz z`H)!QKTN+kQ4XY=m;f-bhrk$TY0eE5GBP+Ki-+Y zE~Nr0(d03-PK91aim9f;Rh7plHgHLa_r=#mMFGBP0J`V?{itzBL?7Z89lZ9AK13=! zJh___7)p3fO2Z92nupvDSG# z>VB`75kT$FRndV!oWR#Vzk`fle-oRe`<4xF5L5`R{7RqL&MhfnD2NhckGx*(VlO9+ z#KTxo=rgWn4J(|b(~BnBRkarbj2kq#hWCHIOkOi{`o!jc^BOHz2TX3C_o2QR&RBim zPELFAKM~{qY~4^(>Fp1$;U^#!qCdjc1sl!iX^aYAjXWfi-4pz+>bzeXrSw9<7g9tXuAy2|wQzI-sF^mz@>xUD0Y?j=wY+rx z0-l0m8v%6fM!wW{WKZSdo|N&12|Q13Ax8L#@$@_iu#tPQSPtcPN=DBMlM3!1fS9=j zY6bcKTk0K3R%RCcF|HWN#aFab!K(RPV0z2)uIOx0t)eLkY7vnCkQ4mpNf?=OQA1uD zv4J5O6~C7%R9@bQP|NfAB@l6Qdp)ZR4b^}I-Q4t8goM6jI^f@h>dQIKL2AzQ>>aNf zBg%tH5_GJeec%tyhq$LNhQT>Z6henqi1Pj4O{9qtk`F-4y6V(-UHT#_Q`H1<+QQA_ z4$42S>)lOgwfq%0q1`wj#iDaGFn@@q9)PQUYV={+IMzpr`dk4y5HF>+M&7B#?oVHL z!O``mzbXrAf*SgZF3}W(QAGtcUo>TuzP!OHAspN-?&20TfDJ&AzuDn@<;od!M)<4O z^R&M+V+iHS&LYNgYJRLcZf{=#IaJ>+YTCf_nNr2<{2)_78ozwXCMiC*?96iSG8V#{ zND1tA0FbZ@g>J~?R#wCg7SxU?m35cFo7=n_%Cj8h&&L%OqApX$Fmf`D@3z?fJ&ma> z?qsb>7J5-_6$XjXKc@Fmh|jp+nee93+`mHdCGT)&$K`}8A}7YG`A=~KR+RUybbh~IRrE$cPD<7R$_{J~I4e1zB2 z$bD2#cX0p4A>AG3f^Jrmr+3e z4Kl0=EdXJ4Z~w=~;`E%a#+9&+?0@p@UC3ZxX9I@K;vFiu)Id4(_tknHr_s?R76HgS zL^NFHsvhKe%3$^kB#hrR_!GG>tU1w6QukE|YT#IAwSgP-s+2P`j4$b+gTmjaB`H2J zU|PZczKLfFNlY9W7b5VHlRK}8*gFu)c7ES9ov+<&SOUR3I7!YusPD}YYArX5Si_Q3 zzC5GmUJ`ymtErA2SE7OTyS5q9U{wWK&oO)c9Ob;mr7-sI*+=S*x+~(RY&Tz| zc%s;5$SpYb@joGmmGtGZdpEys-3Jb|4q~Nx(d7iKK(><58ih$CIN(wKR%(|^*)c!s zplT{q8)MuB8bM)QUQ+`*PqWqle-0lK)+zTQ^aEh$o(rdHPwB;NtZ6&C9Kr$_ml`na ztvV}UPm*WODZ_Hh0=-#2{uVaqfgAF)g~eGJHQ*oJ0>akTe5WCKscAUpj-)2SPUS@C zN7nUCmD6hzGN+8v>3l#f!a8B=KL5vB>%hTkX0)bo;Gi>|j1#rzg@7|H*)|asbvR-u zqVnwNHQ0CVi^!^V%XpbUx7QB1|01N~C4tg+(~nu6_V5nCKGZt`4O3!n5s(lsk_IH! zd;%N+&SW7grQ2Bx#$I^;yF!kJoA0??0cG6yF9wmXF`KU5&x#v(@+-(Gn;zN}bk`vm zj)c7PwSF%{Tr8WDiWSj7{b6lkV?Aw|SM|EI)df{e-Xd<=ny?WQ^3{BK9}BOpprS6 zb!y>`De2|z_BqwtpNf|cL|G$N!)F^Eh)tV;F&%#X{=2q@E>p?qL}b@(^RT`GzNTkl zIIy3h6C1@Z!QT5ZZ+allRIXA__{rtW<$lZPk4WB{0}h}QlUai~8HIo@bTax=&@BOk z`5uE?f2=$w-aS^ug)qMK-ETk#USyq0-HCbQY1z==(0KtTow#Hjv>v+vN&Q`#@#-#{ zR&65u=%Dk3^F=SlkrRUQV;_uQ`h-wf`AJWcyrQvFs7x!aVUbXVpNWr~J^ESo4ckt2 z=8|-;wtubL)%Ep1&58~ZHv8zBOz!P6mTyCX@|>54?qdZTZfUw~9{D!xf8KjR-~IYF zMEgd+UtCIn&cuz^sz~-_skwWelh1dT$Ft03ipmnm76k@Qh&04Oq|57SZI2)Achl@P zuOHP_jmuNYM^E~PyP(!RpKDoO4zO!jsMw^)wd%jAcoy3-IR7s6s-1z%*uQ?{uZOXn z#)gzB@0EIM`0Vz|BUVVmd9k+Y$qvGvxWduq*J?7gqNfO0hdG`^$0$wYrc3mdXu}So zU+qVfJN}Ywa<*-%`Dg0^sa$w>EO!5TO5$6D#CkH%s*;5B9rr3XYIDO2l?)0~tAASb{vp9@x{FWCWU~cj4A0N~ zRE@tm5wpH{UTy{|Aem^6((4e*l=0PCLntF~o41pgO!co3NNT`bP2|#hebYSfBjM{2 z!)>b$4(wqr8687#0RGk-83h}Y=2b(a7PfKC=0#E{^?dKm zMr@zwcA~oGSM@uz14R#0=f>%@I!YA|2@Naqv)TCeEk^e z#X%T|_S0J?c;hwTyg;@(c2~gM;DwTaeh;HP|8I*=JA!F^>N5DW#Tja+<)^oH1Y1(F zpaq^C0tw?M81sEx^elpp>_!klq!!KwGiHnk-d^sQOcH3<=*g5Ehj+5EW4)N)ekwsu z>(X;$1QqSizcZG;gNvE|O7ctutb64j=`6ke`S$anmU;~|jmb+;-GoDl8dPna)-Jm# zX=O4PpCu;rj`%d#y8o!aXngC4Bk5!2+Tn4YKs3vw#eN>s}) zkxf`=R!0*AA_2djxto}sXP8Hde%KngV6eAEz1FYq6kf=v5hN;CvnYN0)5s(;57gCp zI|ZZ_x*ONgP>+5!&Ki9QDkADqosjt~i?~TQ`OnR*Ad4tboh{juGIuQD>MxMbYSPu1 z2k~KB{G6G;W20?7;110nqP4(nWnaX*^OQlEp0S%c@X^25mx` z-&bMvoz0#f(ee;TeNPGmkK(;UeN4z+O$qt_GM)1=;9?qT553%1cyW~dVrPHX147(= z835mJK&_t5Z9X2Bmd?zrwEsLKnEujp+k+B-2%&ir7?xA{&I&;6)BfD3W->Kjz4-*2 zDwekKnhHGhu@}&=|4j~!Jj;qb_VSCi;_CaR%f407g6G(Zqcd(DiIN!b^Zhrw-A~~= zUnIv$;?|9zXKlME;a-64#*K0#Ub zzogZ)KlP`h<^w0oO7|A|+O^v9y-U1ioH%^_Tx92x(s}^6VEePcKcO23*-wc3;JGTu zYL}z;W{$6OHy~H_Oe$1ROcvF61SVsD90rkZa2ru-iYRmKIo&jA@}gzz&zecPY}*Ib z6HtQkz7nE{+i6U2M5wZk?TJZ=((qIhoIE6$4pQfT4VHbp0qq_HRUb>vK$yDtpO zLor{Zf_7Not)CYVk1J4bTt9w%=kJebJ5yQiO36Lx5u6Z>uLhrs47Xqesy{qWBYzOokshW>C7u zvfkIu&3}_~7ZMh385XJt+OMC>^;@B9y_X7)&w6~HSyFzFR{VpzOQf_9?bxM!bq)+l zNXdFU?8E-~xNz^q#^zZ;6X0%6vPp{>29Cl%MozHnIqCF5ug$bn$UgR=e1GDGJj$u$ zB~51TMszPP@xT}9Rb+oK8kgIIr@-|+1zry=B#nqbWEHPF)jswZjw*Y|Vi5G=E@0+_ zr-V58&`t}pc~3N4!!pq1+a6pDPjhK(ZGpaObA{rb@kIV+Ifp&e%{AzSj?iX*+#ljSUt{W`=D+ZNzj19BkMp8p3ffx zp(bj2yAEGN1cAb#xY=vsJ5I#pIoCU^8F6qVq0}@1aGg&zjU}!}TPzLkXd*DJf1mEB z#!syvwqoXo*OKmx1&Znao`T1mHmcK^#Yu-sEVu@4u4&}LhoMW_mZc6^>sXhBLlY+R zj&H~J3FXc}5zeb00;N&_nRM3@`2;H^)1>j+&_a6(yN|#fL|EC=3ouJL0&l0 zTYJ)G<=j%EVb<~#95{+!d5g*WcJbemaJXm|=~L!CX%^47?CDy`L$0Q0<)2(e@ktVA z_~^=?F!(*ZX9lxZg8& z^T;#_W8NjzCl#$;Xr~-;cS-v`x0#zFr97%E>^h+wIRcsh3qg|r%Pn(#l$3JU-*nZ; zqm)6T*6h0NtvMrhb%HcLnrLw)<}1UJ(Mp>K@4aZ1xAPi7-)#2EH4C zNdNf~r3m>~Bq?)A7nqXx@9*cT-2TIViPDKH*;{EB=5$(twRf)grksgm-g1k~J!?FmYe2n$fmP%Q+Q4N#{!X{jTAV;ZBEa%PN(A%?2!@GI56 zale$~dAwJxTQU^$pU^W$Un$$=hnKg?!Zqhmv}|>h*+mS+f;t^s6K>%dMIwqXTU*k> zk0H)%b0~X;Gq5$esT_Ksajlqe9(N}0Pn5rhD!cbOihB>rT-4-aHyQAcXeo`}okcKA z5ur7}J=4NckZTPB{r0~S;lCdQAK16q&OgofK2Gr|?jePzEP&!*qcqbaG(emb*?TqK z-Y4?aQqj+6TN-fDy&#t>>W>OGQe*rM?JhBk?a zRz5y%)%aNc_%tRR73k)9O`FkV>7X<&U8;3S57dGcZ?i~oH9qveoU>&xnEpcctm`pT zm`anOkCQkpa&dlZ&?*Wh-XyI9hW0!Y8EjGA41>$a_KE}Ui81<+@pyU z_o_zr>C3;nvYg>+0j{&|v>h*Fn~;&W#U>e>1Xcg$N_00($#<&f5p?f$n~B*2>hs($ zH&Ff5fuPnyTaycU8AivyUdpS9pe~^19ad&036DdynY026EN|xxR+{<$QtFu8x-vdm zb0&^F9R$g$tgq{fZtJAQ14}X6Xv7AjNASt|j`%4;6Hn*)cTu{sPWDvf7%+ zk!hDUv)0aZ+VHyY1mO-FfptCIiW8_sN}~~3|5zBtt@WT6)I%8%GAFD`_9JG?M~o)S zUqrpd+t;z-#Eh}$``YW`wpZczc6M6BJbf(Jq_8sjs9&I&6x#hXnr0T(JM4%9 zh{ocW`R?DNDXRA6x38QZc=jB}Cnf%W0PR2$zeBQyz~m4D2{ZHaW#mzKBGzeCe0VDr zk)`oB=<3q#^i(prv>l-f9R>Ln5kHRLqrgw#2OE*sz!J}S3j7#fz!|<=!<3YS7SHL_ zKq0W?P#WTN6IJ1HFET&DKcSXiw}hNQgBe5 zuj9({n~{F3C3M=*j*ZG<5hySnDY;n^o&)89c8p4Mxq)Ey56Vye$Ri+CJ?_o{JVd_& zRAe7NbP@bVS)izPA-@{1jy}HmSFrd@i2^Q{KQMvG`PYs+E>wiOeUVe87(7k}^sDqq zh-9ClEK{L#C45RzN@A5ofB%E+H}U9lk)NLsJSv7{$YaT(>6#D=uPRfY$P58B zqkS6HKe+X~zff%Q>=|~#82j741P3;#& z$+v|^!fKQLtTsK`==RH%pK?4U1(iy*1nDsWL({=`%Apph!yd8p_$z!tD5bNe=V!K# z8XKMTnm^upSc!63YlpwU(~j6+aFoSz?7ZG-wTQ)8#*#^93W0*ZhZTM7vDw4jGhSkk zv3N-7)#up7i`RIN>fFztFDZN7m@&V&bhCB?S6DS+M#QF^hZDb(RaW5T{1$`_B-x)^fAXwve8%UnZ@)ZEe~g} zvCdSj!meF(XVj;)q#e`b2XnN*y; z*d`~ZTJ_e)&;HnJUi}(8hU6KYti9n)r#=5U&n@w?%ues>tH1uzQ(xL#obR=pxR^#N zwO(tHO-WH6dZVpU#TUNt$3n#o(lvScjW2GIp-JWTHQ`1xPb8{NVjA^Sf=JE0B1%{|>l*au8*I0FOtY;~} zmqz4N9l=oSg0=!BgSHkLpD>JiF0z7WgNflG=V$9~UC|^GC1x-x z^AM(C8iG;GO>!AuN``?DH1Q+BWk+iMeSSiI61x!PQq{9kt%kn*5=s@fLJRodA~Oh? zenWk>&ye3WjUo^$_*Q-OBYgO0ZM6he`NEP&nSOKrGknLoeWlsSmLGiO;xFtoP$aRn zsrG9PP-qCqi4O5CJu4+Al0kp{_KQdk$m}Q5ki_W%jm!@zO$MCA@XOS`d-1015HfE5A_eS`urr-E|XuWgg8Y06yvXD`~b2!{3L(! z;2S@oWSBWi5=Vs(vmf^iSbO5%dHePmh7h<>i;Ni|X60ou!}Oc%M_nqvYAw;e`0Cd# zNxV8gse}Rhx%;%=kS=E-qeb7#>{ApdlyFW8449jUR3;e<(M~$N{~?Y%arE^oP-hZ^ z+oh8Y>mP>Lzxa<*GvuQDgbncjo3&r{{yHQ-GA`q1Rry!i=Meb~oIY0Y<3hK#-(Wub z461ceA(X(Kuy5R4!r=!3rkev8yY7Cz zr6&3PCDorGh!p{W5lZ~tb^+HJH!7n@8~D%)W6 z);4RV@v7{<{{d4qmKk<6sp|Iiqvf&fx4+Lm2kciVRaxhVcZ}Lx|NZZu^t|Welj46$ zXP$M|{QN9VsPn&YJ}*_AnVJ6l`JeydmoM0~{#Jy+QOCkS{sqO>V{A-+>{Fh4)g_nW z#A65jE3dfHqh{@b4}lk5cyYbKt0Wl1m9Vwt@VKKd*6HRyzSOemS>KP?7iQ{fN zA%?bWoG;H0SE{wV5R{+^aoEna-}w&qFw$)^y-tS2o^SlOPrk9SJMX;XrW^kYi5NKZ zpOM+wS$vngDs-u-`tsDo`-^Y7>Ay$}bfek0;rl=M$Unx-d6CF*#YP zOf(kpzK!p(#~#(nT9(;bYa_ME@u`_L-4;D449Wg+Iv_EUx?c;O@u_Gx2}#dYn7B?! zC_NU&r`hwBQQW6#4xj=0{jp#4TqB3z1; z^dX3ws0y?Et37|N#Lw<8%jFjmJ%LNmXLg#)Z@Grck3VfCKV*b0$(JM^kbfEftI9vn ze@Yj*`~o)eQ2Zoz#?NZ{4St5oFER@F1N`}&5=s$D)E|Y%%A5Exq z6@ORoGx=TCD)zGsU$86i-PHTzUsta$zb(Sups;Kr7BKiv_=F(zU6S8jX56z}NV3(w z{B8%CnBe;`zRq<2%K5Q=Q=J@LsKyLi9nWuW!oKk_j!CR!|BV# zl^uMoJQWDnlA9Ay`3fALy2K1WlS2O$3U{X7Kj0D!9r&4<0Ie!T0@W+QMCp-D6_Nb* z`ywxKl0Uf6wfQkl;)fi${HyU(kU#npzxPddCgZDx4ai@_mkgv@&VMGqj3MzOIT2s| zRlGMdRaMm%C;A)YM|T;~VM9S0@|u4VDs`V`1yFl^n7(8#{0~WmS$?4_h4JLZ$4-P_ zlt1Ac$XGTL@t`a2wtOW~Epm=eI_-$z5|CFXN|=g6s0a#gJ9k`1CjD};l7D9>Q)IOL zm^~^aKKzWpb{ix}@{>?`5IxG5DB>snsiA~Ui6}A_W|-@#BegH!`ubIVII;lZCnT33 znSH86;iVv_(6t0mg#Mge1vyACCJdpRufR`{|JCf%u#zj#a>cJ~ze%nFzOqP~t?gIr zMfhAOV zV`5z?#6Jr0aF4sQvS>8PcXoeQq*M`V6#v|RfhzfBc)kfN$M>B~Z2_QAlu8QC8OY^C zN!<4dvJjS^i~rI4ujIS8v|nx)nb(;!-7&oFwUnu0K=Jt@yZ=UCs#vAv@dP@wU%c*2 z<7%q3ym+&ZY=TI^@>EVjluIgUk7IoNMK3;iVSW*32lj)*4Zle)(o zeawFQ?T1sU-Dr)Bl~`_f!wol#O)x2JbgA9>w}1OEz?i|~9{boIUVlB)zh3dGS3dF) zkD!Qc)^0O5KhFfDdXvdYcvwcYOMR=slrDTy%s=Wj{1nQ24L96**%!|nE!S{AvAEIp z+MCnwi%hU;fHuHWH4 zJ1i_LR7>OMo%h);FQcqfdhOnEPk#ozJ9})g@=^RW8soJZD@A$dz`{agx?lf1f((Nc9%d*k!o$riJlm_0SdHfO4PnRi)!;!~e^`Tzgd z5Td($seI;p-}kI%KMUJrA2#ZZ_HO+C5Al+by*yd_?0KJEyLPP{PU9noA92K*ZPq;T zagSy88CW;|@JF7drGZojmy#;J#PZZgrRU*vo-^O`-gm!yq-8%JD9jpWEhOe5M?3G2 zW=>ddyjtzn7n#8~cgOl?J?&|X0P&(R-;B2p=;^iD7_CnA+DKVtV&A#vo%i_19n70R z8|^N=5^IZk%qH_%O5Q)UyC zGyKS*UNy$Qku=7?t^tM1-T`%)DlfxVxm*plU*i}12T2s;N21h!D-)GfHy}ZY^`M1N z4)K!zk;w96vYh%*_6#TbALA3W!aorFfLZMSMY(oOQyIaJrn43LpB#S(|AWuZn#ANM zw7`$^z?WoMRbd?YRW+3u)emFQ_FB4DTFp!YOhzF_%xv)EjFDZU1Ld`eq|>77kzUBJ zY?*$c7MyJSo7qp`L%Ht1I&%KKm^qDq!#+jbAiiqXm%neHh>!eXKPp$-xf4Vwh+zE= z*bkPVN=WBfEfHJx&*+;u#hOA2OX7*PF8fxclKpsRFa=3^+mAW~UD%FDneoH;*Ziw} z1wL93vfQkmxT$a2uj2KoS|~_D3y1RvvLcMHWQx#V!}DURH>XmA420>{R5=s z<1ZJO_8PMYs(}xQsj}lj0O>Ln8tL)`(v<8+RQU+eq(08#f-gzP1s{gtDl}v{6hV}4 za(rjG9GKy&20j8ZX_gCR9)cfZwNoe4|-JfbQb(4`7xs`!BGB(^AlMT;W_7Em{r+cI|5oh(`~6$e@3Q+NWo;IJ*!@xR<;Ejvi}te&|K5N9 zrSdQsPt9&c_tN{Xt&j@R)#m1>%giME&xd#W`!9{gRfzk0su+D@GVFre%gV7UL(xo$^(OcOupkpX@*biq0n=IdYo`ZpK>v8Nb&XEhe*7x6chN+X@l z*S>ny9qZTQOktVhc&W1A0SA@K6?|A4dX||G)Mk4rxt7=lOLK^{vpx&%*y@X2?Wd+D zaPBasV!Tfp{9Md(!mrfnG`VEssy%jlAVoFW%u(W>t6hM&Bubv09?q){?j!J-Hl>+Y zqa)khXM2_w&P+`+Ff3QcciZg&k9qW=U%BK`=18^XX20--^Z)qy&tvjbyWPC;#v2)z zl4+a!-uFpQeiA$8@Lny{hNRG^FRHQ7vS<^DWpw;9E^iKLdPje0SG}3Y@t+Q=9I|a3R;J0X#3Q}LbX5QOWEj5s zXB{LS=${k5OI7iKze@X6!9W!8NuNE_TSQ$-eg^`c@N@Z5DDnsXO8HmfC;pWo!?*l6 zLjv4&q7)?%eFglL{NE*f%fIsZQ0!&+MgA4eVg0U(pWPp=%OU(2S|1wUAppOo+5Kl9 zU*!z)pZF<^k7E3r@vpqOk*$Jn(^$nmrAX_z&;KxfkSq8nZLZ&p|4e?VOiCQIALFm! zC*wcKFKU!I>35KyzWh1=u%w(|+sHU`BAbjPpa;m?m#b&vat>8Mc{#|eRu=fOF^6Rz z`ZhZTh&+;#vb+4tz-P&;>T*f1y34;WxnLpX$;wuA6v7#4Fa_O*luDk-h~`r?C`j@- zR&pw@K1e3tqpd{e`gKMMkB{*>4J z=yehj)BF=ir2okh{73B-33GC-%T6-=8i;?ML{_R4@@f{c@RR&v-;#<4;qw{%>mu}< zapmIGFqOfDVIw@VaMia@*-zlxevBXdL;*QJIeuy*7O!YOwtvZ>!uX*?)imLUeuE#g zVzK?77FSiZ3HkFjNJywwm?4wQMELp(d>2NvgOct9rOX_^hxRLpTo-+!xDdv4ZEe3$ zVK`5~wh(Yn9*c0lXk{MWtzit^BfZYR*6 z?!Wz>sCfV74(W&MTr)_Z4lSl~TkpCwl=Ub%^ZtKp`wgn2{}0xS{QLPqHvppg{lC)b z-X8jm*mxGuql)?Gkw%_@49oajDpM8)hJMhLgZ1EQ#n^tND&1ee(WYexKxXAk?UyLh zV|}u>K=K(|K;3rxZO=aTn9*8oso7xJB1`J*pisFis03AW065CkfBL5n?0WzEkFaE~ z%mgb2GZF{C5?(DlUL%uhIx4is8woWl3OPqd7Z>N*-iK9wxYF!q5>mV;@zTpLLn`(z zR3029?N*h&&9HxY%J4O5TQS~#h^vSdhHU9oVfqvEw%FE+5m~1@zc9~?uq!Y6GKmrK zEr0)}7d-#D?1=xl&z*P2ZMU&Oey7zu;t5CUb&aG3U;9|`63uZIj`9S_;><>~jgN-e zTWorTGmF)KRGIC-*hNdNq_>0{4o6dOq|#m3)NZwy2e)Q=4I3y^g{4+YQ&~!*uYdJx zthRBSS^Mv||53*r#r!gaWFvX%!WLjO$K#Jbo)wztm#H3i+cW$!zeVHjuT1u%|G;HamD#$Rf71A>c$)$H%W0<`Vz*hdX0Ltron4rpsMac_(b-M2ejR8D_a7N~y2kIx`MO{H z7+dajMt0tP54*QyZC&;wqx+CBC6=|~gPW)_v5%ejF)O`46fNUL#1fdHCX4FvE{EPo zmoW&f0eJ`$00$RT5|P|6)S%SPw4#MW-eCtp1^MGg5uC|YM^f8AIrxnaqYF0c-_&Z{ zifBhA+c(A)_`+73vVDRH93cXC`<3^QbEFeEMd*82`-T60`&DWHRIJ6IqRI)fngsZ& ztSFKEI`LnQFOiZ^KC=xVzh_sa6;Z+k5i2CO?=OK*h8&;rkU#Lv6H$~b(Q9nI83U`Lw-%UqA2o=YDN;#mR}3aP?KcP zk2x3rfzKpA@nd|*A`|UM!m2+F01+xlkRaWE&5uJAZf!~p`Tf_*Q@nn79dP15Dnulq z1LII(kVyIs{&gAkEZ%=JMB`eb%zlFZ#E%#dGr9bnkXlb-Y5Y6D4{9+#L&m?r20v6> z{L`6hIB%9Eq5K$OkqE4LyOLI9kx^2Zx4;km_Q${CUnPcp7WEtaOL~c|_DdQr1qMl~ zmjq7uS!;5TbQk$jb~62@{wp;}>r~6;vm+~)YxuBUb)x7jk&+BvdjcUD%u2|O@Fq(4 z3Wa2Ce;K|jV|*KfD&=|5IX)#JrNQci6TBx zLa?}@XhT^3uyrc$I?3+yzBepc897u%ePW;-7G)BS=dh$HK1SNJ2)4kVE?Py zrz)UBnaRkP?Q;nKtKb*(tNa7}=t8I~@rSfum#w7Ez(-NPGUNa<*noZ&;>`E@_M1}! zNp3aJOU<+P1wlFgTib84pUv54@R|1ynSnuK^oT3kmrc#d_~`d+&XVQ>B@~^@_`7HVts2}PeH#{yZBE;NBbgFC(0@PaTP;y z8y$h#_TK%EvRl6?hc5$7gP(*d1j^*ou)@`_R%m^E(z1wcvN|zZnV`@yCMz`-m>s3{ zeM`Ks5|0yg>5j8ye!IzhF~%9U-hKz3E&NA2-S2_Zlhd7Un|<%)2CFgAiCwyIg>tX{0_djDO&^{sE>pW^+M?BGR-Y@q+x$2^*-UVRa_%Wc2<?JiFpI&HL3 zuP;<8Yvei77;tRO%vz1B@di>;ua(3XMU82{Oqyd%{jICNcI~y-JnG;_Rx0c_--dtU z%J|~C9o%sL_Ms1*@z%F#&$7lG8*s^2#ef@*IME=O%CSZTOgCd>i<=7v8LYM!>&#?>D{|5=v-uaEg2jaeRIUkJ^YaHk?(rZ0kB`mF%vkBz zlPp3Vu!l~py(J-bhF7cJY8`jn@!$UDHOvR2^3`gk+3i3bGtt<-jcv|OIrS77kF^Q0 z`2LKkktfbg&cE@EtB*bU81}o~ahLmj^~x)!*UZ2hS^{4DP}9*;tFy!#7?`xis!c}9 zcvPu3v+5YVLKJ^`hr$SNP36Lkz>lRiVm=(^=PPwPh4CFA>h5S1#17tHsA1p!NhbRT zG;3cOG{KPK_NAiLirivgOa&9>_Dg0agQ+A!+<#{H?rm4JFSS0EDAo(@S0xnX7rxmX zl>aWY-;AFX`0D>vHU**Jj2~&60CN)ZBZ~R4{D~hYAQAS$X(qpOnPp`ekpC{=i%mIZ zjHg14k2MX&7yp)Mnf!zNkXxF_^}7;Z=$ZT({}gTgF6T#B)OQtrYz-^yrzn4hPk|Cj z{D-(QpJn>RYE)Q3{yzVN=locFDWRa>zWgFn;)Z^eE#qe;{s8|0tLL9MbWQGL|CZnS z&E&U!?Ig+%coez@W**RM_#mnBk?!RwX&s{rZsN-`51 zZ2AXaf~D>{cL~v40#NF5Cy|vTV^lfttETV!XqGjzYGtw8au~icVGcF|FgVaWKHv~Q zU5fEtm%w*nQjSzaxc;a>KZ}`kSrR(oi(pdn1->d~1OnCN#Q~t~C6$~KC@SRVUZSqf zw|$igP{LOk7Qpx*a*^SOeB_lVEAVCN5{w9cQiM#8AIUE|lKc{!RQv#Q4AItoes!UGb|SljZki;zxk0uTtoTbJ?c?2l=P*sdI356-g;bCmciMSA+|83K>A~ zKZqYf3&=PU-17TcY$g3B`E8#r(@HZ|P)T57rN9qQCi;(_awYpI@^5Vn*pEPhY(U8* z`Qwk1B)@|0KJC{uEaP7aF^$-d>)_TWyElsj!Gf;%)nm0WR8ePA|G=-}`MT$~-!u)z zm6he_M{3sBpnaM*RXVeO@uOt?W%gOL9|0Ifc(Av%{bn|l*^gRe^v~jOkRSCf=6bX3 zx8FYpc9H+B?YGc={oh8cktdx|t$IlYe$Q{evJx4O?1%Q|{_39Fe)-QRnsJxn`-u8l z|2x<FP~9*`ZJpomST)PAw;cs|eiPu+iUir{=IS0|qS%;Rte(FT~7 zRiB@`;B%i-#-*hvAA1ar6+A%{%T8Gb9(3RrzxZX4dLx|&JmCIaw(?@e6^jFTT_fAx zYn>t+S9REbixjMstoF2XJ{}m)pwjO4)pBL~ZMU0WV2}Kf$%)CEfBsX6hA#wn7jK_r z>n}fgwmQ7Yv5Z5B6`6V@YPDx)S&)c#Yc$?d$^Pv#+sr(ux;y*U-|*e*nCS*>*IjoV z)%p2Pe-48fz;y8q{#@w7AdPYVjCaCjLt2MAWN)MjB7 z6sSoP^1dzgaH6sJaIA`EAsPNrHJ-0ZsnkRmq*-c=EI}{9*oM}HRwpX#c8`}4L3z%; z?mORp>|-B`;_*qcNE5%Fc1>c64W26PG6(P&o=VldT)p|1zm%JgJ<-NXcn(L)ya}{D zrq>jdx+5K?i!~d_Fpf)cYN|@%2pO-~|=Ge3&D8sVH>e z=+xZwOl3!=iin2N???3zY6a24u{#Sa@l=zXD$Sjc{Tt=G9Kfphg zwf2NBX^@PwG&^gE{D{7cAC1PuauFX!#jmQR4j8YMl7-o054Mxt^>s zMNt_)EBGPWwUvHVPW(k>!BvRcwA4ZQnz5Vsi~vONEZRYkD}Bw*rk|~M)DBAdCmW7@{?Nm`HX4-5}8)x z-?3!K_3I2YY&B;w;F>Fy1j+sc%OU=QA2rC}$M-KMEbwFegt3bJ%lJtJxD-BupF#T> z5C<`X|FF-DAIl$fNPB(xm+3d;Ulo5O(2U-$k?ms9M2`ut(_bp?<-|VV@T=JM~;u51gFK?ZPvc)%(FJ$x&D{G_|?w$+i7Zg;!#(9b<_IW*e>d| zfA?CZ6y12^P5bS)?*aQC(CUu-^FO_RX{pJ}5qG`+0~?DCrjlv&$6yPuR%3w~Ughy| zcD9?e3|ky!6yR|1;b-jP}kw=drhon0tb%gl`umpJum6P^0v zbgf3}M!jCH)>vW6-u*p%LgVAzR(HBKIg7VybhI)v)2=u0;_SThE^m0l8(;gn*WLPy zn^9Y9)7;zM&d)n=MJ>+NrzR#h+_~Z87rtgR4YTcJXl(pe#Ro_n zTaB|4ll$pJtvb8VKx6Exrq!2YV{?nD3w|_sXw>n##)47I5?{+mlU@07i?XrWC?~wc zpu?{EbV@jwI4GD!mn78eyhf9K+*sY(=`f*;t_UudwEC9Cq8%>Wv8Y4L?H?l+lr(7H zhdQfh8Sb{IftBlv&5kbuo5Z>O$Nha^{DQb@X);26@)P~Dy*8*%g**Ss{*x#Um7;vP z{4|f1?U&pFQii}E(tpPIR}I<;0ZhjVd?{G5{rgwUyUNVZ6WZho5i5~=3oo> zq|u8W?1dpe2&M7o0wJU>&6qC}_U z=Sm>Rpb}T%KjSCxl|9)H79k08|3J<^X##&xzXkp&Rgw?zANUTIkSp!S6;=i)9|FPRsXCOv?@InLY0VlViR!7!>Q5G;9F~J`#by#oCI}^q#MTc{ zPM{ME3hyD2f{0A&<~E1#{E?IyxveI>t@hUpp%Z>CjquY1@jkvSIs~N0)JdwbpebFK zsu3u{3;C6dQssqH3`dB(M4wCje4)B2I~}ngoYMHpsd^=lL3}3@L?U2ye8U<=Y59v@ zJ~D{39AAtop$@ysi};EpTs%{|;736gPry1>lV5T=dmyL)=QSdkRj9K^FoxRKuj0%v z0QPX}IztO-6IXa;w zPyjlMBr4tbp^>6~`%#(v@F%#aJXoAjj^)=?NaWH3E%9S%BE-2RmD?!U`HzKbY+l$M z!e~qa_kX=s$EQk^Q{*R=DE=fL_~Kl3qb#MBc(M}cQk^Amh98wsU}pcni}pK!@1l$R zNSdwf*Br5n3hzE&Gd%+*^mVX=PNm1K?bn8jDc^nCuNxGKD)c|@AJA9czTCJGUG{F9 zw@S%b=->XZ+OPh7xTH;gI+_PDIXx|k%+J>QuWelYwt8{;Np;sc9HKCovWbN)IUoi9 z_U^v`;u`68nKPmRvnE}*fA&A-Lk8P2QcA`F`$sPQ@^KHi|K5AsuT>zm@hA|_htgO) zJg43!(-fwKQR&%NlK`YT9;JAM$3-QFS<ao!S57=j7 zqB72wSRLMPINED7U1)4(dWs#Rcp)Nd<|Zl?7F7~E&fFuWY_KkJqW1BRf9#d7dG-7q zcRc5hU+_nN^aNzXhlTK;{M4uJzsoLkvU(;~jU+KIas1x(*FXNTkHfLU%MO`*q}gaJ z(qqsnd+*{>;a!lgeB~<_=H{3L_2mmM!a_731bP~s|MP}7jF-yu^UR-OzS6|v!o2ps z*Hj2z0NI(pb3=Li```m=F-?qlSdG^D4R^+h&cy|$dQo7x#{{aW>eB4o2rsUr+H!6& zl!v^L_995_vChcZA3ghF4|~`?`|R_$Lk__c#P(wU`H}y4!eK`|{kW$yD+>+&%Lm^- zx1RZ8RURLa_TB&R9$4eG0PM%Vh@*%N#hArOotK!w$2!)f;EiYJUa!?_@XE^)d*S13 z!tuo%E>)pXXTc`eYPn3G^ImUEePeGhUY%K)n0Vj=9{94Czl>ghTo+$>;dS5q7E(`c zv;C~x?+LPZXigH5hURwUX*A>>jxVW{lfOPb zNmsOAx6v3M5$WRih>BU@;`522kSvowZ@(db$Sa}se8N}AuVBAcLh=)iMDkMFiq*=L zDu#2yS9GcrIQ{-X?#=sPZEvf=Gqzrx+GVkJ4Y{#hrhFn*eQ}W{%-d%1-!l0T7%Z;C9Lr8;dU}#Y!PG{D z^)R{7W>fz^U^13SDexnONyBDA<{GmJ`~CrP!H?X#sLT9VO)^YHh(buUo&k{&!7hy? z9|0>=hYWsa-62uaMFEIla z`6Yzx-6F}K%DJxno5bW=)7FrRGKdP2I45;Mepg;uWIwXv+LplEt}HpA?w%Oh3=n3YH*P*>Wu z@`XzB2{Bf$E}U8)%1ehNZ5hsKg#m!bzl-?F02j)Y3gKc)_^H5E<(C-gutb6`;%AX6 zaX@}Z63r{|BfIoKm(XVixeMjw7pvl1nj-(McO+Mw3O1tX%fM6F1%C1jtMI=$`4fI7 zVlF>Yg_5%veQ%MU;qqso0TmT0;v-*fKk=Amm6I^6-trIF=L-Id{HT~HBnmW`=^HM; zsSS>#e`k;U3LHQ(uH*^|bDlQz$3y&4$>|UB^!?CBwfPv|m+k z75NFi%k~*4nf(hi9YgW6$SvDvQNJkR_tt)GR?FJguftE6pXAqB)-x1N2*%2CV$#WQ z`!~&D{|QdXl{wk}AWjrm#8;Ea`u}@j`we~u`XBSNn*Bh~UF;w3z3o>G&HV78&Q5)- z4jIo1ejKBZE!yYZrTq@-H{_>Uce#JNJG5U(?T5(J{`>MLmZc{uitbR};PYEw{&fGk zJKulrt?ifR2Wim1=0Dk?^hP5a=>Ov>Rih$}4r25{5_eR3K9PRKlkOP_HQKM+aYWl- zaPo9Iwb}$ojrtvzTzm<>43;kcQenZiRiyK$C@OxmN2q4v@e zgK+9*pT`*c>yrR#9{z|&UVix%RE916FTMN<_BCUCw$lS1u-k4A+BiF3!I@N^sMi;F z-E~*yg&|O{*rkIgIn#VDQGmUkW6U;Tc=9}M`o!^AF4VTtld+xP%&CJ}UO;@v8 zlZ>O|JMOsCLm&DuDyxN;_6C-CDP*UEMp_>7?{~m~a)6ALues*hk)>lkeAE(@!-){Fg{qnsoaLZTEFC;Z<% zdP_ig{%5_ap7Aw94nL*~z6*H7FKX?aa3z=H3(EL1sRBNw$P1)Ju_KL+UNy_Sz{SQAb2%k9&W5)9VyKo?vAD!} zkt{w4?H@NfTA$}F8l9P$iSqaaz6sRAY{%KzwqI=8md|u)q{h5RmO1xI+pe9Mo#TZM z+BUDzz`={BtgNjBS?x>TQrEjd*m)4=Dn-jB*Y4ncr+j>?)6>(Wuh+G!9iC{s{J7^> zm@Ic5?%iq$KWrUO_e6DZktNk5?2AV!Y=ap0cU3+359f_^pZH04&Vv0c<6k6*E;vJB zT%;q{ue{rX_$18notZ4oxq^Q}6~SL_pGAE2QxZo>3i7XxFK=7ezkFR$s58ffkPt0C zhPE$Q$^9oQ?2{N<`IkMv z@wN}w)BOZJ1s7!Wr@p8U7O5WApg}1&8On$nhXhw0XZQ)&c&70GYtitCb6 zJaLwmB!1O{a`=Nj7s3n`2qDjE`2Md8Jk#8w*d#w7!TnJA<$)vGm5)+%VR`MF7Leq8T>*CKxk4t`Xw(x@EW?+g5-)W~N2hW*$H ztQ-UjmEi}bz!Ka_{*^?9376he9X40O z5t8sN%QF2^FVaIqq*OYUYYLQCzHTBNtI)oE>i-Jv_*`1>ANCXcM73AqM|hGy>=Sf} z>2g)YyG#2`O(7JM*?$PSwf#z4vMcnN8Ko>Dg7tJqsBA9uor077$QTFhGwf$;`~97_ zU+KlF$?jkMhDQr@|G%~U`hF2Tu=`JurY=hN*?4G|J^PW&&kqX6)Rn+LjJ^u@mt_0& zZ(HxbdH+!0CoC}fiQn}8%N;d?Hr;XmWeA`5KhlgI+~PUfbClJ6#c|KM|K3~MFMc1l z4#sSH7n;<uNLGN|x5 zWwOR%okfw=E3W)XWA;vFR_w9&o(CRy0ITyTXsNTfcH3?DKXCtB*8P$}l!OJdXlig2${9`3H?`L~^J!|ro!vkH%rd0!j7=BL{Z^QzZee9^^stXs!CxS!s1!^>Xw*YA1ndvPe- zcgLM@mg)I!Z0s9XfAi2sKW2yR?}JN`Nia8j``hpTzy}%Yqn?qma-*?`I(UI2n|9S$ zkGZ(OBY}K*>?p>h7y4D4WK7`Yod(h^t=OA}F@r=bExqHN@4W2di+NF|>;fkOZv`a{ z)=-zdJ zHn8me>j^8^0@}i-sAp}PuB@1sqGBVNJ4|r42$DFJBGObeLiC8A7IxYrliR;!Rfi~s z+!_)-1*tXiLsk~93RLZjZ;(kUp;TknIclc%D<_f?_Q$_2DzTNF_uDT3BFiE;7JjG} zQWV;6$nR0j^7fm{ulfSTAzmdF{D7|UU5uMEen_HfklOO=OxJ1rTl5b|2Js_t=NEV* zM-NZd4{!SMhd=aTK)Am{-CzCXUu?hQeb7vq#k$>*pZ@r#XMgmgyqJQ#G}h-m_jv~# zw0FJHsNn?fjofzYE$@Bb``BD>ez8uUbjm5G?6S))IN#AN>ym%=v!8zW!yjq#j@mLV ztrA~D0z(-af8HNIfA78aByVkUe15U@=C`~>?YE47dFcsHe8ORmf1G9!X+t+{$u9m? zMZU#b@&0RWq;*AGzxd`L3wyBQ)qf@%Kn`6cHkHRFrTeX66=1yt|6!j>pAID=zX~$C zE`^unBCk4j>(_vx-|#<#{RqkSNnUCc`c;^Vlw*t&^4gjSM?jTA-H}{P$x}Kl2eE20 zDpE_`>C9~k!nX*PDzkr?2#Jkf^bd#tic};mHKxx}7-L&iVA%LqbO3|>t3Xu4g+fdd zUx;7u&!|k{D>ih;i8yw0-ydBAH75zqWY>O5l5q%HaRShFs7MH8$)OaZgNitb1dFuF zO;TSuk5G)Hq#Ol&r%aKgCDQyWk**Z2Uqo0HUx=xuQAo(I%$t)xN=qsQLTU*peRB9J zrRp>3nAP}66-n81`F$Pu;&OF<5=rJ~nEXY2gYsdMJ~a9$rj`fe$>5-)79~Z@#9)kLFvNTUF}?~&GMh|lcmn= zKjaVn`n?RcA%#_Gx}xVP$pB0zN^RezQDoUm?FD`}kb^9^0=al|ZUQ)Q8Wl z?Kd$JHsXuyBbGQo(bo2xJwRmC#P{8){U*r@_b-Le=C`(AxBO`S>VM)MAKjArL>=yH z^Zw2K1EGWZ&HFbck39O?()~qglwo-P6rwC!;HNMDX7L@F>~pYvW%$yS5`+*#<&XQbpyDX-bI-c}7UdVo_u%`l z+E_9x=`20}D6D8dTwtI5syd~fm`JBk4!+QDFu|pwl)yT6uuY?nk4uHEqe%4m&wZY! zlCe_dNl$z-1IJcV8^BLaZv*g=M?T@gFMo-53SNB4C2U#GR$T12zWol{vlD!6qQ(pk zrWDEO7;TPe{T)uRHV&_`QTEt+;~U?IgRI+Xjq-1M_RlUcpQCd68E2ey;t86rg=~yD zB{*ZqaqZw98=O<>I12{H@EJ`^Of>3qzYwbvcnt2=O~V|_y63?lD@$3cI++Hd05RYf=UR-Z;Vkp%kX)0*ScTQn>@|kZ zxKT7wry`#M4o53Ns-VYb4;;qU@=1AM%(6&LYtz9O?gMr@V{pz0yjEp0PQBM_Fo_3C z@DeVTxp|$U%unMy@~D5a8Cuu5>nO}q9g;gGp+{<4ETY)7Et4#pYtOVG5x92XWqq>o z_2E=u|751QNB{uPKrg?gGAUe926gys~z@4a(-nFcchD2fydv7n%+SU_Vp z#u~*MF_9Qc5{(*TH)@P%el@ZD5lCW*1>0{D3mO%at|&xB>d@;jOu4=O&-3nc&iCCi zFqR~0JTv!v=j^ifTJKtGx3kYaZQ~cqAik#Woe4a!=el-GbW(-Y&`X;3{yDjp&^N?n z$;nO2e+z}e83l4%%OcVtZ4g-t|8gz0+vMLo6#>(xctCL{ol_|^QDdHuA$*)J@e>|d+iA|USC`_*M0TXSHJ)7-#hE^Ya z^fUI_w1H)s%yls*e8p8)eDMA6pBtNSsqCtrhaPz(Q)2@ItH#G?scEjh`l|Q;{ol<` zOk&oZgM$xw=tF<&ZU;?GQ|n;E9efscV(V6@0LIY3;6oq$V4f8e1PVSPv=aVB2`lI~ z3MuWc8X88kTebdW&9#5G1ZEHvae)64gaCvWjEOe3Gs7-^^2(yjA*8i9AUwPaccu zCy0}Fto+UV5`>d}f%kkXUd)>G)NNIAVr+t8oQ%o9UGayeX%^G`Ik+qNlFMDMxJ7Xs zU=$pPeq9jXC8J3dtTrjDZWkjr1l}wSva*{J9Q-u;BCXk}mHh-$w z$YolWyhd;*gh{Gg;VVLOspKgY5Yrzm0FW{K7kqd|#RcR!Ikj4)NdXcJ(H8%twC*<{ zF=P$fb=Nfjm)TYckf6f9@D*-K8sc`rznKbs+H4tsvxfgR{Ys&Tr;J7V02(CatidPM zRY^h8>}qC~Lf;T^k_sfF&L#LnIjuA$oj`B}Yd*!`N<5343eBbL7pf)3JL%Wy%oFQM zp>-Om%SS~&!7Jk*@hKxrgoUWKrtdF6ih`Pzka9IgY_r%WeI!}uBuZxD8mlWD10PR& z%9J&x17p$&o<+YQtNp})>H2)oxf6(S{{P&l`fA!LEqSeO3toj$%54MlL@n0*cm^D8>Smh=ij{wIT z8m0bhwVS5Z{sZe``zNTb|7+A+?N>_qP5WE=S3g_(3+S~mt0%?}rTxt_-dKEtKe2WD zQkplt;a8~r{WbRAbjfTg_={H1RC;|?#n)x-~R1ijhmT#$U`4OS7(rwL{rlfW7{VtCYZ;0(;MGB zHM#SwbH4kNOD^i_>1PM_2j2g|L&GDycG%ZH$hKk<>h9w$NEl8p&5hVMZQ6UUO?z#< z{(5*CSv~Twhd=D8PkHja?|q+j>-S)yYI=^LF5i4)u~!caedE+q_E^7`J&EXe!8xTf zfy45s%P!sgJCA$J)a=v<%Nm%CnVITl*CAfsWZHGy* zpRNtJ-|ulxctT!0@1%nKm^{~fmA^oPG<7hLy6tHnC%)J8+*#ro_5H6?oC&Nw?65e@ly81xRcH5}x8L;4XFrGc&_>5VxO--Drf;zS9(TVd z#r1cmOI+4^Fss<3g|Y3E-Fas&{F*25#_~|mcvlY_Ke05&i91#icJ`y+(tnOksxkZC zesO@q@2di~wNYkCnQZipMPd++S;(r{F)0c+q4gVcqpH9#y@dXA$k=jYQRbC8|I3Oq zU<_MAamUOQF-#CMhs7)_+L-5q4`9mV7cPW3%~+GnPR;iW^Zf@l z2xG<)GN9KD5x1A?;Nz;(YypOA^TxH%$rc564h{B9&M?hN#)a{*ai`Av`>`ZGlrb{A zniQ7K&Sk9CoZ&y8OwFo4)^GB~kNJ+VabMoV8+oxBb7ZDiEwHYV-G8UqP>=6aI9ab+ zk&1b9Fo9F%_1M&pZ3GJ=sXyIJF5^2HF8DY0*{0tHpWrG!v9jRV*pERK?9q!AH6|Z6F%@uC_Is`YrSe|7hDe zcC!-)%1T}iN+wnkK4B<@v)`ZmSL5H34MZD!YWh$p^oxHf90kOnE9&whBmOzutq!@Y z#=mX$S<6wehc7};IntGZGPy#i4u>X*4Dv7Rl0=s_r7u**V-hLS67Z;ht4oEZ7%TVy zQKz6pVoGAAZ}LUIq$&xjn2Nqaq!!_~%2Ly>v`t1S>C&j8TyidnITH)5*k%}L#mGwN z7oL)od+3{8X6vJ-uSf|aK}avA-{3=OyTGSKUqfDWH28$|oC>csiY@$?(wEnEd6FiH5Ve23cvYay}jlP0==(iM*B8|0u35C|FR9{A_ zvYg8KH~t%_8hn7P#wY0QR{y!6-Lij%4=w!$bo(a+MmaTov_N4xr5D*IUY($4Cae;~ zZ8biOvPepV*%<^cteT#heBrUjvRN?`UTZh(dHDSgXBCn&9V~t7?q@D$(_VW&`KeDD zW1Z6E#Mi$3wJc@oI{G=!W|zS5IE>Bbbb&=bnD_(F}%J zc-7a}mF>uw2k2xjj}iLczURHKea&mmJMa5U<}r}Zq(JA!+u!aruX@$DPWjfqe&GxA zEHLO=#cPtot4FqN+up_3D%s*27JGVmzs?QByL*_|lLN+W0|RT;@4?8F4Z<0U4-BvQ zhZBx}=tGVi-Et#rZ@%c_x4!-D$G!8N|8V^AFMaV#&OY-DaMn%vLzeEYJKgEGp8oXT zWBdOTPdssMe3TI!W%p6yD&8&~8fCk&%m(mkDKisYbMw6U*w@|J-#^6e<;(`Lg<0Rg z5Z{JkI-tL=|H)5&D${cV{r$X{cJBAidE*=3;0y)tT6T0CbIkMK@WwZycyR3R>z$gM z>g#1vf;BZKobt{8n4Oup)1B`0@>jf~e|YsIa}#9i^(82l-h^B21zoTt5Z?P%b+F+S zFkE%bm24};_dVfxW@h^I(@$4Fy@T$>-s|3E00dszWn=$t->(5;du831bxB9pc7m%tAg(K^`uNX4GF%Fn7~_nJy)k zbK_qb--RbAX5E@Mk&xmG|LycGC(+?F{pV%=+s^;;@uOlYs1{%77ym8(1sFlIAkoIZ z0SF-?yP{7{5>@o$QIJCJLf>%lfu;wOVv`eC88RmuZp?Zux9);`=fDu3skLt6ol@Ql zc8TZ$Z$?X$^^I&C1{Zv*0U`L7#{3lTP&3mtJ;kCh%xcwwb8fuAi%;+}W>5Fr^b~Wu zOpG!aJvi8B-Q_AY$;DyWq<;#f;wmH?86JcPlv$F=C$tw>#5&N|+t2>}(@vH#C(hf- zL`_a~_75;EhC%kR!jYFT18W;T;?$1+uJqfEPr*M3i#7HGe%!`1Zv+1Vq-UQIMf7jo{$UTb zwA+70AIcsz|K+CU)&5}xU%@BQRhE*}coG4@Zp~jszD_Fj$Wf6?YlMqN~=>=PVOZGxjP- zMsRo$9r4SYsi|uItB6JPL$1nbxfT8i2{9QGbjc=Ez1eAzKDJ{ST&85gNM4F3g$NmB zQqSd|mntIRW~cy2=G<5Kn>*PRFR_haq1I#oF|P|gDa1+~&PvJc{2N=;7yH-tk){a= za1fK6F^lZK*iX_zThWq)hY;afltiKppNfCMiPV9x*6%Xa75`={5Kw2QkwIB(0}2V zopb4@$UM4Kw`$yT`4|2Es{5}79lKP-+P{hde&fGrURA60Vgi-_f)4|^#_wr^l=R4{sKpXa4wekm+>6J26{2p)WO`g+2^wMH|e7VQkAN> zrUI>^85ENCm*GfVvO!Ui#U*=+#x*3E?nNm7^0dFdrv8ibsGX=PJ#7{-bapW@0LaHOm_VE$3H&v%roumc6a^N zTi$xYhflofsw-LZ#Vdylh3R`j@q-`u;Mm0Ekqo?wb<7YqnnRCAT zy^+|@EM>pJ=8HU zwC2idu44kIudkbL-Z84@x^y8?!k(MAFO=c>s}8EQ1e|#CLp>xnNZ`?oSof$OvRxX46T2l-+I1-t>mV9Q3~CC z!&%2O%S)+PPmF3|Y>YSTI?p=eET%gbM#p?lGyBJ_Vz#cQhfU4au35{XF#w?yKON}4 zPBC747L|R{`KJHOjYLtEw(IypN5E#YdPVb163suhuf-EY4zK+4%uobAGd{JT;tRZz zThlkDP=qp!r2mY6@oR)7#?1f@8!+f0cxSgGY1N}R6CAaf0ef~keJYRs>LZ1*hch=c_s+?jS z+zhon_0_To;!57d^dS`6@i$c4>1PyG zB6c62LLX*iFs!-;yr!SuN>bS?>HucnZ#M(Hx?DV$G_G#4pu|uuxzH3z8hp$_>d;re zOYl*e9QKGPF7(-B5^Yx{`?D*2$lH#;)Jtj!Y10{H2Hr*ZKylafCE-yq#zoD)p*8;& z9StWPm+%<=akHe65kevc7i`T)qBIXhqE-=^`VKah)TZBpzcN@!X#eyyq@Q-P&~NNB z_$Mv$BDhxOt)63vErB3V;UkZgn2Yd9s1c%Tq5vCkrC73Xt}bf&E&LNzr7z~a68)du zf7|)5`K;;(c>M+XZ-b9u)b_tvzsR=;pIzI(*tP4o*k^%p_wi|?kLYTs#$Wmzg_|f@ zY$_cSTJztI57gve)8)_JylTV1if%(NB@h}L6_Hl`wjw?a{T1zJ*ZjBaCw@bTvf-vP zkrMltRX9YXUxfb_Y!p;`YT?ta-`c+@8f1$90%bv`I@kE$8a}(y?{fNb<-wW%kz5a~ zgZ+&nqo@Y|mFSn;q%RhsVr$9`eJ_*-YALNrZRVsG^CkGFRLHU(Au1_#|EPdANNbWLH6{h0pN+Fz)$)F{uBBL}Axa2xhnn4oOv%YOvayu%bBwSJ+; zVXYkgy;(&6=V*VWWpOL(w~8ttRp*v{w&PRr|8uv$iWakK$3NUryauomK1=x5{!40p zW28W9x0C#yi*Z)q)R{Z{zb^eI^XElS3%^(K2Gh6jHcin<`q ze^bN;YWk*jBl!akJn)^zz5DI&c*nXms~_{&#~yaS!*03uTF1A%zce_w$A&$rdt9&8 z*>&*U54q$gmvwga{_%@nHZasrb;4dh6xZF;$J#ADd~kp%p|QzHSfGYs1ruoTy~TdE zB%AXimaxZC4iZDKn=i%9_bkk?_Jjc~8)?x^hhGt^FH)@PB%PFgR!cK5KFs@SKN@7l7t`N5$cmyIwkceA8|e;3nm-AvozD64ul?!C{8 zU-l==XuYre=qRcw@`PbC$37v8-mWhwSS%Zg{~9{}84SmdVF~{vKA& z;JK``yU%^^d)WQ%rzwGo)i!T{&W}&~t;q@4n(t*&j3sVL9kNU!GAGD~4P0)MUC(F7 zr+WJPiT$@PeQC=z*E6A|MfCL_{mf_n#j9V#@P}zcKKj?w-#a!shJb+OWL9@KOVLO+ zWzzune$;QihYiS>aR_vj)o5-!kA@>qXjlVs<`S#j56^r|KF!@ry@@~>s|Wuir&5?cJX;S;NoVPq6M zb8!V9OXw%i$lyb1Cavk4hX}$HB0J5O6oqQ)=XUy4P9-2GR4hK<6p)pv=?>OgvFH;c zz5JT1u}N&0)utn>2S!I{_>9BkB;RA0z306Sx%#^6Fc3aF3qeAtI+!Bv>*FJpGaL8X zXUmQ)EZ54%G<>I(?F5;sh7`a)@Igmhv+WirGqW`_Jv%tqJ&m_BskhM8*SqT8_c?U) zl~>Kqur#ud+K7pGCW~3e%JMMYp1k9J`(1tW&B#YBg});KYuTpRQjg55I)C(|A3<<< z^&o4_m^VvJDKKsL`^k|633H?0mGBXb3Lhz0VTnIbdE0J3g+6J4bY=RKq&0hRj^-BQ z5B-E|{~ng5m04AqXOVuF``2Rr$=hz9=-A>L`r*GtpR$N;{Hx7Ae>(qFxY#G;LQ^gR zvLc5E!y^~`gd-9ptnF+PZZ+ln%6OgH%q2Ji6?}r0^ob=QV|2FRFZBkglb7H^R?;EB zIYFc|UzOE6DdY6~+RI9W@kvi0N#vnmn)uWpp<$}(ZxMY0k;kY^Q!S;h6+4gYM^#zA z2zlBk@d~ljP*tggxHlz|`yz9}3$d{xn=AP!%vzb52C_orw4_&s2lLlRn@PgStANT` zn2^EXYp#M1#KFh(!CXQdE7332C{bZKG9_)HpG@J$ce$Mea~9SQa1;rYkLNM1gKY3X z(K3>wRMT>60DtJNmENnGf2yllwj+gY>I=W#;=TzykV`{uLd{1)d;|nOvUQ@48EqSH z(WDCf!awH}R-tI-f-OIpog%_&N}~#hbf{PW06+jqL_t)ZE=z$2pS(%V)R9Cg726H} zw7!IrxTKOW`b|Ecc6IQH-wCGeS5E`eGUxL8q1b=wjFKcNBSpEQ0Lo79U3z8kk#;KxV11#S%oYC>P1ibY z9^Y!8;S*itER6HwgUs9iY}qGy1Bp_%?5D`c(4dl80*Vy27#b8ms`MLB>KpKG9uiP<$5oZ_wogKXSuSI1AlEu#{V_V-Xep1$j${g-TweA-1p_{*$fp zuflWerxaW9PX_g8bdJh@p_}?a2#SB1o}>ZNVlu4fs>A&(@RScD;sd{J< zi_NI%B4816QU4(}@W0)E(SG9@?Y0oWF&uGh?1z-mU8{XheB23_iv4g?P?y4@_E$qw z4R!5ik4L}Lzl#5s^62YeIGF-1%VOjZOH!vPUaGQ9w}u*)Qd4h)xGXgEr4I4V{?)6W z_Xo!udiVpbxMK6*nl%R;bkMms-Nby?s)4~LJmK;ELqog+$Hq}3tB2nDwzuANGcW&b zd-S6p<<{`buv3YwS~W8>m8mmp6xz|jK_*t%R*P|IuMLs&yrE{cbn5NQKz`)Nj^qhAM9RTt1PAX(LoSo-u2JQt#7nEFl zuTm95Yr)qx#fNE2vbfDy^eW#>s9C-90n1na#cNJG{q&x->khs5ATBc_>k@BdX@gZ-VO<4%lF+htLn#tV5j4ocsi zHAI{C-gvKj-Rpbb{od5fATM7+4`tE!xX*p={qk4*`PeA47(FZ|vqp+F>Ks;0*fQ3O z5Gp3#;H6Q4-_+a?>AQKP&-yU}m|sh4N;29lhX?iej+ES0{zbyTo)EPjr{d0E$Tap} z=qDt3Grk~&W?@SJm>{491^na=m+@LT%;)+97@UEn*2myA6!9G)5vkScNOsufzyu60VSd97lRaiNqF#f|}Se-gB#DqHK;0T>$ z;xnxTvf~&uShz~pyMGV@7?iw0mU(I}0z7;A!C4=Zo4vz*`0sqyk_9W<-wFYfR`q0q zPqBZ6HMPG&f=Q90w$GqwR8?D7$wVIPQ7}1*eX0^9h<;5i35gZ@+(87)&0sozYy6E; zlQ9*@B`x%|Pw6Cas`}zzi|ohDX4ewDAhJ5NAF^tKgj3(d7J!(_0W8Q-YHL!NLmCx* zl1rfu4h!9vV8A7e(E$JHmQ;aP0mNWA}tTFCn zd9ROd&ij$Gn>b9eA?(YD5_^u`ewiH+cN9KW3?0+$HQd=_p@7IMaF#Pxdwxefm<3Tdguj3s}CKeQI%6Z)0GB<{b$f0yuo zEA~$mDzEU@YNB%UFXF#QQDW(If*f*IiRcNWZ<4U0-6Zt1)P71Yk%7n(`rf)Pi?eLx zf?j#?1STr`v`3O%lj)<8#s)SH&W6CObtf+w@`{S`$h$J+S~fUHV_@u`%c_1Tl17qU z@t;ga1`+EAj;8+1ogvLd64dnLU;3|QTY7#twdzk|+U=j5h5zOLTk}r>xe3Al68|kt z3DC=^C6&{z{MW_*q+~rsDmKln!bcVhqt%()f=}gNi|I35E0v1XaLR-ht)Zg2wwy;J zj40`8(w((~CCW9VZOB?k=yTS7qzN@uSaVa;FS%QBWPwrr!cu zNyEADvz&g!uklGDPBpoTz5#9Z&#n6}MNvv+pBB6V{|!G}EW*F>FO?kq%7RD3Kf$+R zpC#pT{&%edEB3ijXZ8@Q`FbLAa(J@_sM-sQmV zo-QUmcERlAXqzR!K}U+J$izCG}O z19?znbp&_mf6mO#uqze<+~XeihTnm~q0C!B#St^OIPR+Pv3E($#uVYIUQmiE6;)L6 zf*5^LS8rdCn2`uGP;;3nb6yI1j0%IWE5mdms$xdkovsa=pUa$@uXC}c&F8Z~*vYuv z-LqUC!z6(3mO6z5vM#v_8!+TxJFTq>VVxTLfWsQu9s3{*m9W*HTeLaphj1>>A{yMe zXUz*T^E#*e*fcwRK)7n9)Vx?KDC}_#mDz=R-t(Skoptu!d+(LSek`EtV=fV%n7tw| zlWXwCFqQWl`KaYXk9ycipZy%o>d5dK<%#?V1-4oASJN-{!*gi9nvgeR(ZETc`3$T1 zSWEw=eat2&r8w#Nzt7R9yzf{z0s=f6cjYSM0w; zRQfIcn|_R_dc=R1&<7vvP*ovbp^t;WN1K0@tU6BSFSH!^w<9HG(w0)BFK6*DPD1Z# z3IDs}zh(*3g?=u>|04Rj(-Qv`z+(UQ;jB1{^)9I&+UesCE&58c2>(J~*~MQJYx?Lx zPhyVDw}c6G^`Em4NM7iZoQK0~PG^>clOBE`CB06Thw>p)cIeaZa0)oK%K_PlKkvMo zRxgZv@{+GNz(WTUVd{N?x%X_Dr{;*~h<(bVi!Nc=gp>mP#8O2F4((VL5jZmeV49g8!2Kv%*&R@YCQU{`zF-ETLZnERoGwM1p@i{U!DT`3nEqK7&u; z7)*_C_-`l|7Noz7|CWBqjQy*gmVYS^3V~>Z4E?3}G^K=p`O{Pk)jqjeiccGTl2Krx zkc@I_aWMdk&25YSik&JT8Oup~3VURz`RA(G8-zTPCjB4j^QcF)K~W=NPA?8!s8L}P zoQ5a12W9I98!(MG&MyfIt519pm});E=J7JsCDZm$23SIbq;M&-P%tbJB<86|W;Ro; zVLP}4RZ^h?#dsG)YcaWG{geSxfDd(xxk%WjCA2+$CdBzjtM1}*^31m_K*&ZT95UC& zT3()Ifd&Fv9&AN}v0sxxviM%bzn*WggcknELz6~!;Gtb3Lpf@MMZheEijG*Chf^WI zR-@yrHRbUc46}fl=U^-cW&A5}ad;&ZGnmdJfQllKiD6d-7JyJ)IG@FOD@v>m;>qWe zLgmQGnQV9w>BwqIsPHJsh3xL&z!M{zMl{GH9##+tq~(~BxkL(L3ZGaGR;}xVYPO=U zt#~9gI?Q#r4F04HcO;S~ch+32u$~41(mG}9y4)!yoVcKfkPzyEKOG8iZtT-X0OF7R z&}FIcAtjg5yQ=Y#LNavsv!b4_{itq4Ll;_E(R+;#Mf$7>G5O9)NZ0nGjnQA3?V6*` z=7U5KVP=6xjywdT+A2N)rar|h+Ex_pz5pKm3ADFrB-W4{k%)MP*7(Fs%Bl7*(X41I zSZ<`hNWcFR{wpsk*=I2=4Ri@Za=mCZ$cgn`0F zw8Cjw`$B=pq$7e!aDf>Pihzuox1KAsOA?DyoAL4R%VkABs2HYxiyF8qRJga-BR*V8 z)HByWk@kzy%xHK?{w1Y>euVDsrBIr>H`9Mo^%A1_#>U!nv@YLJo5 zN_WyS2YI-HNs>^A>9~!JCOx@5LgrV)e>IEBe&k%yt0SgV?i9BPkNCT8e;X1l{X#H( z_L_2Kp9xJ^O~2FwDN756ovKcevD|XPsS?$~pV4^96aH0%spX&FMXa0A5Mpz#=BDPHIubKZMV8MSmKH%EYFT$ri2?WilZr@E;DWUjR>cIB?Gk#?*%F?;2 z{$ZRbI<)=GfX*}=rlmZ(aO%+i`9~{M{M*nh8dd)pkfdDKBf|()05t{uN}Lo*taxIZ zFxF*s&GoRW)cx;&|DmBHC&s7fs8T<;zmOj8X=tbJB%c=ZE-9w1?VFJ1wsj;sJ8UXAj*sqZ8`}|laYMNks!Rd zrYi-zD2}ku&%X9t@%cPS+xik=(@bJn;va7_Q2Q3Pqk4G&3uJ2OyX3sXx5);Fy7{K$ ze*5jiM>hHXK!9~#lJ*@NpxaTN_)-*ofu4@uz4zIBkM(=lGw5bNsp-yHTjzAo>-r~v zXx%B2LTBhFr%Ql9%w{6!9Bblu!)3WDX+hYp`{Dx$7Yex7X!v0A77jmWlyn_FxNtM3kq&l%z zm_p@0FVM!zX(toBzHCvdmeGmT-srnD{ z4;KX=V#8~v!zi=(52-zYWV&Is)8aJ=C$*~iNBmqkAAi-7KlN_41XMDfq zzZAlreNqceHAKfX{1cL@j3gYOAWJaM399w@OaHYGR1O$UytQZvgRN3O#Lc86{v7Zo zgGw*q;6r)oxHkM3{AIr8A8CV6*fF_TjX(<`ZSdF7paBvLGC*=E zeF;fH{H0$&FZM}HL%;A(k%fMwA+eYRe|=2(q#$Yp07JULVnV)g|3xJ3~;jx}TqQKG&U3s!N?h;Ub zLCJQ@DI+bp1U6^XK#eL_2@)*O@~FuqHYo(m&=eXI3Ix%s^nn$hr$f*Fj_iNAZr!@! z;bC$@fiZ)hVl}V#C^fX|GD5i&t7mEZ5aeVIj?@XMk4hq+hfiN{!38`O->~P#y>GYA z01tU$S_oh}M!wKqgbyKtLHW5VvS_-7!2gUpf2PcoDh;fL%2LZAi4>j2!kWBA5JLtA z2XDCH`pYi8Y{%%%b!*n%^{#jIE3@!lk&-?uHI#^UMKb1sf4>;*(RtbC&7(VaTyym` zcieBkgYJCizAUffVH(3}i-{;y?mpS0{-RA{y<#u4j4C40O&R2axV_z5Z{E83(#>nv zt=)6aJxNI`Nj;UfzPmE-w(5#2ufF24&HL?tr@i*xYZYHACP8#zduqL5&B0Q3Rx}h{ zZ`*du>eXwoAL?uDzKBezYA^?_Egt=&AN>%~Xm{7|vB$KSBC?zIrM{V+& ztFC5&?%H*0ct^d@-4m$egRDN~u$gAzNBOpP|C$-)HMi; z5!q&q(Vg43ZHF12Z|||kdY==>oF+UmF}Z#F zHn)YCnZf?oty}NRJ9)Y~F1X+)TW-8*aA=TyLhf*fI{-J@2Y=Wr_f&s{~8|+)2zjQ{+s<*rgwo4Ed4z7L!%Aprx;EJABGo2VDx4S zKyd1RVQ3xVlZw5CPag5hFbDatUa!NyKWTqtJ2{Ge*@mCFjDB{)$}p8#7``FL%Xd?3 zBs4rs+rz>tI_-3yTsqd#H8{{mCzEyh^ig4NbZnA&eAt+pobKV1hfJ?bPWEPX5<{8H zi>ccs{PS3rynNt}^{Y^YGYGQ0hfA+2nz{o=r%u2URTBTG;@E)3kG&m5mdZpcxOj^o>TblF> z=~6M%%8`-ug^hwgcaoASOA%} zC;ZAkffIZ?va@r{6$a#3U@xcs-d?&DUGqR!=)hwaI-TbO1-v6VzrYX)C9yCESS`NL zEVW-lUm(IiEHOdp>uk(cIF;!vAS2&dx-@KXj;d(op|ik>LKct}=4B@Q>%TSs7S+%v zw6@REzaj&%E&E6Pvfmsv{t+tlGfzo;&A*p5eUOQoEW4pkW7*J$OwvlJH%`bjDaH9qbBOM%7y<-ZYt{DptA z$-e^ei{S12TNE+j-{j@y=i|Sfz8Hq3CHU+<|4d!B@gMz$VZ7^{sE&Uey#ePU*+n)L@X>0P~ z-}*uRIabhbJN~Mo&3_?|-A{K0d|_$Z4mYzVE)b=gCGUm;-MFC$YrqBSuQ0a9m23gILBq zdkqe-g=Rh7Q?u4FF)`6S(9P$pT?C#6G-J#zKObu+L*RIv_UTW4>MvjWT4X=ypaZ}A zy>khn*$_C&0sRHqf9K{|$01Ah3;Y~R&jM?g=9%Do+~Xe4P?jwW8BJYs$t9hu*k3@C z5_GYNFzgn=$2)953C$Fqama?6=KR3+2PZYWPfE9?^r@@BuC6`@t(x*=P_kaKQ zz4qSp@sEFeRUhBA;q_#g@7TF>2UAq^li-h$9#eSldCz-4^O=(vlD+!XuYTn#Uk%gK ze(Z!T)p}V*#|RMR63eH2`63eC0Lp#b?>zppOD^Np&QrgA3fmd5Lk_q>9O%#kQwp#R z>}Q^R<{$pS3wP|;e$RW}e%Qw z6Jc9!+#*Do&%^4#fO)YW{NTLDKmM^}~;~)Op4<`-F>v(XA)u9FUpU}u+ z;ZdT!gpgUuzoH^JE=O&j=q~kC?7L*w8W;JO_f@t^%l{+(i!tyrY|V(GH}!+I7q%-g zxQT_7vA3AyKN)H(1t`-POk76vL~5#@nj?j1$tKGruSvKevt}<3Rl!Z3;Ibhf}6hcY` z<{gQl$WOWEmg^#2yMEO|Z9ir=9|s^C3?XmY{HxHfQWYae22KRVbW|UgVa8twwCqRw zAZ5GCJtf@nZW1IU-`wFtcE#}U(A2bhKU1yp%rK7A zNmcohm%bDW;f3)t&OGBz`|WR;>Q~;_3YU5kl~F8Opk88NMg2KFft~op^jS1Z`(j6= z2*HN6J}tv|FgCLE_p>dG!%Fs)WI@5`&au_2M-+ptK4)j#0yp(l!M|Vy|40rP1zFRQ zp&(G>Z%}mvBx42PSHgc`yeMp`eoJ^0Edbhof@2&-)(BqkS#F>0{i{MBb0ZZ%zA!usNci@Ko~;<>cXB{;|M3BG&9VBGIDu%Loa>C&Y+X znH>44$eWA6)Stxq@7LFVksW9eSnPzMpe+9F`W5d+BY_N%f|m#u{RYDo`p?X9)4{TO zAq&9!uz;D7Il3_5j}nNh><7I#AaYJ3eN@iMrAk4KN)2P^l zrH*-~vsk#q-q_G*UX16m)Ohp=sZXeEctq+Q+hehuivCX*ACH3*YBM@Qu2o?ZeCz+Y zh5ms)`iJn)*UR>-)<52`6Hpuf6+R#t{z*Ng?N$?R8!Yzjsi}Xua zp|4-qzNBswwu(M=tgV?|6OHJSA{~n4MZe;r6v2ml2u^y~BAI_tO!ClAP+fuyFIk`@ zsTTjFl4V(hEuhyvA*Fu#h^MRk2;rHv**oW_(T67CodvVWS|21_?ZsYBGqVoQAV)tK z-geEug-|X@nX+K>FT?=>OoZK|Fb_t0T`CY&(u9!|Vxk`Wfd~GT5(LV>@GnJVZR5Y- z6Z|XtQBwKhPt*1a?I4Y!8-Q>Hf5Hi9r@tJZcKq>Q*n&&4nNh=q6;3|pgb?&qgZ7g( z0@9~rqYPDaifq?!@n62?4ODVaZA3E`8NGm?3A~{=a^dW=I-*mfL5+ zpGp{fYTO`$jQTH{64y5T!K&E*68wSLv_&7Oe~SN_J5DK~#K#~AC3!_ZPJqs;=!U-X zkgWWZkBa^-{I}Ro@)TA`9@TOx%*kG;VcXPTF&3*<3(5hn{K}8^ul2^(2qR=S+FvwM z^yaPQ8Iti08X9}L^6<+J1ymp|L0Y%&{PWM}Re6^BfLh_7rkc8aa(v?Eo41f`VtkAU zMgoyd?!#FTGS?O35rk?Z{3nfR3qE{XhYPT?2`J(u7qXxzUXWzP2hBj8%NGF2T(IU- z)1;l8n!t3oY`Gbcz^jVO*8A{A^Uop5wSs@`LgZ-w>ALkA`6T)`aS#XbKd&4w=yK# z_YQYp9SW_iQ%m!6ljD>B{Lddb@q`n>=}m8X)8ilaSS)>%uVIf*(XS#UP_X0Ji6@@$ zPyh5!WO?aJUiS27Je{H3^wcy9#AIoqlNrFxn=j{OGgfzR+G}shrO|dr)yjZL)4@hy z$aTYY*WGaa^>|Z%|4r$J!jrcK!6O*h=cZl$+u-L`sUWc?oN(3izG3btn8#*<+=mz!d1 zEuQt3#BF&Oz zTWJswL?+#>G>q;TJ;vhM_lM_S_{4R+!XDwV; zL{-9={-~V2KMYLsu<6NwZW0I*bh|yhwsHwNl73@-m4hw# z#>&ddzWBjW=a>nvkSD5lj3aS1mD1pred~VG1ol6k@?}jHEkli z#R)vPS1*XwnJ?HJTlHSpvq;t*1!JKSy-vBKQfaOFhRIsDCvGp@i&w(F&~Uq|-H4Vf zJ!DqN-;g$dRU)ByXhH#AA*A0>Tr56jP>~TCC#93F_b$n$--IgpeSHc_7J3Y{UBl>9^%!n2Uw#PQ!qTC5hY6-KU*_!}G)4w9u zc+8B*=Y9t{57svCqjc*v@(_lY@cUk1+z-jJ;60jjVvXDnBR_L7z9J!gZix3L=#?Yz zI=%vF$N)vaflx1xLmxyCOSpyB0--#+US~HQ;{!3tv4(`;?NV z#1W@(4ZCll87mwCDVSY2mzyP!n##-(I0h$o7B~!Gz$leWgu0@^_!!GdkVk!iLa8+n z53_bHVq|6RUC=@Q=7j$RfACW)BFswT6b(xHDEu8WZzWc^Ek>W1=ew(Th@*XQforSDzScmY3RpbyQ+32daIt zHJFR@8gYf~8qJ72&rOMlPTYCV4Hnm$d-5NP`)uH@v8hLQu!QC1VccO#$jLV5z7K$g zl80`5i8xz@LMJ#HPG$YywdXt)l=g$rNT(jrM<_H@ybS6e$IB4RCQvIrwnK!mX=4FYL|%-K;JsH8b{__t?qes5;EMq|u~)rg zaYUloe6rjTLA21dmCxAD;qaKrx|I&uC$SV%km2-Tw=L_{u}k}or1F-~<~w=4Gm536 zv&2!lb@3h(giZ;)wyPXudv`!L6S(5Cl6*?Q+z1qSGZ;czb%g@Jr*BVX%@m?v4}(h1 z3Ma--0wgy5{)7?8!78N$ydMCDUd&g*7k0^h#o#aR zpWZ+)D8HHWC_2k(MoF5Sx~4gX8dEGY;DwPk^jJ8cgGAPtjnG9e)~`4eI%G*kZ$m~- z3i4fYto*}WSt;eQYUk+R39nSE*JM$G-bpHZ=5;?D?gPYM3IK08@_H0M1w<5^U40o;a=V!pHaGu#>gsnJ=4 zgCr@NK*qZAq?3^Zch#gV8??2$g22e2ki zVHU_!4Lyivb|b$>1mx~ukoA)cn7S`2Pu`rLNSlhGBQ2CzR4E};H6}OetLO6NchleT zIcfK^bmv(qNl978sn{%&(*&!_t>4n8NyeRdT1PSD-IymafY6tIW1O6Q?(bt!cJ+cJ zylOEjn|wn6FcSLvT)vk`eT(0YTDR_}CrT!9)qqYWFsq@6fHDNt5I7)Ub;~1S*Kv$x zi;e(8mWr;gqP0N)FGH)2dtYBMhJe3)$>H4%%PjQyPreyn!TM9czfsl`FMmz0l`n)+~b8j935luAMREg z#gGJw0?$WdU(AXKRov*}D~tC*!hyI#K(HzVo@tW+qsu3z)8fTwHAkaG*1?-j{*KAg z(?L$RMd(Ao5ceRBtIcLONoer(jQWbG81lq1i0iGI;;j zx(*5}qsIbvuFygxzpP;4bgLLN`6F-@ZOFUDfrJg{!+M7_G??m!hq#1~lDtwqx2#-e zr$kH2i#xGB;}O|IxB{U*L?39_bGGO+_MhZ&?)P0vb!xp$k%Cg!CapeNQfi=5YPe+& z#&ik1E!X5CSoR%{c8OdxQ)XGlO7BI9FA;vLLl~X(yM{kB!L_z>PjQeH*DFWZZWaWq z3V(x|I|4-&N4@j^NZl6iW7sU`eD&WmT@$|LmBwdky;YoZt-kwgjsqP6X8E#o1kZZDYSFbqI~qer+uNj_YD_Er7NwvI!I5 zp05CLH{_a|GEZ(6pCnSrC_Z}|Bs|g3q}^|{nG7$vGK8!xNin?a)g!<8?0}M;wmJi>f)u}+ z0e!t~uYB1_JZ||EqW*1=W40Qkze%Hs(LW#t-Y)9~XuPrNcuc%PVvHc1&2BVXh1Z(( zknq7d_-5 zE@7RjSd0!wh)kP$=1K|hX+90{e^dTPj*=3Ltl7X^!5emjGMjmew9vefY;Kcn7Y%}D zz}&(jc#f&9;+A+fZ#b9cxQXtYu4|o`FUyobN|OZL2SRpeVJZRVkKs!<;;X|$nlxoS z1$M5Ad<}d3LawdWkH!~XTfV8{uj2>$wCFv{ygo7@;=(e=qL;FKAgJ4ilm_s2oNh4A z6`$Oo!riK0irkS6R)if8=sWNr>+Z2piElgfclL!m8V5In?fSC)e9`K!Iu5P4eGj!) z6x_C8eAI9$xHIB0K9vV7#n!yKzh0=(;;R0U)`j^XhN`05K8zdy9pn)A zrxb)g+x2Q#C@3YU2Ozhjq~2JJ5(3|tIu(XGN`pomR3BCasSITO&$=$6nHcc&iJDPM zSex#z_LcqHlZc}Pxq^ar_Z9!iSNoz7gMlZb0u90ISD{4)flu6ShFzOgV5ur(_uF)k zKosj|TcB(};%Up&DY*=664-HoqbReTICt)I_e)|l{A#H~bA4JVEU{z0)~ZiNc(ITf z;8Y>kELN`~tTq7Cl^jy9?TIn9{rnl>f3}E@(+$Nzk=@>f!25=t07;(bUOrr!LYf`M zbgXW!-534(P-GA&*4g|j8)f%o+l~qg_@v7_jKE+o^CRpkb`icvk4yB2U&?*ayFKFk z)9`Pyi-OT8;cmakJu_zsg*K|uHnj@CNa9!cbxYxy2C1kgyF?kly%NBAf=D`|4?m+Z zglRwGO^{kuwMTev>Gp5A;_y-*$#!t(TsI^3+YZ_{4Rh?WT7QOWee;0R5&cyh00J56 zjN?-aIKD}q3Lj5ZhSpq^7?3K=>Owso5bsm}{;KcAqzHgAI?>2>bz;HzD1 z5yMAiS8m4WHLN)qcR{qS zgB~^QPUK|1z~;}$o{0V!uoo|^__kkt3i_5%;9L0yyjx=xf(YnN5)MdCE{$177p84e zBJbGALx+bH&^~)x<^)G}iJp7Esv!4CUh-HAd{fIj~@0z&m*bcipHGyi-(+lC9@3v5?tw(1wfXX*iT;Xihy5n_h>(lf@z1l82%|C z<{-(EXr4ANDLFX@T#v+LqPUoU6Y{@$HV*q?>-b;4@6qu5+9&Fq-(il-%Jt^IqUj3@ zVBE6S1`tv**vrQ?&5mv-RXtd=O`IY>IxEi{r;!ckRVXP^+7_$PWVC7ujs1y7a#~p* za0IG8u|jb=<8+pzMY0{39rbKfn9l~exXApMlw&%B) zgkA6})Tk6#6&VnOYQzOxs7vtkF!H14qk$CXj+mva(B7?j^QQ(SJ;i@6h&j(zOC4UA z^WNm0tuzo&j&uhI$#G%vgyBjftt0Nt97|(N-k%Y z6Ywl$RFna}O9t~?@1x^eeJu<3qO4G&AAj)ymr}^DJ>OVYpz($VgPgXQEU@t|y{fXW z9&bYn4cZ}dbvA{fr2b9UHZ|h)2zWCRuuXea#4gJf!Mn5O^5iKwgm-~R|UO)OaHj)cx_?;T+EIgDpxplh;8HKgA&lkrB^kAEDi9!zq zu>PgJNg`R}vVMk`oYg0XIWdsN>uifn;9gfQ#vg$PpsW^r>Np1}4Q6kyl|qszj0U

6-RB4L`)fGeG4YpC)_sa(ht8S&wW(?}GIEoGJQnUQu z{a$^8#}W0mIWOxy9~(|KHFkr(&iW3^GW>z$Nyh8)*7aRTXH-`D$IjCsQ8P9%J8VGM zK{Qn7$zxOFRMj%E)f|%jp!fRk}j@AEc(TWa>ZSY zLUlHePm1)^apw;%wN`v?j?8UJ@^4spk!a?yT{2lIy(sJx35~qF96~<@@f8w8Kz_A! zTc}wiWZ^YQd)jg)P@=%6u1DX^wm)y-X%6v}7x5B5$MaF8BV1S({~o4k>h-bCwolyG zfVo&S-A18E($+m?M4VLWkmos)oa{O!S$CT5vFSRO!=n+6^Z+ONhEhWlIWgtvP<*Y- z^DL%H;l=_!z_TNVzx|z$vi%Bz1{rw3r!~|A-v#D7aTGW$tTw_Q(^A5^=X8aCrheo& z$XN!s1PT-e0eGT6eKz0o-^+6ntuM^m%5ZhJw;OU@xn*g%AA`KE)h+liJ8#iu!~OZZ zg^2Ml7;t}}o;LUD+obS%vHd@q-j+AVdv($(MRfF6vwA!khUG*ryO>8i@#hVH{ubzy zL-;IM1)zSd-+0o}jN)4yMy!l;$%0v~9;WvDq_vUky-xtJKc?t=0V`}aSaS_;&3?3c z^wPr9;9O~0Y>?Wd!m&0T@(Stcnf%e8@Z)Ti!i40qVpT@!*L>@>rD>RG0#%;SZ>ouYJgDql6xc1nbhu_@smIp2RLpsFWU28?-x|K|AA|Uz6AT<9j^q6v}WE=0>Ry z6{=Te_B5+}UcfO$?#)JzWqeD#STkB>@tLhz$<*(X^|2QpTEPjH%Ft_b0k66@uBm;_ zc7IB|!sMRQ+1(F?YvQTgzVGKqRvj%bD^@OokwMq9grr9|ew`b!X^6?nHUGC1>?mEW zwvpOFoZWYsM8XUnn~;{!8ZX1K3a?cDwbmW5=(auh`)=LI{$_YEiOG6O)!+@y2rt{5 z*!rY|%Gy-zQ2(MC^v@rkA(o+dF4xBO?$F9m%2>QKuaGei>C<0JfI>n_+iyV3k&stO zv+b$S%~S-IR!aCbIaw?uqKnyRM)1!ye~hfvP3X~-+ACxJvyk;Z74V z2h0&9P{+pke?=~8AJB)N{6pl0%;JT{Hsy=`C3VD?rf{&iU z+^-}EcjhwG56L0Kuok^5?qIUN?f$3B1+wjBAtwZ=*>1LjyGfKn4Nef5J;}eB5ecIG zK0RFU+SyO-tYL9QIA#ifg0Bs+1YrN8CgAWejABZz5V^y11tVH|vu!N*CO^`wY>Q&B zHhY(qWMO>`Z2k~1%oq}9!K45&LX%)dITH$Bkm7!aOZVzwI7B-aLVqp{zUY_2)CrZ7 z0wT1vLu~{yaSGw(8@sneHg%GYjAYvUQ9Eptfi7=+h%kJ8%9@4;zb1|7Y^}f0IcYOI z3|XkeGhc_Kjz*wkD|SG9lj#X4i%{3b_z!{A7fO|bkmY-FFdpvWrl_e78%|_+#U4Bh z20DuAts}0!SJ!CON4NeXr^)rW#Tecd-M`y9!4ZfJy6KUhJ7lvd3UXDRs{708fzJBP zGP8t}G)!IFOu!!UhNkB(xla_or<^tgeJ@b9tUbTkd|{F!jEc<&(YD(%ezDt7%Df!4}n^QS_RiN=oQS7O~xGbJ@ODWfk54fgVlD1bP z;pW?r>2?f*ay}E93E^xZwGFALW6#1G#Z?WWQoa5;l{?{3TbT%HsS#DI4u%e^`*dfw zJPYw5Z5RO^5M)oc$1)aK-g`xn=W)o$jj2EXhLnOwzAEKaW!9la3uS3Dz8wH!2X2Et zcaT}|oK`t-T1-}c;h(hJlsdAr)4UVMR(R+!y>@M`F)Zab#UXpIV%KN%$~8E>=^^EY-pHri7F-**NhZ?=9a@>v*&(jfHyybReh+|{X6{_=rUmFqAgS^9P5 zlJ57Egrv_NSX4PE<**3(uK|5q#(H1tB<~hpceXSr@l|JLVgKL2T=&)KSI_lc<+=w} ze_oJ{9rW7MjfcIYeW<5#LMK+~>kt*oRSu{v=!(XaqWhD5;D-;1QI;B-{%R3Vk-0c_ zw$ZOR3nSKCAgH{(Rk^X-|7QVU?bLg7Q!*J@N=7j4^7|-tT+3W14~hrlI?%7|!&lq` ztG`@JuBtgx>=TFH)Y;NdkG>`!D7>umx*;dMjAQC#;|bYoF}1=nA`=DGag;wvO&SFs ze}!60#q0EAo5zvdjFM+=t>_a6aPM%2y1=8gmeDBt#1`_Gus^6fDA;j1By(iMIqx~m z-$A3g#2|Vh36Gbu(sR(Eb*Es-Nt6Upxz_95*f#tJ{Lo|10J|WXwzPNeCH0BD>`Ek; zDX#x4c1x2(V||ZxjBtUsh4)--00UJkCmLgQ0E@89(hGI_{K8#==)5r++;3XvEsj4(j z6d&zURA1n$%Ey^^XIJYWJ7M^&s~b)p=3Epd_Ea@DbZgiAdSW3tcUJj)e6Hz-qpxjv zm^@X%__JO}MnPBdi)bhe5b5x53DtXaubxF3^4;A03sG(zrx(g-BDx5F^_KOG&O+w&D3Gh%s&ml?0^*9%U5y zE{2eEdT&>fp2)qFY0DsQZI+caWT(z;wPQdnw~fzrb+B9aQ+|RLFjrVwk5Js;8>?cb z6@Qk1RNRLD8zp*XH?EE912UaA{rKQ*P4Sx|#pmi}SZjV)$4I!itCUo352V_dSB99f z2p@a^-l4+yBIAZK!d@M3rcqYRztJPvYet6b!MbCn1JJKIsELq8^cy zeR!UqX{==w$Cd8-c2rSa^lMJV^rD8bqo#gU>m-?J6M*`is%d;3EI_J?!$a-xYTp!R zl((sz%>VRO^UbgepHbh1NXLg*B3iujK?O~U**!6Xf3VpQ^3yR+R@5+wNOx+Z{GAzi zRW`*=w<)cp1Up}P03l{`HbZYjt?0YRY!9C~w*h15Z&8 zI6=m#fYX5}ZQ$MN+%Aa_Whdp6dMSt-`$7S=jn>lYL%?KCZeNJa# zfx-u+*Dv>^L7xsQv^_7`HkH)n4&Y!pAJp>4dGb+1u4*_rmi$fm-|teb^n>*2>k&fJ za(Klpl;$`_3Z2mShNm%}amfS*@{yj$MM~a~dwhXw6wCNWrOa^fXIA89E{^8HZxI{T{e8!N3(iWLt~(8!d#Z-ST#(q8eS-5$z|X))PWbZdlfG&UiM3~+}WTiR``7flOp zs{C(W4a}gB^xb3ww5kjs7HEZ-82aB=SKGZAe-N#U_)3SF&Du5lrepm;ru61DbHX-{ zAI&6Uk^AqhrVBUmIVpx(wGyT~i;sy1r%R$@GWnf0fvN_i@388k<5j}V?W_SbQ?Q0- zUJ1D!DcYnG*GYJQpuOT#R!+P^Q|f;z^DCvg$?+Hm6yUK7T;yHtVGT?Qsf8(lH!Q;G zP;ZJ$Mbr~ct6(CvuZ1c>@H?6=7-$9*ozm}ypl+oG6N5$}j3%c&RZT_8kb?z{=8^9? zZ@LoDo5J@Ehhtzn;EUoW0LRLOc`4MuamLH$3tBTtw@XTT46f^}146IB&i|{>>8r$B zVvu9o0D0<|1u1rM=r0D&h#*y>BL@^nFo+lrn+nj_dm@0ZK5J-YV|s!ZjYEw@{SVAL zh!DDOmIEE&P-NR`1R-{^DS=i(S1Lsy_J#pbD}Nxlp^;{3-5r*Lo;U0Yr0Ap&T7)si zL^%sXt6{%XDu7N1U4bn<4_U(2;GRohRqR_RX%O!F%!DCD^@3#;e*FO7kg=X3)}CWX z6})e24j}wVWD0I}9}JB*8SQBp49(nu-*kQyg-^xbG6y zPiBmxqm`h3YIFQlhw)fhC7U8VS{o7hKo&gv#7+lbOO!4)cV#MbQefHDHrV2m6NT+{ zo$>-84{KjRm+4Q)UmpB|%07I@x<>FAn3^PV`US=9+Pn%bJ=~MbzhXDO!DzpteX*A{ zC0v95;VCe#&5YE)OoW3Xw8H6rf9;fGQ?&i34+tUp|B}sgA$ZfyyadR7ykg z+Doj@qeS2;gTF(s|KqTW{=jzjY5!O7wH?60k)R{@!e%RFE3fMbc>Uw3QSqPA0_~)NPe*Di^>ZO!CTnd8N0v09!QJkb~*x z(yEGl29r-$Hb0^e^0#XN&}IfIIhayJADMin16fMYxLpwBDZNNHywt7XcQ!$p+kA54 zk5uDGGlGb-nG1dR{+9pe^D@!^e=BTO)_&oYr652q9#zY(xCwCtwVQhgi;sc{$j<7knNP6aE;6cn= z%2^j`&$``e6B>mzQdj)bqBfdWA7n_PJ32#C)$2*#zrKf_$$!ugFnEs-@bjfOBw-hQ zjCyu%uk{T|J^Zx91(083P;NRx-b9OfmM-U8l80le16@qrwl8X|hq6VkGKsr;hjjdcjmaLZ;_NW`zj7<6NuERo}=Q zwZH0qEBn*q!5=A|%vS$Z%Karahbm|)4V;U@ZeWWpO95WsBL8KEfS-Nc3zpzhg@;x2 zkMrPtpA@j1ac?ih^m)8g6pN@ zyrYbW({Zc2&IOOnua9#3SCK2w+cs=0j%GV#?Nno&@p;b4q|ms(-e*PA2SGg_hobq; zzim95JA0JRMJwO4Oj?kq(Ym|6P}-}aufb(P3ZRX3b_E6$I7xcVo??WOkMsZfCFXw? zoc7(|x$eVmD-RaKcfn~G@d}MlN1meFzg3pOo2q4ob18v4oDCgkGOUCi&^8q80r+LA zP;O5wrDLThz|MsqfPk0Lv5xgBexH}WTfDLZVEPd^$GJlwh$CwGUrmt5Ef#Y98LKEz zZ5=$mt$o@v&>zb~XU?OgWOUd}JJ0kG3I30tdcu7f8ztT9B>!MOEqFSXQH12`juL+< ziksclrfKp{o}J6jkKjr{=0wt>N8CU?newv{fP~5ksArIa#3F6f@?$Y zv-cHvfDzvp@JxXOelX1zMSKLer7@VhEVGwn9xHfs4;vAjTK#b^GH!&Vs;Jzl7ZnoN zph^zaKW$JIEGhb~qEKh_l0UeyB{k*>j@H|O$7j&JoS<%J#=934U9gq7(Me_TWgU>?v#CxF619rRbb|O_*C5GCA<5H*>n8F8Rme1%O7m+~Pmu6G z^)I^lxJL-3V8IqwXb2Gxxf@vNHC+`Z?Y^h6OQ21od#%N-;Idet=aq|mcZ$-la8y~cGQijE}DNW z0AH&XJC)$5RL-3bBcJ>L&FjMUuv5&+aoq9Wi1|Tn;P5P2z=vu1^N%95>?eyIsQ=s= z$bdVH0A^|UDU0v{zJC~e?tT(!zG8i&u$vP8^{d4RpWg#$hlUv@AR9|zb>QSa~{8k}; z9(8Ty>Fam(`M0Bg(XV!{(oj?gvX7@@s^#--{@r+lsKF|T7Y<^6v}U8)%26cQ7KicEF@<@^N=~}nCAPv zuU=gjgHqn(YcAMtrSk@kc7#*LfSN+MC0EVxp%0dx?$ z+>Y*{2JX2pgkJfQf)ZWj`}B>FPkc9;)xL04a0*_8k%O7u5=r#|56tz@f7D~+n~?n2 zY3~X#(0bV7#_O-5x_OKIBG zhQeW+MY2f0zmCLYE0WJDVP zgPdusmFmMcnXpKbY0b--9s+3B>a~gavshg7;FTHwKM%)gW8^y zIM`Kndjj(NzVfRlPE3H*-$(ZJFK_-D?x-~=`?N>No_AfTrLayMJ#^0N`Dwn!Q<;?0 zcp2W;850zt`>M3!Li_ztH}8I-pKwF+j_6A)G~X-ae{mdWj+gza5h4H> z6&)_&=-w7ZBrg?WDylbD=gB2-D3k=P@~D~h#AlGe%HwDHP4(*}aJ`ZAzp}gAE++C|l6zd`u=|EB0@)r&{?gXE1;bwAcW#ikF-$etc7)Di)b-Ly=UsaP zUTsCtB@a0X+5`MmwlJ8V!S(d3xNDS132x1y?uPm&i8nolBfZ~uIWhnCr)c5EgIexC z!%zQs?Lqq6zdSQiL#sut4BTxUt+ewO`NIZ034jjU&rd6jG$%)qBgRQ!vA@5DnNzdV zF)uI1;&aDtNy{d+u&GPo9mDUlbd_`9&l=jiym>o{F;;~D;&0rTo=>ZAi_4;0n*3ql zU`u4(oI@hv+5{IYuPae05YN$}(n+8vxq zhMjBew94O8BmKm7vy_QOB8^B;AeL@b<)1krxs5jU0G-bl4%*cF;or zkh$NF@%TT9e96i&aD`Fi8xVm%hrGyR0Bt)i9OYgHsH0kiFhL8C?8I zTeMD{pdv<-F+B6&{|P8Ebl5{-^y#M6^x@`os2q695D9zdR(JCUMgHO7B<;ihQh+2bH6OyVDIi$A7rEDcz+{0 z86+Gj)@^v1-I#QX#0pFiZckZX*iW#x?2qI3T)eMknJ?R?N&%vej%$xp_bd9s06>b? zgQc&5)8b_GtFLn+1F29P;&Ab_A%@QhPt*LA1Z39%B1aer{CJQM{gN zE1SU8-q1c*V+pY;gi!ZOo_x*m0sh+fxvDLfyfI&DWj=dhxRXpk^4tS>jes@w<}&w4hNS0>-WdC6p?HuFUU@SFs$?nibq1% zb+OFUH_xz#Thv*3?Df-T@S34-!Q2Uh0`~s>D>3?3rT~k)J_EtW17PO7+CNEI| zork6^)kI=QrB`De?d5%!3*x7WWHIIe_4e5Z@pq>`=6VRg;ihtZ3zm1CZuQ5rzo=B~ zXoU-PZ|LPUZW#1bmOL$giJycUI)a2PnriJ>1UTZzpUhS1g8U|W8hy?>_L+nF({RNP zisnvVa4&X@$=|K2-euQw%SN(-SLtHMtpZL(M9}vlqO9TH`*HL%U@qJ^1Qs40&$lJB zg+a?zPDTHl*ke$4PW61ID!{(1>J;Mj_2ypb!KA%nC5=b-4~_&6-~9jvfc|N>k1sD< zxR^6;hL2bw_a?_sXNWfeHd$AGru*_Dd?Q>=jo&c~nUUwxX2K`#wy&S|(^dS@0M8$9 zCi8%~3#=VC+}UBaZ_Vd-?PqGMl9#C|0t_}E@n62IF4?2>AYM2K?RrftOjV?^7^8F_ zKXf<`zh%1cdVF9#j`IraF0$0FtC^HX)jW$rYjZ?QGUV|B+%rhEy&yMjs5=Zye~1~? zBd-u!6!$aN*XB*D$9KTBIy#&MtjX&V#u;kI>4wwnCHU^c$n%`^ua9ky>t%6>RzvpJ zs>%0876qi3%PFz_qKeNdr(TQW|p{=S-pKF zHD*vZKX7$Fg)>+?cGP@C=VRipe&e?V2w_SO6Q=iF_LO=*ne(yx7kN=|Pp@SF%NVZw ze>xKG_N5L1OnW#uy82YMv*|&#oYv05j(jJw2bHYiZ2Yl=}miraE zCZtx}50+JIaEQJ%z|3EQX>JpsW?dgHvMsih+q!P{ESTdHxy&^!}kXgFJ5cY^m-CT*uxNwC#V8B^aTelxD;S_ z6`35c8nn+)1>U`pHq&T&f)4sSoK`E-ey8xchAo(0TbD59Q*0Pblzi$_LF(S_*-#Id zSyJ+cv-bypXSReA^(M?Mz3LiV5N)C4Kb4~{5E~z*&S-#BT!W$xK^!?xO|XJyme#`? zd=ND!~i#Y@qoNu)O8;@O^X_HGn_>Vz#ezrPV$SA-)G@d3RcOlJgzVZeh<+ zFUg0ATT;Z5n*B3zlvj_+2HjLldcv(F=!$3c(Zcm_zM-vm!yu~10)^xWx2=qOy67ue z=M3#`3V$h&M?rk{bG5*{1d`Gh+kS6buh3tSRml5f8^C4JRC{?Qbp|tZ7LM6R|N3v= zNv%!PL~17kbfSL{>|ZPIO6_Z8v(sLUD>;IhPuXjkxj!IFgwF}P>n4;di{8tyMHZ$z zeR|?ahDy1uS@eoEy^k($4w(zxb80PSD<_L{x&SeC2bvAwseI)Ze_$+ zHL^s6u7ZElUGdGfgj}!~@e*cG2uA6%Q%FPrX01Oz1_(a`#Q9`KvJ<$vfWo}+c2m7b zqeLod1T|^GPTti^I`@_6*!jP$?W+lm73{TGal6Li+~48Wy~J_-#Tg&&L# z<2;y|PCiEOa#K0bOm{jb`?B1GdQ&%&XC;0a0r;}EPzeSi{dMDqTbQst=pd{7(41+0AaK`#Xg+jlQU`uIvu_-xilbFOS7)o zNLIB84BjvqrcGD-(0TVklu$r-zXea=d5Z?^7bp_3AcD9b_#>PTNu7QwC%PQ2|H!RD z{^ZA8_umdJokI9i$ z9x{lnoS)pq#Us(KMy^3_usKs1hPx*K>O-ej}Bra zeqV2@qr27*u)TOPqr5HG^^(;WqO3&2GU=avWU{l0{d7{R-hCuk6{uHy%k1GpDf(FauaV_1F-65Z~2Jr|kz- zvuNcSD4gVU^?D~UKwT=B6Q*>Ht}rF2K@UUL^dQ~3NDuphO`-`W8sI%oN8sP&p~tiy zsbC;2dw`Figds$t_trDy_^M^x{iJo&z7EA zozN%>zukJwX|SZ;TWo=B85|sZM4p@lrVxiykVx8t@{IhIo4oJs-PF>BO1K+aoUw|i}@wn_k;jj1Q#`SALjQ~ zcK5}5=URF;RDHoezw)^8pdWj9yK1y(Zjhaa;;$joBpn5 zGEQt3^Y8QSeOCOdt#9USiL^Z-r1YYXX6<{x%{BO8a0$%5_I%D?b+JEn^?iTBo}|bj zs=Y$4zDBdfoop+}{G~GPUt{^SSaPx(PT+UqBp5@;6lhy&2j7?Xhdu!e%hR$}>VN|l zntlmdb!Ecl_Q^0OZ7W^@gNIdAr?wpImz(}7+TQ)KlmTl#L(?|L-WDake=}#>wnnqY zuYnt71YO8Wf&S%BHz!Tbo26%hkLG_5&8V|}oHvJB{j_FRkRTW_Ft<0r!l>eT-MS*J zt>h6YX5!8t?{{;k=yzL*NtDuKGrc3^P=_u-0kdU1;T)N|r;9qf*sXxsA~#hKVd-3& zZ#b8{6^_$u80q%R(=)|Gx2(ic1A^k!S0znxyM%*DMT#CUO}l`#o8{yB-GN7ruN5;E zX`@{1X?-#=y^QhPZ8!f;MQJ}TTHVdy&#uvLSEg)7yGZ)%&G1E?n%q|KJO^;%Rp)w1 z1|lAdHE#K6bz0p##(VQtIIS2X7jynXKUUpVAQ)9;sT8FfdILq{x*(={LzC!f<*h}L zge;(~h1p?z0xQ=xkf6n223Q7RV?+J@-f4QclqZ3p(jQhq`A6uYua@|pO=Gu(IM_{{ z<-ZH9do$H{hW~jEw-^N&Zv$Z!%;P;O`m)D>q7BV&7msvY% zJYm!)QWm?hg6v5m;3p_GKyzeY#y}Vq4xQg6f%#&h(+sx0N?)mmaA^}=D%JN(H$_90 z(ZWKrtle=)bzp1iAFnoOzX;q^p=P1T1nzMS$pil_*{HHyRPcR03$(J%=oi@`n}XO1{VtE(#Ryq`4h;=#LpdOJTjln0&%tD2uW1_L&fpRxP( zPsTjcj%AXuKZqcgzs7PyKJ`Wt^~6Vs=?ekQv$*6K zP|(moPXjhOcj!R(i3k@x2L@2AJA7SwBY+U2S-4;^{3C{CEdI;{B?f3TG|-n|4Sp&| zED7afcp80_1@l0a-7-_yX&ZZMW@qYYiScd-kkZvuD2g$}3xKH3nF^Gb{_kG{C7-rqGRK zLetjUZVR}shs%hCe=KRwx@kOeuUy%O?abKo(d{6+bAU8hafWPk>S$rw=f^+#;kDOZ z*ROBil`MAZpX zt~GctOB5j%@ZpNUlLuDc_gZWKKptOb@d(3e1NDwoLHOHmzx{7-zBv{JZ@B3jaXru}n11?a2xpU_8;uFEzF>rKSQ^V3_=s&H` z5F1CYkgn4D@QJ{UW_ahOPJ|;m(4DsFg3t%|+>>;&Jn+EpS%VxMAlTQpUw{0@q)C&< zz46A_;&U~+f?O9&cK8uL0V0tv-$i>&)$N2A_-=v~rMP6ow&ov5 z4*Lx>zfwX1M~P;Zz54M)2$eI-cfnyia zO4HSUi=Xld39=?tJTnEp-IG#2g2v1~zB8xitv-?`6k@H+!%BRN8pC55XG^j9O8|mU zZ^LpnoMoA><>W5(_> zmRPwU)?~5p0}VQqXN`moJ?;YvyH_ zUJCL_C!fq(hv4AD3Hp8EgyV1i&CRGWmW2$dQ2;s{^R2hv{^Tb<`MvLb?{l9!obhoo z%c_)1X*mXpcE0_#+itl22HXOUjeS2bbNnQp>ZO-o-gB?LjysM4cE_^G31ygL$T!pc zPdoLrH{N&yd_Igj1CVe2UvS}t&p-d%Rjk!jM_|ipN{I?1egCPS>3d0Y_*ut}$Y)6d zOGYl|;;E;c`kn852Ta0<=sf!4ZKBa7Ai;t$2Yk6DXcH|<6~QHdhyZ<$Ej4O@-Rmq; z8Mxbl#?4SJ_{3&4azX4hr-9dX9JAF{H{N(7FARvr(qx8!Rde5^A={Kym4DJ(*stnG zE-QzYQo`X5Oiy*t0$0?L013Yrpzs2t92TFFOd%iE1C85v`iDTV1ex_|{8AYS`Uw%| zVasnh=XHkp%!BN8d^*pPducl^j?(b{Dj zN;;y6uap9oQwb(bjukh#$X9tj%q;2uqgYv%-{437!LY1Yk_q|Co$x_1k4yhw;0HqB z%PjedV_{Hv3kNrRi%*WCAcwlZ0$&;yH&2$S;*%^x2^D0b)*Qu&CE`FzE$lbA<0}A4 z{!>4z@-av82?&pM3I3W;Dq<#BC2JGJWS2u3gtBHpIUR2ZkoD8OVyctCNp)0XnP$eo z=f@x!H+JjDy`3z4cOXvWbB$r=z?V)bpK#v6gV*>J@=1K}x%}5iu;0{A$_Ar8J3tI| zYH@#$%9&hFv#H#Yg<0Awtxb(?b8r68zh6VV>!qvyLtB z;~t#;A$1ur_|Nu5K3PTJBpw76ed;IlQiI9W(!N^BM_-O;_$v74+_S3v@_4{SUME@^ z@{xmc+gA~{!VfviTyPZ6y=qx0nwtERtd~MSV)7~E@5QV$CEuIxmg< zOyXmyKuCI#F>)2>#ntxl1xT2%9BRSK|BW}=m~9G=JmPcr-giI!fm?39l`g^6*Ibh; zOh5a=^CzEjGO-NGt-H?fbIv(O>MNVJ*laVFE57Qgs~VRzUi`C*c(6b1v{UKzvBVht zIyauLcSA!7`fjxY`w-^%3t~D1Q+Sz9_kboK($hVLbTR%Q1}u=DNl8n(bm~Ml`k#bU zbR|tqj8URnN&t&c7jYtnS1|qO=9_OOqIc<~mu|B0Mh6}IiF@z87Y68YjoEq(-2?EM zZu#WnPaJd1F++zAUD?>g8+^hy^r&e}`$epP#e!Vjov!ElkbQ!TORes`IlbYc#}<#Pd@iEKx4|}$yB<3-+nAZ zmR%I(wPIz5PPQ~pMpS%_frHLG^BYGTc@zR?C;>gYnKP#CyvwfC$i9uMaV?^k3WxI- zEu>c(x{B5?TgTQVY=eO0>Qw7`K4sB-&vTGz`=ujzY>K%ehp+9ry&S9Jvi-j|kf{I=U{#~CZU)bL-}6OCMC0NOPZ3w#Bjz2nrvHvn4VfSEw>Um!;- z*wVop;*v0oIxNMgOl4UI{8;=^uzd$S!f}WS-<29Lzyq>^6 z0!aA~Y8Fu`kf-E~4Np-;J{3XK>IIXPWc_vZ{YjvU+huSw5bkEgd?H7&U@n zT&OZv8I{T4BcfNxjmE>4d-m?l7951LjV>|X*0%tJN}3q(h_54L#_%f|0b1A*i{jxo zh7B8rc*bnI&1IKghME87H^2GJXFkKazwdwF`+of6ANB9oAO4v}$2?_Lp?dL!mv-23 z2VOSpv+uqPVZ*;QEM2n6CYv06%rPt+Ms()YS6?A$jzJNYf@6Gx2POa31c_TS*J^zK z``@?0dg~FB;86(!aZ=ca4N1c+sMB&iUT%yYEgQ5`1ElC!P4k>uhsZ^hDt7%ahVmNJAJ;$LTC-Uq`yY7V^T&Pur$71WP<9nSU&9bWwULi)6PgUyD_1OMg0{2Ed85NX zuVsymgy&|Do5j;H?<%Mi`p;_uVpZVNL|uGq(t&nF0;Fn&G3UCWaXF*4K*ax0BOEeU zF;#9-dUD2OEn|2ux4?QNgMXtbX+&;iga(m63uSWm!7qB7WKBtOhJLEsoI8H3iLFJU zr430lKll?LMf_-5UcCQ-``27^5UvtoGBB-Xzfrb@eW@i{w02fC)M$5g2s*2`Ux}9f zr!9V4`-KiPTiI_}muQ)+iKP9&e+?hO1h@G^U6Vt(Cufz>=6Y3?UUvcf8z(0^eH z{FH_?+T%a?Eb&D&kZWGiTFK|V<0ooxP%u(R)vjiXH?@75r@9ntW=?_#W}w>Q73l%744^`MdaPl+TCqpO_@T zf7G*ue5&~Goqv2FSUg668OTDn##dg^x~2Y9@!_hbp9Noq{)a{bM{88#kRO8c5p%BR}C30ug&9scurXl0>4VHy?u;4_4xedUE99`ebysJ8Earv1Km`Be3% zW?vf0f9+R|Yu~@MvfpO?424mJR2-D0xW8}`dR3@XRe!4Tui6&{Je5)u_B-z{rG1f7 z=x6F6Cxw55OrgACzn+@4h5xrSsWp-bCXxgNZYEr$g5V{3Qd~zp3`!R{&71jkSaF-x z{1Jd^WRg`WerXe0TwAPy$B5D|O*_1FLQx4#h# z`qJrNnK^UzU;p|ju`jyaAN$w=M3K%s^UMn_IKOAlUc-j1eZ&z*QI>8AFY^(dEfcfz z=>MR#q!8apfz!ceC_#)QYa)?OFiX-*nL3rWdFGj?x7uneK(^d+^S?d$Bp8o8{P3p_ zJ&af$?{wG;^5;MQIgHXpWR3YzBS$ivim^++AGW>h{V-gCQVSO^VhtzyT@13IciOK@ z-&wO}GVlSO7^Xpwh^d==+i`W^MMt{jaL7XS9MfrI9u{3Ja2VvZym3X>9^D8<&73)7 z-1za3p#R1SIJ0IpFvf~~=wjo$Uc}nyyG8tnX_d=YtXRHqAyEVPYGMQyBgq(eLo^Dp z^y$}+z7+=qI{){l|GTt-4NTcuv@=mif=ykzb|WDwn5ZhZZ<*TF-#nKT#j=|o3*#{#WF?JXGPg`B+!ol0l8U*Z#pQ9 zzcT@~v5iUMl2k~{7h#dtrw7zP{Nf^qp7@pc8a3jg-Mu2dv#VP2pZN5g!M9Hn)ROM% z@p&@x)ta{7rN%$=5K7>iQqPzR*B49qg#M$xRv5k{^g%s@Dm_Eaqz(xUU(s9%`%M;hG;mJ zCj2U@YY3oFO}b*A{`6sZIsCxh zd+kN^jo@wf?rtcIIWUF`qJBbr&<&B8z+0x_!`Edx^T{WjgsCnf_}{>;xOKn%?Qd_n z<(AN#x-GZb>ZC88w9h_!GvE|}tO8}-RRga=JgCWGdaQuvjseS-F@+rc*?zlipMT+n zrR>epsS^WHiE|M}nlkm>d+z(4+XUk=diTEZSHIf*10To;Bg;x2d+f0g<(WCihwf9~GN!g8r^(z@Xd(c4#;t)_WuBu*&iBMIX zYJPE_W#}tYu$dU!)W~}m(01Q_cPDo*T}s5?uoMC1l`Cm<1cHOb@>yPh7J2f?C%^l> zvllN~LhBROU(t~Xzuej#0t|`MPCtF&{Q1UYn{4#7FWQspAW7a*K>ha*KEx0#ycV(k zqmMb78zPRF)v5FlVG2&<@940#(W6J5e)?CkeX}Z|8|pHZL0;;=;mwuTUw{3bciyRT z=*pYzH!Dx6ntxCp5Q+zlz=R{Ox3ypLfp7Sf>tUxsJ#rhD=OVU9nq1fLO)2q}wRybt zrTu1^uy-$U8m$qItjbM^&t(ARPh(2@Lc)I}7aZBX;oqwMC-L>ya3mo)&)R}j^Pi-{ zUkyKUx0X*8Uo{FGzHs_&@x43iU_^=qQpwYScSz8;<#<*IZq!7xaFyTCOLK5$$&j2C zPS!10L!<3J@6@UE1ZO>CK zgJ19lgHUdT4~hjy7b@Xw?vxiqNsV;tR`PGMSmCBi`Gj1W6*T!L3*~~tmiW=`l8ttZ zYx0onNV3FF`2>evBxjZ?@S6o%;PV4H_rJsaahu^Z>c+R~7WGNSZ9XYlT|x62Cxu9g zdds1Fng^+H(E>cmMT>}+wFo+KXkJSd+*%UWL$wmfs~q8-QvZGCu)bbReEmbP0Y3l5 zBP&#kL}^>Va?!p5l4!=V9&s9+B))tE<&cTD{X&0I&noy;`9MF*#S@bp`hHw;@(&2d zg?=Xa^gk^65v<3TbZra^#m66CY^r6T1j`{GugaCm6mB^5Bp=r#D%}_e`Ae+S|FWTEl=&4wQ2ZdC zPA|$?#!t!zvSNgM0>6~Mi7nbUpMOY#fAA?1+@t_n;FC_O_ks8YhhIcMNfS^3>HC3p zsGG;|PflLX^86fEOks>cyp64uQhx+ChmB)Fj%)X?QhZr21xl?_1k$4rcOmSM$C4;( zB?<`yS9uoK+VD@*wYV%hK^NMomr-3P#N2rDX^v1={(+nNZw+A~Xs7&Tu;f4GBaIYC zO+U#2Tzsc~Mk%?8M({#hj6DCT_M7^X@=*%Ga;S(ktJyDCQUd?AUld&Z(RsA6FVB&f z`tS1w(nFXkT=o7NN~ACIuHXmxE9lUF-mFt3^~cgJ@l*bP*M2D$w2S8lEHePxz5k-o z$q+x*%CZucw0~j0q?+i(qF!+le3Hb++l%;B`;sbSbmD?!MDWW$i{vPLy+r_oy=pk( z@e!^3E>ntYKv+Sphqj6UB{H|g5Al;XyVHTgBMu$+70yWFxtt`zWH;Y>^AU$1!FVgC zQWC1T@FzdH_@awg7n$Ay54CKkHSx`f1TX04?7QDStZ;;Pf&&~5OrfH$HNal%>9S; zsXzMBk21-NP++zYeQc2Vqnb3;lEsUD`#n;ln*jE}g5p(IT>Yglp9a<8!-hTf*yCNgb)^$XV}Va!nstcg&7VsV7VY13^G*8q z?>ley++~f+uDtS!WRFXXk-6^LYmnNg(WAchwXc&x)Qor{#Rsi9h_(Ow_U&KX)Y*eD>8d&Y=3N(+~5^y`p&>wiELT8II@H5zON= zD4_n3lP*1zt-=gaY_xff`b0!4X3n04dtoYgJTH-7{m*;^duVTljBpAX4bHd5>&jiA z69bAuTnzgHjb#4JtdaTWVLTbJT91YsMI#X`^@$l&oSHN6`Q^@T*UJqk z?Ijm%2}{T|t_1&hiHz}SC&{!K<4LC^gMSEgUyPWi~>} z7mCm45v3(Q{(-}wlx;&s$)o?tzuYkUjx6BQI&Pi8pdLa`#Lc@hRqd-^`#PZ?F%q$C zQs|MGRgvv_{15uMjK$A&hH9)kFoMrc64>VQhGh)xG5kTz%-jX0VHSg?;qV3sD{y2G zE9=|WagoIIr4#tDMBLfgb|f^qCoF^*rnHQ?$V(S@s_)3yTbwbj75^Y}M!lVnj74!q ze!j7HVn2?0jg|5Uf>7VYM@lLjC(PdGalcc?z4zYxhd=(&4}S1{*7c{|5N2cDGFls^ z@ho_rV~##*#E6j}+UrASobgrm`K7^O2|l&2COZ5p7lo7G&g!mbd`(v~>STz2VYu!sI2)$e}ydj}nK5cqyV{<>dMm_yZM&&c-eu{195+qD zOjEd^2zZF@?YG~#@1EaHm^hvk{y`CGr~NXb8Wsd;VAElQsuE3@Fk#}vH<6E4Q0jjN z?G8p6yt!iRu49k={BexnhKSsg>R0@c5;jZw1~D;lv#1C5rDgL@P9D6~i*CKcBZYmD z+lwKeRoO2sf_=r?NR&PNgKSVN%7%Z4)53lW|7js>2);Cp;!&(%LHp%A>>ctl zk%}oS@67Bsts&>zqEm>O5&B;--#%7UIWw{LP6EU62WiK~`X zno3GE604t-m|2oM*Rxj52&Mm&yW~0Uel78gmdGzt@L99pihRsj#ZUgV^N>q|WMX_5uw@IhBcJy9kM^b7$$yE@83N!wmtwFL zQd0sy>lYMlDAOodbcgt{l=VVqZTk2sj>%^JEY=pkefj$oDMON~>VLcVAs_Ke{VC)h zNd-RFl5G!UG3(tLzc9?$wkIlNzQAoCUvx9H&>z!+e+AQ$|588AQIk*dAN)1TCsAwo zvK?agAn-#k+Q-K`YPT)yw|)Ktt2O^M{Y;vr{LP!sR6bv|!I!^~Mft5%3Vf7JECKQ- z^}i*)-YR)UV}gGYYX0Z%@Goo-uHv_qf4llak&1p6wSX5%+LnIfU*8M=Rs5_=&Au$( zRz8uE@vAETtJ0suFXdD9&sqQ0`m=iZzeoA}6Z*NT_N~gNI1W8d`L`n-%9@3zM|(*OZ?=&t$hA#zcxg9|4Nya{-J&Q)qnEQ{^LI2NhXDTt)hKbCI8jx zPqcHXCera!(9(ouyy}1A1P>GTT_{)f8GU*yu9o)8DIZc(4f)Pb=#TXihEFW#L`O6_ zvPrb4Bh?QM3?+KV$0HsM(R-)&C9esj>hTx1pCpTTp0=+S%ay${ceef#wL^-aJ2_L=|lqYE!!(%_`Ylg~TvhZkIMA#0YcGko3d28JXD z2zx|!@7kTQORU(%)FTi3ap;N;Hk^1E?Z6B{64_@H`7D?>|B*-j{Mu`; zZ?XC2tj5J+`STadBOo|=(j*2?QH*}kxN+m?MJoz|d zUfy_PLig@n>B_=?x9%(ou4X;;x2MR&)T%yxdjo&h-FG??q)(r<4H&hrM)UlIao#i7Z;Q#FeHAdb+$9F|RJnV`2;+(|K5LAKr)y zFanM^CSwm7#Nj$kD^`$=@rKORVp}>!t|1oc%c5+^;I`Xur%%3&YOq34ea} zy3}{>{P{ju1Y=0@-D&SGUbq-eM~oP0bMc853vK7ky!vAzm*`odSe-iVvHKp|ZnG`9 z=!iv6k;c^~$zxxXj8rEAd&jt;A;-jyp)O%Urgv z5g&*#uFVY)#Fq&^+OHNIEnDCdE=c=@S6U?Tk$>Lqg5ks`dg4U{3tf#HUZ|1UTz&!{ zmqA>Eq#*0WF7O>a6tP9K(x(jPYW;o8!r}`4AxB#WemvS&mKz&7g2ugsBz^EJM8d< zFC6#DPaTR1cj>~;4Dm<=S!jiPpej)iNrDw8_AV!Sj@291;RSfdPR~613}~!D^~j$e zIrY?2ySRpZ6SFM0+ir*b_utRZ35)ctSdP79Gz}x^%rKwEWw1&l3so_01-32)g(T`l z1kFi{@YAPHUmAs|%JE+~;ni1PrQ%cud?+quD)&ma9U$V3|HM+o54)rAi!{63jcR0v^hWzWsjM*CbI6AV48Q+WHmc{9} z-){RCUU;F;fc}dXE+DE!O)0`KUPha~J9W}8e(?)qi_EL8uV1uaQMS86iBSe@f4t6G z>l}N`F^G#4w8-?19X8)=i@o;V`|7K%LU`}I{mzRoyfAu$_3yg-ZiW(*iyb8K`HWCQ z3@L2}e|=#0-8bHFV|E3B`uOp<;c@G&x1RC|(G>ambWzk|PXhV%9Z;#hE#jS^4}){UQTQhy1yud`ye45U50oq(U1x zFXbaB*}akL`h4Igug7GHlH|y`$~%AM{Wtkf{tEq(b&7>-0HK_>-xA+L83w=P-`xDQ zz%S)varK`csq&Mi>Ur8PN8D!e8ss!&AMK(d-l$kiHOW!Jt4^EtP=v4(+sre}9Ot17 zNH?)5wiU9lHG*kwQBv1qWNGDucOeTG#Y_8&RSrTjK_=WxAebfF*O>t;V_!My!T&-& z$u^lRUD|IUF0+_gz5gb!A)mtk7yXyShbKiB;sp`=E!aScE%B3oJ2SSe6tmgL&-?k5 zN$?2^O2e;`|4b1%BxkmeBA8&Me7I=kgZ~)+Sco}GNYtxZPpFI;Yxr=(WnU(#-l?A! zFZD-mo8`|c83rH={>jJ15K~HC-h)*jQ%B1bAZXvvf3}9Ah-Lj%f0BQHIfGsGGoC8V zCyE}tH#V;-@KgRkO4rQ!6{@dKNyk-e0E<^B?4Kt50iFChz( z4f$v_wqx3_wF!P|_`H1y`D^7|G#U$O6I@z`!CyXIE2uDO-7CP6Sq&W^*m#Khp!&y9@kPa*9BLX^F>?io@%dfJrAY@q>o~b3Uyl5$L6UIt3(QTyh6;?1t zW$1EH5`6lH3@fsgnDs+kfk*)j!xWfh%YfBOu54n-o69c0^oS#lT1ISU8Oy>f-)8Hr z$;`zqHrw*D%N$KWm7jk4>92n6t1Lrf?~ZXkDe#bl@~84CMWRtF8r?L26}Y>0CzQtV zrkiatXpKQ2Gnnqtzx-v(%{TAYr|&w$*Lm@Ir&c}w_+x~|@Yzp1`2^JgX2_7iTWr2r z_V6>h5d%w@H`lR8PojJ7&0#R5Q--EgCg*!vHapdAX#esj zIqUV;U;ET2KK0H!?{w|nZT`IZ5TZ3Pyk32KedUa=oObG|2#4V`#03x(9~K}R|Hk-k z-B}=v(PX_C+JP09RQ*c5kY0nRSu>Zh;w5#f)#{0oW z!sfH#GcA+JgUXm<1`d2$@)AYj)~GMhQX`e0lz$ChMJ@4(Cko)hbbd6=R`_ARfQX-T z6FhVNT&~J;M z{8#a-^6_b`#83T!H$7Tf@e18JV;^hsNkPCvino<~lK*tU>OaG;2!T1yL-`nnqYfee zJhoYE*8HbjAUA5%sGt1wCrs!g-hl#i?C5w{KW2=kqfDldpnNis_HAU|-x{N=aSuOx4C4zQco&HE5IJEmwq*q+zr+{Cdh|lD54Iva*Z|CU{iw6e&#cvFz)i|tFLCBDLaP%8%)+w5yI0)A9K{5cidIq z*-(!D3TNA~)25qk7D6MD2&3{}bMTs&n~{2=^GlaH<`?hXfq>lr();%5J7vn0zdrWY zC!ctNrOlWsNbrd46h!C*d145y^5JCf#H-}83_RBx8+iurheRYnd zeC~zk_x(+&x2ZpUPjJtE%1^9)2s?kvw>Z|NLRVDbu^Me(#a$#!bBnx}c zO&uP9Vk`OW=~^c#`jw|dGC zpJb;%^4(OT#x>ar=&U2s3(V4gVvSy{lqvYn#JC#r4;BL}G%BzY7&*n+>YuN4`teD~ zihYrqyybNSj#T&)H>kG4kA{JNS6ubkmR#1Rtn;ezWfS0VXh|!ouDkOz`HvGDK0oTS z|7=OC8^Ys9&8VFe{1^B#;59!=(PTf(LJq~MBvvj}@NHbN$hbQBG~=sSCG8pmtLDGJ zmyg7k(WtBZ^FQU^3g1lpHS14X`BeSC=&Qm92RbNZ3RC$<$3nwvgCF{1^%M#B7tJI% zNrv(pB*&%uX~BO;1N^u`sdn%y?c0+78orbzANrH}DdVl=LzaIEU!=fK{cP6%Hu!D$ zx7JbNs2<|1+Q(z}J2iV2K|OMWoAhRe!4Tv3(J?StzW2 zmiT2;zOoi61p6ii_}%$fpgygc3ivk)L2K@yQ+u>VHi>7D*vS;e!8`^6^Sb`8V5F zDSrvZU5%XW$R`9V`IJvfeCyp3KT=F2f0VI+ZBvtvHL-5|H_fb(zp9_5eA?o-;lI+p zssA#%YW)90`)y@kuoA3vOiezxMj<{zr^L7=O!uQu}eul;Jjsu&`!`iE8NXIuYM z)z4MwPtE_(%909nRlA~}nt?aC;$2N>erS4;p9El3lWbpc_@w_4mA~fpE%-0-ts?kM zyxu5!GHR3_O^CqZx9mTs&IQxi2}HYOEi-d9A1#xn8X7#MEvGNfveFQnPdKR(KP8<{ z>W(Vn6I|H4VAIRN{P8D~i{&e!x|Q4Su;WojA9KHncnU3A&WSh7E^d zRD#uc+^nFnk%fL(JZOYW;<@1@@q((vuX%Lo!PkQ)t z;Avk3vgyVfyHtODM$8?Xe@JMTcON*Q|J14P5~w8+{)u?r=2mytQ=EFyf~=MetX5O%Kdg9~ z9mvqLs(pDC*=Sd(400a1v{uIaz`y$B9U;1fam|UDWsz<%!Yh*fB6}4-Nk`rS9|C4F z3Hf`R_RF_~5eMsD7q^D;F32oip0O;KQ0-y$YHYV>Hd534~Hk&I08p$I{`s23*`mxqe{ z(Q07fA1h$gDp4FKIxbxTzMQZ!BjJ;J!nF;g@L@zGJ5*q~!3JTd72$li<^VDDpV&m% z%usUiGF>ivalnb=L7TcT0G1#~Q`1@B{`SSc_&KB7*s+md7M$TEaBXnQKr;$5Ue$W8 zSjmvIJMX;fLm&E(0zvH?b5LPO!%5aFI@h~FFa-&#L6xW^T;bm^H1LVyopSQY%rqnv z{^S#X``JZ5W0zsn3qd{p#1o7W#@izV%%0o9!o>@HfkBlUv|JxTSx4L#4={$Nxq~oG zOPGvh2ocm-XPte|-FLAXAPnH3ctxSqJ9Hol#!xRRMGOxA50m(Zc?%XWUW{;3$8H@* zjvR@9vu*Jtwk`X6%*C-V92g(iw3P98^%EvcAa+Pu8!ftg4?XmdT1J4+I3tVKyMHM1 z!JEP`tLe>{HIpgl_%N!2L{wZ0qjE+Sm5~~t4p2?h1cyhPZnE(vEIP)1CTLl_qoC2z zoRvtUXZM~ANv2-hVxD{cIi3jdmQ1e1h4JnwlKfeF{?{mwejgc7t&;%NSR^?^` zNo2ySk>ig)A$JskC@*h#3*y?FjWm3mC3?|i@*ja`Oo%E{yBz9Rt@qQg*2zN zGdt?Y2{@j&v0v+tlI8<%5Y4jcH&@bhVq#`1`$e6pMGZfgDU2%Lx0xG1=)@E=$b$G?^M z@*j96^S{7Pgs55KbNf^NnNq>eS~XV+A6xim@&=2#T(0wp>O@xLszgdO#a@v=1q!an z5M{|qB|Vl``LD51a4MCMZRBCtsb@(;()!TQAFIGw{P@+TeU(5({*g1;Uj_fEKRCNW zP$fQ0N-I9uzQRD$oV1nHN#VCDpJ37p{3B0MB+AwBgZY#M{0mgY=d`taLMBq;uaIG2 zROG`s_S>ihr*3E$a*3(%`jUz2Srbf^S3%{8uY;oP^f#hK_PirR^+?2|xq{aQ&dgLR zsuMQobxxCiWx{`G8NgRqAp|qDvS03BVoEdYmmIw6DDkn_#3WC)4jBdE!%xUR%%T|a z4f(iMKJ8FE^wZzC01kO7{3lvK%5=eh;OCngh)~qxUgnr@r4~HPUQ{hI$YIE^8Q%;A zQ&s*`K9rIt?bu&#TM`|mI?=ZQHiKs$iK$Fx7X5trhE!f zrktw$2gQ^>0Z`wvgnm+C@R1=3#Zk!0H!ZATN6K0O4K41UVJjM$n4v$bk`LEw_$Wcie_Q<4_FHRT1PixXWyyck zLM|0crG4e4kWZ+0DSzw3f4W>P+n3D$vGyy^CI1}2e^q}}7Osc;|kdjAcLQV)<% z<^H7@O8fF^iJ$U8%KwM<%T7vEilw&c6Wg*@uyy-dUz@m)498|G^xp&cXMhK}meWz@ zb`bLUr|efdLeZs{pYr~K{v)UNZoil!`$u(%j+Fd|!=Nb>87~yeNOOTJ5Tw%9&U(>y zk<1~dq5mZLFXaN3s-skYc58r%?Ef^I}w}-gHl0}RE@P|J<_Sjz;#c@meKjnS!dmO`|WSN^A7xf>s#Nv?Y7&Q5hyv^LW#y#2qa|a=5$M! z+STmXY4DnZ=_=!nN3Xvg{jMd8mrk5Go(Z8WVYbW8JKuNTeXPd)%+pVkhxL{J`uJag zV!wi2-oG!-saeA1!A=+Pq)`=Z8$3l_|0#1^K-kcXjb4WW@4 zY{FO!O0o3VE<5i!W$NTtUwe%$I6t_@?sMkOWuzM$4Q#Of`sRg0 z&7j^<9&|Xdc0Tm|I3OI!!HntC-b3lLgq*H(E>W2&&g4mya>X})yXF&obhA!LB zDGk+C{a>(P-YYLJ;0*mBcAz;BdTU(%`s=R~B0*p@IvRzCi*CGS(&+Li+C5Z16ac9j zclRYO*2#aJKwN>EpPCJqIbpW2PzVc%YTn*geYzd68x}8`RDl|VkWAp&#W;pOSy?fwBBNODl3TZ|+?5pQ*$T8t%W57#0rj zgyWA-{U0!(|0bJmI&Utk7qSL_eJspRe1w_co!LD%{eu=GVZP)x@u5ecyi=?_?~W6l z-I)Y8(yw3t=`&~G-!S}`ZD)3sCLnwqukWzXNDYx|qClM|Pns<2UZ)Xl&Co35S>L%s z#Kl&wWW?4D*WW<<;%3)bXE?(!bv_QD@;P>GK10_YF=8Ye&NAJS_!0B`h(^8n_FG$S zwS_l=CRR3t|5yYUY4bc@D2Cxd?3afJ_{YC-*0D>+=`*IY-v4iY^IIlY;=Qr`0sRM# z8a)bbx7d6O9Nk4fzX(nl9|}vv78(0#K*xc>uBCclX>A%kaY{GdIcXz%hof!5g-U#MwENv111N+*;Xr_u#`btjaPE z;_lZ`$Ckbp4sBD}Jm&qFqyJnzphHJqw~&gP8aiYs_dZ&ah-%lBY<1}d2;5<#kmkkx zk#U8o+a>=J0+BjU!7xCzo2puhkJtRfdFC}+D5O7SJ`p^XyDY)Cg`*$faRr~#B9rPe{`Ug*T$&GzZJe01uyWUik9X)zE&&$Rq$_7X$Ho7#($6UNzgX@ofgxSITRA7=V^$|1T!dH@X?9D zkKD=!l97Q-@E;#>OkBPGgk^9uYl#nf8~&T|tNaI7g?&+|X20ex?aTJlRW|cq#rMIf z75|(E>lOJ#Q-*xJ)-0c@{HbXnpDMl;349Y%KJDOpCFNhk5B)6sb1V5*@socjtQKGX zTj59hw$Fd?0L^Cq(*obisboP^VA1(!32W!qYVad-3;Y)P)1G_^e3}|s-i@<;oAsw` z-NP zzpeDY75}U84_TM0{#4udKhb{C16n{0(Z99Jzlm-1r=|Z??^XTysX;Z*hMq@Tr~J9PMNU1Ze;H#QOOWzc?^^`AOCT zA-=%(Hh~N%QR0U6cU~U&aux)oAIWjo%Nj6VPWGlxoA&UZ9;P(ifUlkL)%DgJJ!v#MKU)XZXG0das)Oq=>x83@^ z?|v^N0Xs1_r>==`*n@v~XzrXjbcE=%tUYuXT>=I&n2ix19f@!;Qp?GJ^XAN@`vw23 zE>D-BONVaAXWqQIal5Ifb7eIVBX5TYdpP%`3%69}=rK!(clgAHSm1_l76m)~^e^9e z*PV3FD9dn(3x0CpXFmNIqI4&pd@3C+@Tn)X;3Nfaqw$GgHXFtu$Hh_Vj7#(lC{eaaezS^wDWk-=&4Hsi7&%T-l_Uq5&qqp9Ao8{8x&0heAQzlP#5@-`6Q<#RTny9e$ z9;vj;uJ973TNh^((sxB)7~Vo=-^>vC>fB}Is8I^1zK-?ISTcx1dVNfXBr|M79pMjK zd)Q@{Uw+-S*AgEjg!H5HfBfppuN)R)Iq8&>_x#|VJMQqr%vrOXF|}eD{a8W_G`7on z*R%2zJ10U+mKgo=@SpxTc<^9`fpqTFal;Kay7&J3m}g1{mcbqL_|Z>#m`uMVg!j&* z$-9jmOMsMG?7!cK8NGr^H`T2;{VS&r9kTZR`|bCqNB*?;Ui*Mg$kUBenZeYz_w3m- z8RLes(jdKi^e7Xlr!GVaI_$KB0Ya&Xc2|%ql)5yg+@> zNbT1)4YRa_LxIvv#&r{U87E$E~*BlJ)5d{uO~%AX^miSO3XB%{>}g%2|iJ z5JiZw?ziv0*x;u>^O@at8@u;Ddykv2?vf=95F;x0&fAkNzw!!#xwqYRD-)~l|NR5_ zAEFgQhYod;AhZps))P?a!4ycu1&P?`0Sby0K@i{R-aWL3rLwxtTQK+Rv%mYm{l9;L zb>+KvBg6!|ANrHy+?RoPXaVBiL zNkw1-e7p*4Z`llS>c$_qfVvLxMgbp5Lpg|y24Xbd%{Tw{?z?|SDqCUp>C^j{*I&Qi z{vTdRG@oElr_Q(B{F`4~@(VJPy=&*L?C%Mq7G*rz@?|}{v)$(cf<=gs@RqL-Ixsqz zJqem#eeG4;5EaE8t-ba-3@>G%F5}3EA@=Ok>%Mz`x8~rrnD)p{wMY;5%Rf91&J_?` zE}_z;$396{?on zl*&wJ%ew~`q~Q+2T@be>6kxC`E~XjQO*w$I7HEMHoz=`4Y8RYDiNL447c(K)RBgpI ziDQu9p~k%&GsjcgUaeRsP9bdp;rjO1V6>0dLxSk@*8a)oP2lZpxfjo?73R{58T79HMO zuUN<@{4?|+5&VPCEv~@#Kr}=WO;yUr#JCV&T(8KdXj2*_@T?Tqt%615HRPZC3roAf zcJN-q_bUHQw^fx-q}bA8$iMQZLY3#k;6ou=hWsI$q!5Hc<48^k@*_tOsOb-|E%9s5 zw*~+F6#QGZ)E_m^YD8&KWj+{)Tow6bEu&f?{~CVCU+nhqQ$7+5UK6~)x01B^DxRN1 zBH?hO0%W#|ncz{}R=l@DmM^5MEEKYSE^DHQz_*DIEzeO5=3-)X?f$C+jEAdWkNYn~ zWl^^Rf`UQqsdM0)$qR5DtVda%&YJDlHjPWd(C*Fn$|m_&7W~(~NE1B2oUkE2E{XS1 zDl&w^zlo14U;wj4sgh$MLpRgDOcR6vzffsA&Gri!PAn4mHWUBE0>oQa9);*28kgX| z*?ybxtMU;~V3`9)$%Tccx>xxpk8r`irVgX#qgOrb}P-YW{{*Ww(75OB- z^)l54Nqp{xt>hzvE%nnzj^~qL+(#LqK`|qukc;?&S(01mC!AYxlZ)r`5?`3K-;@s; zXx8{wZC@!V+R#)oP)IRTsM)^A5%^yCyY?I0h+2_2^atZega5aT;c+_IW}kH8l0JqCc(dm)U+mp`U_MfRFkUURZ!wvnHVPD{3#k z*g6^cP!IGgktkB}hpFj%^fyHYSqgsj4}q`#C{GJT0*6o!BccbK1KP;S^{`%%JhaBa zufK6kY%d~KZ+;pW7r^J3rZZFV$4`x1C>~#OV3C&il(2kjd~@RiLQr+jKll8!nbXNN ze8lhrKKfC>*_-mJYp#JwI&Y_)`X!cFjyL^?tMllQ>ut2b+mAo-I76bYxZ=u>?7u%F z$cPhNd-XM^o^sk6g9hR(lp15Zx-!yg@nx4>Zaop+%dNx$XEC!f6Vf(yTV`svD}scFA`KTOPSzy0@JbI{<(9k<`Xx?^-IH{N8k4cFg@M<41!uL&AVh-5~gy)Obl%);u{ZRV^w zKl(k0F`_4H1Gk-wXIQ6CD*S%MdL4(#HpyXsIM#}W+&E^LP z8RdY1;p+_ViXb|5V%8Lk)bn`r?z>YTeDJ~X;~CI4VeZ^H(`U?>GI6?{5%2=l}*oal@mB$|a zc{Gps9g{1G2@*S_3FxVI?bq$DJMSbvfiR4K;O1LzzIn>Yr;Z!<28-_yCqR#AUuIb{ z)pyuB>oDemF^u$h>D#Zr;rhs~Q|GP-C!b*cQTx*1)Guv}4=unaPe6_Ybg;F3HRsXB z%;$je_U$+DHD|%}f9a{h-Tj01Tlj~t2Z3sg7Ds^(C5aXI;dS9HZbvo$lX>Jly+=ra z#HfDwAC9=x%KyA){%fqsWblI&z;El{3i(H~k+US5rII6kK=MyBY_|Dk&Ozg;w1ZRj zBKnhJ(2K)zIOy=7Q5hlicR+cF{5j%3!4cvS_uhZ+V~;)dr-%Pc&}xr8_8Vr=4aJ$m+9 zv~Usjg)u($^s}37vN_|g2v^|OoY}Mg^v8!^ zcidup2S=ESF;e)j!4y^j%9QPX-R z=sC~qgB zwQz$CHgv#R9dA2`(lSrK;qkvdj<4{MtFDgM5}*xPYc0}gKLqm1tFI3pJcRj-T!l%T z)vP%)A9(12ciwu3m>1#8kA3W8xH3b0Xhz5??bmw}r%fvt%v;C=Ny2THTyojsg^Soc zmjQG~9eKp0$y2G$HP>9X{(9>%pv*e5pGnv5?4SGXv(Ng=pC4fm*~zDz!n{;q3D6VJ z$G6X#J*R)aeqFl7hN0mfeD+{*YE}|f52!^@mU^|pC-_PJ<7(uAV+HPABy-29 zo+#xXS2zy*ivMW`UxG#HAQemp~0 z5oF`CCoS$@;h!t`LERF{p^&5euhRb~e(>L1*2LuT@85q@{%!GPwfy4{xFVqi|1I#- zc)9h(P8?J;-uKwP%$dQHMem{c`8?!b-rvEuk%Mu+=y~B@5jqC$t5R>n(Dg>0!mt?5tVhr$S}JjD-Q!4LgGOjY{|{8iXj!mSp+m3;m&{eksL`%+EiYL#KQ;GgHQ zX8uWyhHT4!rG5V`_UprR7!v#ihqiA#0HX|wP^s#3@@fdarTmSYw`aeUr68Kaq#KZk zN*PneLeU?jK{2h>Jp4B$QsdYsu22x4)>D7rLommUX-uN#{^D-V=_b@3)r+aYvDhkE@rd-hkho$ z6eJVG0^bBdMUBI_7L?7I)GS=XZ`L0u%Q=~9e;6P;RHj82$(djHkURn)B~Fm2O+b0^ zQxsMRoe18zDqE#=C}eKFcs!P2OIcM;hlTC`+Q=MJ4%&t=h~1aZ}uDe?UF^vS>{qmQ;%z|Q=9HA~J zCDpBDoCPz5m^DMEi)HC$uWm(m-rF;F%oqEt1w-tpE8!lZI|h;*`(Sl@_v!V*^Uojf zkpozQonG5C_Q5wS3`0F9^H zNBD=zGlGmR3d>s217ykb?(ULVf6v|bK!OhB$dRK4^y|+AJ>p6Tao>IRz43+{7tCMq zz=IDQddMN`t+(E7x81_R%!39FT(o5AUmpDnOX(x=qmMqqJ{y~R8n<$vM`lvl+hj|- zMkgz&v%aKhWEFYZ!b)}PMl6gTvTL8wL+a9L(fox-p4Ep~1BtBdCA&jo0eiuiyC>oX>zkDjs^u1`foF z4XlTcQ(;+bhq+WFjvO(PJ|}TYCl;-&yZ^rXCQq3}J{scev(F~tIdjg;3FF64n>KC7 zopzx53~j^0M~xauECKmjEoR{n-BZ6+7C>6rSDJxM5vIehIiQWTpq*@3gwYI^ zE%PUwxLjJL{YI@~#-G;v|FQQTfVv%3-S;`S_wAPFQPh2-Y;a?bbr{nzaM?B|r5n*a)a z?sM;c_MWolzt*g^_RQ>=*=8<7lYRVwJU3Dz(SAKr$WGXA2e$kJcOmAlhdelxFLX~u zKeo=^V8x}bvSLi-Rp%c{V(}RQ_n)l@6NQ#PbP1GCmnmKb0tBd&k!dAQ4)v?Vmn0Te zWj6mN=@27W?6`^ex8!m}nBelCrYrqXUXo$|@&tNf2fjiQI=KJsiyV(JnKY8#>jC{< zJ`7+Xpih13GZ+qmr<{5UL#{Xn91DZLPkiDNU-+UIGA#Xq7o5chRWH5dQhe8j4eK88 zzz0wavm-%6V)F9w3M{MSk6q(KaAIq>ZovvZ`!}CmyLO$6d?%-`{t*LG)2mmn!hfRk z_#Z}Cp8eeCvhNspVVpU!_KXxN1Ht)VZobF~m^tz1371Xu7c98_?T@q6^Zyn~ zEU$jdqaXdAKY!2N?tVAyYt2eFF7wkL?9cVicf1|{z(h-A!uZS)w>yHdB!>Uku!f1E zQ(GN{2Sdr9E1)2I#`+N~Z!&lU)Z%46qk;Q!IG)j~efPs@;aC`-Vn;}3uJN*jEE~6P zM8ddA-PbfVwUqZ9thO#8uu0sz-}ByO%XZ_b!T3I&;lgiS`1!y4Jh2FehscR1o`_#M z=|1=Q1=(4Fm;5GgyDU*6G`z`%1`IOLLAbZ6zu>ZYVW{b_EaX zcO;*m{P`Phe+unq-mvi^a6P-MGR`hD7@Kv2%^kSqyH57 zi$|Esgp1-D(bN+#B8V)BQJl?w;mFV7Of29F_VihK`XfJzluz-yaf-`L&_0ksKU0*v z^e>1h{M2V8!mV;E{GnWhRHYMH_G@89-yKC7{?H;JVF`@0+y(NGe$W(t5GtQomNM;*(tjwbSt_q!C9QO! z@AlSQs3QC+pG8gny?odL%tRtUmGoyTmM^77SSLX9lHM(3@R`&+JN>L(w`pOP7cl8Q ztW`;fZ%jU*VGSHR8ZT!N$jfU006+jqL_t)r!>Rofo3<_3Hoa}r7T(A*zru~SRi*9p ze-|??XWP^STm10qc*DjmG~dJ@SAts7jAW34yUpu2?YesT+RdBzXq^*8!n7Q@XbcxX zraT{sa5)$@Qk_)!rp3)W>rI>4Yy>$mWWko%Y=7p+Gh=C1wRPauIoNOYzc4sozA`$Y zfzrZMjOOMUz`GX*Itam7{FdxuMhuu-!yFWxl5e1uHzchd2lc!*aMb@5aVim^r)oIK z1~*Bo1SC+=-cX6wZ-{pGt41jY^IFNoQ(SH#r5q*E{+pKsHXfRXs49rHK}93 z+8u}C$!4*kT?MC96A&xfFk(rim4pM-*cy|hV|MK|*Rl|X3xe4>Z28jVbVdH`4R2gH zxp>veg~y$60{sM3kaBppPj%AyIriA&jyU{?f4lHPwDzKlzIEDZkNA^6c_mNZPkiDN zd|^;^Wt|I?D7J6?6S^4Dq-q#9=e6G9ZTBL_q*^#=u2O0(+d+fFcTgcyZ z)4G)_meXBf4=;w!*bEv^yQaPeH!-np-AxyM^FsFOy5^c|%k`Z7>Q~?Xm}9{Ip%4G% z@y8tpd!PLHC-&NNpPEZT7j)4=J``o)Zg|W%)Ka>#n|bNa8c!ySWSporl$reA_r8bU zXPYc`;G&NQ<51&Z7><6|BTqm5j(4~Neu}|GmM-#LHl+EtpZl91{NM+45jSt#bj0De zBQXq9DLgHh8;2MW8q%hc=u`uVnz{Sk@4=q*^uAWDUIm^X@^QDXd<6qrjEzlCy7I_x zoUK4uJPTOhNf(ih8S-&@T|QP!br9vSm*1&e@~v-vlTX^wd1ggeRtaro9tu@{_j}wO z?x~#}-(u#K;t@mcIY}@AI%DJ-&$n zK8D6%hu!Y~?2UhRw-ZmKb|@5n|NK4g`Nyw*jgiM!{OK#-^p-cDcG_wG@!fy_%2&Sz z_Y9$Z^rIi4f2)qNT{Odad2CWy;72iPdm~0mp${o87>Q%+Iu^j5bMCopU`G*PJMs7v z&V2miS@QYDH@tye(w^}6Ge7i~AHLT~_gt}xDLL!NScXRtTw{N^h|wjW^q&at_#Y#n zl7{RY(FG-RQ-FW}-QVJ2=V{sB(>?RkbfqG$VuMj8d~<)bjn zi`Yi;8PShxDfxHwErz0rNoQ$@aR9x?l6dZrfBCnD-$!C6|M)k{Z^X>pGc>b)gQLe^ zIp&}r`LHgS&tLGV5bJw-;y(Ae&p-a-KQgl8K>Xz7V;=M9=RN;-S1nuicVGAd&fzcr z@-Jx(V$HvF$2*cdy9%=PCZ7aZzG~%4o>jC3F?_h#v|-DOUi`vy&pnqd0(lIV3@u`E z;tqHCrI)?*WenUi5tUJF_T2*r<31P~Jas9(!oMXDyRbDdQ$E4Z2`S+gXfm!?w&GdO zdKTY6!~mFq%jS`%pZ>`Eo_zAtpZ*N?H)L??dzW7NtY<%)%_14MLo9d|cI-OpsH5-? zHPvgvfgqN0ZewH&SB;J0Az3-jLxH((dNccpJk%M+TFTP$uYTnp@IN>H_{UFs`qSU{ zzV|VMmHAeT@;~({Pr3Nwiy2pfGGl*ycjfAcCmz`o7SyA>#S!TcKBi7Jni8Rf8{G)fw=INU-`;EJn(@J`qU>s zx!dl$^UmR_tFC(N8E5d)0;C%@Z+gQ^f1hzHwIr%f-m+g@4NoO*N&kTld=UBEw0=EG zOGniO;A~hn?e_$6<6r#Z7ks6md>nTp5dVW~<%vc!gRkXYe9^@*B?Qbj6Sf|B-~ng7 z;04y&l&-k7hGo#2iNP^-cQflJd77{=Q|WG{Z_LYg8b@gwk~hcP_7yd^j{UFNb3~ES z+PCFFRFt4a8UKrIzw)HF86Mp2K>tfVO2Ryqy_Y}YYDahR6UxUT_}?mo%c19Y5uchGt{3Q%I6l-w{wR82Kp&|1Nq;s)K7!z|NQd#xq1G^ zgg7BXcu4)~jH_zHR|;|DVeLLr<; zUn)LzQoo@OLd#bUWAw#uDKqJtu0LkcAL8e_2;^r(-+{yuf23*}ygbBg1_Vg+MYgW!C%OW3(hm}8$MpYMSobo zv*n`zeVwwi^oR93)E_!@z5Wd9m!Sii1&iq2v*iIi^7;<>tX$5lc_$HQzqCCN2Ll=i z03I@UQvM-zNOB-@YMLvX(RKs-x=AIMjv^=<3`8+e0Ll=|e&?trQwNX$aXjBSTVxU% z68)bce~CnYNRX74$Wgb90hfJ=KkaAa$PkOLp|7TJGWi_zL!^os@fMa;=Fxg@#K;T=FKcxxMlseRVx=U zV`TkJ>}I}%MP&;noP)yn17jk&pTF_=GarA=wbwy; zmsPtw_#qE|)T15+3ygCee%N8Z{mf?`d&2ST&y7}P2$s>=MRZ6g#VHYs@Zh*`r$x|R z;a^6_^zfu%Na%tKEPhzW4nf{NReh&n`u5dW~3+5;*CVL6&YRFGAV&W(A|3!H*nZ z0p3~J%3>l%=uNzs&rsg!j(7Z(U;gFW9eLzIw>@aDefDOq#EW12d+zkVc zH;H@7Q2{ob$j$=8?p;#Llr#$1$Aozc4|IC}Rt2UKD%k3Ye>3w4e_es82!(O<-$UG# z!J{Cz28`&#T)0y{n8gRrd+!yOUCw;fz4zMtE_b=hMHgLU#%)abT(rlYd!BIoiU06b z2Ad|?BaFGRJY?_;_y@*K7_s8%i%WsEbs2yboo(D{tdEtyjE3V42!q=-yR3P@10L|` zM?V^8h&|#|d9pHk&NKX1?{&|6pLCLSk=dZwIKX1&lW%{=+u6{N(QRn*2UFoB5{mSy zetApA1C1qKQ{0CR?P9lzO z`u_Jn#Ls(&0P@frn z$m>KBwj%(OCu*!}a?E4DP|#RZEStVb+UWc@uwOaBSe2!A+}NAPev>3x6RCUhu(A;k zFH#j#{6p1C$wy-G4<;>lW1fh#Y^(RQb=&-pl5Xky=L_XD|5F8flNugYi!*DCn8$v3 z5ccgisc;PNSNpJn>-k<74Cxrgkw87{!Wc#Kui`wH{wvQM{tW!{P(GNY=}U*3D9qEy z<@q8XhIx$S-?cvtizX(eA3h+FCyZQ}J$&&O2TX0&u$wbGQ~E8+ z0!vgQ`4}YG5kcJi5%K7pi?tKg@8 zqvkSSp01~FpE~q=8z-mushox^2)(g>hw_<0UtWUWi|Egc`h`N_mjfuNagj3Rb@VGI z7>tn%Cibv9Cn`GmFW>n3_RX76+w~i^F;YQF{SRbhpb%pX)sj^Hik#nwLPtmuq#M|8 zU|Le_D=iR&9Y@a08mFiMck!YpJ>|(f;-S2l*4NfeHYQ|G^c{Vwp-CKW0gX9q@NuOi?Kj)56Y#C zowHvu*At=W$xVo0BIivOmiq)5&L?%00CKZCx4@9j7GQzaTY%f5=y(*N=g%2RM= znjxQ}!vk#*JFeOaGPlkNeb};X^~#m&*WLK2(@#JAh{Ng9qE>XPc_&L_`s}k`d(*nL zd=QVV(dnyVO#p;A%`Nb&i@dpp-1PL(N8Rp`r=7;Q1A@Qo@+;o|fpd7b%GM~hPMMP8 zC+S`DbjU#mKj@(kVam=&KKjus=wE|GJ$K$7!*i_SV#B3hd+0-!E?u z>s@(VRHai6MXKy2yzrso>rk*?W`6NmCl;Aaxt`<4H?VdAglwTJzbFX?PMZ^H^v@S` z5di3;hkQsK4x!&UUxH!SvFj;TUoq{>;cJdcC0(w4q@=$r7#{)v^d>ZTQdh?(MK4`bycH_HXuY1jFF&oyl(6wVB zA$1e)l{1J@0uBV`qQ$(T}o-@`%F^|Mo=}U3t}&?|tukfA@EO zhjA17gs*z#tG@m1Z&N14o%y&k*Q{RcH!E2-xOC~)zVLERrdbwW`kz!p3=YyI zKVF~9vM-Xw+3dW1N$@|xe&@y?ZpuH%b8hoE2!3xPh=h9@_y=#{Iqo*>D@aAxA92at z?Tr6vnL=M8eVzgRjxf@)XwOxD!96B_YHlw7rb9L{X=nO3f#8N=U>k%4KNz4n$O|J&W}c1%9nYwx|;Fzb|4PI<^fA9BQDj^<#Y0AP%z zJ@zIiAM)S_KjZ07&udrSW8t(MLLns!#xMB$3$DKMYTDgey+>Eia0Qi)OZ+I#6(0lf z*T3#{D_5>~|NGBbyLK(>uCXsh)^K__5O)4*lVqp1-}z3*zWL2>KK#(bH*VO7<6XCQ zEpB03n*I1srhGD)9M7L&GUrd>rznjnPt54VTp1dA(1RcR(T{%gbAS7{h=q~3Yp%sE ze*|n$GP=aL8xH)cE3cYbICb?^Kl<8Nzedq~pRpEl@_ae?phIwI|M%bi6#(D)?ssr3 z{-buMrq=EWCZ3NC>aOxO#@-odM$T0Dld1|EL z#Y0>O#WnqG3T}&%CRuy_QYooNzYXn0)R5n5MW!O{dcEMOde)o*mE!(Z$tXUnd91XJ}ZIIWYA zW@sVbmDcH}P=bkuA#LkrE0!{7KD}+j^tOc`{@@2`a95X1fk+>U)R;hMfVB%Mk?lUV zxY*ohM93)|RfY^jKW9S{4Xs}H6XwYRcw9)9<|6q`MR=bwX{4-M3Z5Pg4bZT+WBTIm z{#SYel!Rp)FvK5v9lPW78%lu0`3tFNbodDdS`}7}hgks} zq2J>#RK;A;ZKG{E9isFoS-Nzvw%-uxJ@#m}aW`(&fy% zQLz;X!DXQ+4E z_RswFU-NNjwidP(#YShQROk7VK1T|*@*6b+pv0My+MxitXs$FzML=+L*yxOl+m~10 zlGs!5cM3gbzr^-|nTiK%Dw%`g4lrRRR& z+$|e#Vz`xU&yfv_vY7wEl3lJ}@{*T0!nSzv6d#&%nk4=z{uzVcy5-nkz1Q7NI5AT` zrWjuMtB-tSirF5`epSinR&>7e56YZz*v@_zI?atQ%57oV3PV~8Truis1AT!h_g4Wz zOeGgh$(y@VjVd@lsnqg$gqCWNSq#V^#e&Q%b(fo%qQ0`xkJa?_&2B_@B1@TB14ZgY zO0h0X+8-&5JL@9!C2UdMf=YusFiBQ8I+PIiZIkDjM<5am=MyU0u~#-w)LZ8tb%S<2@>fBxx@I{lCT_>bwRyy{P1#el`K$;t12 z_q%U>+gpLLXz_xRPrm~Dox@id@U$%0Dyt|%)4X5q|+^&2xfM%S37pp-NDq4PfU?sxw=GG?F+ z{3}*yllAUNoftzNzI&2N4y6XRAap^LSOUNPRDz7`|U zfBo^lqPW{0bRc77ELO#}GwKKCwKuI}qa~{FfCCQ5a2RiHHrtz~Q@v#A9e?SL&wu{& zAM%igP!LjOlQDeJmhD?zlF5c^j4&=;2HQ*@r7SWrIf5^NA_tyj?D%v24L7hWKQc#} z>PwV{p+KgY*vwL36AT{!1D3g7v2qzwjTxi_D;V=&^)Z+Y{FKK#K~{ps0{ zee7fN=+jz!$aA3K>Hz_Pnn~t`pCqHhVWDyZJ{yysTXfU8yXG}%PCqC18!Unf+F9PojlWiBd1&kWqV zjCuhh8sKxoM;>)FA^9Tb#GBsomQQ}- zUs020RW`5FM@ZpR#R`*Y6ui`(qGA7Btg=@0(rqZY{d$+0t`IAm?hiWFY~*oHP*VaRvlp;d))EJ zY{SM)U;fgUp8V9OUU%&^_#Yl3C*19B&wb8wncN3=nnE>*C)}38O&;)>8`kf)|Nh4v zcRcQq=L>(V5NB08=@B~>gly}YbLl4^8o2b5|L}VYw0@?iZoJ_}*5ndP)ic@%7q_|1 z-l#XP6Y$<(3xOiHL;Fq7i^R-mVL%R+%_`*N4gQAY7tns25MV$aWT_;>Imej&#>>lk z?|;fLkq2~=rS}is5B$=ASQ-mGee>$H6lzzK{E}z@E%EUJT_L0ug|`iZR}C7^BqFMY zk`7-W{c-HRndI#RnMkI&llk)yzAcEH?E}@PQUP% zw~I7{(&H~(v)CMRO8LsC#)ih|N3DIXn?8vG8Kge;1(}G_4N1zBLLSmD{EVIq@tY<1 z)yo0?WVAMV`T->Vq94wMUPY9AD44x{q;H}oA8Hl+Jp2dvgUzgvsdNG^`4s(p;bshf z$)~5^%Rke&#^ledXlo=b8pedgAxi4Zck_`TqpOjog?Y@7Kgk+rO{u7t0sdqv`ZMYm zQp^hC6!ux$xWw^=#ccOU@r!sA`q8kIhclrAr&(upUvW*0<@lpuRO%2ii@M=G9*nq3 zV{m}-(4>Cpl<_K^_Jhg_hi<>)3^IgEl(|ZXg`eupUNqdLlqP>FntLKsxb#!$BylVc z3|O6oTE|`g(T&dC71|o@TAU`(noT{2{emaREdbJGzl22egd{s?AWVInkJ_ka^d_Y$zDe4O+NnCokTRS>O9#x3D{2CvOhJ`f zTCg+-3E4KawdBukR`9_Q4T6~kY-p`$QZWWY3%cZ|0x&zXRI24DCT;0u5s28?(kVp+;Xx?)vMV@r-BiS+%t{ zt>tC@-S2S^I%yAj(1Tw6>a$<^l9wKS#9{Zk=RF_xuwT3AqVKF(v+9b=uRyA}5WddF zf;--p-}k;JfBa(~L;pVi`M+bD&pY1r&QtDxKb8uzQPd@uTypy9r!xnE+Tnw4Y}>zf z?Ts&d(TlFQ;&KLK=p-g-oBj-`}UhMDrQk2bl0{o0Sc>j^jA zbR#MG{s0p@Xs?igF=l!=h@8B1x|r8+<<(dH(?9(m)>yK+)+0}UB+Eh>ID#n3#4Im| z?a)`|r}=@$3HC2*+F_+ETD5uwlfTw&ScmyRkRcupdaz94pi7Am$byPFQ%nPO))m6} z;gvt6_sITPMrssOx7lZJ)YKNt#Ka9ZUjKsUKOg*98Yr|rob2`~#_U$Da)+-N*n;1B z%wyjD?swmw{kM-k`fewlaJwUqJoMm086t!}-CXd}EL*-}2?mc7_r(ku!}g}l8yJA( zvGdW7eC)NadEGTXx(51d7?WGY_Geq%c*xDb82UF0V+6dmVIh?tDrL7mg0&&z(|~a@ z8XsKSd#^pwgi}vFIa5?GABc7Ib4jWcJ4U|`YoP)t zC_WH6EEQUyNeI;9iUbf4GDq*d<(`NZUw@qw#of~@*BjsX<`ztF*bL1Mu& zKJUv`5%4wrxxe|`Z$JAvS6p@(l}I5Z{gu1^%B%kLm8;h9F%S<3(Y}X4vQs)W$U7k% zVzbYUDs53gbMr<{zX$<>Ey@{i#p=!5{pAqnx~5;G+ub*E6MeTU=i*Z{my13oo@}E|XK}Da+F##vfskF|1lRq)T`Zcr;Re}07`wahV z2R}in{F|rW@l>z^r3>K-`aU;oke56xvgn(lQb~ZL$!CavzWOzF zM}Ic{65h_xujdOw412|-u#}2rs6R!2h<`}G>etw=u7~(bK1F}FenvjDb@-gLr6Cx0 z3U|L_mbh(UYorOjEykw7DFV$&8jBgV-`F8M0#`x600?Mu$$e^O-2a40#tPVhnL#i* zHIH}%;~u`n=jj>xl^31zD@=nk={pq7MI1{sol~%coRwM*j^Euk>f@zYsh9 zj2y=3&z6r=1966Yf~!S}9U*zHgDb@#gX8$;qi-^aS>2K#w^wuECzWw<=ce*$7jb9r z8lkf%*VIz>;mS@yOP4J|7vA`$H}YL3P1y|(F;vVrnSFv z#uzYIc7G@*5g*UBh*0cwSUK$Th``PDDcKRy((5F2~ z<8b-1MJWG@Webiu`e=5-rQHbr=06=x4mWC^brvCpl@b6nhw@**M<-~{FX9U!i`m+J z37_e6JEn-c#GgwWt#Ac3^+L|d=2C*U)=M4ct+dKeoyn1CTn6|mrO=GYN%~xqxT8;^ zpX&2BA8x3U(E`v@Sgb@AD4gQ+8$JTs5W>3%qrDe1S4r2}(McIPWv56dLFj`iQ$48+ zauav!?|ll$zwH@&83JpCCib=kI^^OK+alzW|Y65TMmXzVKW#V>w|VG~MW zbm^c&54_*~?#Iv=+Hm@#9{G*0|I_{NcgmBW^5okdc+fvwZ~>E;80-W%%T@N=a}QS6 z(4+gk7r*$#<4$b;T7|;M7>%1Dajdthv`cPonEcIqtm9SI-=%+4i`{J?@_O z{8hAvISl{r$NuX4^FL4TW!1{nbpIw;-?U)irfr-5_h0;PbcKy|SZc$-8FL-5gwoiO z50p@3($GykhGXH@@MGFaN_oyzKjz zo%w__S$lZ!A%~oJ;t6~2y$|xC4`X+Z9#M9gVQ>-HjIXgV1k;_c9AD&aQKto70X%hxc~n9F>wI(r*BOE?a_~Z3`7{rrWTpx zwR+X+tFOLl>sA(axiJv)K6c-ISH>LE_qIRAgudrIn>9esX4)Mqsu+^vQ=3jpVQ*>X zl}t>sFbboFar*E`p0P*@;sUFKBf9JBH>^AI$RjTJhcAn3YU02H_h-zMflqE32=Iar zx~QL84IM+woiQx>+u*l1&R``vG_lZO0yaounG^F%k3Ie`-v9oW{N9Vt`@jdUyZ%}T z9)H5|DiD8;V^1G<{IR5mqD#HjZ{!1cFMH|lL*~~{`}KFd`_C6G+0N?DE^hsFx zXZg3*SWNwr%H~IbNikencmj>+$aMZw0Iz%9YYsa2z_-5Tt!F*|toOX<-Cz3hmz>fy z#fQe09e2WUX6LQa_UTu?@=w0;&;R@{|N5^_eDaeHJM2*OoCMIv53n8(^Dp|b0w)0} z)a~xej(K3<#Fjvmicj4IyanQw6%GmSv3k{-qmMb7k7MAlaI>fxwu~RZC@#G4TbnlN z86yD&#oGlSkpRl1g**IhN{6!KDT)jZ#Iq~b(xpt8MGUO+u{Xm2PDcER;x#M=&V*SQ zq4FaCj3P};GHcA|VW1L@$uptVBHEM1%kn}S*6F(U+=J=7droaT6#Tp-pt(S9{5(&J z{rB6Cu`YZL_?i9AC?o_K`_pSIScn*5XFn3{w;v9chfd+QNfXKIGQbg4+9 z*q;G3l@Bj9MdNrOz7dCp3+2I7^1=C6E*Vp7vO|l7CGAM&;6o0kEOw*dA&9qm_`@FY z>ersV&t7|@yhxC*Hv9I6O)9teK@wJ@<3c&45&dQaP}Tr}UREx`UKK7Wz8(o4-ytg`3K>=+m}3KoSGXzqZlaDsng ztJsyv?~z&MCTa39rT;@D&zGW4*PGWbSwke-I$>^AXGPk|NLHd! zIfR8a@vgiHI&-yHzJz9S%FpbH_=zfaL_br36f?O}zgQ8CXPzQS8Ty7>RQ}VtHYM_4 zu#YR)l0MIR&CkDxMmYsu?!?rNMqgCcuM~=YQ;>#4`WaLVl9E4(13{j8`k72sv>Q2^ zdnHwnr1p#@BmYNWQhM;KrMT|VQ^#x7O-b^f&SR60bi!gU|KNvH=bkkDDM^~TqLKX7 zpH4oZ*Q#&;Ddc54{avKmlr#@HJiR0<~SZQ8hP#quS*xo2WF{YRd^uK$?gT~TV)Ia9u!^x!eaZE{(M0#sn| zMxi{rpsIB#`23`Pg=pJ>J<9F0x@L1*X2evkNk9E8Q&w(hhCbKqAtfDU7*2y<=*~}4 z9!YMrMI^sLqVd-H#c+#L&xF$lP6Z}$>A%~$N6g^2en|v;2{dAfCQO^|4#Rr=@kRH8vKTs{FlFa2VVEXW7PDUu>7asD$%Flnoq)ff3+ ze6x;n@nV(*m>&PR1!aLTMMr69i~I04prT|W@CBPm$N1Xh^iAuwE?csQDT%Zjw{6=( z1C>3xiXXKz>p=>0$9;4pK`T}+-n5A~po`Y8+q8TcFU#3?o7pfc=-&3vUl@s2MLs@0 zqOHipf1UqSUW!`zvkrjCID7_ik~MHHS72c$cxdScy5x{Q+7zi<^QodHAFW=lJ&B%A zYexP?!w)eOV9o@Pq|!#X1Nv_DZWt%+H{dDfr!jt|sY94bpn$Y!HAjo`kiEJFD##Aip1OA@%G>Qz26h7 zANu4&ifa!!_z5e)W+@9t9evGClm^5C64a`!!}W`i6gc zdf$Ebr5q_tPu%vv1ONW-FL>0WPT#O$9V0jJNT-FxiANoIyGy?ReR_b**kVu&;$hE~ zhA(~T%f9{Xi>UHD{?eTo!Ewsc^b{LlzxMU7TYKY;M;vj)$`vd5ju>BYWKf1&^y^q1 zvwhoEr2;c`?&Oo62Aap`7x!XRh`nkyZSqqCd~J|@7oKH0*@>YtjRw|vIqAOFp7{@bPv8(Fl-+`4`C+50)qeGXIV zAjJX82xQ4Jw?m@a#*iPF8BIrYjLgv+r~A#Y)IqnsE&WNxC8>gk{n|tC{;PLq;^@H# zAH4s*`+fO>3vP3pePHKb{^jczUij_1+~t>99t9CA#mOlMR^JRY9(btDUVHCz{q@%# zb<~lC9|o)XNP|NW5>xQ_MjLxAO)O(3+LNF1v`0MR;V*m1OYeNAUkYUUoc6;HKm3Si zKJ!_u;)E;cKs$nSH(D@MGKyWAu;wjYk!Jrnv65>uz88;+KBo zH%@!XQ=ZH#9o#|zfHtE-cs~ZveL8O0jJ4x`@DZpQuc7#;&4lYR5gQnM;?%*+EE|A z_+@$W!fd&TbD|WrHdD+P1BxscY}vR0?~2!AKpF3Z$>_Ziw*J;fGSh0=5RkI*ii~qX z0?`uA2$c37uJL|Q&d|>!UtUYyO*4K3JG5Q!&l#|)&v!bW&OlOLqv6w5-muoUz7VHe zHan!%ViOI2$1Hh~0GeqRE=tu2HvMU}OS1x?3`faWJ*8>&`%SkgHvfV_Viz&L)koMm zo+=+#g+USs3c(ZE~^hc z^w10WBJ);9$udmohvaF#(IsHgpd1wRR=ZXQ4;P-WhaGV^4@&a#_^>(__}K?ME&7IM zwxVslt7RURE(!j{dJ;CT{WkkjogyJsCirubq)|!~S;m-joxdRVoBAzD$byv!JH32{ z?H}v6$xIOz*X^ui$v;~@^U){0bqO6PMgFnf4#|*s=r;euH906AO%6;eqY_sDR3ti= z{CA8${0#61N#j?ULN?ZvM^RqR$Wg!N^?tCbWO1FDzY*{(zM=YqDYr zOxtnwv`=E(c`|A9p`Xe(8D)?hKR=T`64U`(Ug;+ZX3>WkpoD(l6*hdY*gbmsToZ!+ zh_3jO{t$oBw~0&&UKrSEkUr1qPNe`M_rATXPLR z>Akm)95bEkz6+_7N&xHN~zLi0A z$=?b}X7O*Og%h3LvUwY0AiQkkEfxAtBM1Hesiq@eiV&-+q;?6(@b9Wzy8IxpW!oj$%+7 ztzXgh`ahr_j?4lxt-hugP$}j$;|PC}SQ$yIe1`g2^oa`)>o?L7#SHt!+dzM)pXqQ} z%e{VDbSMAW@+tk9O&?#`+pmTzPzpk-B_BV*$%=I-JBGo|Bz*6UvC2?bI#6MGD5<`*N^tyCj!dir#l}6t(lPzE)8?So%O|T%MT%O1(aZ8JR_4EZJ zeg1Qp&_gq`ECX7`28NK1RA`}?eN0NAOd0hrNkcA_%XYjJhl!KI^j*q8kiMO|1q*+8 z)eraBZFl1RMyP&=I!1S<88m@DgJLSZK&O9n%{2^3FiDEZWe{O<#2$OHbcM%3iBe9W z>o{5tO5`>AjW?}by>b;>=P%~NlYA7Coms>r_Qe?_YBR91l@A>i`ZJ6)Q4sXeVg{J# zxltVY%b<@N$HWd8f2l4O_FVFTCVXQ6J^dN-(WKaAhEy4IMeR^!6^z~X(usmofJ>#r zf8*MlR;*f)Hwy?M^tG@Yr0OocPc{T)=1@?ROfX7klB(&6306n)oyYwTIFP;;{ZHi2 zEIH&~_<@LgSoPTvHXo*5@~MT#$cJj7JL$}b97VrMcRH9~>hxQjSTHgqF*SAL4L@d0 zH4_}DUo;$m4A_wysR9(Ka=!6~8xR!ZQzT=OTF+;X9~eyfU=0*|^z6I;zU;gPe%}Hq z`o^R#YWr5mGGJ;)cWR1H1Tb%fHQ`zrM+CEB!+PYe58A$+bw=>X=o>KLmtlgde{{{7 zUDxcr=U&tp`e{vBWt5?O__J0bd$QMslQ?(;@C7I0ixeUXzh+4`vzu+K zehHc>pH8-*AAe^~NnN5@w%;Cbnk-GFB{N%#iesuNK+3X)nnP!DJ z84JHk-ueG4ue^$(I!2vXM$HE_eRc#k)JO~=f2FK6OFl5x^BH`m2I$~8*hV(`(U$DC$f25(C64*=!UnnY48 z{s+fSdhnNjpyHHLvfGMc@u^!;$qGNNX*u5mUdFoA&b}J4h>gNasvW-|f_^Xm z5H|?=q{Q{{=I5R7cqd;zwN(GlUB6J3_RznDi#h2RC5Y7#2oMg?z!PM#jU?4;0~B2F zM+&AOiAn&Kq%QruS^Etca+sBTr9UE(Tp`MN9{R&{Ch2i^SsUVF zN3f{{Yj$WPO=o1LO(U*h`>o_tVy~Lz8i(22ufkSfYW58I6RO*xBSj74v(QhhRa&O! zOq_!Oar@$lWJqVmR^zG6mVQ*f)%pcbN`#ChsP7KDRTU)$L_|TlQIm3U(KN)n=Knit zrYQQt5bKCOm)z68Ah7UTboikVXwFH$(SM#~(1$h?T$t>mR9j_{8xU3{$Bh0jfkXKh{lY(#e{Wwf zHUqz<6sVLaIEY$UvoG%l_zmaLTm^T}-`Vu5eh2n7*8ZvbrE(w>KF9QDzWO!oE!WSY z|Nn3MRU>-;MrAEd&(?}{t6#UHdJ0`>MWR8n%PU_geL$v2FLj6)FdN;^y=;R zGT4SUVeW{#7Q>BCC{=91#8w_s7@{-FrrE(9RNL8Di4|SjHlu)xSFUu&Us};fW9#PC zEWC>()n^#fLvaZ;ZZIk^v1ygSjQDAcUol2;<`bUq)vtcFX48alO5=p_aI&PF;>8l! zry@d&_P&8iN~;8>@ZegR*M*p~r1U}jHvWjS6Jbd47^aniev==f93RTyD<{N}?94Mn zhxA!==h*LwF%fzfskMpe)f8i96om?Qjw>^^ee)09Jk|v@vCHZ;kYaF`{sw(cw2R`D zvp?o%2nLkUQK+BRzZgmaxhs^Df=7ExdWc->*xS1^<@(}D9 zMW1wzgE|zcm|bEPhsqPkM;p0kYy|^wCj7Q-Y^ukY6sSx{${-*E15YV91%?YrDSyT5 zDTjfPHW$f^FFrUwZ5;ev0D0=%$(~wgJF()ToLIp-ZO-%s?7`yDUiye5o zqe3kPdi>DA1@HwoRHC>;+@)Go3*x6NHlSU&1a=q^m8t1z)>1Jz+SQw?34L~7LP+4x zmlkj)6JksKGO2I%8n?k_0c`egat?@P3yVuivm{lGk#l&%IkMFvX_8D3$>7gi`k!4{RDW9Hfe=$ZMB9>IA2yGlsY0i%Aw}*IH zd|XN>5InRpDrSpi=JN|WF?#W@VDG{(P`g$7{9@n74D=@_-pJMj^6rp@Yj>%DBTX#X zZAQUDEqYakxY^(i$0YK3!NvkPdcQ`{zm%`os2NXi9)145lg#)k?s4yqUKL z7y5+Ez;Bu&rTmdgpd}Q`_m;|S^-Cy6qhGAess9q{%Eev8=r~m{__H4qgXbo4-hhgj z5tF!yXjE6fKuBz0NR!bS;gYp#Hd%yK^o?s?b5{Kpq)tDUYQV0V!I` zt6o==4<+~bVakVZo^fos8c8+%@9Q@;$V1b#L;V?-zo}-+UxEQKCw^l~GdlaK_6Z$- zBvJSyVV~7jVB`amA^y0{&QUZ>i@j;s|z6 z;W!&2#LT}@WOxuS46 zOCTAuWLSn*`b(&&p!7Q?gfp_nnK%a6D81;H4*9xC_LK@U(-^q1$CtgCsh;7+YAhaLI&AfvgbHS(c@m zClh?DritofEtVTmnl69Q7+@K!WH>V`7*^ZewGfm#QuPg1_WHjz6deqKiruGm2T>d;L^C>Q7Y+ zcSHN_rBj0F?5k9>WMjz{ODQE6=tp=K6BH3*?|(Y{o%ih0Qgmu1S{8)K-KbfPI^F75 zGfId*URC{aRrPDKk`Gb*%OfQ?rDuj9E5wnAwVVO|iS_F@U@VMWvpN!ssD}#lu@Q82 zQ}}fnfkSw$zJja#{>PT8k82vx|>L; zL?sN4zJ80ofWV1+1eWxL0e(s$+{Z`Q3t3MYH=Je&{+Z8w#!WY_h2AlDIGT(>LoUi{ z0#Ox8G=>>EwBKTn(%EK}R8(9!&(3~>SsRxmzKaBc_s*9+E^CB7-^g}O904>JByYBm z_8kAIWr$;vP1ExJ1`)atwQ4rScpCYK%nk8PrGk$v_`!whd3}CS2J&HQFJfE53j;PE zsK$;ZQ;s^7Dl>cGAsq0DB|uquV^B1ZWX*n!ZDmG$w&OYy#T+2Ogqj)*!<=(e?67`~ z@_$zyT>j_UZ;G!Cbhv2uT$TEb@RLt;iEsXm z`<{?A=eA#CJX(d8|7l&>xG5t-s`o#$?RQK+m4@UfUU4=IjsA}9S4TyL_@Br@WE>s+ z2*-#f(~*7Y29QRR&(HYzYl9^7OZ1=BXaH2?1J}5`($7c@um;hN)x%rA>Y=6?PCXN< ze=hx$G9muMGeN)P4{yZ8J=uOne@IWEC5WP>{gN^Mt?H~=G1_mQFP4>TRk&ElU`tw3 zmaP05eqt4Uj?Okr1CK-un z^p#IkiIAYwdN`xKD`oYj2o;0EaWH47_UY(0+vtCM6;$PXA-Sb3cDO{h7ypm0h!6E)k`=jo^`N z<^aZiZJ0UYrABaW@BDq>s*zL;J$v-QJdmSWGN3_fOq(=hCW)#O{gHk>aj0L1CgLPi zLh7{HZ@CtiJ4urIw|~tIodwhA;pjmN#PZnrm+!y(8$YUk+HS`x(3z;X_8SRcWLjWG z*4R^sWy*dholY|yb`rC6RC)z5N-F*4|HTF~S{k52x~9_6P-Zta);>Ee&1Um7Yc7B$ zOC}~q7CKo8`YDJrwUe)n6wSs>#Wwn`^kgG2remm`eN+LI?BM9+BS$9Y?LGAA z%aA5)mVB5dpDB^SU)if6NE#-tF$-xqD+;40490bNO$c^WH~5K$X6nPsIwqN%CEtpP zDUps4wFr{Jb7V4BzhS((c1altCo#7?1|Wq1%OPi6PueaB0*tE6ZNw5@a0dvPu}C7a z2iXW&hzpC!&f;&Pc0#F+fgjyE1#{svz(`QMu2fpUUP(OGF+>ihez2>6Da|zF7}FmR zx*wvJ5I$2V*Q`fd2nCmXIs&|n0n>$xDBMv6t=hS%gUT5eF`^F?##)&rYjns*6uAzE z6EMT4A8ssnomQJYq66bnJf(xX2w4_^%bcv zm}KqbN}f#RpGTA{p%b34!Ok|7NQlVgn0yBEH$A*aO8OahYyO{cF|$wbRSkx=v0NGD zz5@LX{W8UvaaX^@8Pzq&$YzVe+%>Le6V4YC9vwA7WJ0*wK~nxv9yiht1@9<6cl02* zhFUU7TAJK|oogQBKKebNPhG~#O)>-bg=d|`!gjuudBcr2(rzUe1Cb0zlFM$y#KT_L@3|J+6N3Dt0PIVv$j{)8Hm zEU6){RGQLONz9_((akT_6#I=ClGtJ(#L-|>?R(qPPjnX_`ia^(`9vap=Fwf~TY51Q zGpC@N(;tDR^nx(*pIs3H{GI$uJ~RBYBKrk-{?5X$NA;b>9QtYf;#kxlCF1jISiiaJ z?6>5D{7GvT>lxow`e`WrrIc+%J7(!m^uIHHRndZ@O9o0TSP*vTSN(Fy^Km}>QW)Sj zBb-W2`T&tW_=Gs3Kg3`3`6}m}_-Dvx2L1MY5&IbZA$}h?M3#I+)E{I*{uv|^rSh+g z0WLwgOO{#kF|^nJ5&fRF7t$Y>4^N_`D4IK>pZs0>6WQ#%e1yus$bSq!p?UrbEHGy1 z4_U^KlFyiag5Oj#@WcPO{D<^OKf<4kT`5EQgelG$^0BBY%kv0-CA;PF-(mgELw^i^ z2R*+-{NS5Ye@6B@pZ@pyIj8(7eTIEes`8&*zyD*u^Vx6o6CI`HrZaG2sT!IV8lO9u zWYZ?Lq}{rmwk|I~yJqMd_FMb)M*h}7)Es(T_xdk=+y54mo^srpL(v(|(pPGC{-b?f zFNys`9^w3Bpn+a$D0*UQPt$+8y|$nE*k%!s4ihe+zy6FH>2aH$BB(ei;TK z61Jv%u{xAbr$50P{K`kBIAtad9c?mAZpg>WO=&*D%c?c;I8OeuG#gP=H z|0Znl{Hn4r6#N~TB*;nolC*@$Go2wti1{}xg#t| zXzmLnv2?ytviO6cUaDOfg%$}}jhi3h*(>yPk9gdi$b{AvPUrnB4QQ7mJ4q1?j4TG2PY ziM*&}VPK{6Jd-{YT}4=pNn=f+Qtp4mguBQ(93{Ka;np~w;J_T$hehaZ$$?1i+{6sz=;?xxgqYz!*?v1| z3;@ZXh*h_Szql^@E&P>8PP|GD-8%b~{bpY&Qkf=9nuI_BC z;~e%YL2Dt{gV};|gbNwA5~N?1L9l9^@Dv(GsrOOt0;d?wL$REw!$s^`g{p1k%SsCv zslg()Zr==kkO{&`j=T%Plvw%Cc#E!vVujZEXH!~ciJ|D5wQ%$iLL?i1JWDBF--a5G z^s^=+Q6yon{m5{pCGj!< zTBNc_7Mw2R=|G_M>u={DD4O<)qjOpZcfd~lB3?Fv7Z2+6b14SHm;|`ANfc!k_sufa~NO$tQ0%{ z+}h76yXij~Rr(_WOXdZqs*Zr5&%JIhfA}RUsQuv|ioTh$cH3&;lKYTO%?|VvA|?fy z^rf8F5TVblQK4V;O)>&U_(Rl;s0B&+Kg3U7Gb>I`MZeRZP=!8lQ-M`68fXrH2HgD0n+yrju6A_=dVn6@__+KAl9fLCmv5J88VrHg?#J1JPIk^p}M4fH<|%Ba(yp?pjWBr>Kx!H*w?ZHe(;iu2m9O@|%YZ(qN2+OJXx zesly3`hQ9j%KzAJ1TkK}O3R8G$tQ`lUjyr}>=&`7SJCt{uUCngWPTPa02c89I-1ld z*Mcdw-P*pz&$#KxQsbTdy5hhR8A-Ek&rnq~BI-qyzclEukW60;R10@k5TxFYo@yq6PQ-0VplpaQRyXtAb2jv5kDK_bc$3t|I%fU6m9;B&W31*%SeM*L_M<)#ax@yf#1ot%)!j`R&Z}*J9-YkQUwO zrr3l?TZ`#SObQAUX|lAmB!G?~iL=T{Q;!F76Yf9pb5VJUyU-^S5gu}4W-BgPDs4ry zB*ZuRm1{tsbYewDM73ge7JpY#5YB_YVYCkxru-SvCp1ir$)|BPl20#zk$fae=5hIV z@(F=PrI(L6JP<@33a&eP`OJ`iC*_v1eo`l0OwpI8b`0q6pngpZwQ0tEBKSa`jh`eH z$Wk44(OsK-!1NjW-&#RxUsGE_`-1rCb>FCd>-*NqE4DYmjc7}eF zPgozwr;}lcS>sHKx#gqpFbThvY|MVm*g3us*=hV^@(-Dr{-=Xg^auF$8I}uvsf@|L zvGTL9U&g8W1NnFKlaj=Z*T@IC z`!I1ipXZjOlaIF^T^%C8PJamO>UReHPRcu!e`SsUVvm+C0G^zfr&rE%BxhDgF&$z3_pP|ndGWr8+T2dKt2=F=Y@jkb_z>o8+| zGai+M($73X-fFOsrO|#afVx8rv~h~tk$eXF)3K(g{nyE-MBg&@`ssaAjM-Q5N<=01 z78S_P&%6B!wL?7vfBCmg|C@ZeT5NghXP`gOlZ=%o{~iPSoqY}D-_cLXney-SKeU?M z2l^TE&~HWUy!~1~IU4?()z2iJAs@rKvI~LK8kR-f&2&bBX_g#KgUQubI5;(^mkT1JE-5Dz6><$C{ZAh5&hur{b#Y<@w{{R8~sQ* zC+Mf<5^)RZ({4%?4CMb`LH}0EXD9LBBKiXnBG;XgPfuT6?38b$pNf=Yj{4n6`hERs z5VwT>sQ)rkKYu#&NTZFReC4 zzUwaOYgmg;G0~+R5bwuN#_X%;Hxsn7p=~j9`F{fYDk9COe#vF~R4?M-6~(s5pZ)V? zD4!nOJox3R6X6*CVz&80Q*qSK;Q2^cQjEzbNsXB2`D;@+X&^VSQ*M;C|0xOaRdG~9 zY7}t%$_4+Z+$yE4ijfREFHS*9C%C6yO%apQq=y0nAbnh;pOL4~wR|otf<92?8PQLG zx5j&uzDMDw+|p2fpg$^A%;WeapujC%NRo0;)q#7jVGR0pq#%+ceM%BP-4Y7tw-P13 zu1EO0$SR~HR%0#3lne@g(Mj2o6;$YoFF=q_u1J`us-uRGC_)`c>hPN;A|$iOD-#L8 zPfB@|>Ex4g8g=PbNyJ7f!7$Vx6fPxtO7ZZ3{$>h=@+KS&OP!>CP2FRNepZi~6vdzM zQ33%4OzPLr-13{~6k91RJHaF<`VDpR4CO;g{`K;oLI2j-Z)DZv0~gXSQ4R4|9Rb~V zZ+=?#3&YVUWgQtnyN(p3{3#O|5c^+dzcQU_Q!w=}7?uAl`-L4{Qr>>_`h%MJokZnE z&hQ!v+HVP6!qN-P#3dail4CymjXOqJmCtPZRc_D+usE@#t_-g{2Kc#)tb;K1FVG5u z9nBXcvg_~M`k6W;Te3@?ncs{GM*oe&WI-t*ODX%UYj~Hmc*ICn|Em045u0q05quF# zK=~hLm@^v3lyF@FzGz7n@(<{je2RX^_862;M((Ty>cTdMrCKz43ELfhwzk~H>p5e3 z_C%eO$w4s0ivAe>7b1r;B#6Mdgg+wZl;#b)bF9}MK zeu=7dDFsyBD>LH-)afUeK7XSnK7YB*LAca<{VDtsunsMqWS)Kz?6)x`uL7619K}Sh zpC${}obpdekZ?ZzA?hc5{{AfNS0+7D(YfsKr_vJL>(5;E8!3bU_B8MFcVJ(r!>zMl zKgy-giAf^BT=Vp3_RF|;%u)xD#V-0z ztNmt}K1EW!e)jf7jMtq2DZ1#BA@t>~v{ml} zxZn?xCLdFB4<^VA@t389&yr8!rze8^NhlY6`#EIgyy)w6iF0W@8r$P9`3&W+i?-m{ z;6Ofwe|GzMDE}GH-;}L{IL7Q3NU4^>&mG9S=a+F~&)-?{2Z_(c$e$vLzB=g@5hM94 zVe1n8^vTuB$NY2EZ-;**|Dk+hzeds#4_`V%%Q>K3;*dJ z>7^_DL_^dOX;@0FpmIVm`Ww==xUefosV`>nT1aM#R!GjxccY-q|3tUDmL50GEJ7!t z0bX>oy4hZ-VjfLDHSD4NYST09H_x9=|1A`=Kn~)^e#JqUjd^HBKG5KSz(3b9+B#Lo z#h+qlziPl;6>}j4pty zG5o|OI`qjOMJ=V{&cmD?{u%VCT;&r2k|Ck=4X{K53`$_T;y(PgTjNaG@N+f7&rPW8 zP(CK==|c(m&!SJ6bJHKgKX?6((dTA>KO9zZkxxEYxp;)%iWtd9D_+O~SoRrXmuL48 z!QbRu|?kmz5a0R4T<#` zL?v_6pTeI|BE(GdDU zjp*v`a8B?0JF-21ITq<`bn|S?}`SVKon&TTu%KOdJCVwiq*_gNF{nRgZ{E5 zwOF!bVxdhwO-KOo`Cf3Wjk>v2&)*JzsEyMPsz%-D0sT%tQ?l^5__?*;0=Z-S$Z&4` zoJW6#?W-~R3POQcxBk!JZ6_ZS^v|!@FEyAtHfJVX0LjZ@jr1y=BheKPm zcKL*b{Bs?6jJWBBO%8r>K+9#vetumBt3VS#YTot_spMDoE!WG+-phw@;CNJ;TQ`0M zut^qolq9eyLH&R2z3YZR&mJxI?(Oh3uA*;JpD6c?$# zZ!LfX{SpyUD)XGo3`RJe1UA-QSl9qTI-Q<9fvLIcY{m(jGIMi*L7f>CDA2z}B+%Tb+*okuGzv4?O!Dp?qYz)ia`cswEGAoV1 ziT$qs(1%{+DI)n%Omc}FKaeYM;4Dx054;0@^fV*(;ZIbDnQLom4sX#X%95PaVb^U5 zhLvo{uRi33zxa?8gOpJ}OwcU*lRq)G#7}t?!$PUj1%1@TkNc|s8Gq|hVQfbBAd0;@ zWa&Hl&t3UdzSPLEPv;P^;ABtfC4N$a=_dLQUzV`uURH;rZb2|sOGI+1T%e=j;B~+c zyU98H0|2;qiYwRhvmQr$u*urvM~lJV9HGYBs{f7?ZX2zX*6mpIZ{s-h`cYi?kjMK= z|E&(I{#@EhOSOG%``LOUempKV*g0NiQCGu6$vON`BFXI0at&Ne5}(N>5gVhx=rb0+ zrT^NTluffXz6=lBS7)P}#KeR67=Mf(J}rZrJvj9p0n$J*DRE}tBxhucApSpa|E-)C z#|K5WKK3swYRhgFTgHb!RsU52?|rbn^UErYpO@or0%?u^^bO-Bo#O``e-Hgv>}&Xq zPdJRD2hFvP|09mWF(Llaf2)OkJ%btVF7|rIKmIyKJMyyae#QswJIQ}K2Tld2yfWGk4x46@&@R^`93ksWOZ#Ljt#980w3wi zZ4=RlRFFf5VRT4n#3}Jp;})YjrttR=uey3sX#a>q-!HMmJjgxx_z3mQdo#rN=M!kf z_B-Y80&bq)3x-eDXw6`u>jrdm^t`S8S;#W7={-~WwYd0ue9$omRf^RL^n6vSpdAEj z&>4DJbC;4Vds8mpYswbk)vN$yn7Jh<%uQkzFIFHFO{$jpSoHenPguNHvo<5sziEdI zMynNm)c(Z?1oCS*>LGmF$^9>qfQdx*#*66OfcRF&2Ktp!Dz8|mZAns-ZgA{5a0!OW zV;RD2UvXD&`SuuI({>uHn1z?Zqh0XK)3b0F{2So8|0c4}F~Y>Nemah_RedT|$GEie zoBzC?fFjr!ArAjqt}ibZdf>a*;6+gtO3lmt$d!XM|4XF7BTKI8O~TuB+QqRWHHP3P zr(Rs#O@#hC0s=oF>-vi}TNt@rpVpRwk3>-_O}@yo^Ej%1y^??{+E-ZGywV#?yNkHh z)sZP2!7TPSSPOPgBl9by1yi51x7d2h(um8Jw3VYAn$k!&)UDc;`ekYEb1DezxuRj- zPjMt?{~z~4%^MeS9#}(gaA$k~dj8NwI>0L~{!MW#wy-kl3)3}1nDw|w`|DmhZ%iii z$`bwCajHvX>w5hDsa^A9t;8Rhj-@^%s?p-B^yFY0^m0YZBQ|$`j`*EShx{+a#w;H` zs4nJa`P!QN8&s^Sv4T~)p(&~xzwI2G6ZTbuGM*dDisuBki*TiUaCse~AeiEScgb$- z76qQXwtj~1|JeSX)5M60z)5s6RXt(^H?5OF?apA5+IL&xd4rU}2L!xDM%f&&oW(mCyxST(zOH99WKX4Tl%y-#Uoxg3@ZrCJ&uiiR* z5P%g+ezYndFqp1aA~Ao}-_bo5`A0MD4dUC&C}uL(y}{5Ej`d2ypWKUhHySwf+(Rbd zCM@C|Y!q4NI7mQ_v}SMtde)T#ROtGB4W4sLG(YPJhi6k%dzHAxT4>;l6t44AZG(t^EhS4)?q*9`#5T+!Bv%R@wpjNn24PTl2 zXGu#OTp%`8QAzm5(CYFqtKgVtI++d(Qf&}6g*vAoTfhCQecTRFha}qHv9WczQo_6( zR=rBn#0{}2?7wDgS|3J;`)V3{FC?f`i8W;lUY)b#t>NJ4SxD%$xck6mRaH*~N4%qS z@PffZ{^g755TS>SgaCVyL5-0-5Q$6L%8gIY?L4d`?t`p9mu^nG=#;>jDiga)LL|uX z_SN>U9&%c=g&Ml=y!8UVkC+6LMuIxDf(kC*2R+ze5H-F$7W;_0T+^QPnxsA=@!`ceZgM^l^-H`4$}K`-xvi2v^C@&4S6MOrQNq2gz6EFOPXpg(qgzax z3%V?S^{qq^AG9KonILKsZ2c9WA|r8BmEibl7es9hmv7 znFlUgZy^t8KRocV>^pY85zP$r!1Hx1UYJlC75KtCsD$ZR$KY=jL=*&gATxZVPk?j4 zQ0L3sYtyq zis*ycU@^c?d5?nzmKbu|?wErbhg{mpJxy8n4K(n2si07m)tK`1dk|Of_`PpjHb0SzhU`uI!=V3)>Lm zuEfbOsT?hkD=6XkSG-n2n|ro%6>E~&PoAB!!}ZKw5{G1q3rT}{Fa}kQRB)&-ha5u* zrHcu0PY$k=o^0sNJb?!BiW;e=@h;VJnthaJg@0B6nO%nIlG%9&_nm zN17tGDsUrMRCT*XUTWrqI1^$CapTv9yGD0QBXz8aCU$7tQf zDLEGSm@3}LapQL^gsVVmhPhdy^i&|I1BrGE0d|l~12p`_&TV;D=a3$uR9y;dF@^!p zncX}d>o%{4O9mp79&Onfl^%YBR_6?aXI;ipGe**6PaXn$in3W-_!+_@3{sRu0ZOPMBdA zaMuD1>SQ~yW`jR@2vpZs5i1Zby1FIfWy>r0<1O*v4V8612IPu zr>wvS)jn#f^^8xqBpyM3d68eEvYIxk@x9<nNCWU-x0czv=?_Wtt*0f@Uax7JNca3FUfDP#mo ziXY`9)`QDVqc&F^$(9%heu`j%r)M3-oUZ!X{5D&NPKB{-*?S40WWn-fip` zq~O&zX0HQJ4f*Oi9+N8 z;D1QTW9V(UzYn^(6GaGdSU?e6M#wrv9HArkFUcO%5sA`@PYvhVYGRq+l@mDEE}Zh~ zoAL|bER>!uy!*eWd%G!+I6#>qckFYqQ!TCswn$!ncLvS{qJV|}6&~1iJ|KPfECM`* z7#<0l8yhEwO@t?4?*x)KXJirQ8aqcXuU_QoWK>xj< z&|i*rZMMMEc!=QvBB_8Y4m&WtxV~=ieA{vH%eIQoD(Yc0Q5JeX)_QTUz|tV;hu&0v zTI}aWw(w9hH4$Y?yIFM}B|GITZzEK6!FgHHrF=(!M#!+lnzez$SbR;yU5hrmkv#5( z_B3k*IR)ZaAYEK=H_T-AbwSk7&d|R&2)F(pu5B^idOK^YHFr-)(UN3uWnr$a@ zzctj&((z}owdFI|-!&m~{6`SU~}N`WTaw?q(2pr!wlnMknb}55rY_13CEE_f zp6wTbNg)#>;tDV4l#WIShO=<@Xr~O-p(pP4mH*z?^&OAG=ekcDD1ImC@@V_!{s_lY z-%X@>Jd$oTJZGzWJ?wh2BBD3-BD)OW>#=~VH~AcUjP|&X;K4ptJ)}pInPPiyILHjr z%W`fsD6gZieVIh-IOGrHYQ)f|wVDv+rh+-R&;CPoT92Rd7xway^q^HS;5O#D3oWNc zvHyogesuTTP84z&!7b3Y$4K4H+5U9P8yn;`XUin&CMqw$o{4Y>yHhCxANE)N%C=)m z*-6{&`c5n)TQ_Y6`(9Eh&a-^k_@6911%5!-yJ9%s&*qL%9>1DeU<~Gyt4{Et2xUjT z?3OX$C96UTzwQ>k`Tf>@VUp;6Oi!UYEV@uB7i8eSz%~J#>xec-5BeC4&Gb6!o#SjzH;r zr9HLF;o+^f1T(nf2EO2kI(t5nGXEh$*X-*~BfA4e%-1_Ai4=b*WzB>~*x!Q46uf1| z=u_@=WR+9h|M(MJce}{d2v>gpa+*f`lFfwZ71NcP`5w3nbRvT^^>^u8j~-y)F5D9X zJg!qun3UNMKY>0G1kH9=H+^i@?dnMF#0{nfH4`3QxH(8Yo6UX9B6o}SXV<_aeDkX3 zn#XOppA0;5PfAT$f04Z)*>j{2s1nL5%cDv+n%bc89<1nlb0&4!-3z$%^w-kD!r}p= zwl4QWunLqw8kD|GN3fg|?wxDKaly!6yDN-a{<|^J-jWWboYbS5(JhB{l~rCiUjiu( zdiyP3UZa_m*Mm7^5rd~CXRZUzFZqTHWP%z9bV#)}_(fVdZQXz*3rJ$8T#oZI4Iwts zJI?H)8AkS=s?{$38C`g^_r?HI%89g+wAKN*-lg(&tyIjaflm6$TWeN&9lh9#b$PYp zv8|ZU63OVRnve9{p5YaK5c;g;TniXhzMC(JqhxG-f7ji*DWLEQN5vjpNZ^D%z3Ux@ zJgBD${`bUVRQksWXODzM$noziGGlF}J|!=~jcqjuGL$t*yJG-^A}>e0#tq(scIi z4?U8>oSA4KOvV9^vL>T?kF+BYVfHIY>(uO zhr&uvPj`^|b`GPYfFDtc&pZ!GTz?UfrhpF4Us>{x(B*JC9 zxeFT90q1>E0+gVbmw}9Kpn?aYU0GMreo-g$(>Hvn1tDAQV0{xQmLt@wCntM*hwCUW zCl1ctW)9@}i|bI4!E=$Jh7Ul5o%aDGDJ(3;(r$ej5*E{4}+ zbXy*bes5KBW`xxY0zAmdOX1Ik4H*|JZQm*PxUbl3W5`>0qC-4XXpQqO8!KLB!n}kSyWJy6nzP}~A2AtYsO0aanwrnZ`921+yqvi3tDN7uN|e)H{I^aR z%^{G9!~_#JqfiNk!lSQr;lgXRiLUxR3S6g^b#Z|~#%bF4k=47be5{Or)=ZjxH`0-b1yEk?>)7 z8s4)DE#Ho>a}jy>LGx}3>A5T!Z>H>Cz&A8%DgITq?56*;&1C~vU9Rqtp`)Yhk-6Og z@r>r23O?CtWUl)~Tz@_s$)suMTKv1L3{xabvrdY?wr)U8k?4k!^gihe2V8oWti@5f zd?g_6F{a|YbS7iXfmCFIEm!*{H5mKveX$%LmrgJc1xYg2Z+ggUwk6f@1z7_`!D0+c zZ!1bprZR~Pq7gNK5XUur)A66Xc5)Buc1X=f=qIk&vA2$GCE1RD&h8yUZIbO}%3O+L zTxYa-C3<^PDFf4i`gcRkJ_sv*6^Tu1%_>WH65*6Zvon(PbMU2+i(G&t3CRbC#vvoK zyOOQ#aD;=Q;|Gf(zx7pz(T<;8D8!~6WQ;HD{=RUr+QyTsW&&^`sO3LpO74gv{E&e& zbKyb0l`=);aC51!N}#wmmqJz@8f{`XM{wh3(mygSgI2Haqmmk|`*#smQR;(ZDULv0K@SwZY`4!IoGnQm}py zuz)&aU`q`0Y1{5&Vv?N_&NnI5!^Xh-%0Wb@{`5v(b>(%~NzT$}Nw8tiH=pl-!Z{Uf zE2+kRTd+La5@XRaFMXeCRZ}tK=Y;lGmYW}-#J8V*!tn(b7A#)ptD+kMkO?&V=-VY# z*H6!@SpZ?vYtCEWYn?Hgeh8h|j6g%PknT)XK84aQGyV}{^3k27!Z-7`Eq|X<*qfh# zRNs{-zVpR|FPTzWnr{%9@Z9m&*azz>zG*}0R=FWbd|{Jc>QgCBNhPR?QMmi=wQuJF zslqask4eW8%=?&t#fsD0KFr*V3AAjB4)T?G&A!JJiPM)NYpf9d#@mjcMm_j><-jB4 zLvjL`u2rGmTU(Y2kC`K^Hv4X^s)x=?&fwzJ+$>XS|(taV)1OpN7cbmEGsW6T88hn+1UA$ zGHQd`87R3ZWl91tjop2B_6(6YGsn3w@W*uPL_;@jZUwJs+{Dq4P~0DZ z66GA+6$z+PJt&-d$B)D6yiK`^q-RVDg}ks3T(D>VbQ*!QqyGK7T{Rvd4OxnpjU74o zPS|6NO2lzY|C(tIgBu|fD#_j4FPmdqoOg@!u=?kSC6KTA5jUX;)p)<$iJ%9IdShjb zNWT(N<{t~pz8UEiQe3x`?2T6(`2Uxn)yV6I(7)lwf#N~hR8^tIK32o>v@K_`@tPPp2f5v197cSOmll(25#1oHd9e2@d@Mr#bKX z{X%L{5=i^+@mLF|Kjv}e%B?!^Rb-5-VNbkBFMwOJ2z~Q}R{w-BAC?kK(Ym&I*=Wfq zHc$Y{OTjElx5moSl9RB)KB@tAVYFx(c%5ZcJ48)NBa2f5T*}>2TS2|A#c7R&ZoNWHpRFq2h7Y{AST9s&&4Cf2pJTQ=w2cxByQVc zBbpHVzqAB$no7kyeGFx1Rp^`1y~{mEaE}F)HV+?A{=z2!Ws(Wdi?=9r_Ya_>D7JFI zvB14eBnE?Vd#C7k-ND937R*1MCDZy(qIq$$;)XBFeSW2>De--}?}pO(ole|w*n>A!C@aWYJZ_hf8{z8_ih=mAE)nVaw7KHfucK>FV-c;TqpC6-o z5o?z*0g7%ui7h!F?S{K+UPEJE)1i61ibx zb?$?j+h-LQCD_1L-skvBDj!+SC&cF6BoowPLLK=GxL4lp#r(>plbVjJsCS99bl8oi zpSaw=BVBpI24_cQ3>7lqZH#Y8Y?u*y=icAqxbg4JJyoJe3+Z}*10gXdqG&F$CKN6w z^ey9J9!%xlfB~Y`e*mP4+~R*SriB}LOkMw7Fj0njaSmV`S(Xc zX*qtsIZbvI3XC^UVreuZUY-oL>;AW?KIGHA*y)6_%=OgehrrY z`s^wacDcz!BT!Fv|D$p874T;v1^q|P+S%s$-BTy$>c?p1RwFM*8@WG49Xb9Pumb-H zmf%#1h5vOn_OjeakZ|9}ucnJtsn2Av4o9l)piQI&Ofe{SIlEEssIfH&aM(riAsVIx z{2PYbZ%C(Xq*Q*`x?l?zI*!zNeR!o^z^YLji>UB)I#@217+fDWf>fcz{1*xCRyR=w zuMy^qwnp(vv<^=PVW?otmE*Q8?=L=XIgBNT}KU8JP@<($M>@& z%6`4l?l3<7;h+)|DN;ThhL{__AFPA_qxl8@I7{PEm+4A6xt-Gv5GS{ z(vHE`Da}K82vaEpx$-gKXt^qAwTY>a1!95eQlcoGrl6c1F{zN)l^KfM?3Ay!k!E|2 z8!O@kW`gqVhSH_uRxC56Tm!>qIxdfg`rn2{BC9?U5-JHBNxWUzY&xuvE7V9AGylH` zzE793FPS?}w+SzG8!m)VoX*TZ5v@ni3$>!>L*di2ee zc-;v;-~&{K4Wf3VJc}{_1>+P@7(Fq=NB5+s@MML+j48D}h66-vb7S6X^7o3<*Y-C8 zy2p^-mORgyq2%sCbLP|f3agg9Qs+YlOs^oOom}QKop!zBy8a?>O)+)X(*7IU#fjsv z!nB;BUa3<7!|N|c@3g7DzyI7+)%N{u;~)2R&i-3RUJI3zA}a4R4c&`frn%otpRDT( zd3!f?ax^Se552ya^=@+I{(HP|-&L5!Kox#3tSNTj&${2Q?_m+?v@mr#7HacW7z4lv zO1kDeG_L;9$+t~xlNV?1pVc$9U`Npe@t)rWWWl&B@*()Z@aWHEs~L}?`P}Ix^&`-S zspE9AR1eAs;Y27TS*{>D4xn>h{~&zqD&!uwA$0VSv~=8-Ps4ak3Egz-;`IW9gC zfSi9TBUGu7EVbos1oT5}>t{Wr`rHpyE=(Dw_&}0)gfzq22$ZIfY})5xjh$vpxe&Lt zf+iDq#0Do*lqc3-pZ#3Y1GM@E@W%d<-Rt$oXex>x;BN&i^nhByOJZ@#lYG5E4|#)j zfbceW_?BRB$F~!81VSygqJE4rapHebTx#-QO%Tyo!~1YhFS<^O<^3G6Oxjn0H?aeG zsS@YgZf99t7lox1X1&=CVqbZ@K*$VqS?A=v$R^u^bk{d~*UdSJ!tFPABUh*Z*A8SK zKnG3;t&4wRWJCe;l&9}`;643e2RA1U4e|i<^?EfvyH{AmQ$qX-)B)Di zg}BU;`+V&nlebx55w>bB-G_T*`cdmNkp=PM(}zK_$$(Pdl|O|qyFA>d-+B(WE!G@j z_qz#d|B!Ddo>j0owFB)_6^dkD_{o>8x%b#>(B7hiILS5fXlRDroNv`$?eU})$BOe$ zz26$!?a!PTzb9zidf8_%+V`r_xMRX8vS>@(dr7pFX{%r+;DmKe6@dL>RdY)AsRPmI zmnQPZbb`KHc}&=FpR(F+zep@j;`?$x&rzuU`7G{`jbmQ)mA^;I>o0quSh67Xw->;c zF@>lcf$GGmj7_hVt#WjR{pZ9?H+K=c?sO0xNftBWgshfS* zSwG^-o1DkPK@RC;o$El4Ep-*C4U>;_0yEdQ+NIG&WZ=8s%yxZg8$S`XJwSCYRY7 z`*HkZY~sLLTW`39Il&yE0FEco=`6pgZy*nm_HzL}pk)?Lv`+QP^XD&(2!(BEhTk%Y z{a)v)xn2NJ{Ld=URqJ{GlzWeVrj<&K3uR`F70ZgFuUSjF{Vf+4PfSY16eM}3B z7NKoX6IB{c8z;S+o{n7cPPR#0;Cg)Kdd(r;<-bK+imMncW(2-_B7pqTx0#5|jlR?E z8IXG~KAx*%_WJf)W%##2qUk#7dD{%{I--|YO_oy%Mv9y(*J8Zv6>$ln)#~m0%*|Iv%gql8a(v%IF9;@3q%tgC z0@w959uI^1e|#^@bt~Ahf6o%YWZ1RXc}fIa=kBQ6EvpgATHQ(yH zH|-JDGYc2jcCddoi5~-vQcr~e2a16o$lkHH!1ag58V%}YHqdvzC(vRwTxX&?*`qGe zZ}2hWe{?l)`d4-^5MhJOIyVjTQ=nM;M6e^`uSO6@>JrQ`1Hb=Zham2Tmsr2}&U87V z=Y@YHGZmJQ45ljH%lr*NzZ?)7O8at23S7`Ox}9CVD%kVvQRn9ZI)Mx#F*V3Ixsg^# znm(%1f^4tI)1@@{vt0ys5O(|A3mCUgC8<4?sysKw!l{_HQy(7Ic|&z ziF1D~B!G@8arD5urZ8m7UA-fzfHcJfp41}+iYnzxaYF{!vuPdURM<4It2Z1l=Af0^8RV7tirFt$7nNInUk*zx7!eI7AxuQ5%yqy zu&wt8Q-1wwQ3#=1BLpu*;v(&qs=UBHl=fW0Hy6Hb-}8-fOx+sM{XCHo{lj%iZ1(v$ z%^|dz1LD^?&=#*{=0K)8$T)!(X!HNVp^wE~FrWc{*xXs_t_8&?mqRUvS=6*d4=blE zb*_|f@R-bZRFPlybcH*llrmGg!HIQuMGufHzytfg0nr+6wKA-Nf586vsW`>&EY2Rb zi44aMT=i#_$m1oZ;69LlpT+h<+kMGb)>Px9ZfX8=>P`Itgdv0&;HQ@fIla)$qR_Eh z;@8Pr)dFy-u62>~=$t-O1d0Q?k7t>3PqJWI>H2%BV>Hkk51pi#FqwQm~ z*_w_&4`lx%_!HyCGDR=oPvUllsYde_5{am6eyuHk+?jes9}7iTf*^t?zSb+UC1>inQt+ah0>KMj_Vn{?L6&M>dlu>|x8` zce`GPdLwo#cDT`?w4EUK1lU7DnO1WS2bA9$bro8JM zkzi4yq{)|;Bwd)Glu0Ub@A_*M#!IP5ot_(z24;$ulh)sexQbq+QHoSzF52%F122tNr4)=qsK{-%oiSI3LbxlGj}R=g1=+E>evI z-?eyBgM*N_r0<~T@szKmgu}hx&R3^_2%yJuW3t$;v5XLlXsBH(!Ve>l7a3;9YZO|cqzcBj zswTpdCH(0?+<85SGrs3zlNAVVMRGlxLYD)iTe6^b*G{@Se7G zUp|~~Yy0)SReE>`d`P(Pve9YdQCzCFzU2TNHd?jMxj(Je2RU4y*ZazJMd(}G4ux`9ul}k7}2f(S0?2ezdH4_ig5Z++|mv7e7$^erLzg zC{e0j$R5?|N6FmO#-?U?!(m_Fh9G`dQ}X5(v)rZO?69d4qHXxBtUWj5wa0}1AB?zU$pSx zvy^?=lQT15(tG6@WdcH1NRt20dvN$FB^u1zT8C;3ipvNlj~jUTIYx;cxOEx&I(aHkRzqC98MG7ew28IGyWLk8Vfgmr-osh{x@eF zX=})aR;q^AP*fvYj?gw_XS@$Mz<#l=-XX(x0sf3!IKGMR4vLR;wKu*|-LuYWe$%oS zWlV`Q{2_o3b~qSDB<^jrd(<;y=Y+8i4A1UFgYf;A&B#dzOx$=Qr zNT3CLNXd7%4+;H`eN8Z_Ah_6wHJSKB0M0gFx7Y&|F@IK%25rM53_V9M@|1eK1Rmfg zWM!*=hA9R7!uD=f6O42Av51eKcc8`yPzRt)h+#&CQML&u3k1}%tMvIb5->|vB10nq zsd$jBe;ft*f=qU!z!NDM_j<}p#mtoh@Czr4<`3E2qUiw-H|GB+l`>{*0)@?ODC?BUZy{Yn?r(Uomp>H7vUWdv0N%j zmWUjGn0}Me-n&TJ5E)MHpX>c}_xd05fZpa|rOvFpj)ZmprP8RYRj#qGPFl+S4fMJb zKSZ{pRXz)LVz?m2z>Uyw>^#qf5OHn7fv!`H(T*pYO#CwSngxIv%ty z|4pJ*q}=>Oaud0k_v7Hocb=;KGjt(fwSxHm^Sji!SB;dxlY7MEL|EbvMZN7xA)78< zi;owP?J@ViH~!N^gEJ{m!W(Xg^@T1N`MekOblimuJDZTAe;}XB-1*4u6km?7@MTI- ztSD6mm+d;GtV;hL3KwCTsp4(Cmp}T3d}ja37r_i?N!#^2 zj5IV{E$q)6QkK6BHvY>J1mFEJV`~B1ulx zgN{6}QHke-DiY{`hzh2EP<4}Ahiq#30k?NZzgucukBtEK1k_E)$5P}_8fK~Df_oHY zNxu8qBc0_E{!j5>o#1B#=dx&MukI2n5cf0yOhK73g#Wf$>MM@3$jeDz|7Kg;#ggW?st;1T3n-<$R+%~%s-L{+xT$hpq&G=| zl(tupF}%$yVIxjC{7bi0ATw91%?{Gm*3Q*#T}yN^|2Jn7y86;_Ma9BoYWxnp zdY$tv3QR3m^c)R)KbR|0X&*hba1}8+n^wIf7Wr8BxnpnqwQ8mmYio14y1!@%46VBJ z!Bxno>5Y*q@3%gc-?=ny`di%DXY%sRPfK1@xlhR>Ld=J+ZCk8&$W+2s(%pthU9sIR z^S#G~*>-ur3V{p2l-?>SBd|OwBC;y*31vJQJYfuwD6HIP|3QvMY*~D|s*A25e&>^8 zYkSb$>&sr0+Hlv=>F#qk<>@^|<>gavM$H4)u0#KS-pklWv1)>$?s)pUPj8)4%g8 zBPnLz<`rH?x4UpeY)x(^5zs#O8gsbt%P2md&OMnUd|H8*03$bqQ%ylwsKGPR)b(&S ze@YI?wWl6eNP`P^N}cJMDE#adWQYFGH-)gS@r5=0vhm-}6#B{>>N%#+W6WLDDU3Tm^!2*3r_*aUa!A8R4wu>ev8xtNSMm2NpQNFl3ypQKM z76vYISTDL}8dM6{&vtYH1F9%fvPL=^Z%N-(iTJx#%zp_<{WK0DR_3x+F6=4)k&l;h zUd6?8kN1!*@66tD;9@FRQQqM`QEz>_{&8-~801dV+W!H{d-er^Z4!|ik_u|W$f;37 z!C=MjvTbhd^6gK!e1ZOlS zXY|^B_*`I$7KIsDVg+i};4T+h8YUliTp~u~w zE+G8LMzsMn=l`27@X4b5d)nv+ll&hvjX-$RKfWUi{RbgA?>23?x}Rl)5?c??R) zr7rW0g;CqKY8>SZ;*J46Axuns3>qCm!i;BkAYey!ZbP9dqX}bLsl~Uce zjR2F6DCgnvhycn|1#r;~-gW^C91>D!uL$g?^XQX>QjiY=ZN3K6w9Y+z`^~pq7{WWGv>-WMq|C^q_;-qjWX&Nv5&lEEapUh&xK}rl#dy!K`+29GE71A7H{wz!7>*B z>cuQ_`m4v8`EAxpZ+ep~phiRFVsLiymI!qu2%&okG?I}dMk= zYeMn|arcmPMDKUj$7wX{|I)L`1@x(U+8$pdgw7Ua##9^|G*exz#&Ir_uZ zzN2phK9RB@!7|vSHhX5;r3|e%(qSN13<@iCU&(*u+Lz40Ja0!3?JrSE30y#zU298) zVl@0P%J7>Ue9Y;n_ZLa*l5?_2h_itrjKHQ-CF^2lp^z_n%nfltV+s>!1Bk(f-L!4e zRd!LX9YxEg6c=<4vq`o*rdRc$DDdJnuGH|#P*kkhL&jeB^D!RVzP;CEi}pY?Wn1d| z0vfq$798#e&lu(9U|)%2vVFPrxI6sOpKPsN0?x`|MbCB&;-IrAdeM3iWPjg!Z66o8 zWbm}^`aq}oaEZOhr+y{|(UpgZ)dez6qv}2_CgFiH)B<7Oe=4wr2-N^voPyDj<|7X# zZiunLqYV{-(>)zLtxKr~s}Z`A)BE%K;Uiu=XWH;d6a9D3RA zfK%ix8^UsK%42EN*}35d;M~|v_}ztKL3BhK4sbzsI*+pye~|fIx{f+Es$+Z1dfq^* z{jpOyEH73;;@KaFyI(Zk^`df^pKai)b{-C65FV+ray^J;v6T?<( zJna7K;`j7sbfq$})UN1tAPZ!g(ci6Er1NBA#_6bPJE~Nmi6+){v}sz9po$Q+lY6K*TMSC880S~iMX~FR!2sAsjV~IkYq}j zTDy%#Y5vB{SU=>ElX!bQ?7AL1#xi)wqN5#ely(9gRu#m2lg0pX`fmC6RZqxKtA3Kl z1!7k#d!c1yw!YW?{>O476;^-*r0zrWAp(ye32~$X8zvV4kPe(+&ku$F414lZ;|T_U z_vUps^Mr}i3S0ewm>W}CVJ&BF@Y7{}6y%JI3-xR8UV9aftY^*wT@GJmOkLXHQ;B>Y z=#^-{)jG5O6JWx7yV6xmb#b~roJ@m zC=Q!M#v6AusWCWxqe~uqVxKi{Dx2JUKm~Gtw*)801M2{#0yyq8gvjK@p~`j&9!dqY zbD2Ge4XF>(1r5I??!C`e6c@gCQzx+?FvavgY8PBD=SMkD-MYln5U93RRpuePWc$Hw z?T2;gc`lzWw`E-NSnnX_nV= ztAaI>C+N19UM?Pqp$2EJ{8a@N_8Y@1vA~@Kb{bm=#1h2FA_wy<`B4P{wSKYLAqG>M3)A{Ra$cb~1V4cPW`*GnmOE*mECZ)-3JN$u17~Mm z`xi&)VlW!r9BhUSm&@iygt13q!tpLY&3O5P+4RaSl!?ZpAu`~NhpHbaJ{JNhIP(-8 zhw51$2VgM!CGAsvGJ+p^-H-hT3r9PjxBs{xW_pAqnLHosc$}f3;`I9x{ z-7T_!NIN83?R zaL6C(30cH1+hp&ImVrt91&^$^*!r?7Mqa*17K!Tz|3hB^S1A!lZQ~VUX)0Lb{Z&`r z%qi3@d4pbhJEAX*YTZ8WuV)XcM#?7xj3qqFnJl`Emzk}Zc|APbrJ5|Vz{PwNdVC=L@zwtOs#2BvcMvC#pWZv(yf!>-blc?s1kjegip$& z-gkO>{THL~Sy#B>SJit83&e%NwpEqZ1^?auoC`V0xHu1%KayQ*-nyF+SusEel%$jy z>q-)0^1mcQfFl))9pKiWf}Uz6^TZe;M>H^;o=>_;Qzg zxw$O<2bGvP2K_;dW&9+T!T>Bke)?f{rZu?0_3?NdWZO2-6cGqLXC{}2*er&@Zz7Bl z_s9}RydjJR`i}g0wPyuHF5TrlH$Gtn>RdJETDuWsUf||qgT;~_w)5?5IALA1z@0Kh z+8^(w9EVUG$a!x{8J1HirIeDkht7|Ug5Gx-2Ho+e?^i7MI@hs0tEgA_2@jGvOAUUG zfBJq_ZXpxD`8KOXqSIAL#X~n@$-w(n8#eW^oi^wsDLG%3NcHJ}`6LL9fA#Y}wJoF9 z?4U@JjXOV_b>vUE#EgM+$U6E=V zyWCLh^|uSDSW}6(E2an8Y73lCwCyD9S_lcvyi39%DM;Yh6*zGPHAtU~g~x*+iK}hx z$qS$_ea$>L?_cWcDa%jP^BW(2;E){Vv;Eo1x&;lXjB4*qWV?b36>Ja)a%Z(6R6^xk(D}t6S;0m^mN9UDvBF7`TB)e&IpR4GxUE?^9mI@aJ+uMs=@0VYb zd3iL0Qz!l#8lipnACIA5E-B>d>N|D(=kX4PRxV)$F>?lZ`k~Nx1JrTL*g=e-7%=`* z;e7F(yPd3AL|BE@?luK8J7v-<%(7pyC-Km?iLe|79TgOz|y8o zCvgcAl9>jBA7{0HGpg|NnAyp`*l+|)7wDQ#(d?Ik;1`hmH+GCKCyZt$f+@e|8(DDd zdQ~EC%*M0Bc=?$A;He&DY*w0qX$jC}7)qu#{EV|1>b_WG=)z)Y;c`2Yi;s00M9Q!Y zed$vq&2y>LS<^J>Gx~6YK0+o9Fl6p`A>uY(;UyKZM)IhfN<$i?4tY!YcFnBlch0sPU5s zvA&7es58b-Thf*ui#tX{;*>@@8~=N}#ilZE_?F9P1JKFwYZG5+efhQwWy4GRuZ<|y z1$?v8Ry}^fvBxhwQlRUWg}+Za4HW6EMaSpaznH8ToO;3EmGNkQbj{NuIFMzFpEDfBEzs^KY z2onE&5^~fbom^kuMkPo4u3n67(wRRnIC%U8gL(I4Qhj6vV-17Cp3OY?Z{tHO)Fe3& z3w}zq139%#>w+I)24MT|=sV%M&7Yb7?DYkhF$&c3$J69bQocfx_;)ZE?R$fzg8@w$ z>*#}5Yg7K7`te5rS8%Yd%+F~`N@YC9p2Q6w-Ne`4WydE?ZdpYi6DIy}5*w}3+QF~@ z$>29WVPQLvr?GgpRC`z`ni_n@XKZ3hAz|VrrP{g|_2Dyni^Xf=$62e2gh_zX+K2YV zrxto)6E;JikTgxn(!RvjkXILg;?DkCZpzL=lTgj@AMvx`>o~_2(rQk^rVn}WH=Rn@ zL~Lbz2yk}ozq&s5-xSdK#2;-!O6SiwwQnc~i)u*3N~nM1)6#4FO#NdYRb#*Lk9h}p zVo`kFqK`;a<4m8-Fv+Msxi}Apa#c1-rT>OwS&@vv=)Y}Waxtww;jgV zuU<$qC()2?kie$_U;3{Kx)`5s8za8h+~O1Y{_*>-?MnpE+Nlw%(WMigiFqr?hX7~s znF3e(Z}l;Jj`2gnrVn}7`fq56f3g1(eq)R>N^>LuD-Ce}<-u!d>0iry+sc5}mNMYy zLI$=*G--zXzSuELob8)JJRA)xk0r0);LC6Jj8pE`I0sS7VHKB(3#OeYpYEYAz0)PQ zyxYw+CsyLZnqdA=^U-h zv4&|$JMVy|a%n&4I%M$I`jXGxWEIxJ%^}5~iyq|s2rb8fU~Ty3Pka;*KW$?vDSG56 z|5}aUvjxBp*RbPucbfyLHvXvrX1x_s0fXplWmnVo8u%7A5+n#!j#Cz>8m#5-zNyB5 zU=6FuKS?l_+mL{yK%uxAi9GPx9Qwq1lnA~AFmCnPocKb=KOmX}toU^@lLTX-oGy>} zgOA}>EhocNPyH=zrVttTNBmHP1=j+aiz2~h-S^ZwI)y(*Tz1>}E5^h(KC=uydz681 z%{P1|q^*wVT90z-Gxd)J85Z<7oLd@mdX67{u}=x+Hw{Z-}DXNNKhjj@t-2W?~dlD z<3pqqWyK#n&-pL-lf`9xwM^~B&vv6hTH)r~!e4^B{>G0fG4icD;a>!jNE5@R5C0Z# zYO;!N;Z2_wNqAt$?=_S2um9J-GM4`I?gQ&jpFT70-Q4iY$I8f^I2%6K&cY(30L3jP zkMv*3@4^Ib5XEHs%(5J^`iuh~^i@NajE+eg2^#%pJn$oVi<5wTXPYROiAeYKJOktn# zlLRi1L4bR)-`(8kHNSimZ(>^YFt@AZ%L1Mdm|@e1s3Mw^vaPd4x=T+W*dQ^sZWZAK zzx14I2qeDHN|v19i_8-KCJ(QpXt@->hOQ$s@3r0E3tUcSYdf4Q_>G@+%jmgqyu z&P8>@pZ6pFC>VUp?Pbg<|6#MeVJDz}Sw=lpeFPf0F5mWA@p zZ*}X{ItdteT5|mA^$?xNmC8_xStW7H$k+H}XN&)U4d~)G{!N;_Vx-54DJ!F4WYF=Z z5056m6;t=mw;!3T-n@IuS2#W8u@gZyw6KPRt5gKiV7N~Pe<9tq#k_o^J{=U2uId!Z zw>7@L=Xqfkx2#mS_{t{ zmJMQjuw^MLYAOraeg;+{JbDWQjtiDxpWqKZ!H4@r{OS`NZ+F`8Cx?QE31UqCp+usj z(z-_PhF``qjowO)L#`~zZTH3+Z9Pr>7eoIO5Q{9f<<^!X#A>IS>r0|8lybqTTjWYWoi%Kx`nVUv*d#Ymv+}o8YKB7NgR~T}vS8xSk7rq?k!0p5 z(Sb!jF@+7q5Gj`e-Rc5O*oY+mq=(mdvZCXLndaq>jiG<)myGmZErLI*m3^=sgHG_R zR`|I}=D-&YTM`$ZG01=}gOsaXY{_+e!^ak%)Cm0WV`Xuc%}@E)YwC-QI$pFjWSfBvWCK>qsm?(^rH_wUc+=R7{+P9+ec_#J<$-`c*ZDkhDg&h1R2ob@#F zfpNS1MQ~y758HnmYpG_dI`lyd+V@=l>Z55}w1wvQ<6fk+A);&X)iVo9uH!>ehyFIp zQU8`r(`HqOiEr!Jzr@g8!)}gm^rJ04_v5cagNE8q{^i!a;MjwLz1yt#D}M>Mo-=;Z z-BYddlk*-w?oIwM;Rn8pysgoPD%rmj+KwPTcs^CavZ?DCzCu{D*h9tRuluQi#i9=z z(LepySxNO`#P_TPp`@hv5YERlG|PrOIeRK-l4P|3XOZr9P$@opNdPx^20 z^-=r)u*;Bf(5L)I?K6H(uY(;WjBQd+7RS%vw-(JcDXrijv1vN$(OF`TU;3T=4NPpK zeA3fI3B7Ingo9StG=4np%u?zjzaD?fKWG};&{3^6rAe;s>lX=qp7GQ_*L%s~YfSF zZun=*TrAC<^V|3}a*~|;dO{#F9Plv_U!FD<8>6PmHJ9+GU)i!YU^^12;#(~mhztNI zVMSBW1_0L!{cT?uveDzO-1OUfW34F|BJcf|U^7-af5Sjr)t&g!C#^jAkNJQ1s?T17 z1AYjK2v#4#ooKS2q~SH?$|^ji7KJx`TE_)ns3_CI-5kwAsm6WqtB=Zi{K~g&R1TZW zPuv_Fe@Lk9;O1CTeSEH;V^q!`gRgib1>cIM`oNa|h5ozfqYoEL5yRrR`}O_%p9Ub8 z(tJLY4qR!P+D-hJ*+Y{2(tp)w9Ut0&BYvE=9OCC6q5s}}^&wP^f6m>E1Zickq?HY> z*LNRz<`+S282GiWbWe=V%+X8Uif5A!R%WavU6?pN?I!o$mM-ck15 z+Yfx9l79-T6ld?e8M{Lx?5($1^#*E0eKR{)ljqKMz$n!>8=PN8!u( zB&VvD4ZGsAQ_&vLCvE8QR|M~KPl>6|LH|)SF^<{?FI{624+Ie$2guPukw9VNt2q_QmE={a1a`g3;eTx9cOs z;;G?lC))IvV3atG&oli$eE4hUeDuA4dh<^_hTqG++`akcEsv&t_ueP|+}`<&YGp*r z@K2}ZA~KJ5WYa*gNryfka2rny7U8GSv{MGYwOP z#u z8Tb|lKjXg{f+Ap&;{0iR6`*AJU&HrlmhqlTHGsLMG$z4k`h1a1R^pTP4Zcl2^-sgJb%@Ut-0<03 z`9E79K94)YCyFQj+P-Uip6N5pF{i<-|E|w^456Xv%~{evS>@TIr%Pu_;l-jV;P%O0Pzk|~GrhIPbu zEH6Vv0Y9_|R9WOG%Nsjme&uw%B@&;Sk7DA#^xs|oqdwyUW|e&PPyhT+QI}&lV&-Z} zhD)|dZ#%hF;dys4$9J;Xj<%d{M|_~b&#Atf zVUu87;-~xzEkj>&E`F*$cFMo;54X*z_$`Sv^#n@!ulnSyi@9w2tk82iE|2rWoy5w2 z%zvV}lHso5D;qM!Osc!vcIuqodDJKAyLn2Q=3rESBYsjZ;NzACrluYLL)}%QVd5Z_~bO{4{g;(`%upR&lw;&#KS0_*wB+{v+l`ed0_0 zp>QdFs?UvIueF-$|LOykDn1D$*)PUoGK*H^v@Xk=wTQ8+7_I!pCtJ*-u#-`zyQU4y z;*YvyXXF&%k^XBfv_0q}<)h|j@w4iW@kRcp{kO0+p7?W5huyBQwmwx~^hx?|-kPyr z{=s%NP{#hcH?{+r{@apLm;Z2;-HBt7FI<&6#($i2HDfaRlpGK75*WDIzi1FslFeH1 z#6{zWPfWJ<)st}$D^~~nF_zX1xf36|jsJie?s9&?BLT{P5ReQ-iBqpKTa+b7A%}VY z9aGrAXym(_O2oEBv=aaR_MQ)u%%EuaI~8%4{=>DTH{80wr8mBC|9=1oZN!seS|)b@ O0000KRH$a literal 0 HcmV?d00001 diff --git a/docs/faq.md b/docs/faq.md index c25515abc..d21236df3 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -208,7 +208,7 @@ OpenCL / Vulkan 采用静态变量自注册的方式往 MNN 主库注册后端. ``` -### 部分模型用 MNNV2Basic 运行出现段错误 +### 部分模型用 MNNV2Basic 运行出现段错误,或报 Interpreter don't support case for shape compute need input content, please use module api instead - 模型不满足运行条件 - MNNV2Basic 使用 Interpreter + Session 方式运行,此类运行方式要求模型满足一定条件,否则无法运行模型或产生特别的 crash ,条件如下: diff --git a/docs/start/demo.md b/docs/start/demo.md index d76166811..376054f0b 100644 --- a/docs/start/demo.md +++ b/docs/start/demo.md @@ -6,7 +6,7 @@ 代码位置:`demo/exec/multiPose.cpp` 1. 下载原始的Tensorflow模型 [pose model](https://github.com/czy2014hust/posenet-python/raw/master/models/model-mobilenet_v1_075.pb) -2. 使用 [模型转换工具](../tools/convert.md) 转换为 MNN 模型 +2. 使用 [模型转换工具](../tools/convert.md) 转换为 MNN 模型,转换时加上参数 --keepInputFormat=0 【把输入由NHWC转换为NC4HW4布局】 3. 执行姿态检测 ```bash ./multiPose.out model.mnn input.png pose.png diff --git a/docs/tools/test.md b/docs/tools/test.md index c4981e2a3..532877f9e 100644 --- a/docs/tools/test.md +++ b/docs/tools/test.md @@ -64,7 +64,7 @@ Avg= 5.570600 ms, OpSum = 7.059200 ms min= 3.863000 ms, max= 11.596001 ms ## ModuleBasic.out ### 功能 -类似`MNNV2Basic.out`,对于带控制流模型,或者多输入多输出的模型,建议采用这个工具 +类似`MNNV2Basic.out`,对于带控制流模型,或者多输入多输出的模型,必须采用这个工具 ### 参数 `./ModuleBasic.out model dir [runMask forwardType runLoops numberThread precision_memory cacheFile]` - `model:str` 模型文件路径 @@ -73,7 +73,7 @@ Avg= 5.570600 ms, OpSum = 7.059200 ms min= 3.863000 ms, max= 11.596001 ms - `forwardType:int` 执行推理的计算设备,有效值为:0(CPU)、1(Metal)、2(CUDA)、3(OpenCL)、6(OpenGL),7(Vulkan) ,9 (TensorRT),可选,默认为`0` - `runLoops:int` 性能测试的循环次数,可选,默认为`0`即不做性能测试 - `numberThread:int` GPU的线程数,可选,默认为`1` -- `precision_memory:int` 测试精度与内存模式,precision_memory % 16 为精度,有效输入为:0(Normal), 1(High), 2(Low), 3(Low_BF16),可选,默认为`2` ; precision_memory / 16 为内存设置,默认为 0 (memory_normal) 。例如测试 memory 为 2(low) ,precision 为 1 (high) 时,设置 precision_memory = 9 (2 * 4 + 1) +- `precision_memory_power:int` 测试精度与内存模式,precision_memory_power % 4 为精度,有效输入为:0(Normal), 1(High), 2(Low), 3(Low_BF16),可选,默认为`0` ; (precision_memory_power / 4 % 4) 为内存设置,默认为 0 (memory_normal) ; (precision_memory_power / 16 % 4) 为功耗设置,默认为 0 (power_normal)。例如测试 memory 为 2(low) ,precision 为 1 (high) ,power 为 0(normal) 时,设置 precision_memory = 9 (2 * 4 + 1 + 0 * 16) ### 默认输出 @@ -82,6 +82,7 @@ Avg= 5.570600 ms, OpSum = 7.059200 ms min= 3.863000 ms, max= 11.596001 ms ### 测试文件夹生成 - 若有原始的tf模型/Onnx模型,可以使用testMNNFromTf.py / testMNNFromOnnx.py / testMNNFromTflite.py 等脚本生成 - 若只有mnn模型,可以用 tools/script/make_test_for_mnn.py 脚本生成测试文件夹,使用方式:mkdir testdir && pythhon3 make_test_for_mnn.py XXX.mnn testdir +- 为了方便模拟应用中的运行性能,可以通过修改测试文件夹下的 input.json ,增加 freq 项,以指定该模型运行的频率(每秒多少次) ### runMask 参数说明 - 1 : 输出推理中间结果,每个算子的输入存到(Input_{op_name}.txt),输出存为({op_name}.txt), 默认输出当前目录的output目录下(使用工具之前要自己建好output目录),不支持与 2 / 4 叠加 @@ -93,6 +94,7 @@ Avg= 5.570600 ms, OpSum = 7.059200 ms min= 3.863000 ms, max= 11.596001 ms - 64 : 创建模型后,clone 出一个新的模型运行,用于测试 clone 功能(主要用于多并发推理)的正确性 - 128 : 使用文件夹下面的 input.mnn 和 output.mnn 做为输入和对比输出,对于数据量较大的情况宜用此方案 - 512 : 开启使用Winograd算法计算卷积时的内存优化,开启后模型的运行时内存会降低,但可能导致性能损失。 +- 1024: 使用动态量化推理时,对输入数据分batch量化以提高模型的推理精度 ### 示例 diff --git a/docs/transformers/diffusion.md b/docs/transformers/diffusion.md index ffe6eb9d0..32d790a26 100644 --- a/docs/transformers/diffusion.md +++ b/docs/transformers/diffusion.md @@ -2,44 +2,77 @@ ## 模型支持与下载 -[Download-runwayml/stable-diffusion-v1-5]: +1. runwayml/stable-diffusion-v1-5 +``` https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main -[Download-IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1]: +``` +2. chilloutmix +``` +https://modelscope.cn/models/wyj123456/chilloutmix +``` +3. IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1 +``` https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1/tree/main - +``` ## 模型转换 ### 将Huggingface的Stable Diffusion模型 转为onnx模型 +```sh +cd mnn_path/transformers/diffusion/ python export/onnx_export.py \ --model_path hf_sd_load_path \ --output_path onnx_save_path +``` +注意,上述脚本需要依赖torch/onnx/diffusers等库,可以安装conda环境: +``` +conda env create -f env.yaml +conda activate ldm +``` +在conda环境中执行模型转换脚本 ### 将onnx模型转为mnn模型 新建diffusion mnn模型文件夹,将转好的mnn文件放在该文件夹下。 +1. 实现encoder从onnx模型 -> mnn模型 +``` ./MNNConvert -f ONNX --modelFile onnx_save_path/text_encoder/model.onnx --MNNModel mnn_save_path/text_encoder.mnn --weightQuantBits 8 --bizCode biz +``` +2. 实现denoiser从onnx模型 -> mnn模型 +``` ./MNNConvert -f ONNX --modelFile onnx_save_path/unet/model.onnx --MNNModel mnn_save_path/unet.mnn --transformerFuse --weightQuantBits 8 --bizCode biz +``` +3. 实现decoder从onnx模型 -> mnn模型 +``` ./MNNConvert -f ONNX --modelFile onnx_save_path/vae_decoder/model.onnx --keepInputFormat --MNNModel mnn_save_path/vae_decoder.mnn --weightQuantBits 8 --bizCode biz - +``` ## 编译Diffusion Demo ### Linux/MAC/Windows上 +``` +cd mnn_path +mkdir build +cd build cmake .. -DMNN_BUILD_DIFFUSION=ON -DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON -DMNN_OPENCL=ON -DMNN_SEP_BUILD=OFF -DMNN_SUPPORT_TRANSFORMER_FUSE=ON - +make -j32 +``` ### Android上 -cd project/android/build +``` +cd mnn_path/project/android/build ../build_64.sh -DMNN_BUILD_DIFFUSION=ON -DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON -DMNN_OPENCL=ON -DMNN_SEP_BUILD=OFF -DMNN_SUPPORT_TRANSFORMER_FUSE=ON - +../updateTest.sh +``` ## 运行Diffusion Demo +``` ./diffusion_demo -其中,resource_path 就是mnn模型文件的路径,除了mnn文件,还需要 -(1)将MNN目录transformers/diffusion/scheduler/alphas.txt文件拷贝到该文件夹下。 -(2)针对stable-diffusion-v1-5模型需要将huggingfacetokenizer目录下merges.txt和vocab.json拷贝到该文件夹中。针对Taiyi-Stable-Diffusion模型需要将huggingfacetokenizer目录下vocab.txt拷贝到该文件夹中。 - -model_type是目前支持的两种diffusion模型的类别。如果是stable-diffusion-v1-5模型设为0,如果是Taiyi-Stable-Diffusion模型设为1。 - -output_image_name是生成图片的名字,默认图片位置在当前运行目录下。 - -input_text是文生图的prompt,如果是stable-diffusion-v1-5模型建议英文prompt,如果是Taiyi-Stable-Diffusion建议中文prompt。 +``` +其中,resource_path 就是mnn模型文件的路径,除了mnn文件,还需要: +1. 将MNN目录transformers/diffusion/scheduler/alphas.txt文件拷贝到该文件夹下。 +2. 针对stable-diffusion-v1-5模型需要将huggingfacetokenizer目录下merges.txt和vocab.json拷贝到该文件夹中。 +3. 针对Taiyi-Stable-Diffusion模型需要将huggingfacetokenizer目录下vocab.txt拷贝到该文件夹中。 +4. model_type是目前支持的两种diffusion模型的类别。如果是stable-diffusion-v1-5模型设为0,如果是Taiyi-Stable-Diffusion模型设为1。 +5. output_image_name是生成图片的名字,默认图片位置在当前运行目录下。 +6. input_text是文生图的prompt,如果是stable-diffusion-v1-5模型建议英文prompt,如果是Taiyi-Stable-Diffusion建议中文prompt。 运行指令例如: -./diffusion_demo mnn_save_path 0 demo.jpg "a cute cat" -./diffusion_demo mnn_save_path 1 demo.jpg "一只可爱的猫" - +``` +./diffusion_demo mnn_sd1.5_path 0 demo.jpg "a cute cat" +./diffusion_demo mnn_chilloutmix_path 0 demo.jpg "a pure girl" +./diffusion_demo mnn_taiyi_path 1 demo.jpg "一只可爱的猫" +``` diff --git a/docs/transformers/llm.md b/docs/transformers/llm.md index ea671993b..2358548c6 100644 --- a/docs/transformers/llm.md +++ b/docs/transformers/llm.md @@ -143,6 +143,12 @@ options: - visual_model: 当使用VL模型时,visual_model的实际路径为`base_dir + visual_model`,默认为`base_dir + 'visual.mnn'` - 推理配置 - max_new_tokens: 生成时最大token数,默认为`512` + - reuse_kv: 多轮对话时是否复用之前对话的`kv cache`,默认为`false` + - quant_kv: 存储`kv cache`时是否量化,可选为:`0, 1, 2, 3`,默认为`0`,含义如下: + - 0: key和value都不量化 + - 1: 使用非对称8bit量化存储key + - 2: 使用fp8格式寸处value + - 3: 使用非对称8bit量化存储key,使用fp8格式寸处value - 硬件配置 - backend_type: 推理使用硬件后端类型,默认为:`"cpu"` - thread_num: 推理使用硬件线程数,默认为:`4` diff --git a/express/Executor.cpp b/express/Executor.cpp index 0edb9d6ad..f6b85765c 100644 --- a/express/Executor.cpp +++ b/express/Executor.cpp @@ -17,7 +17,7 @@ #include "core/Backend.hpp" #include "RuntimeAttr.hpp" #include -#define DEFAULT_BACKUP_RUNTIME_KEY (std::make_pair(MNN_FORWARD_CPU, 1)) +#define DEFAULT_BACKUP_RUNTIME_KEY MNN_FORWARD_CPU #ifdef MNN_EXPR_ENABLE_PROFILER #define MNN_EXPRESS_ERROR_REPORT #endif @@ -41,12 +41,14 @@ void Executor::setGlobalExecutorConfig(MNNForwardType type, const BackendConfig& if(type == MNN_FORWARD_OPENCL || type == MNN_FORWARD_METAL) { info.numThread = 4; } - mAttr->firstType = std::make_pair(type, info.numThread); + mAttr->firstType = type; auto firstIter = mRuntimes.find(mAttr->firstType); if (firstIter == mRuntimes.end()) { info.user = (BackendConfig*)&config; std::shared_ptr bn(creator->onCreate(info)); mRuntimes[mAttr->firstType] = bn; + } else { + firstIter->second->onReset(numberThread, &config); } } else { auto creator = MNNGetExtraRuntimeCreator(type); @@ -58,7 +60,7 @@ void Executor::setGlobalExecutorConfig(MNNForwardType type, const BackendConfig& MNN_ASSERT(nullptr != creator); Backend::Info info; info.type = type; - mAttr->firstType = std::make_pair(type, numberThread); + mAttr->firstType = type; auto firstIter = mRuntimes.find(mAttr->firstType); if (firstIter == mRuntimes.end()) { info.mode = Backend::Info::DIRECT; @@ -66,6 +68,8 @@ void Executor::setGlobalExecutorConfig(MNNForwardType type, const BackendConfig& info.user = (BackendConfig*)&config; std::shared_ptr bn(creator->onCreate(info)); mRuntimes[mAttr->firstType] = bn; + } else { + firstIter->second->onReset(numberThread, &config); } } _refreshRuntime(); @@ -83,10 +87,10 @@ void Executor::gc(GCFlag flag) { } Executor::Executor(std::shared_ptr backend, MNNForwardType type, int numberThread) { - mRuntimes.insert(std::make_pair(std::make_pair(type, numberThread), backend)); + mRuntimes.insert(std::make_pair(type, backend)); mAttr.reset(new ExecutorAttr); - mAttr->firstType = std::make_pair(type, numberThread); - if (1 != numberThread || MNN_FORWARD_CPU != type) { + mAttr->firstType = type; + if (MNN_FORWARD_CPU != type) { // Create Backup Backend Backend::Info info; info.type = MNN_FORWARD_CPU; @@ -151,7 +155,9 @@ std::shared_ptr Executor::getGlobalExecutor() { info.type = MNN_FORWARD_CPU; info.numThread = 1; std::shared_ptr bn(creator->onCreate(info)); - bn->setAllocatorType(info.allocator); + RuntimeHint hint; + hint.memoryAllocatorType = 0;// Defer + bn->setRuntimeHint(hint); gExecutor = new std::shared_ptr(new Executor(bn, MNN_FORWARD_CPU, 1)); }); return *gExecutor; @@ -178,13 +184,13 @@ void Executor::_refreshRuntime() { mRuntimeInfo.second = mRuntimes[DEFAULT_BACKUP_RUNTIME_KEY]; auto firstIter = mRuntimes.find(getAttr()->firstType); if (firstIter != mRuntimes.end()) { - mRuntimeInfo.first.insert(std::make_pair(firstIter->first.first, firstIter->second)); + mRuntimeInfo.first.insert(std::make_pair(firstIter->first, firstIter->second)); } else { MNN_ASSERT(false); } for (auto& iter : mRuntimes) { - if (iter.first.first != getAttr()->firstType.first) { - mRuntimeInfo.first.insert(std::make_pair(iter.first.first, iter.second)); + if (iter.first != getAttr()->firstType) { + mRuntimeInfo.first.insert(std::make_pair(iter.first, iter.second)); } } } @@ -301,7 +307,7 @@ Executor::RuntimeManager* Executor::RuntimeManager::createRuntimeManager(const S } } compute.user = config.backendConfig; - auto iter = originRt.find(std::make_pair(compute.type, compute.numThread)); + auto iter = originRt.find(compute.type); if (iter == originRt.end()) { auto creator = MNNGetExtraRuntimeCreator(compute.type); if (nullptr == creator) { @@ -312,11 +318,13 @@ Executor::RuntimeManager* Executor::RuntimeManager::createRuntimeManager(const S MNN_ERROR("Can't create Runtime: %s\n", EnumNameForwardType((ForwardType)compute.type)); return nullptr; } - originRt.insert(std::make_pair(std::make_pair(compute.type, compute.numThread), std::shared_ptr(newBn))); + originRt.insert(std::make_pair(compute.type, std::shared_ptr(newBn))); + } else { + iter->second->onReset(compute.numThread, compute.user); } res->mInside->mRuntime.second = originRt[DEFAULT_BACKUP_RUNTIME_KEY]; - res->mInside->mRuntime.first.insert(std::make_pair(compute.type, originRt[std::make_pair(compute.type, compute.numThread)])); - res->mInside->mInfo = originRt[std::make_pair(compute.type, compute.numThread)]; + res->mInside->mRuntime.first.insert(std::make_pair(compute.type, originRt[compute.type])); + res->mInside->mInfo = originRt[compute.type]; res->mInside->mNumberThread = compute.numThread; if (nullptr != config.backendConfig) { res->mInside->mConfig = *config.backendConfig; @@ -586,10 +594,8 @@ void Executor::_makeCache(const std::vector& expr, bool forceCPU) { scheduleInfo.pipelineInfo[0].first.reportError = false; if (forceCPU) { scheduleInfo.pipelineInfo[0].first.info.type = MNN_FORWARD_CPU; - scheduleInfo.pipelineInfo[0].first.info.numThread = 1; } else { - scheduleInfo.pipelineInfo[0].first.info.type = current->getAttr()->firstType.first; - scheduleInfo.pipelineInfo[0].first.info.numThread = current->getAttr()->firstType.second; + scheduleInfo.pipelineInfo[0].first.info.type = current->getAttr()->firstType; } scheduleInfo.pipelineInfo[0].first.needComputeShape = false; scheduleInfo.pipelineInfo[0].first.needComputeGeometry = mLazyMode != LAZY_CONTENT; diff --git a/express/Expr.cpp b/express/Expr.cpp index aa664ad24..be8b01bfa 100644 --- a/express/Expr.cpp +++ b/express/Expr.cpp @@ -206,7 +206,7 @@ EXPRP Expr::create(std::shared_ptr extra, std::vector&& inp expr->mInputs = std::move(inputs); auto exe = ExecutorScope::Current(); expr->mInside->mReq = exe->getRequirement(expr.get()); - if (!(exe->getLazyMode() & Executor::LAZY_COMPUTE_ONCE)) { + if ((!(exe->getLazyMode() & Executor::LAZY_COMPUTE_ONCE)) && exe->lazyEval) { _addLinkForInputs(expr); } return expr; @@ -1228,21 +1228,8 @@ void Variable::save(const std::vector& vars, NetT* dest) { auto des = TensorUtils::getDescribe(tensor); auto describe = std::unique_ptr(new MNN::TensorDescribeT); describe->index = varIndexInfo[expr] + v; - describe->blob = std::unique_ptr(new MNN::BlobT); describe->name = dest->tensorName[subindex]; - auto& blob = describe->blob; - blob->dataFormat = des->dimensionFormat; - if (tensor->getType() == halide_type_of()) { - blob->dataType = DataType_DT_FLOAT; - } else { - SET_TYPE(INT8, int8)} - SET_TYPE(UINT8, uint8)} - SET_TYPE(INT32, int32)} - SET_TYPE(INT64, int64)} - } - for (int d = 0; d < tensor->dimensions();d++) { - describe->blob->dims.push_back(tensor->buffer().dim[d].extent); - } + auto tensorDes = TensorUtils::getDescribe(tensor); if (nullptr != tensorDes->quantAttr) { describe->quantInfo.reset(new TensorQuantInfoT); @@ -1252,6 +1239,20 @@ void Variable::save(const std::vector& vars, NetT* dest) { describe->quantInfo->scale = tensorDes->quantAttr->scale; } if (staticModel) { + describe->blob = std::unique_ptr(new MNN::BlobT); + auto& blob = describe->blob; + blob->dataFormat = des->dimensionFormat; + if (tensor->getType() == halide_type_of()) { + blob->dataType = DataType_DT_FLOAT; + } else { + SET_TYPE(INT8, int8)} + SET_TYPE(UINT8, uint8)} + SET_TYPE(INT32, int32)} + SET_TYPE(INT64, int64)} + } + for (int d = 0; d < tensor->dimensions();d++) { + describe->blob->dims.push_back(tensor->buffer().dim[d].extent); + } for (auto& reg : des->regions) { auto regionT = std::unique_ptr(new MNN::RegionT); regionT->src = std::unique_ptr(new MNN::ViewT); diff --git a/express/RuntimeAttr.hpp b/express/RuntimeAttr.hpp index 3272cde95..21fd54fa0 100644 --- a/express/RuntimeAttr.hpp +++ b/express/RuntimeAttr.hpp @@ -24,7 +24,7 @@ struct RuntimeAttr { }; struct ExecutorAttr { std::shared_ptr constantBackend; - std::pair firstType; + MNNForwardType firstType; std::string externalFile; }; }; diff --git a/express/module/Module.cpp b/express/module/Module.cpp index 00b0a63bc..82172bfbd 100644 --- a/express/module/Module.cpp +++ b/express/module/Module.cpp @@ -32,8 +32,8 @@ static MNN::Express::Executor::RuntimeManager* _createDefaultRuntimeManager(cons sche_config.backendConfig = config->backend->config; } else { auto exe = ExecutorScope::Current(); - sche_config.type = exe->getAttr()->firstType.first; - sche_config.mode = exe->getAttr()->firstType.second; + sche_config.type = exe->getAttr()->firstType; + sche_config.numThread = 1; } return Executor::RuntimeManager::createRuntimeManager(sche_config); } @@ -165,7 +165,7 @@ class NetModule : public Module { setType("Net"); #ifdef MNN_INTERNAL_ENABLED if (nullptr != net) { - mLogInfo = getBasicLoggingData(); + mLogInfo = logBasicInfo(); std::string uuid = std::string(net->mnn_uuid() ? net->mnn_uuid()->c_str() : ""); mLogInfo.emplace("UUID", uuid); mLogInfo.emplace("ModelVersion", info->version); @@ -208,8 +208,8 @@ class NetModule : public Module { auto mModule = mChildren[0]; #ifdef MNN_INTERNAL_ENABLED - auto glo = ExecutorScope::Current(); Timer _time; + auto glo = ExecutorScope::Current(); glo->getDebugTools()->flops = 0.0f; #endif auto outputs = mModule->onForward(inputs); @@ -235,8 +235,10 @@ class NetModule : public Module { metrics.emplace("Memory", std::to_string(memory)); } logAsync(metrics); + MNN_PRINT("Cost time with log: %f\n", (float)_time.durationInUs() / 1000.0f); } while(false); #endif + mModule->clearCache(); return outputs; } diff --git a/express/module/PipelineModule.cpp b/express/module/PipelineModule.cpp index fc2551687..932ae6daa 100644 --- a/express/module/PipelineModule.cpp +++ b/express/module/PipelineModule.cpp @@ -634,11 +634,9 @@ Module* PipelineModule::load(const std::vector& inputs, const std:: modRuntime.compute.type = modRuntime.rt.first.begin()->first; modRuntime.compute.numThread = 1; // set allocator type - modRuntime.rt.first.begin()->second->setAllocatorType(rtMgr->getInside()->modes.memoryAllocatorType); - modRuntime.rt.second->setAllocatorType(rtMgr->getInside()->modes.memoryAllocatorType); + modRuntime.rt.first.begin()->second->setRuntimeHint(rtMgr->getInside()->modes.runtimeHint); // set winograd memory type - modRuntime.rt.first.begin()->second->setWinogradMemoryLevel(rtMgr->getInside()->modes.winogradMemoryUsed); - modRuntime.rt.second->setWinogradMemoryLevel(rtMgr->getInside()->modes.winogradMemoryUsed); + modRuntime.rt.second->setRuntimeHint(rtMgr->getInside()->modes.runtimeHint); } auto& rt = modRuntime.rt; auto firstRt = rt.first[modRuntime.compute.type]; diff --git a/include/MNN/Interpreter.hpp b/include/MNN/Interpreter.hpp index 16344a52b..6debbe3f0 100644 --- a/include/MNN/Interpreter.hpp +++ b/include/MNN/Interpreter.hpp @@ -206,6 +206,20 @@ class MNN_PUBLIC Interpreter { // Geometry Compute option, default is 0xFFFF GEOMETRY_COMPUTE_MASK = 4, + + // 0: Close dynamic quant; 1: per batch quant; 2: per tensor quant + DYNAMIC_QUANT_OPTIONS = 5, + + // For Mobile CPU with big-litter core, set decrease rate to let MNN divide task differential by CPU's performance + // 0-100, 50 means litter core has 50% capacity of large core + // Default is 50 + CPU_LITTLECORE_DECREASE_RATE = 6, + + // 0: Do not quantize kvcache, just store float + // 1: Only quantize key cache, use int8 asymmetric quantization + // 2: Only quantize value cache, use fp8 quantization + // 3: quantize both key and value cache as described above + KVCACHE_QUANT_OPTIONS = 7, }; enum GeometryComputeMask { diff --git a/include/MNN/MNNDefine.h b/include/MNN/MNNDefine.h index b6d6645db..ab84cd8f8 100644 --- a/include/MNN/MNNDefine.h +++ b/include/MNN/MNNDefine.h @@ -69,6 +69,6 @@ MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \ #define STR(x) STR_IMP(x) #define MNN_VERSION_MAJOR 2 #define MNN_VERSION_MINOR 9 -#define MNN_VERSION_PATCH 2 +#define MNN_VERSION_PATCH 3 #define MNN_VERSION STR(MNN_VERSION_MAJOR) "." STR(MNN_VERSION_MINOR) "." STR(MNN_VERSION_PATCH) #endif /* MNNDefine_h */ diff --git a/include/MNN/expr/Executor.hpp b/include/MNN/expr/Executor.hpp index 3ca0d9e19..3871827c9 100644 --- a/include/MNN/expr/Executor.hpp +++ b/include/MNN/expr/Executor.hpp @@ -136,7 +136,7 @@ class MNN_PUBLIC Executor { void _makeCache(const std::vector& outputs, bool forceCPU); // TODO: Remove mRuntimes, only use mRuntimeInfo - std::map, std::shared_ptr> mRuntimes; + std::map> mRuntimes; RuntimeInfo mRuntimeInfo; std::shared_ptr mDebug; std::map> mSubGraph; diff --git a/project/android/build_64.sh b/project/android/build_64.sh index 8e2039c73..34b18057e 100755 --- a/project/android/build_64.sh +++ b/project/android/build_64.sh @@ -7,7 +7,6 @@ cmake ../../../ \ -DMNN_USE_LOGCAT=false \ -DMNN_BUILD_BENCHMARK=ON \ -DMNN_USE_SSE=OFF \ --DMNN_SUPPORT_BF16=OFF \ -DMNN_BUILD_TEST=ON \ -DANDROID_NATIVE_API_LEVEL=android-21 \ -DMNN_BUILD_FOR_ANDROID_COMMAND=true \ diff --git a/project/ios/MNN.xcodeproj/project.pbxproj b/project/ios/MNN.xcodeproj/project.pbxproj index f31638c61..009adba67 100644 --- a/project/ios/MNN.xcodeproj/project.pbxproj +++ b/project/ios/MNN.xcodeproj/project.pbxproj @@ -268,8 +268,6 @@ 4A224A1427D0C56E000A9260 /* ConvolutionWinogradBridge.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 4A224A1027D0C56E000A9260 /* ConvolutionWinogradBridge.hpp */; }; 4A224A1527D0C56E000A9260 /* ConvolutionWinogradImpl.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 4A224A1127D0C56E000A9260 /* ConvolutionWinogradImpl.hpp */; }; 4A224A1627D0C56E000A9260 /* ConvolutionWinogradBridge.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4A224A1227D0C56E000A9260 /* ConvolutionWinogradBridge.cpp */; }; - 4A5BEC6026AAB3B30032F6BD /* CommonCompute.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 4A5BEC5E26AAB3B20032F6BD /* CommonCompute.hpp */; }; - 4A5BEC6126AAB3B30032F6BD /* MemoryFormater.h in Headers */ = {isa = PBXBuildFile; fileRef = 4A5BEC5F26AAB3B20032F6BD /* MemoryFormater.h */; }; 4A5BEC6426AAB4B30032F6BD /* ModuleTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4A5BEC6326AAB4B30032F6BD /* ModuleTest.cpp */; }; 4AF4FB24269ED235005BA97B /* SparseConvInt8TiledExecutor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4AF4FB20269ED234005BA97B /* SparseConvInt8TiledExecutor.cpp */; }; 4AF4FB26269ED235005BA97B /* SparseConvInt8TiledExecutor.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 4AF4FB22269ED234005BA97B /* SparseConvInt8TiledExecutor.hpp */; }; @@ -732,8 +730,6 @@ 950B28FA2A0C9AC20002F454 /* CPUScaleInt8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 950B28F92A0C9AC20002F454 /* CPUScaleInt8.cpp */; }; 950B28FE2A0C9B310002F454 /* MNNScaleAndAddBiasInt8.S in Sources */ = {isa = PBXBuildFile; fileRef = 950B28FD2A0C9B310002F454 /* MNNScaleAndAddBiasInt8.S */; }; 950B29002A0C9B4D0002F454 /* MNNScaleAndAddBiasInt8.S in Sources */ = {isa = PBXBuildFile; fileRef = 950B28FF2A0C9B4D0002F454 /* MNNScaleAndAddBiasInt8.S */; }; - 952298AF2B4D38CB0043978B /* ConvolutionHybrid.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 952298AD2B4D38CB0043978B /* ConvolutionHybrid.cpp */; }; - 952298B02B4D38CB0043978B /* ConvolutionHybrid.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 952298AE2B4D38CB0043978B /* ConvolutionHybrid.hpp */; }; 952298B22B4D39050043978B /* MetalLoop.mm in Sources */ = {isa = PBXBuildFile; fileRef = 952298B12B4D39050043978B /* MetalLoop.mm */; }; 952298B42B4D39260043978B /* MetalArgMax.mm in Sources */ = {isa = PBXBuildFile; fileRef = 952298B32B4D39250043978B /* MetalArgMax.mm */; }; 952298B72B4D4CC80043978B /* CoreMLLayerNorm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 952298B52B4D4CC80043978B /* CoreMLLayerNorm.cpp */; }; @@ -807,8 +803,6 @@ CEE9B95B2A3AA4D4006438F2 /* MNNBilinearLineC8.S in Sources */ = {isa = PBXBuildFile; fileRef = CEE9B9572A3AA4D4006438F2 /* MNNBilinearLineC8.S */; }; CEE9B95C2A3AA4D4006438F2 /* MNNBilinearSampleC8.S in Sources */ = {isa = PBXBuildFile; fileRef = CEE9B9582A3AA4D4006438F2 /* MNNBilinearSampleC8.S */; }; CEE9B95D2A3AA4D4006438F2 /* MNNCubicSampleC16.S in Sources */ = {isa = PBXBuildFile; fileRef = CEE9B9592A3AA4D4006438F2 /* MNNCubicSampleC16.S */; }; - CEE9B9602A3AA4EF006438F2 /* CPUSoftMaxInt8.hpp in Headers */ = {isa = PBXBuildFile; fileRef = CEE9B95E2A3AA4EF006438F2 /* CPUSoftMaxInt8.hpp */; }; - CEE9B9612A3AA4EF006438F2 /* CPUSoftMaxInt8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = CEE9B95F2A3AA4EF006438F2 /* CPUSoftMaxInt8.cpp */; }; EB45C774244D7C4F00E28F44 /* MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S in Sources */ = {isa = PBXBuildFile; fileRef = EB45C773244D7C4F00E28F44 /* MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S */; }; EB45C776244D7C6600E28F44 /* MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S in Sources */ = {isa = PBXBuildFile; fileRef = EB45C775244D7C6600E28F44 /* MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S */; }; EB8D2ABE246A4975009948D1 /* Arm82OpRegister.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB8D2ABD246A4975009948D1 /* Arm82OpRegister.cpp */; }; @@ -1098,8 +1092,6 @@ 4A224A1027D0C56E000A9260 /* ConvolutionWinogradBridge.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = ConvolutionWinogradBridge.hpp; sourceTree = ""; }; 4A224A1127D0C56E000A9260 /* ConvolutionWinogradImpl.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = ConvolutionWinogradImpl.hpp; sourceTree = ""; }; 4A224A1227D0C56E000A9260 /* ConvolutionWinogradBridge.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConvolutionWinogradBridge.cpp; sourceTree = ""; }; - 4A5BEC5E26AAB3B20032F6BD /* CommonCompute.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CommonCompute.hpp; sourceTree = ""; }; - 4A5BEC5F26AAB3B20032F6BD /* MemoryFormater.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MemoryFormater.h; sourceTree = ""; }; 4A5BEC6326AAB4B30032F6BD /* ModuleTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ModuleTest.cpp; sourceTree = ""; }; 4AF4FB20269ED234005BA97B /* SparseConvInt8TiledExecutor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SparseConvInt8TiledExecutor.cpp; sourceTree = ""; }; 4AF4FB22269ED234005BA97B /* SparseConvInt8TiledExecutor.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = SparseConvInt8TiledExecutor.hpp; sourceTree = ""; }; @@ -1572,8 +1564,6 @@ 950B28FB2A0C9AD30002F454 /* CPUScaleInt8.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CPUScaleInt8.hpp; sourceTree = ""; }; 950B28FD2A0C9B310002F454 /* MNNScaleAndAddBiasInt8.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNScaleAndAddBiasInt8.S; sourceTree = ""; }; 950B28FF2A0C9B4D0002F454 /* MNNScaleAndAddBiasInt8.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNScaleAndAddBiasInt8.S; sourceTree = ""; }; - 952298AD2B4D38CB0043978B /* ConvolutionHybrid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ConvolutionHybrid.cpp; path = compute/ConvolutionHybrid.cpp; sourceTree = ""; }; - 952298AE2B4D38CB0043978B /* ConvolutionHybrid.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = ConvolutionHybrid.hpp; path = compute/ConvolutionHybrid.hpp; sourceTree = ""; }; 952298B12B4D39050043978B /* MetalLoop.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MetalLoop.mm; sourceTree = ""; }; 952298B32B4D39250043978B /* MetalArgMax.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MetalArgMax.mm; sourceTree = ""; }; 952298B52B4D4CC80043978B /* CoreMLLayerNorm.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CoreMLLayerNorm.cpp; sourceTree = ""; }; @@ -1607,7 +1597,6 @@ C4D4823C27BA2BB40021C2B9 /* CPUDet.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CPUDet.cpp; sourceTree = ""; }; C4D4823D27BA2BB40021C2B9 /* CPUDet.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CPUDet.hpp; sourceTree = ""; }; C4D4824227BA67DE0021C2B9 /* GeometryDet.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = GeometryDet.cpp; sourceTree = ""; }; - C4DBB34F27041F9C00ADB16E /* WinogradInt8Helper.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = WinogradInt8Helper.hpp; sourceTree = ""; }; C4EF5FB22657A9E70094235C /* ConvInt8TiledExecutor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConvInt8TiledExecutor.cpp; sourceTree = ""; }; C4EF5FB32657A9E70094235C /* ConvInt8TiledExecutor.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = ConvInt8TiledExecutor.hpp; sourceTree = ""; }; C4F906AF276886040026B847 /* GeometryTopK.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = GeometryTopK.cpp; sourceTree = ""; }; @@ -1650,8 +1639,6 @@ CEE9B9572A3AA4D4006438F2 /* MNNBilinearLineC8.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNBilinearLineC8.S; sourceTree = ""; }; CEE9B9582A3AA4D4006438F2 /* MNNBilinearSampleC8.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNBilinearSampleC8.S; sourceTree = ""; }; CEE9B9592A3AA4D4006438F2 /* MNNCubicSampleC16.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNCubicSampleC16.S; sourceTree = ""; }; - CEE9B95E2A3AA4EF006438F2 /* CPUSoftMaxInt8.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CPUSoftMaxInt8.hpp; sourceTree = ""; }; - CEE9B95F2A3AA4EF006438F2 /* CPUSoftMaxInt8.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CPUSoftMaxInt8.cpp; sourceTree = ""; }; EB45C773244D7C4F00E28F44 /* MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S; sourceTree = ""; }; EB45C775244D7C6600E28F44 /* MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S; sourceTree = ""; }; EB8D2ABD246A4975009948D1 /* Arm82OpRegister.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Arm82OpRegister.cpp; path = ../arm82/Arm82OpRegister.cpp; sourceTree = ""; }; @@ -1866,10 +1853,8 @@ 488873A8215B639D0079B12E /* source */ = { isa = PBXGroup; children = ( - CE482EF5288536DA007CD935 /* internal */, 4DF87C482887D3560003E2D4 /* calib3d */, 4D4CF4612760946500A36D9F /* imgproc */, - 4A5BEC6226AAB3D70032F6BD /* common */, 4D9A931B26255BDA00F9B43C /* coreml */, 6A131E3C2582331C002EC3D6 /* plugin */, 489D7A152550FDC800AD896A /* metal */, @@ -1934,12 +1919,8 @@ CEE4566A2BC0E23D00F062C1 /* CPUExternalConst.cpp */, 95278CE62B9F0999009E9B29 /* CPUDynamicQuant.cpp */, 95278CE52B9F0999009E9B29 /* CPUDynamicQuant.hpp */, - 952298AD2B4D38CB0043978B /* ConvolutionHybrid.cpp */, - 952298AE2B4D38CB0043978B /* ConvolutionHybrid.hpp */, CE8049A92B31C65B009B422C /* CPULayerNorm.hpp */, 958375342A496E5C007C0A3E /* MNNLineDepthWiseInt8AddBiasScale_ARMV82_Unit3X3.S */, - CEE9B95F2A3AA4EF006438F2 /* CPUSoftMaxInt8.cpp */, - CEE9B95E2A3AA4EF006438F2 /* CPUSoftMaxInt8.hpp */, CE9AFED428E54E3300566949 /* CPUInterp3D.cpp */, CE9AFED528E54E3300566949 /* CPUInterp3D.hpp */, 4DCF538B2892B16300B5B393 /* CPUHistogram.cpp */, @@ -2236,16 +2217,6 @@ path = ../../../test/speed; sourceTree = ""; }; - 4A5BEC6226AAB3D70032F6BD /* common */ = { - isa = PBXGroup; - children = ( - C4DBB34F27041F9C00ADB16E /* WinogradInt8Helper.hpp */, - 4A5BEC5F26AAB3B20032F6BD /* MemoryFormater.h */, - 4A5BEC5E26AAB3B20032F6BD /* CommonCompute.hpp */, - ); - path = common; - sourceTree = ""; - }; 4D4CF4612760946500A36D9F /* imgproc */ = { isa = PBXGroup; children = ( @@ -2913,19 +2884,16 @@ CEA82BDC2A15F8AD002CBC95 /* IdstConvolutionInt8.hpp in Headers */, 4DE4E82C275E307B0016A916 /* cv in Headers */, 1F501F842397BA5B004E8721 /* ImageProcess.hpp in Headers */, - CECF8C5D299CACFD00D3875B /* Log.hpp in Headers */, 1F501F822397BA5B004E8721 /* Interpreter.hpp in Headers */, C4F906B327688C3A0026B847 /* NMSModule.hpp in Headers */, 1F501F882397BA5B004E8721 /* Tensor.hpp in Headers */, 1F501F872397BA5B004E8721 /* Matrix.h in Headers */, CE8049AC2B31C65B009B422C /* CPULayerNorm.hpp in Headers */, - CECF8C5A299CACFD00D3875B /* WorkerThread.hpp in Headers */, 48C84B85250F711700EE7666 /* IfModule.hpp in Headers */, 4D9A937326255BDA00F9B43C /* CoreMLUnary.hpp in Headers */, 48C84B98250F71E900EE7666 /* CPUSoftmax.hpp in Headers */, 4882C8B8241A22B800DAC168 /* OpCommonUtils.hpp in Headers */, 48608B54250632EC00CB1D71 /* GeometryComputer.hpp in Headers */, - CECF8C7A299CAD9400D3875B /* sha1.h in Headers */, 4894C6EC27016F7200D8BE79 /* CPUResizeCache.hpp in Headers */, 92FF04A623AA0BFB00AC97F6 /* FileLoader.hpp in Headers */, 48F34733273A7C8400C45394 /* ImageProcessFunction.hpp in Headers */, @@ -2935,12 +2903,10 @@ 482BFBCF28351BA1009210E4 /* AllShader.hpp in Headers */, 4896D36A25FE2A3D00717702 /* Arm82Unary.hpp in Headers */, 1F501F862397BA5B004E8721 /* Rect.h in Headers */, - CEE9B9602A3AA4EF006438F2 /* CPUSoftMaxInt8.hpp in Headers */, 1F501F8B2397BA5B004E8721 /* MNNSharedContext.h in Headers */, 48925F352744AC0700919B37 /* CPUROIAlign.hpp in Headers */, 92FF029623AA0B5A00AC97F6 /* CPUCast.hpp in Headers */, 4D9A937826255BDA00F9B43C /* CoreMLBinary.hpp in Headers */, - CECF8C85299CAD9400D3875B /* log_util.h in Headers */, 4D6D7FD52656896600F80814 /* DenseConvolutionTiledExecutor.hpp in Headers */, 4D9A936626255BDA00F9B43C /* CoreMLExecutor.h in Headers */, 92FF027A23AA0B5A00AC97F6 /* CPUPool.hpp in Headers */, @@ -2949,7 +2915,6 @@ 1F501F802397BA5B004E8721 /* MNNDefine.h in Headers */, 19D0FE76285C66F200B74B1A /* MetalLayerNorm.hpp in Headers */, 489D7A682550FDC800AD896A /* MetalReduction.hpp in Headers */, - CECF8C86299CAD9400D3875B /* sds.h in Headers */, 1F501F7F2397BA5B004E8721 /* HalideRuntime.h in Headers */, 92FF029E23AA0B5A00AC97F6 /* CPUDeconvolutionDepthwise.hpp in Headers */, 4D9A935B26255BDA00F9B43C /* NeuralNetwork.pb-c.h in Headers */, @@ -2970,10 +2935,8 @@ 481C2DEE25FE2CD6001ED6DF /* Arm82Functions.hpp in Headers */, 4894C6EA27016F7200D8BE79 /* UnaryUtils.hpp in Headers */, EBD4842A2485FF650083CE95 /* Arm82Interp.hpp in Headers */, - CECF8C81299CAD9400D3875B /* log_util_imp.h in Headers */, 92FF037623AA0B5A00AC97F6 /* CPUBinary.hpp in Headers */, 4D9A935826255BDA00F9B43C /* FeatureTypes.pb-c.h in Headers */, - CECF8C7C299CAD9400D3875B /* hmac-sha.h in Headers */, 48608B53250632EC00CB1D71 /* GeometryComputerUtils.hpp in Headers */, 950B28F529F629A90002F454 /* CPUBinaryInt8.hpp in Headers */, 489D7A732550FDC800AD896A /* MetalBackend.hpp in Headers */, @@ -2996,7 +2959,6 @@ 4DF87C522887D3F20003E2D4 /* CPUSvd.hpp in Headers */, 48747D4B245D9D24000B9709 /* RuntimeFactory.hpp in Headers */, 92FF03B323AA0B5A00AC97F6 /* ConvolutionDepthwise3x3.hpp in Headers */, - CECF8C77299CAD9400D3875B /* log_builder.h in Headers */, 4D9A937226255BDA00F9B43C /* CoreMLConvolution.hpp in Headers */, 92FF038B23AA0B5A00AC97F6 /* CPUUnravelIndex.hpp in Headers */, 4AF4FB26269ED235005BA97B /* SparseConvInt8TiledExecutor.hpp in Headers */, @@ -3008,7 +2970,6 @@ 92FF028C23AA0B5A00AC97F6 /* CPUReduction.hpp in Headers */, 92FF03B923AA0B5A00AC97F6 /* ConvOpt.h in Headers */, 92FF04AB23AA0BFB00AC97F6 /* Pipeline.hpp in Headers */, - 952298B02B4D38CB0043978B /* ConvolutionHybrid.hpp in Headers */, 489D7A6E2550FDC800AD896A /* MetalROIPooling.hpp in Headers */, 4882C8B9241A22B800DAC168 /* ConvolutionCommon.hpp in Headers */, 92FF03AE23AA0B5A00AC97F6 /* ConvolutionIntFactory.hpp in Headers */, @@ -3035,7 +2996,6 @@ 92FF03CA23AA0B5A00AC97F6 /* CPUConvolutionDepthwise.hpp in Headers */, 92FF04A923AA0BFB00AC97F6 /* Schedule.hpp in Headers */, 489D7A9F2550FDC900AD896A /* MetalConvolutionCommon.hpp in Headers */, - CECF8C80299CAD9400D3875B /* lz4.h in Headers */, 92FF028623AA0B5A00AC97F6 /* CPUDeconvolution.hpp in Headers */, 489D7A722550FDC800AD896A /* MetalReLU6.hpp in Headers */, 92FF04B523AA0BFB00AC97F6 /* TensorUtils.hpp in Headers */, @@ -3056,7 +3016,6 @@ 4A224A1427D0C56E000A9260 /* ConvolutionWinogradBridge.hpp in Headers */, 4D9A935926255BDA00F9B43C /* DataStructures.pb-c.h in Headers */, 489D7A972550FDC900AD896A /* MetalConvolutionDepthwise.hpp in Headers */, - 4A5BEC6126AAB3B30032F6BD /* MemoryFormater.h in Headers */, 489D7AB42550FDC900AD896A /* MetalBinary.hpp in Headers */, 92FF04AF23AA0BFB00AC97F6 /* Macro.h in Headers */, 4D9A936C26255BDA00F9B43C /* CoreMLRaster.hpp in Headers */, @@ -3088,24 +3047,20 @@ 92FF03A623AA0B5A00AC97F6 /* ConvolutionTiledExecutor.hpp in Headers */, 92FF036523AA0B5A00AC97F6 /* CPUResize.hpp in Headers */, 92FF04B423AA0BFB00AC97F6 /* MNNMemoryUtils.h in Headers */, - CECF8C88299CAD9400D3875B /* log_api.h in Headers */, 4A224A0D27D0C2D9000A9260 /* ConvolutionPackWinograd.hpp in Headers */, 4A224A0E27D0C2D9000A9260 /* ConvolutionPackFreeWinograd.hpp in Headers */, 4D9A937426255BDA00F9B43C /* CoreMLReduction.hpp in Headers */, 48C84B8B250F711700EE7666 /* PipelineModule.hpp in Headers */, F41497D7278D8A21004A363A /* RuntimeAttr.hpp in Headers */, - CECF8C5B299CACFD00D3875B /* LogHelper.hpp in Headers */, 92FF04C123AA0BFB00AC97F6 /* Backend.hpp in Headers */, 482BFBCD28351BA1009210E4 /* ShaderMap.hpp in Headers */, 489D7A812550FDC900AD896A /* MetalPooling.hpp in Headers */, - CECF8C7F299CAD9400D3875B /* md5.h in Headers */, 92FF02A623AA0B5A00AC97F6 /* CPUQuantizedMaxPool.hpp in Headers */, 92FF028023AA0B5A00AC97F6 /* CPUFloatToInt8.hpp in Headers */, 92FF028723AA0B5A00AC97F6 /* CPUFixedPoint.hpp in Headers */, C43C8227251894F400A0FF84 /* Vec.hpp in Headers */, 4819FB1D24C138DF0050BD09 /* GeometryConvUtils.hpp in Headers */, 489D7A952550FDC900AD896A /* MetalMatMul.hpp in Headers */, - CECF8C83299CAD9400D3875B /* log_define.h in Headers */, C48CAE2628900C4A00271A6D /* ConvInt8Winograd.hpp in Headers */, 48F34730273A7C7300C45394 /* CPUImageProcess.hpp in Headers */, 489D7A702550FDC800AD896A /* MetalRaster.hpp in Headers */, @@ -3120,7 +3075,6 @@ 92FF038C23AA0B5A00AC97F6 /* CPUEltwise.hpp in Headers */, 92FF028823AA0B5A00AC97F6 /* CPUDequantize.hpp in Headers */, 481C2DF125FE2CD6001ED6DF /* Arm82OptFunc.hpp in Headers */, - 4A5BEC6026AAB3B30032F6BD /* CommonCompute.hpp in Headers */, C43C8225251894F400A0FF84 /* WingoradGenerater.hpp in Headers */, ); runOnlyForDeploymentPostprocessing = 0; @@ -3290,7 +3244,6 @@ 48FA474623AA127B00172C3B /* NeuralNetWorkOp.cpp in Sources */, 4D9A936E26255BDA00F9B43C /* CoreMLArgMax.cpp in Sources */, 92FF02F423AA0B5A00AC97F6 /* MNNUInt8ToInt16WithOffsetC4Common.S in Sources */, - CEE9B9612A3AA4EF006438F2 /* CPUSoftMaxInt8.cpp in Sources */, 482BFBCE28351BA1009210E4 /* ShaderMap.cpp in Sources */, 92FF038623AA0B5A00AC97F6 /* CPULinSpace.cpp in Sources */, 4819FB2D24C1396A0050BD09 /* GeometryConv2D.cpp in Sources */, @@ -3328,7 +3281,6 @@ 489D7A8A2550FDC900AD896A /* MetalConvolutionDepthwise.mm in Sources */, 48123003269EA83400EB7ABA /* ShapeUnique.cpp in Sources */, 92FF037D23AA0B5A00AC97F6 /* CPURelu.cpp in Sources */, - CECF8C5E299CACFD00D3875B /* WorkerThread.cpp in Sources */, 489D7A842550FDC900AD896A /* MetalBinary.mm in Sources */, 48747D6B245D9E33000B9709 /* GeometryFill.cpp in Sources */, 4819FB1F24C138DF0050BD09 /* GeometryConvUtils.cpp in Sources */, @@ -3428,7 +3380,6 @@ 48F34734273A7C8400C45394 /* ImageProcessFunction.cpp in Sources */, 6A131E4025823349002EC3D6 /* PluginKernel.cpp in Sources */, 48958781268EBA6F00EA01A7 /* CPUSegmentMean.cpp in Sources */, - CECF8C7B299CAD9400D3875B /* sha1.c in Sources */, 4D9A937026255BDA00F9B43C /* CoreMLUnary.cpp in Sources */, 92FF04A823AA0BFB00AC97F6 /* AutoTime.cpp in Sources */, 92FF04AE23AA0BFB00AC97F6 /* Backend.cpp in Sources */, @@ -3483,7 +3434,6 @@ 92FF03CE23AA0B5A00AC97F6 /* CPUOPRegister.cpp in Sources */, 92FF02B323AA0B5A00AC97F6 /* CPUInstanceNorm.cpp in Sources */, 4819FB2C24C1396A0050BD09 /* GeometryPoolGrad.cpp in Sources */, - CECF8C7E299CAD9400D3875B /* log_builder.cpp in Sources */, 92FF042223AA0B7100AC97F6 /* ShapeConcat.cpp in Sources */, 4D6D7FD12656891400F80814 /* MNNPackedSparseMatMulEpx4.S in Sources */, 4D5662CC299B76ED0031C1A1 /* MNNMaxPoolInt8.S in Sources */, @@ -3491,7 +3441,6 @@ 4844603D2726558B00F7EABA /* MNNConvWinoSourceTransformUnit6x6FP16.S in Sources */, 92FF044A23AA0B7100AC97F6 /* ShapeConvolution.cpp in Sources */, 11A01A0D258785FB00745FA7 /* MNNVectorTop1Int32.S in Sources */, - 952298AF2B4D38CB0043978B /* ConvolutionHybrid.cpp in Sources */, 92FF026A23AA0B5A00AC97F6 /* CPUNonMaxSuppressionV2.cpp in Sources */, 92FF045123AA0B7100AC97F6 /* ShapeArgMax.cpp in Sources */, 48F9E54E2493A0A800E46522 /* MNNPackC4ForMatMul_A.S in Sources */, @@ -3563,7 +3512,6 @@ 4D759B2C25FF89EE0037B0B6 /* GeometryShape.cpp in Sources */, 11A01A07258785EA00745FA7 /* MNNVectorTop1Float.S in Sources */, 48747D6E245D9E33000B9709 /* GeometrySlice.cpp in Sources */, - CECF8C7D299CAD9400D3875B /* md5.c in Sources */, 92FF041923AA0B7100AC97F6 /* ShapeQuantizedMaxPool.cpp in Sources */, 92FF038A23AA0B5A00AC97F6 /* CPURange.cpp in Sources */, CE125CC92A52BF6B003698C9 /* MNNBilinearLineC8.S in Sources */, @@ -3621,10 +3569,8 @@ 92FF042E23AA0B7100AC97F6 /* ShapeProposal.cpp in Sources */, 92FF025923AA0B5A00AC97F6 /* CPUPoolInt8.cpp in Sources */, 92FF045B23AA0B7100AC97F6 /* ShapeShape.cpp in Sources */, - CECF8C87299CAD9400D3875B /* sds.c in Sources */, 9560EAD62BDE426A00C8D0B6 /* GeometryLayernorm.cpp in Sources */, 4D6D7FD72656896D00F80814 /* SparseConvolutionTiledExecutor.cpp in Sources */, - CECF8C82299CAD9400D3875B /* log_api.cpp in Sources */, 92FF03A823AA0B5A00AC97F6 /* WinogradOptFunction.cpp in Sources */, 950B28E229F627E00002F454 /* MNNBinarySubInt8.S in Sources */, 950B28F029F627F70002F454 /* MNNBinarySubInt8.S in Sources */, @@ -3634,7 +3580,6 @@ 4D9A936026255BDA00F9B43C /* Model.pb-c.c in Sources */, CE9AFED628E54E3300566949 /* CPUInterp3D.cpp in Sources */, C4F906B427688C3A0026B847 /* NMSModule.cpp in Sources */, - CECF8C64299CAD8400D3875B /* LogHelper.mm in Sources */, 48FA474523AA127B00172C3B /* Executor.cpp in Sources */, 92FF02EA23AA0B5A00AC97F6 /* MNNGemmInt8AddBiasScale_16x4_Unit.S in Sources */, 48A8A61A21D101DE00C2B9A7 /* Matrix_CV.cpp in Sources */, @@ -3660,7 +3605,6 @@ 92FF027F23AA0B5A00AC97F6 /* CPUDeconvolutionDepthwise.cpp in Sources */, EBECA3A724643D5D0062C7A3 /* MNNQuantizeFP16_UNIT4.S in Sources */, 92FF04A423AA0BFB00AC97F6 /* Interpreter.cpp in Sources */, - CECF8C5C299CACFD00D3875B /* Log.cpp in Sources */, 92FF045623AA0B7100AC97F6 /* ShapeReshape.cpp in Sources */, 92FF032523AA0B5A00AC97F6 /* MNNConvDwF23SourceTransUnit.S in Sources */, 92FF044423AA0B7100AC97F6 /* ShapeLSTM.cpp in Sources */, @@ -3697,7 +3641,6 @@ 92FF02B623AA0B5A00AC97F6 /* CPUUnary.cpp in Sources */, 92FF032723AA0B5A00AC97F6 /* MNNDeconvRunForUnitDepthWise.S in Sources */, CE7DC00028E2DE6B00797689 /* ShapeConvTranspose3D.cpp in Sources */, - CECF8C78299CAD9400D3875B /* log_util_imp.cpp in Sources */, 92FF02CA23AA0B5A00AC97F6 /* MNNUnPackC4.S in Sources */, 952298B22B4D39050043978B /* MetalLoop.mm in Sources */, 48925F372744AC2A00919B37 /* ShapeROIAlign.cpp in Sources */, @@ -3723,13 +3666,11 @@ 92FF02FF23AA0B5A00AC97F6 /* MNNFloat2Int8.S in Sources */, 4D9A937926255BDA00F9B43C /* CoreMLRaster.cpp in Sources */, 48417FF224D13BF50056D9A7 /* GeometrySelect.cpp in Sources */, - CECF8C84299CAD9400D3875B /* lz4.c in Sources */, 489D7A7E2550FDC900AD896A /* MNNMetalContext.mm in Sources */, 92FF033423AA0B5A00AC97F6 /* MNNUInt8ToInt16WithOffsetC4Common.S in Sources */, 92FF036B23AA0B5A00AC97F6 /* CPUResize.cpp in Sources */, 92FF02C723AA0B5A00AC97F6 /* MNNCopyC4WithStride.S in Sources */, 92FF030923AA0B5A00AC97F6 /* MNNNV21ToBGRUnit.S in Sources */, - CECF8C79299CAD9400D3875B /* hmac-sha.cpp in Sources */, 92FF032623AA0B5A00AC97F6 /* MNNWinogradMatrixProductLeft.S in Sources */, 92FF04C023AA0BFB00AC97F6 /* Tensor.cpp in Sources */, CEE9B95B2A3AA4D4006438F2 /* MNNBilinearLineC8.S in Sources */, @@ -4127,7 +4068,7 @@ CODE_SIGN_STYLE = Automatic; DEAD_CODE_STRIPPING = YES; DEFINES_MODULE = YES; - DEVELOPMENT_TEAM = Q48UX93J22; + DEVELOPMENT_TEAM = 6G7464HHUS; DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_CURRENT_VERSION = 1; DYLIB_INSTALL_NAME_BASE = "@rpath"; @@ -4202,7 +4143,7 @@ IPHONEOS_DEPLOYMENT_TARGET = 9.0; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; - PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.abcde3; + PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.abcde3vj; PRODUCT_NAME = "$(TARGET_NAME)"; TARGETED_DEVICE_FAMILY = "1,2"; }; @@ -4214,7 +4155,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_LAUNCHIMAGE_NAME = LaunchImage; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = Q48UX93J22; + DEVELOPMENT_TEAM = 6G7464HHUS; GCC_ENABLE_CPP_EXCEPTIONS = NO; GCC_ENABLE_CPP_RTTI = NO; HEADER_SEARCH_PATHS = ( @@ -4229,7 +4170,7 @@ IPHONEOS_DEPLOYMENT_TARGET = 9.0; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; - PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.abcde3; + PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.abcde3vj; PRODUCT_NAME = "$(TARGET_NAME)"; TARGETED_DEVICE_FAMILY = "1,2"; }; @@ -4245,7 +4186,7 @@ CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 1; - DEVELOPMENT_TEAM = Q48UX93J22; + DEVELOPMENT_TEAM = 6G7464HHUS; GENERATE_INFOPLIST_FILE = YES; INFOPLIST_FILE = demo/Info.plist; INFOPLIST_KEY_NSCameraUsageDescription = "use camera to capture photo for demo"; @@ -4278,7 +4219,7 @@ CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 1; - DEVELOPMENT_TEAM = Q48UX93J22; + DEVELOPMENT_TEAM = 6G7464HHUS; GENERATE_INFOPLIST_FILE = YES; INFOPLIST_FILE = demo/Info.plist; INFOPLIST_KEY_NSCameraUsageDescription = "use camera to capture photo for demo"; @@ -4343,4 +4284,3 @@ }; rootObject = 0F1465AE1FA18D1000F9860A /* Project object */; } - diff --git a/project/ios/Playground/AppDelegate.mm b/project/ios/Playground/AppDelegate.mm index f01ffb6ef..d073b12a8 100644 --- a/project/ios/Playground/AppDelegate.mm +++ b/project/ios/Playground/AppDelegate.mm @@ -12,35 +12,33 @@ #include #import #import "benchmark.h" - +#define TEST_WORKMODE 0 @implementation AppDelegate - (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { -//#define UNITTEST -//#ifdef UNITTEST -// // unittest -// { -// MNN::BackendConfig config; -// // If want to test metal, change MNN_FORWARD_CPU to MNN_FORWARD_METAL -// MNN::Express::Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 1); -// int precisionInTestUtil = -// getTestPrecision(MNN_FORWARD_CPU, config.precision, MNN::Express::Executor::getGlobalExecutor()->getCurrentRuntimeStatus(MNN::STATUS_SUPPORT_FP16)); -// MNNTestSuite::runAll(precisionInTestUtil); -// } -//#endif -//#ifdef BENCHMARK -// // benchmark -// { -// auto bundle = CFBundleGetMainBundle(); -// auto url = CFBundleCopyBundleURL(bundle); -// auto string = CFURLCopyFileSystemPath(url, kCFURLPOSIXPathStyle); -// CFRelease(url); -// auto cstring = CFStringGetCStringPtr(string, kCFStringEncodingUTF8); -// auto res = std::string(cstring) + "/models"; -// CFRelease(string); -// iosBenchAll(res.c_str()); -// } -//#endif +#if TEST_WORKMODE==0 + // unittest + { + MNN::BackendConfig config; + // If want to test metal, change MNN_FORWARD_CPU to MNN_FORWARD_METAL + MNN::Express::Executor::getGlobalExecutor()->setGlobalExecutorConfig(MNN_FORWARD_CPU, config, 1); + MNNTestSuite::runAll(2); + } +#endif +#if TEST_WORKMODE==1 + // benchmark + { + auto bundle = CFBundleGetMainBundle(); + auto url = CFBundleCopyBundleURL(bundle); + auto string = CFURLCopyFileSystemPath(url, kCFURLPOSIXPathStyle); + CFRelease(url); + auto cstring = CFStringGetCStringPtr(string, kCFStringEncodingUTF8); + auto res = std::string(cstring) + "/models"; + CFRelease(string); + iosBenchAll(res.c_str()); + } +#endif +#if TEST_WORKMODE==2 auto bundle = CFBundleGetMainBundle(); auto url = CFBundleCopyBundleURL(bundle); auto string = CFURLCopyFileSystemPath(url, kCFURLPOSIXPathStyle); @@ -48,11 +46,10 @@ - (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:( auto cstring = CFStringGetCStringPtr(string, kCFStringEncodingUTF8); auto res = std::string(cstring) + "/models/mobilenet_v2_auth.mnn"; - MNN::Interpreter* interpreter = MNN::Interpreter::createFromFile(res.c_str()); MNN::ScheduleConfig config; interpreter->createSession(config); - +#endif return YES; } diff --git a/pymnn/pip_package/pyproject.toml b/pymnn/pip_package/pyproject.toml index 25fc9d331..c178a4ebc 100644 --- a/pymnn/pip_package/pyproject.toml +++ b/pymnn/pip_package/pyproject.toml @@ -16,7 +16,7 @@ test-skip = [ ] test-requires = [ "opencv-python==4.6.0.66", - "numpy==1.13.3", + "numpy", "torch" ] test-command = [ diff --git a/pymnn/src/llm.h b/pymnn/src/llm.h index 8e9fffcfd..3ade7a17f 100644 --- a/pymnn/src/llm.h +++ b/pymnn/src/llm.h @@ -1,8 +1,8 @@ -#include "llm.hpp" +#include "llm/llm.hpp" typedef struct { PyObject_HEAD - Llm* llm; + MNN::Transformer::Llm* llm; } LLM; static PyObject* PyMNNLLM_new(struct _typeobject *type, PyObject *args, PyObject *kwds) { @@ -38,7 +38,7 @@ static PyObject* PyMNNLLM_response(LLM *self, PyObject *args) { if (!PyArg_ParseTuple(args, "s|p", &query, &stream)) { Py_RETURN_NONE; } - LlmStreamBuffer buffer(nullptr); + MNN::Transformer::LlmStreamBuffer buffer(nullptr); std::ostream null_os(&buffer); auto res = self->llm->response(query, stream ? &std::cout : &null_os); return string2Object(res); @@ -104,10 +104,10 @@ static PyObject* PyMNNLLM_create(PyObject *self, PyObject *args) { if (!llm) { return NULL; } - llm->llm = Llm::createLLM(path); + llm->llm = MNN::Transformer::Llm::createLLM(path); return (PyObject*)llm; } static PyMethodDef PyMNNLLM_static_methods[] = { {"create", PyMNNLLM_create, METH_VARARGS} -}; \ No newline at end of file +}; diff --git a/source/backend/arm82/Arm82Functions.cpp b/source/backend/arm82/Arm82Functions.cpp index 435d369d4..19038ec94 100644 --- a/source/backend/arm82/Arm82Functions.cpp +++ b/source/backend/arm82/Arm82Functions.cpp @@ -45,14 +45,12 @@ void MNNAbsMaxFP16(const float* source, float* absmax, size_t src_depth_quad, si void MNNQuantScaleFP16(float* sum, float* absmax, float* quant_scale, float* dequant_scale, size_t thread, size_t batch); void MNNDynamicQuantFP16(const float* src, int8_t* dst, const float* scale, size_t src_depth_quad, size_t realSize, int pack); void MNNQuantSumFP16(float* sum, const float* dequant_scale, size_t thread, size_t batch); -#if defined(__aarch64__) -void MNNGemmHybridInt8FP16_sdot(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); -void MNNGemmHybridInt4FP16_sdot(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); -void MNNGemmHybridInt4FP16_smmla(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); -void MNNGemmHybridInt8FP16_smmla(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); #endif +#if defined(__aarch64__) +void CountMinMaxValue_FP16(float* source, float* minVal, float* maxVal, size_t sizeQuad); +void MNNSumByAxisLForMatmul_A_ARM86(float* dest, int8_t* source, const float* dequantScale, ssize_t realDstCount, SumByAxisParams sumParams); +void MNNSumByAxisLForMatmul_A_ARM82(float* dest, int8_t* source, const float* dequantScale, ssize_t realDstCount, SumByAxisParams sumParams); #endif - void MNNConvDwF23MulTransUnitFP16(FLOAT16 **cacheLine, const FLOAT16 *weight, FLOAT16 *dest, size_t ow); void MNNConvDwF23SourceTransUnitFP16(const FLOAT16 *source, FLOAT16 *dest, size_t unit); @@ -82,6 +80,32 @@ static void MNNMatrixSubFP16(FLOAT16* C, const FLOAT16* A, const FLOAT16* B, siz } } } +#if defined(__aarch64__) +static void ARM82CountMinMaxValue(float* source, float* minVal, float* maxVal, size_t size) { + if (size % 8 == 0) { + CountMinMaxValue_FP16(source, minVal, maxVal, size / 8); + } else { + auto remain = size - 8 * (size / 8); + auto max_ = ((__fp16*)source)[0]; + auto min_ = max_; + if (size >= 8) { + CountMinMaxValue_FP16(source, minVal, maxVal, size / 8); + max_ = ((__fp16*)maxVal)[0]; + min_ = ((__fp16*)minVal)[0]; + } + if (remain > 0) { + int16_t tmp[8] = {0}; + auto srcRemain = reinterpret_cast(source) + 8 * (size / 8) * 2; + ::memcpy(tmp, srcRemain, remain * 2); + CountMinMaxValue_FP16((float*)tmp, (float*)tmp, (float*)((uint8_t*)tmp + 2), 1); + max_ = ALIMAX(((__fp16*)tmp)[1], max_); + min_ = ALIMIN(((__fp16*)tmp)[0], min_); + } + reinterpret_cast<__fp16*>(minVal)[0] = min_; + reinterpret_cast<__fp16*>(maxVal)[0] = max_; + } +} +#endif static void Arm82MNNPackForMatMul_B(float* destC, const float* sourceC, size_t h, size_t l, bool transpose) { auto dest = (int16_t*)destC; @@ -686,6 +710,9 @@ bool Arm82Functions::init() { FUNC_PTR_ASSIGN(gInstance->MNNMatrixSub, MNNMatrixSubFP16); FUNC_PTR_ASSIGN(gInstance->MNNMatrixAdd, MNNMatrixAddFP16); FUNC_PTR_ASSIGN(gInstance->MNNStrassenMergeCFunction, ARM82StrassenMerge); +#ifdef MNN_LOW_MEMORY + FUNC_PTR_ASSIGN(gInstance->MNNDynamicUpdateConvBiasScale, origin->MNNDynamicUpdateConvBiasScale); +#endif gInstance->penalty = 2.0f; FUNC_PTR_ASSIGN(gInstance->MNNScaleAndAddBias, MNNScaleAndAddBiasFP16); FUNC_PTR_ASSIGN(gInstance->MNNGridSampleComputeCord, MNNGridSampleComputeCordFP16); @@ -702,28 +729,30 @@ bool Arm82Functions::init() { // MatMul FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMul, MNNPackedMatMulFP16); FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMulRemain, MNNPackedMatMulRemainFP16); +#if defined(__aarch64__) #ifdef MNN_LOW_MEMORY + // Weight Dequant Gemm Kernels FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMul_int4, MNNPackedMatMulFP16_int4); FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMulRemain_int4, MNNPackedMatMulRemainFP16_int4); FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMul_int8, MNNPackedMatMulFP16_int8); FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMulRemain_int8, MNNPackedMatMulRemainFP16_int8); + // Dynamic Qaunt Helper Functions FUNC_PTR_ASSIGN(gInstance->MNNAbsMax, MNNAbsMaxFP16); FUNC_PTR_ASSIGN(gInstance->MNNQuantScale, MNNQuantScaleFP16); FUNC_PTR_ASSIGN(gInstance->MNNDynamicQuant, MNNDynamicQuantFP16); FUNC_PTR_ASSIGN(gInstance->MNNQuantSum, MNNQuantSumFP16); + FUNC_PTR_ASSIGN(gInstance->MNNCountMaxMinValue, ARM82CountMinMaxValue); + // Dynamic Quant Gemm Kernels. gInstance->supportFp16arith = origin->supportFp16arith; gInstance->supportSDot = origin->supportSDot; gInstance->supportI8mm = origin->supportI8mm; - #if defined(__aarch64__) +#endif if (gInstance->supportSDot) { - gInstance->MNNGemmHybridInt8 = MNNGemmHybridInt8FP16_sdot; - gInstance->MNNGemmHybridInt4 = MNNGemmHybridInt4FP16_sdot; + FUNC_PTR_ASSIGN(gInstance->MNNSumByAxisLForMatmul_A, MNNSumByAxisLForMatmul_A_ARM82); } if (gInstance->supportI8mm) { - gInstance->MNNGemmHybridInt8 = MNNGemmHybridInt8FP16_smmla; - gInstance->MNNGemmHybridInt4 = MNNGemmHybridInt4FP16_smmla; + FUNC_PTR_ASSIGN(gInstance->MNNSumByAxisLForMatmul_A, MNNSumByAxisLForMatmul_A_ARM86); } - #endif #endif FUNC_PTR_ASSIGN(gInstance->MNNPackC4ForMatMul_A, Arm82MNNPackForMatMul_A); FUNC_PTR_ASSIGN(gInstance->MNNGetMatMulPackMode, Arm82MNNGetMatMulPackMode); diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNCountMinMax_ARM82.S b/source/backend/arm82/asm/arm64/low_memory/MNNCountMinMax_ARM82.S new file mode 100644 index 000000000..680e6f2ac --- /dev/null +++ b/source/backend/arm82/asm/arm64/low_memory/MNNCountMinMax_ARM82.S @@ -0,0 +1,278 @@ +// +// MNNAbsMaxFP16.S +// MNN +// +// Created by MNN on 2023/10/31. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" +.text +.align 5 + +.macro MaxMin_4 s0, s1, s2, s3, z0, z1, z2, z3 // z0,z1:max z2,z3:min + fmax \z0\().8h, \s0\().8h, \s1\().8h + fmax \z1\().8h, \s2\().8h, \s3\().8h + fmin \z2\().8h, \s0\().8h, \s1\().8h + fmin \z3\().8h, \s2\().8h, \s3\().8h + + fmax \z0\().8h, \z0\().8h, \z1\().8h + fmin \z2\().8h, \z2\().8h, \z3\().8h +.endm + +.macro Max_6 s0, s1, s2, s3, s4, s5, z0 + fmax \s0\().8h, \s0\().8h, \s4\().8h + fmax \s1\().8h, \s1\().8h, \s5\().8h + fmax \s2\().8h, \s2\().8h, \s3\().8h + + fmax \s0\().8h, \s0\().8h, \s1\().8h + fmax \z0\().8h, \z0\().8h, \s2\().8h + + fmax \z0\().8h, \z0\().8h, \s0\().8h +.endm + +.macro Min_6 s0, s1, s2, s3, s4, s5, z0 + fmin \s0\().8h, \s0\().8h, \s4\().8h + fmin \s1\().8h, \s1\().8h, \s5\().8h + fmin \s2\().8h, \s2\().8h, \s3\().8h + + fmin \s0\().8h, \s0\().8h, \s1\().8h + fmin \z0\().8h, \z0\().8h, \s2\().8h + + fmin \z0\().8h, \z0\().8h, \s0\().8h +.endm + +.macro Max_5 s0, s1, s2, s3, s4, z0 + fmax \s0\().8h, \s0\().8h, \s3\().8h + fmax \s1\().8h, \s1\().8h, \s4\().8h + fmax \z0\().8h, \s2\().8h, \z0\().8h + + fmax \s0\().8h, \s0\().8h, \s1\().8h + fmax \z0\().8h, \z0\().8h, \s0\().8h + +.endm + +.macro Min_5 s0, s1, s2, s3, s4, z0 + fmin \s0\().8h, \s0\().8h, \s3\().8h + fmin \s1\().8h, \s1\().8h, \s4\().8h + fmin \z0\().8h, \s2\().8h, \z0\().8h + + fmin \s0\().8h, \s0\().8h, \s1\().8h + fmin \z0\().8h, \z0\().8h, \s0\().8h +.endm + +.macro Max_4 s0, s1, s2, s3, z0 + fmax \s0\().8h, \s0\().8h, \s2\().8h + fmax \s1\().8h, \s1\().8h, \s3\().8h + fmax \z0\().8h, \s0\().8h, \z0\().8h + fmax \z0\().8h, \z0\().8h, \s1\().8h + +.endm + +.macro Min_4 s0, s1, s2, s3, z0 + fmin \s0\().8h, \s0\().8h, \s2\().8h + fmin \s1\().8h, \s1\().8h, \s3\().8h + fmin \z0\().8h, \s0\().8h, \z0\().8h + fmin \z0\().8h, \z0\().8h, \s1\().8h +.endm + +.macro Max_3 s0, s1, s2, z0 + fmax \s0\().8h, \s0\().8h, \s2\().8h + fmax \z0\().8h, \s1\().8h, \z0\().8h + fmax \z0\().8h, \s0\().8h, \z0\().8h + +.endm + +.macro Min_3 s0, s1, s2, z0 + fmin \s0\().8h, \s0\().8h, \s2\().8h + fmin \z0\().8h, \s1\().8h, \z0\().8h + fmin \z0\().8h, \s0\().8h, \z0\().8h +.endm + +.macro Reduce_Max_Min s0, s1 + // 8->4 + fmaxp \s0\().8h, \s0\().8h, \s0\().8h + fminp \s1\().8h, \s1\().8h, \s1\().8h + // 4->2 + fmaxp \s0\().8h, \s0\().8h, \s0\().8h + fminp \s1\().8h, \s1\().8h, \s1\().8h + // 2->1 + fmaxp \s0\().8h, \s0\().8h, \s0\().8h + fminp \s1\().8h, \s1\().8h, \s1\().8h +.endm + + +//void CountMinMaxValue_FP16(float* source, float* minVal, float* maxVal, size_t sizeQuad) +asm_function CountMinMaxValue_FP16 + +// x0: source, x1:minVal, x2:maxVal, x3:size +stp d14, d15, [sp, #(-16 * 4)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] + +Start: +ld1 {v31.8h}, [x0], #16 +sub x3, x3, #1 +mov v30.8h, v31.8h // v30:min v31:max + + +TILE_24: +cmp x3, #24 +blt TILE_20 + +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 +ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 +ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], #64 +ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64 + +MaxMin_4 v0, v1, v2, v3, v24, v25, v26, v27 // v24:max, v26:min +MaxMin_4 v4, v5, v6, v7, v28, v29, v0, v1 // v28:max, v0:min +MaxMin_4 v8, v9, v10, v11, v2, v3, v25, v27 // v2:max, v25:min +MaxMin_4 v12, v13, v14, v15, v4, v5, v6, v7 // v4:max, v6:min +MaxMin_4 v16, v17, v18, v19, v1, v3, v10, v27 // v1:max, v10:min +MaxMin_4 v20, v21, v22, v23, v12, v13, v14, v15 // v12:max, v14:min + +Max_6 v1, v2, v4, v12, v24, v28, v31 +Min_6 v0, v6, v10, v14, v26, v25, v30 + +sub x3, x3, #24 +b TILE_24 + +TILE_20: +cmp x3, #20 +blt TILE_16 + +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 +ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 +ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], #64 + +MaxMin_4 v0, v1, v2, v3, v24, v25, v26, v27 // v24:max, v26:min +MaxMin_4 v4, v5, v6, v7, v20, v21, v22, v23 // v20:max, v22:min +MaxMin_4 v8, v9, v10, v11, v0, v1, v2, v3 // v0:max, v2:min +MaxMin_4 v12, v13, v14, v15, v4, v5, v6, v7 // v4:max, v6:min +MaxMin_4 v16, v17, v18, v19, v25, v27, v21, v23 // v25:max, v21:min + +Max_5 v0, v4, v20, v25, v24, v31 +Min_5 v2, v6, v21, v22, v26, v30 + +sub x3, x3, #20 +b TILE_20 + +TILE_16: +cmp x3, #16 +blt TILE_12 + +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 +ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 + +MaxMin_4 v0, v1, v2, v3, v24, v25, v26, v27 // v24:max, v26:min +MaxMin_4 v4, v5, v6, v7, v20, v21, v22, v23 // v20:max, v22:min +MaxMin_4 v8, v9, v10, v11, v16, v17, v18, v19 // v16:max, v18:min +MaxMin_4 v12, v13, v14, v15, v0, v1, v2, v3 // v0:max, v2:min + +Max_4 v0, v16, v20, v24, v31 +Min_4 v2, v18, v22, v26, v30 + +sub x3, x3, #16 +b TILE_16 + +TILE_12: +cmp x3, #12 +blt TILE_8 + +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 + +MaxMin_4 v0, v1, v2, v3, v24, v25, v26, v27 // v24:max, v26:min +MaxMin_4 v4, v5, v6, v7, v20, v21, v22, v23 // v20:max, v22:min +MaxMin_4 v8, v9, v10, v11, v16, v17, v18, v19 // v16:max, v18:min + +Max_3 v16, v20, v24, v31 +Min_3 v18, v22, v26, v30 + +sub x3, x3, #12 +b TILE_12 + +TILE_8: +cmp x3, #8 +blt TILE_4 + +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 + +MaxMin_4 v0, v1, v2, v3, v24, v25, v26, v27 // v24:max, v26:min +MaxMin_4 v4, v5, v6, v7, v20, v21, v22, v23 // v20:max, v22:min + +fmax v24.8h, v24.8h, v20.8h +fmin v26.8h, v26.8h, v22.8h +fmax v31.8h, v31.8h, v24.8h +fmin v30.8h, v30.8h, v26.8h + +sub x3, x3, #8 +b TILE_8 + +TILE_4: +cmp x3, #4 +blt TILE_2 + +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 + +MaxMin_4 v0, v1, v2, v3, v24, v25, v26, v27 // v24:max, v26:min + +fmax v31.8h, v31.8h, v24.8h +fmin v30.8h, v30.8h, v26.8h + +sub x3, x3, #4 +b TILE_4 + +TILE_2: +cmp x3, #2 +blt TILE_1 + +ld1 {v0.8h, v1.8h}, [x0], #32 + +fmax v2.8h, v0.8h, v1.8h +fmin v3.8h, v0.8h, v1.8h + +fmax v31.8h, v31.8h, v2.8h +fmin v30.8h, v30.8h, v3.8h + +sub x3, x3, #2 +b TILE_2 + +TILE_1: +cmp x3, #1 +blt End + +ld1 {v0.8h}, [x0], #16 + +fmax v31.8h, v31.8h, v0.8h +fmin v30.8h, v30.8h, v0.8h + +sub x3, x3, #1 +b TILE_1 + +End: +Reduce_Max_Min v31, v30 +//fcvtl v30.4s, v30.4h +//fcvtl v31.4s, v31.4h +st1 {v30.h}[0], [x1] +st1 {v31.h}[1], [x2] + +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 4) +ret + +#endif diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuanInput_ARM82.S b/source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuanInput_ARM82.S new file mode 100644 index 000000000..22919922f --- /dev/null +++ b/source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuanInput_ARM82.S @@ -0,0 +1,268 @@ +// +// DynamicQuanInput_ARM82.S +// MNN +// +// Created by MNN on 2019/01/22. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro SCALE_TO_FLOAT_8 s0, s1, s2, s3, s4, s5, s6, s7, z0 + fmul \s0\().8h, \s0\().8h, \z0\().8h + fmul \s1\().8h, \s1\().8h, \z0\().8h + fmul \s2\().8h, \s2\().8h, \z0\().8h + fmul \s3\().8h, \s3\().8h, \z0\().8h + fmul \s4\().8h, \s4\().8h, \z0\().8h + fmul \s5\().8h, \s5\().8h, \z0\().8h + fmul \s6\().8h, \s6\().8h, \z0\().8h + fmul \s7\().8h, \s7\().8h, \z0\().8h +.endm + +.macro SCALE_TO_FLOAT_4 s0, s1, s2, s3, z0 + fmul \s0\().8h, \s0\().8h, \z0\().8h + fmul \s1\().8h, \s1\().8h, \z0\().8h + fmul \s2\().8h, \s2\().8h, \z0\().8h + fmul \s3\().8h, \s3\().8h, \z0\().8h +.endm + +.macro ADD_ZEROPOINT_8 s0, s1, s2, s3, s4, s5, s6, s7, z0 + fadd \s0\().8h, \s0\().8h, \z0\().8h + fadd \s1\().8h, \s1\().8h, \z0\().8h + fadd \s2\().8h, \s2\().8h, \z0\().8h + fadd \s3\().8h, \s3\().8h, \z0\().8h + fadd \s4\().8h, \s4\().8h, \z0\().8h + fadd \s5\().8h, \s5\().8h, \z0\().8h + fadd \s6\().8h, \s6\().8h, \z0\().8h + fadd \s7\().8h, \s7\().8h, \z0\().8h +.endm + +.macro ADD_ZEROPOINT_4 s0, s1, s2, s3, z0 + fadd \s0\().8h, \s0\().8h, \z0\().8h + fadd \s1\().8h, \s1\().8h, \z0\().8h + fadd \s2\().8h, \s2\().8h, \z0\().8h + fadd \s3\().8h, \s3\().8h, \z0\().8h +.endm + +.macro FLOAT_TO_INT_8 s0, s1, s2, s3, s4, s5, s6, s7 + fcvtas \s0\().8h, \s0\().8h + fcvtas \s1\().8h, \s1\().8h + fcvtas \s2\().8h, \s2\().8h + fcvtas \s3\().8h, \s3\().8h + fcvtas \s4\().8h, \s4\().8h + fcvtas \s5\().8h, \s5\().8h + fcvtas \s6\().8h, \s6\().8h + fcvtas \s7\().8h, \s7\().8h +.endm + +.macro FLOAT_TO_INT_4 s0, s1, s2, s3 + fcvtas \s0\().8h, \s0\().8h + fcvtas \s1\().8h, \s1\().8h + fcvtas \s2\().8h, \s2\().8h + fcvtas \s3\().8h, \s3\().8h +.endm + +.macro INT16_TO_INT8_8 s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3 + sqxtn \d0\().8b, \s0\().8h + sqxtn2 \d0\().16b, \s1\().8h + sqxtn \d1\().8b, \s2\().8h + sqxtn2 \d1\().16b, \s3\().8h + sqxtn \d2\().8b, \s4\().8h + sqxtn2 \d2\().16b, \s5\().8h + sqxtn \d3\().8b, \s6\().8h + sqxtn2 \d3\().16b, \s7\().8h +.endm + +.macro INT16_TO_INT8_4 s0, s1, s2, s3, d0, d1 + sqxtn \d0\().8b, \s0\().8h + sqxtn2 \d0\().16b, \s1\().8h + sqxtn \d1\().8b, \s2\().8h + sqxtn2 \d1\().16b, \s3\().8h +.endm + + +/* +Note: Only used in dynamic quant,so do not need compare min max! + */ +asm_function DynamicQuanInput_ARM82 +//void DynamicQuanInput_ARM82(const float* src, int8_t* dst, size_t sizeQuad, float* scale, size_t aMin, size_t aMax, size_t zeroPoint); +//x0:src, x1:dst, x2:sizeQuad, x3:scale, x4:aMin, x5:aMax, x6:zeroPoint +stp d14, d15, [sp, #-64]! +stp d12, d13, [sp, #16] +stp d10, d11, [sp, #32] +stp d8, d9, [sp, #48] + +ld1 {v29.s}[0], [x3] // Load scale +// copy zero point +dup v30.4s, w6 +fcvtn v31.4h, v29.4s +scvtf v30.4s, v30.4s + +dup v31.8h, v31.h[0] +fcvtn v30.4h, v30.4s +dup v30.8h, v30.h[0] + +FL28: +cmp x2, #28 +blt FL24 + +FLLoop28: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 +ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 +ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], #64 +ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64 +ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x0], #64 + +SCALE_TO_FLOAT_8 v0, v1, v2, v3, v4, v5, v6, v7, v31 +SCALE_TO_FLOAT_8 v8, v9, v10, v11, v12, v13, v14, v15, v31 +SCALE_TO_FLOAT_8 v16, v17, v18, v19, v20, v21, v22, v23, v31 +SCALE_TO_FLOAT_4 v24, v25, v26, v27, v31 +sub x2, x2, #28 +ADD_ZEROPOINT_8 v0, v1, v2, v3, v4, v5, v6, v7, v30 +ADD_ZEROPOINT_8 v8, v9, v10, v11, v12, v13, v14, v15, v30 +ADD_ZEROPOINT_8 v16, v17, v18, v19, v20, v21, v22, v23, v30 +ADD_ZEROPOINT_4 v24, v25, v26, v27, v30 + +FLOAT_TO_INT_8 v0, v1, v2, v3, v4, v5, v6, v7 +FLOAT_TO_INT_8 v8, v9, v10, v11, v12, v13, v14, v15 +FLOAT_TO_INT_8 v16, v17, v18, v19, v20, v21, v22, v23 +FLOAT_TO_INT_4 v24, v25, v26, v27 +cmp x2, #28 +INT16_TO_INT8_8 v0, v1, v2, v3, v4, v5, v6, v7, v28, v29, v0, v1 +INT16_TO_INT8_8 v8, v9, v10, v11, v12, v13, v14, v15, v2, v3, v4, v5 +st1 {v28.16b, v29.16b}, [x1], #32 +INT16_TO_INT8_8 v16, v17, v18, v19, v20, v21, v22, v23, v6, v7, v8, v9 +st1 {v0.16b, v1.16b}, [x1], #32 +INT16_TO_INT8_4 v24, v25, v26, v27, v10, v11 + +st1 {v2.16b, v3.16b, v4.16b, v5.16b}, [x1], #64 +st1 {v6.16b, v7.16b, v8.16b, v9.16b}, [x1], #64 +st1 {v10.16b, v11.16b}, [x1], #32 + +bge FLLoop28 + +FL24: +cmp x2, #24 +blt FL16 + +FLLoop24: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 +ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 +ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], #64 +ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64 + +SCALE_TO_FLOAT_8 v0, v1, v2, v3, v4, v5, v6, v7, v31 +SCALE_TO_FLOAT_8 v8, v9, v10, v11, v12, v13, v14, v15, v31 +SCALE_TO_FLOAT_8 v16, v17, v18, v19, v20, v21, v22, v23, v31 +sub x2, x2, #24 +ADD_ZEROPOINT_8 v0, v1, v2, v3, v4, v5, v6, v7, v30 +ADD_ZEROPOINT_8 v8, v9, v10, v11, v12, v13, v14, v15, v30 +ADD_ZEROPOINT_8 v16, v17, v18, v19, v20, v21, v22, v23, v30 + +FLOAT_TO_INT_8 v0, v1, v2, v3, v4, v5, v6, v7 +FLOAT_TO_INT_8 v8, v9, v10, v11, v12, v13, v14, v15 +FLOAT_TO_INT_8 v16, v17, v18, v19, v20, v21, v22, v23 +cmp x2, #24 +INT16_TO_INT8_8 v0, v1, v2, v3, v4, v5, v6, v7, v24, v25, v26, v27 +INT16_TO_INT8_8 v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3 +INT16_TO_INT8_8 v16, v17, v18, v19, v20, v21, v22, v23, v4, v5, v6, v7 + +st1 {v24.16b, v25.16b, v26.16b, v27.16b}, [x1], #64 +st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x1], #64 +st1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #64 + +bge FLLoop24 + +FL16: +cmp x2, #16 +blt FL8 + +FLLoop16: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 +ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 + +SCALE_TO_FLOAT_8 v0, v1, v2, v3, v4, v5, v6, v7, v31 +SCALE_TO_FLOAT_8 v8, v9, v10, v11, v12, v13, v14, v15, v31 +sub x2, x2, #16 +ADD_ZEROPOINT_8 v0, v1, v2, v3, v4, v5, v6, v7, v30 +ADD_ZEROPOINT_8 v8, v9, v10, v11, v12, v13, v14, v15, v30 + +FLOAT_TO_INT_8 v0, v1, v2, v3, v4, v5, v6, v7 +FLOAT_TO_INT_8 v8, v9, v10, v11, v12, v13, v14, v15 +cmp x2, #16 +INT16_TO_INT8_8 v0, v1, v2, v3, v4, v5, v6, v7, v24, v25, v26, v27 +INT16_TO_INT8_8 v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3 + +st1 {v24.16b, v25.16b, v26.16b, v27.16b}, [x1], #64 +st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x1], #64 + +bge FLLoop16 + +FL8: +cmp x2, #8 +blt FL4 + +FLLoop8: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +sub x2, x2, #8 +SCALE_TO_FLOAT_8 v0, v1, v2, v3, v4, v5, v6, v7, v31 +ADD_ZEROPOINT_8 v0, v1, v2, v3, v4, v5, v6, v7, v30 +cmp x2, #8 +FLOAT_TO_INT_8 v0, v1, v2, v3, v4, v5, v6, v7 +INT16_TO_INT8_8 v0, v1, v2, v3, v4, v5, v6, v7, v24, v25, v26, v27 +st1 {v24.16b, v25.16b, v26.16b, v27.16b}, [x1], #64 + +bge FLLoop8 + +FL4: +cmp x2, #4 +blt FL1 + +FLLoop4: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +sub x2, x2, #4 +SCALE_TO_FLOAT_4 v0, v1, v2, v3, v31 +ADD_ZEROPOINT_4 v0, v1, v2, v3, v30 +cmp x2, #4 +FLOAT_TO_INT_4 v0, v1, v2, v3 +INT16_TO_INT8_4 v0, v1, v2, v3, v24, v25 +st1 {v24.16b, v25.16b}, [x1], #32 + +bge FLLoop4 + +FL1: +cmp x2, #0 +beq FLEnd + +FLLoop1: +ld1 {v0.8h}, [x0], #16 +fmul v0.8h, v0.8h, v31.8h +fadd v0.8h, v0.8h, v30.8h + +fcvtas v0.8h, v0.8h +sqxtn v0.8b, v0.8h + +st1 {v0.d}[0], [x1], #8 + +subs x2, x2, #1 +bne FLLoop1 + +FLEnd: +ldp d8, d9, [sp, #48] +ldp d10, d11, [sp, #32] +ldp d12, d13, [sp, #16] +ldp d14, d15, [sp], #64 +ret +#endif diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuantAndReorder_ARM82.S b/source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuantAndReorder_ARM82.S new file mode 100644 index 000000000..44e3568f1 --- /dev/null +++ b/source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuantAndReorder_ARM82.S @@ -0,0 +1,433 @@ +// +// DynamicQuanInput_ARM82.S +// MNN +// +// Created by MNN on 2019/01/22. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro SCALE_TO_FLOAT_8 s0, s1, s2, s3, s4, s5, s6, s7, z0 + fmul \s0\().8h, \s0\().8h, \z0\().8h + fmul \s1\().8h, \s1\().8h, \z0\().8h + fmul \s2\().8h, \s2\().8h, \z0\().8h + fmul \s3\().8h, \s3\().8h, \z0\().8h + fmul \s4\().8h, \s4\().8h, \z0\().8h + fmul \s5\().8h, \s5\().8h, \z0\().8h + fmul \s6\().8h, \s6\().8h, \z0\().8h + fmul \s7\().8h, \s7\().8h, \z0\().8h +.endm + +.macro SCALE_TO_FLOAT_4 s0, s1, s2, s3, z0 + fmul \s0\().8h, \s0\().8h, \z0\().8h + fmul \s1\().8h, \s1\().8h, \z0\().8h + fmul \s2\().8h, \s2\().8h, \z0\().8h + fmul \s3\().8h, \s3\().8h, \z0\().8h +.endm + +.macro ADD_ZEROPOINT_8 s0, s1, s2, s3, s4, s5, s6, s7, z0 + fadd \s0\().8h, \s0\().8h, \z0\().8h + fadd \s1\().8h, \s1\().8h, \z0\().8h + fadd \s2\().8h, \s2\().8h, \z0\().8h + fadd \s3\().8h, \s3\().8h, \z0\().8h + fadd \s4\().8h, \s4\().8h, \z0\().8h + fadd \s5\().8h, \s5\().8h, \z0\().8h + fadd \s6\().8h, \s6\().8h, \z0\().8h + fadd \s7\().8h, \s7\().8h, \z0\().8h +.endm + +.macro ADD_ZEROPOINT_4 s0, s1, s2, s3, z0 + fadd \s0\().8h, \s0\().8h, \z0\().8h + fadd \s1\().8h, \s1\().8h, \z0\().8h + fadd \s2\().8h, \s2\().8h, \z0\().8h + fadd \s3\().8h, \s3\().8h, \z0\().8h +.endm + +.macro FLOAT_TO_INT_8 s0, s1, s2, s3, s4, s5, s6, s7 + fcvtas \s0\().8h, \s0\().8h + fcvtas \s1\().8h, \s1\().8h + fcvtas \s2\().8h, \s2\().8h + fcvtas \s3\().8h, \s3\().8h + fcvtas \s4\().8h, \s4\().8h + fcvtas \s5\().8h, \s5\().8h + fcvtas \s6\().8h, \s6\().8h + fcvtas \s7\().8h, \s7\().8h +.endm + +.macro FLOAT_TO_INT_4 s0, s1, s2, s3 + fcvtas \s0\().8h, \s0\().8h + fcvtas \s1\().8h, \s1\().8h + fcvtas \s2\().8h, \s2\().8h + fcvtas \s3\().8h, \s3\().8h +.endm + +.macro INT16_TO_INT8_8 s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3 + sqxtn \d0\().8b, \s0\().8h + sqxtn2 \d0\().16b, \s1\().8h + sqxtn \d1\().8b, \s2\().8h + sqxtn2 \d1\().16b, \s3\().8h + sqxtn \d2\().8b, \s4\().8h + sqxtn2 \d2\().16b, \s5\().8h + sqxtn \d3\().8b, \s6\().8h + sqxtn2 \d3\().16b, \s7\().8h +.endm + +.macro INT16_TO_INT8_4 s0, s1, s2, s3, d0, d1 + sqxtn \d0\().8b, \s0\().8h + sqxtn2 \d0\().16b, \s1\().8h + sqxtn \d1\().8b, \s2\().8h + sqxtn2 \d1\().16b, \s3\().8h +.endm + + +/* +Note: Only used in dynamic quant,so do not need compare min max! +1. Quant Float16 to Int8; +2. Pack data from C8 to C4 for Im2Col fixed unit=4 + */ +asm_function DynamicQuanInputAndReorder_ARM82 +//void DynamicQuanInputAndReorder_ARM82(const float* src, int8_t* dst, size_t planeSize, float* scale, size_t aMin, size_t aMax, size_t zeroPoint, size_t ocQuad, size_t offset); +//x0:src, x1:dst, x2:planeSize, x3:scale, x4:aMin, x5:aMax, x6:zeroPoint, x7:ocQuad, x8:offset +ldr x8, [sp, #0] // plane*4 +stp d14, d15, [sp, #-64]! +stp d12, d13, [sp, #16] +stp d10, d11, [sp, #32] +stp d8, d9, [sp, #48] + +ld1 {v29.s}[0], [x3] // Load scale +// copy zero point +dup v30.4s, w6 +fcvtn v31.4h, v29.4s +scvtf v30.4s, v30.4s + +add x13, x8, x8 + +dup v31.8h, v31.h[0] +fcvtn v30.4h, v30.4s +dup v30.8h, v30.h[0] + +mov x9, x1 // first N*4 +add x10, x1, x8 // seconde N*4 +mov x14, x2 // Reserve planeSize + +Outter_Channel_Loop: +cmp x7, #1 +blt End + +mov x11, x9 // flag address +mov x12, x10 + +FL28: // N loop +cmp x2, #28 +blt FL20 + +FLLoop28: // N=28 + +ChannleLoop_28: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 +ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 +ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], #64 +ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64 +ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x0], #64 + +SCALE_TO_FLOAT_8 v0, v1, v2, v3, v4, v5, v6, v7, v31 +SCALE_TO_FLOAT_8 v8, v9, v10, v11, v12, v13, v14, v15, v31 +SCALE_TO_FLOAT_8 v16, v17, v18, v19, v20, v21, v22, v23, v31 +SCALE_TO_FLOAT_4 v24, v25, v26, v27, v31 +sub x2, x2, #28 +ADD_ZEROPOINT_8 v0, v1, v2, v3, v4, v5, v6, v7, v30 +ADD_ZEROPOINT_8 v8, v9, v10, v11, v12, v13, v14, v15, v30 +ADD_ZEROPOINT_8 v16, v17, v18, v19, v20, v21, v22, v23, v30 +ADD_ZEROPOINT_4 v24, v25, v26, v27, v30 + +FLOAT_TO_INT_8 v0, v1, v2, v3, v4, v5, v6, v7 +FLOAT_TO_INT_8 v8, v9, v10, v11, v12, v13, v14, v15 +FLOAT_TO_INT_8 v16, v17, v18, v19, v20, v21, v22, v23 +FLOAT_TO_INT_4 v24, v25, v26, v27 +cmp x2, #28 +INT16_TO_INT8_8 v0, v1, v2, v3, v4, v5, v6, v7, v28, v29, v0, v1 +INT16_TO_INT8_8 v8, v9, v10, v11, v12, v13, v14, v15, v2, v3, v4, v5 +//st1 {v28.16b, v29.16b}, [x1], #32 +INT16_TO_INT8_8 v16, v17, v18, v19, v20, v21, v22, v23, v6, v7, v8, v9 +//st1 {v0.16b, v1.16b}, [x1], #32 +INT16_TO_INT8_4 v24, v25, v26, v27, v10, v11 + +// Reorder c8->c4, 0,..27 means plane index +uzp1 v12.4s, v28.4s, v29.4s // 0 0 1 1 x 2 2 3 3 -> 0 1 2 3 +uzp1 v13.4s, v0.4s, v1.4s // 4 4 5 5 x 6 6 7 7 -> 4 5 6 7 +uzp1 v14.4s, v2.4s, v3.4s // 8 8 9 9 x 10 10 11 11 -> 8 9 10 11 +uzp1 v15.4s, v4.4s, v5.4s // 12 12 13 13 x 14 14 15 15 -> 12 13 14 15 +uzp1 v16.4s, v6.4s, v7.4s // 16 16 17 17 x 18 18 19 19 -> 16 17 18 19 +uzp1 v17.4s, v8.4s, v9.4s // 20 20 21 21 x 22 22 23 23 -> 20 21 22 23 +uzp1 v18.4s, v10.4s, v11.4s // 24 24 25 25 x 26 26 27 27 -> 24 25 26 27 +uzp2 v19.4s, v28.4s, v29.4s +uzp2 v20.4s, v0.4s, v1.4s +uzp2 v21.4s, v2.4s, v3.4s +uzp2 v22.4s, v4.4s, v5.4s +uzp2 v23.4s, v6.4s, v7.4s +uzp2 v24.4s, v8.4s, v9.4s +uzp2 v25.4s, v10.4s, v11.4s + +st1 {v12.16b, v13.16b, v14.16b, v15.16b}, [x11], #64 +st1 {v16.16b, v17.16b, v18.16b}, [x11], #48 +st1 {v19.16b, v20.16b, v21.16b, v22.16b}, [x12], #64 +st1 {v23.16b, v24.16b, v25.16b}, [x12], #48 + +bge FLLoop28 + +FL24: +cmp x2, #24 +blt FL20 + +FLLoop24: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 +ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 +ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], #64 +ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64 + +SCALE_TO_FLOAT_8 v0, v1, v2, v3, v4, v5, v6, v7, v31 +SCALE_TO_FLOAT_8 v8, v9, v10, v11, v12, v13, v14, v15, v31 +SCALE_TO_FLOAT_8 v16, v17, v18, v19, v20, v21, v22, v23, v31 +sub x2, x2, #24 +ADD_ZEROPOINT_8 v0, v1, v2, v3, v4, v5, v6, v7, v30 +ADD_ZEROPOINT_8 v8, v9, v10, v11, v12, v13, v14, v15, v30 +ADD_ZEROPOINT_8 v16, v17, v18, v19, v20, v21, v22, v23, v30 + +FLOAT_TO_INT_8 v0, v1, v2, v3, v4, v5, v6, v7 +FLOAT_TO_INT_8 v8, v9, v10, v11, v12, v13, v14, v15 +FLOAT_TO_INT_8 v16, v17, v18, v19, v20, v21, v22, v23 +cmp x2, #24 +INT16_TO_INT8_8 v0, v1, v2, v3, v4, v5, v6, v7, v24, v25, v26, v27 +INT16_TO_INT8_8 v8, v9, v10, v11, v12, v13, v14, v15, v28, v29, v0, v1 +INT16_TO_INT8_8 v16, v17, v18, v19, v20, v21, v22, v23, v2, v3, v4, v5 + +// Reorder c8->c4 +uzp1 v6.4s, v24.4s, v25.4s // 0 0 1 1 x 2 2 3 3 -> 0 1 2 3 +uzp1 v7.4s, v26.4s, v27.4s // 4 4 5 5 x 6 6 7 7 -> 4 5 6 7 +uzp1 v8.4s, v28.4s, v29.4s // 8 8 9 9 x 10 10 11 11 -> 8 9 10 11 +uzp1 v9.4s, v0.4s, v1.4s // 12 12 13 13 x 14 14 15 15 -> 12 13 14 15 +uzp1 v10.4s, v2.4s, v3.4s // 16 16 17 17 x 18 18 19 19 -> 16 17 18 19 +uzp1 v11.4s, v4.4s, v5.4s // 20 20 21 21 x 22 22 23 23 -> 20 21 22 23 +uzp2 v12.4s, v24.4s, v25.4s +uzp2 v13.4s, v26.4s, v27.4s +uzp2 v14.4s, v28.4s, v29.4s +uzp2 v15.4s, v0.4s, v1.4s +uzp2 v16.4s, v2.4s, v3.4s +uzp2 v17.4s, v4.4s, v5.4s + +st1 {v6.16b, v7.16b, v8.16b, v9.16b}, [x11], #64 +st1 {v10.16b, v11.16b}, [x11], #32 +st1 {v12.16b, v13.16b, v14.16b, v15.16b}, [x12], #64 +st1 {v16.16b, v17.16b}, [x12], #32 + +bge FLLoop24 + +FL20: +cmp x2, #20 +blt FL12 + +FLLoop20: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 +ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 +ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], #64 + +SCALE_TO_FLOAT_8 v0, v1, v2, v3, v4, v5, v6, v7, v31 +SCALE_TO_FLOAT_8 v8, v9, v10, v11, v12, v13, v14, v15, v31 +SCALE_TO_FLOAT_4 v16, v17, v18, v19, v31 +sub x2, x2, #20 +ADD_ZEROPOINT_8 v0, v1, v2, v3, v4, v5, v6, v7, v30 +ADD_ZEROPOINT_8 v8, v9, v10, v11, v12, v13, v14, v15, v30 +ADD_ZEROPOINT_4 v16, v17, v18, v19, v30 + +FLOAT_TO_INT_8 v0, v1, v2, v3, v4, v5, v6, v7 +FLOAT_TO_INT_8 v8, v9, v10, v11, v12, v13, v14, v15 +FLOAT_TO_INT_4 v16, v17, v18, v19 +cmp x2, #20 +INT16_TO_INT8_8 v0, v1, v2, v3, v4, v5, v6, v7, v24, v25, v26, v27 +INT16_TO_INT8_8 v8, v9, v10, v11, v12, v13, v14, v15, v21, v22, v23, v28 +INT16_TO_INT8_4 v16, v17, v18, v19, v29, v20 + +// Reorder c8->c4 +uzp1 v0.4s, v24.4s, v25.4s // 0 0 1 1 x 2 2 3 3 -> 0 1 2 3 +uzp1 v1.4s, v26.4s, v27.4s // 4 4 5 5 x 6 6 7 7 -> 4 5 6 7 +uzp1 v2.4s, v21.4s, v22.4s // 8 8 9 9 x 10 10 11 11 -> 8 9 10 11 +uzp1 v3.4s, v23.4s, v28.4s // 12 12 13 13 x 14 14 15 15 -> 12 13 14 15 +uzp1 v4.4s, v29.4s, v20.4s // 16 16 17 17 x 18 18 19 19 -> 16 17 18 19 +uzp2 v5.4s, v24.4s, v25.4s +uzp2 v6.4s, v26.4s, v27.4s +uzp2 v7.4s, v21.4s, v22.4s +uzp2 v8.4s, v23.4s, v28.4s +uzp2 v9.4s, v29.4s, v20.4s + +st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x11], #64 +st1 {v4.16b}, [x11], #16 +st1 {v5.16b, v6.16b, v7.16b, v8.16b}, [x12], #64 +st1 {v9.16b}, [x12], #16 + +bge FLLoop20 + +FL16: +cmp x2, #16 +blt FL12 + +FLLoop16: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 +ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 + +SCALE_TO_FLOAT_8 v0, v1, v2, v3, v4, v5, v6, v7, v31 +SCALE_TO_FLOAT_8 v8, v9, v10, v11, v12, v13, v14, v15, v31 +sub x2, x2, #16 +ADD_ZEROPOINT_8 v0, v1, v2, v3, v4, v5, v6, v7, v30 +ADD_ZEROPOINT_8 v8, v9, v10, v11, v12, v13, v14, v15, v30 + +FLOAT_TO_INT_8 v0, v1, v2, v3, v4, v5, v6, v7 +FLOAT_TO_INT_8 v8, v9, v10, v11, v12, v13, v14, v15 +cmp x2, #16 +INT16_TO_INT8_8 v0, v1, v2, v3, v4, v5, v6, v7, v24, v25, v26, v27 +INT16_TO_INT8_8 v8, v9, v10, v11, v12, v13, v14, v15, v20, v21, v22, v23 + +// Reorder c8->c4 +uzp1 v16.4s, v24.4s, v25.4s // 0 0 1 1 x 2 2 3 3 -> 0 1 2 3 +uzp1 v17.4s, v26.4s, v27.4s // 4 4 5 5 x 6 6 7 7 -> 4 5 6 7 +uzp1 v18.4s, v20.4s, v21.4s // 8 8 9 9 x 10 10 11 11 -> 8 9 10 11 +uzp1 v19.4s, v22.4s, v23.4s // 12 12 13 13 x 14 14 15 15 -> 12 13 14 15 + +uzp2 v0.4s, v24.4s, v25.4s +uzp2 v1.4s, v26.4s, v27.4s +uzp2 v2.4s, v20.4s, v21.4s +uzp2 v3.4s, v22.4s, v23.4s + +st1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x11], #64 +st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x12], #64 + +bge FLLoop16 + +FL12: +cmp x2, #12 +blt FL8 + +FLLoop12: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +ld1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], #64 + +SCALE_TO_FLOAT_8 v0, v1, v2, v3, v4, v5, v6, v7, v31 +SCALE_TO_FLOAT_4 v8, v9, v10, v11, v31 +sub x2, x2, #12 +ADD_ZEROPOINT_8 v0, v1, v2, v3, v4, v5, v6, v7, v30 +ADD_ZEROPOINT_4 v8, v9, v10, v11, v30 + +FLOAT_TO_INT_8 v0, v1, v2, v3, v4, v5, v6, v7 +FLOAT_TO_INT_4 v8, v9, v10, v11 +cmp x2, #12 +INT16_TO_INT8_8 v0, v1, v2, v3, v4, v5, v6, v7, v24, v25, v26, v27 +INT16_TO_INT8_4 v8, v9, v10, v11, v20, v21 + +// Reorder c8->c4 +uzp1 v12.4s, v24.4s, v25.4s // 0 0 1 1 x 2 2 3 3 -> 0 1 2 3 +uzp2 v16.4s, v24.4s, v25.4s +uzp1 v13.4s, v26.4s, v27.4s // 4 4 5 5 x 6 6 7 7 -> 4 5 6 7 +uzp2 v17.4s, v26.4s, v27.4s +uzp1 v14.4s, v20.4s, v21.4s // 8 8 9 9 x 10 10 11 11 -> 8 9 10 11 +uzp2 v18.4s, v20.4s, v21.4s + +st1 {v12.16b, v13.16b, v14.16b}, [x11], #48 +st1 {v16.16b, v17.16b, v18.16b}, [x12], #48 + +bge FLLoop12 + +FL8: +cmp x2, #8 +blt FL4 + +FLLoop8: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 +sub x2, x2, #8 +SCALE_TO_FLOAT_8 v0, v1, v2, v3, v4, v5, v6, v7, v31 +ADD_ZEROPOINT_8 v0, v1, v2, v3, v4, v5, v6, v7, v30 +cmp x2, #8 +FLOAT_TO_INT_8 v0, v1, v2, v3, v4, v5, v6, v7 +INT16_TO_INT8_8 v0, v1, v2, v3, v4, v5, v6, v7, v24, v25, v26, v27 + +// Reorder c8->c4 +uzp1 v12.4s, v24.4s, v25.4s // 0 0 1 1 x 2 2 3 3 -> 0 1 2 3 +uzp2 v19.4s, v24.4s, v25.4s +uzp1 v13.4s, v26.4s, v27.4s // 4 4 5 5 x 6 6 7 7 -> 4 5 6 7 +uzp2 v20.4s, v26.4s, v27.4s + +st1 {v12.16b, v13.16b}, [x11], #32 +st1 {v19.16b, v20.16b}, [x12], #32 + +bge FLLoop8 + +FL4: +cmp x2, #4 +blt FL1 + +FLLoop4: +ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 +sub x2, x2, #4 +SCALE_TO_FLOAT_4 v0, v1, v2, v3, v31 +ADD_ZEROPOINT_4 v0, v1, v2, v3, v30 +cmp x2, #4 +FLOAT_TO_INT_4 v0, v1, v2, v3 +INT16_TO_INT8_4 v0, v1, v2, v3, v24, v25 + +// Reorder c8->c4 +uzp1 v12.4s, v24.4s, v25.4s // 0 0 1 1 x 2 2 3 3 -> 0 1 2 3 +uzp2 v19.4s, v24.4s, v25.4s + +st1 {v12.16b}, [x11], #16 +st1 {v19.16b}, [x12], #16 +//st1 {v24.16b, v25.16b}, [x1], #32 + +bge FLLoop4 + +FL1: +cmp x2, #0 +ble FLEnd + +FLLoop1: +ld1 {v0.8h}, [x0], #16 +fmul v0.8h, v0.8h, v31.8h +fadd v0.8h, v0.8h, v30.8h +sub x2, x2, #1 + +fcvtas v0.8h, v0.8h +sqxtn v0.8b, v0.8h + +cmp x2, #1 +st1 {v0.s}[0], [x11], #4 +st1 {v0.s}[1], [x12], #4 + +bge FLLoop1 + +FLEnd: +sub x7, x7, #1 +add x9, x9, x13 +add x10, x10, x13 +mov x2, x14 +b Outter_Channel_Loop + +End: +ldp d8, d9, [sp, #48] +ldp d10, d11, [sp, #32] +ldp d12, d13, [sp, #16] +ldp d14, d15, [sp], #64 +ret +#endif diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuantFP16.S b/source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuantFP16.S index 689194c9e..01455850d 100644 --- a/source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuantFP16.S +++ b/source/backend/arm82/asm/arm64/low_memory/MNNDynamicQuantFP16.S @@ -19,39 +19,39 @@ fcvtas \z3\().8h, \z3\().8h .endm -//void MNNDynamicQuantFP16(const float* src, int8_t* dst, const float* scale, float* sum, size_t src_depth_quad, size_t realSize) +//void MNNDynamicQuantFP16(const float* src, int8_t* dst, const float* scale, size_t src_depth_quad, size_t realSize, int pack) asm_function MNNDynamicQuantFP16 -// x0: src, x1:dst, x2:scale, x3:sum, x4:src_depth_quad, x5:realSize +// Feature: quant and reorder C8->C4 + +// x0: src, x1:dst, x2:scale, x3:src_depth_quad, x4:realSize stp d14, d15, [sp, #(-16 * 4)]! stp d12, d13, [sp, #(16 * 1)] stp d10, d11, [sp, #(16 * 2)] stp d8, d9, [sp, #(16 * 3)] Start: -lsl x6, x5, #3 // dst_step = batch * unit * sizeof(int8_t) = batch * 8 = batch << 3 -lsl x7, x6, #1 // src_step = dst_step * 2 (float16_t) = dst_step << 1 - -movi v29.16b, #1 +lsl x6, x4, #3 // dst_step = batch * (2*unit) * sizeof(int8_t) = batch * 8 = batch << 3 +lsl x7, x4, #4 // src_step = batch * pack * sizeof(float16) = batch * 8 * 2 = batch << 4 +lsl x8, x4, #2 // 4 * plane +add x11, x1, x8 // second N*4 TILE_12: -cmp x5, #12 +cmp x4, #12 blt TILE_10 mov x9, x0 // src mov x10, x1 // dst -mov x12, x4 // src_depth_quad +mov x15, x11 // second dst +mov x12, x3 // src_depth_quad sub x13, x7, #128 // src_step - 64 -sub x14, x6, #64 // dst_step - 64 - -// quant_scale: v12, v13 -ld1 {v12.8h}, [x2], #16 -ld1 {v13.d}[0], [x2], #8 -movi v23.4s, #0 -movi v24.4s, #0 -movi v25.4s, #0 -movi v26.4s, #0 -movi v27.4s, #0 -movi v28.4s, #0 + +// quant_scale: v12, v13, v14 +// ld1 {v12.8h}, [x2], #16 +// ld1 {v13.d}[0], [x2], #8 +ld1 {v12.4s, v13.4s, v14.4s}, [x2], #48 +fcvtn v12.4h, v12.4s +fcvtn2 v12.8h, v13.4s +fcvtn v13.4h, v14.4s LoopSz_12: ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x9], #64 @@ -91,47 +91,47 @@ sqxtn2 v4.16b, v9.8h sqxtn v5.8b, v10.8h sqxtn2 v5.16b, v11.8h -.inst 0x4e9d9417 // sdot v23.4s, v0.16b, v29.16b -.inst 0x4e9d9438 // sdot v24.4s, v1.16b, v29.16b -.inst 0x4e9d9459 // sdot v25.4s, v2.16b, v29.16b -.inst 0x4e9d947a // sdot v26.4s, v3.16b, v29.16b -.inst 0x4e9d949b // sdot v27.4s, v4.16b, v29.16b -.inst 0x4e9d94bc // sdot v28.4s, v5.16b, v29.16b +uzp1 v6.4s, v0.4s, v1.4s +uzp1 v7.4s, v2.4s, v3.4s +uzp1 v8.4s, v4.4s, v5.4s +uzp2 v9.4s, v0.4s, v1.4s +uzp2 v10.4s, v2.4s, v3.4s +uzp2 v11.4s, v4.4s, v5.4s + +st1 {v6.16b, v7.16b, v8.16b}, [x10], x6 +st1 {v9.16b, v10.16b, v11.16b}, [x15], x6 -st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x10], #64 -st1 {v4.16b, v5.16b}, [x10], x14 +//st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x10], #64 +//st1 {v4.16b, v5.16b}, [x10], x14 subs x12, x12, #1 bne LoopSz_12 -addp v12.4s, v23.4s, v24.4s -addp v13.4s, v25.4s, v26.4s -addp v14.4s, v27.4s, v28.4s -st1 {v12.4s, v13.4s, v14.4s}, [x3], #48 - Tile12End: -sub x5, x5, #12 // batch -= 12 +sub x4, x4, #12 // batch -= 12 add x0, x0, #192 // src += 12 * 8 * sizeof(float16_t) -add x1, x1, #96 // dst += 12 * 8 * sizeof(int8_t) +add x1, x1, #48 // dst += 12 * 4 * sizeof(int8_t) +add x11, x11, #48 b TILE_12 TILE_10: -cmp x5, #10 +cmp x4, #10 blt TILE_8 mov x9, x0 // src mov x10, x1 // dst -mov x12, x4 // src_depth_quad -sub x13, x7, #128 // src_step - 64 -sub x14, x6, #64 // dst_step - 64 +mov x15, x11 // second dst +mov x12, x3 // src_depth_quad +sub x13, x7, #128 // src_step - 128 +sub x14, x6, #32 // dst_step - 32 // quant_scale: v10, v11 -ld1 {v10.8h}, [x2], #16 -ld1 {v11.s}[0], [x2], #4 -movi v24.4s, #0 -movi v25.4s, #0 -movi v26.4s, #0 -movi v27.4s, #0 -movi v28.4s, #0 +//ld1 {v10.8h}, [x2], #16 +//ld1 {v11.s}[0], [x2], #4 +ld1 {v12.4s, v13.4s}, [x2], #32 +ld1 {v14.d}[0], [x2], #8 +fcvtn v10.4h, v12.4s +fcvtn2 v10.8h, v13.4s +fcvtn v11.4h, v14.4s LoopSz_10: ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x9], #64 @@ -168,45 +168,45 @@ sqxtn2 v3.16b, v7.8h sqxtn v4.8b, v8.8h sqxtn2 v4.16b, v9.8h -.inst 0x4e9d9418 // sdot v24.4s, v0.16b, v29.16b -.inst 0x4e9d9439 // sdot v25.4s, v1.16b, v29.16b -.inst 0x4e9d945a // sdot v26.4s, v2.16b, v29.16b -.inst 0x4e9d947b // sdot v27.4s, v3.16b, v29.16b -.inst 0x4e9d949c // sdot v28.4s, v4.16b, v29.16b +uzp1 v6.4s, v0.4s, v1.4s // 0 1 2 3 +uzp1 v7.4s, v2.4s, v3.4s // 4 5 6 7 +uzp1 v8.4s, v4.4s, v4.4s // 8 9 8 9 +uzp2 v12.4s, v0.4s, v1.4s +uzp2 v13.4s, v2.4s, v3.4s +uzp2 v14.4s, v4.4s, v4.4s +st1 {v6.16b, v7.16b}, [x10], #32 +st1 {v8.d}[0], [x10], x14 +st1 {v12.16b, v13.16b}, [x15], #32 +st1 {v14.d}[0], [x15], x14 -st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x10], #64 -st1 {v4.16b}, [x10], x14 +// st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x10], #64 +// st1 {v4.16b}, [x10], x14 subs x12, x12, #1 bne LoopSz_10 -addp v13.4s, v24.4s, v25.4s -addp v14.4s, v26.4s, v27.4s -addp v15.4s, v28.4s, v28.4s -st1 {v13.4s, v14.4s}, [x3], #32 -st1 {v15.d}[0], [x3], #8 - Tile10End: -sub x5, x5, #10 // batch -= 10 +sub x4, x4, #10 // batch -= 10 add x0, x0, #160 // src += 10 * 8 * sizeof(float16_t) -add x1, x1, #80 // dst += 10 * 8 * sizeof(int8_t) +add x1, x1, #40 // dst += 10 * 4 * sizeof(int8_t) +add x11, x11, #40 b TILE_10 TILE_8: -cmp x5, #8 +cmp x4, #8 blt TILE_1 sub x8, x7, #64 // src_step - 64 mov x9, x0 // src mov x10, x1 // dst -mov x12, x4 // src_depth_quad +mov x15, x11 // second dst +mov x12, x3 // src_depth_quad // quant_scale: v8 -ld1 {v8.8h}, [x2], #16 -movi v25.4s, #0 -movi v26.4s, #0 -movi v27.4s, #0 -movi v28.4s, #0 +//ld1 {v8.8h}, [x2], #16 +ld1 {v12.4s, v13.4s}, [x2], #32 +fcvtn v8.4h, v12.4s +fcvtn2 v8.8h, v13.4s LoopSz_8: ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x9], #64 @@ -236,37 +236,36 @@ sqxtn2 v11.16b, v5.8h sqxtn v12.8b, v6.8h sqxtn2 v12.16b, v7.8h -.inst 0x4e9d9539 // sdot v25.4s, v9.16b, v29.16b -.inst 0x4e9d955a // sdot v26.4s, v10.16b, v29.16b -.inst 0x4e9d957b // sdot v27.4s, v11.16b, v29.16b -.inst 0x4e9d959c // sdot v28.4s, v12.16b, v29.16b - -st1 {v9.16b, v10.16b, v11.16b, v12.16b}, [x10], x6 +uzp1 v6.4s, v9.4s, v10.4s // 0 1 2 3 first +uzp1 v7.4s, v11.4s, v12.4s // 4 5 6 7 +uzp2 v14.4s, v9.4s, v10.4s // 0 1 2 3 second +uzp2 v15.4s, v11.4s, v12.4s // 4 5 6 7 +st1 {v6.16b, v7.16b}, [x10], x6 +st1 {v14.16b, v15.16b}, [x15], x6 +//st1 {v9.16b, v10.16b, v11.16b, v12.16b}, [x10], x6 subs x12, x12, #1 bne LoopSz_8 -addp v14.4s, v25.4s, v26.4s -addp v15.4s, v27.4s, v28.4s -st1 {v14.4s, v15.4s}, [x3], #32 - Tile8End: -sub x5, x5, #8 // batch -= 8 +sub x4, x4, #8 // batch -= 8 add x0, x0, #128 // src += 8 * 8 * sizeof(float16_t) -add x1, x1, #64 // dst += 8 * 8 * sizeof(int8_t) +add x1, x1, #32 // dst += 8 * 4 * sizeof(int8_t) +add x11, x11, #32 b TILE_8 TILE_4: -cmp x5, #4 +cmp x4, #4 blt TILE_2 mov x9, x0 // src mov x10, x1 // dst -mov x12, x4 // src_depth_quad +mov x15, x11 // second dst +mov x12, x3 // src_depth_quad // quant_scale: v8 -ld1 {v8.d}[0], [x2], #8 -movi v27.4s, #0 -movi v28.4s, #0 +//ld1 {v8.d}[0], [x2], #8 +ld1 {v12.4s}, [x2], #16 +fcvtn v8.4h, v12.4s LoopSz_4: ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x9], x7 @@ -286,34 +285,35 @@ sqxtn2 v4.16b, v1.8h sqxtn v5.8b, v2.8h sqxtn2 v5.16b, v3.8h -.inst 0x4e9d949b // sdot v27.4s, v4.16b, v29.16b -.inst 0x4e9d94bc // sdot v28.4s, v5.16b, v29.16b - -st1 {v4.16b, v5.16b}, [x10], x6 +uzp1 v6.4s, v4.4s, v5.4s // 0 1 2 3 first +uzp2 v14.4s, v4.4s, v5.4s // 0 1 2 3 second +st1 {v6.16b}, [x10], x6 +st1 {v14.16b}, [x15], x6 +//st1 {v4.16b, v5.16b}, [x10], x6 subs x12, x12, #1 bne LoopSz_4 -addp v26.4s, v27.4s, v28.4s -st1 {v26.4s}, [x3], #16 - Tile4End: -sub x5, x5, #4 // batch -= 4 +sub x4, x4, #4 // batch -= 4 add x0, x0, #64 // src += 4 * 8 * sizeof(float16_t) -add x1, x1, #32 // dst += 4 * 8 * sizeof(int8_t) +add x1, x1, #16 // dst += 4 * 4 * sizeof(int8_t) +add x11, x11, #16 b TILE_4 TILE_2: -cmp x5, #2 +cmp x4, #2 blt TILE_1 mov x9, x0 // src mov x10, x1 // dst -mov x12, x4 // src_depth_quad +mov x15, x11 // second dst +mov x12, x3 // src_depth_quad // quant_scale: v8 -ld1 {v8.s}[0], [x2], #4 -movi v28.4s, #0 +//ld1 {v8.s}[0], [x2], #4 +ld1 {v12.d}[0], [x2], #8 +fcvtn v8.4h, v12.4s LoopSz_2: ld1 {v0.8h, v1.8h}, [x9], x7 @@ -329,33 +329,34 @@ fcvtas v1.8h, v1.8h // y = (int8_t)x sqxtn v2.8b, v0.8h sqxtn2 v2.16b, v1.8h -.inst 0x4e9d945c // sdot v28.4s, v2.16b, v29.16b -st1 {v2.16b}, [x10], x6 +st1 {v2.d}[0], [x10], x6 +st1 {v2.d}[1], [x15], x6 +//st1 {v2.16b}, [x10], x6 subs x12, x12, #1 bne LoopSz_2 -addp v27.4s, v28.4s, v28.4s -st1 {v27.d}[0], [x3], #8 - Tile2End: -sub x5, x5, #2 // batch -= 2 +sub x4, x4, #2 // batch -= 2 add x0, x0, #32 // src += 2 * 8 * sizeof(float16_t) -add x1, x1, #16 // dst += 2 * 8 * sizeof(int8_t) +add x1, x1, #8 // dst += 2 * 4 * sizeof(int8_t) +add x11, x11, #8 b TILE_2 TILE_1: -cmp x5, #1 +cmp x4, #1 blt End mov x9, x0 // src mov x10, x1 // dst -mov x12, x4 // src_depth_quad +mov x15, x11 // second dst +mov x12, x3 // src_depth_quad // quant_scale: v8 -ld1 {v8.h}[0], [x2], #2 -movi v28.4s, #0 +//ld1 {v8.h}[0], [x2], #2 +ld1 {v12.s}[0], [x2], #4 +fcvtn v8.4h, v12.4s LoopSz_1: ld1 {v0.8h}, [x9], x7 @@ -366,20 +367,18 @@ fmul v0.8h, v0.8h, v8.h[0] fcvtas v0.8h, v0.8h // y = (int8_t)x sqxtn v0.8b, v0.8h -.inst 0x4e9d941c // sdot v28.4s, v0.16b, v29.16b -st1 {v0.8b}, [x10], x6 +st1 {v0.s}[0], [x10], x6 +st1 {v0.s}[1], [x15], x6 subs x12, x12, #1 bne LoopSz_1 -addp v27.4s, v28.4s, v28.4s -st1 {v27.s}[0], [x3], #4 - Tile1End: -sub x5, x5, #1 // batch -= 1 +sub x4, x4, #1 // batch -= 1 add x0, x0, #16 // src += 1 * 8 * sizeof(float16_t) -add x1, x1, #8 // dst += 1 * 8 * sizeof(int8_t) +add x1, x1, #4 // dst += 1 * 4 * sizeof(int8_t) +add x11, x11, #4 b TILE_1 @@ -390,4 +389,4 @@ ldp d12, d13, [sp, #(16 * 1)] ldp d14, d15, [sp], #(16 * 4) ret -#endif \ No newline at end of file +#endif diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt4FP16_sdot.S b/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt4FP16_sdot.S deleted file mode 100644 index 9620c93eb..000000000 --- a/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt4FP16_sdot.S +++ /dev/null @@ -1,314 +0,0 @@ -// -// MNNGemmHybridInt4FP16_sdot.S -// MNN -// -// Created by MNN on 2023/11/09. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifdef __aarch64__ - -#include "MNNAsmGlobal.h" - -.text -.align 5 - -.macro Int32ToFloat z0, z1, z2, z3 - scvtf \z0\().4s, \z0\().4s - scvtf \z1\().4s, \z1\().4s - scvtf \z2\().4s, \z2\().4s - scvtf \z3\().4s, \z3\().4s -.endm - -.macro MulScale d0, d1, d2, d3, s, idx0, idx1, alpha0, alpha1 - fmul \d0\().4s, \d0\().4s, \s\().s[\idx0] - fmul \d1\().4s, \d1\().4s, \s\().s[\idx0] - fmul \d2\().4s, \d2\().4s, \s\().s[\idx1] - fmul \d3\().4s, \d3\().4s, \s\().s[\idx1] - fmul \d0\().4s, \d0\().4s, \alpha0\().4s - fmul \d1\().4s, \d1\().4s, \alpha1\().4s - fmul \d2\().4s, \d2\().4s, \alpha0\().4s - fmul \d3\().4s, \d3\().4s, \alpha1\().4s -.endm - -.macro Float32ToHalf s0, s1, s2, s3, d0, d1 - fcvtn \d0\().4h, \s0\().4s - fcvtn2 \d0\().8h, \s1\().4s - fcvtn \d1\().4h, \s2\().4s - fcvtn2 \d1\().8h, \s3\().4s -.endm - -.macro Dequant c0, z0, b0, s0, idx - fmla \c0\().8h, \z0\().8h, \s0\().h[\idx] - fadd \c0\().8h, \c0\().8h, \b0\().8h -.endm - -asm_function MNNGemmHybridInt4FP16_sdot - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; - -//void MNNGemmHybridInt4_sdot(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, float** param); - - -// Auto: x0: C*, x1: A*, x2:B*, x3: src_depth_quad, x4: dst_step, x5: dst_depth_quad, x6: realSize, x7: param -// load from param: x7: alpha*, x8: zero*, x9: bias*, x10: sums*, x11: scales* -stp d14, d15, [sp, #(-16 * 9)]! -stp d12, d13, [sp, #(16 * 1)] -stp d10, d11, [sp, #(16 * 2)] -stp d8, d9, [sp, #(16 * 3)] -stp x21, x22, [sp, #(16 * 4)] -stp x19, x20, [sp, #(16 * 5)] -stp x23, x24, [sp, #(16 * 6)] -stp x25, x26, [sp, #(16 * 7)] -stp x27, x28, [sp, #(16 * 8)] - -ldr x8, [x7, #0] -ldr x9, [x7, #8] -ldr x10, [x7, #16] -ldr x11, [x7, #24] -ldr x12, [x7, #32] -ldr x14, [x7, #40] -Start: -lsl x13, x3, #5 // x13 = src_depth_quad * UNIT * UNIT_SRC / 2(int4) = src_depth_quad * 32 = src_depth_quad << 5 -ld1 {v6.16b, v7.16b}, [x14] -// mask -movi v14.16b, #15 -TILE_4: - cmp x6, #4 - blt TILE_1 - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_4: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - dup v22.4s, wzr - dup v23.4s, wzr - dup v24.4s, wzr - dup v25.4s, wzr - dup v26.4s, wzr - dup v27.4s, wzr - dup v28.4s, wzr - dup v29.4s, wzr - dup v30.4s, wzr - dup v31.4s, wzr -LoopSz_TILE_4: - // src : 2 x [2 x 8] : v4-5 - // weight : 4 x [2 x 8] : v0-3 - // dst : 2 x 4 x [4] : v16-23 - ld1 {v8.16b, v9.16b}, [x25], #32 // weight - ld1 {v4.16b, v5.16b}, [x24], x15 // src - // int4 to int8: v0, v1, v2, v3 - ushr v0.16b, v8.16b, #4 - and v1.16b, v8.16b, v14.16b - ushr v2.16b, v9.16b, #4 - and v3.16b, v9.16b, v14.16b - - mov v10.d[0], v4.d[1] - mov v10.d[1], v4.d[0] - mov v11.d[1], v5.d[0] - mov v11.d[0], v5.d[1] - .inst 0x4e809490 // sdot v16.4s, v4.16b, v0.16b // (0,0)x2 (1,1)x2 - .inst 0x4e809558 // sdot v24.4s, v10.16b, v0.16b // (1,0)x2 (0,1)x2 - .inst 0x4e819491 // sdot v17.4s, v4.16b, v1.16b // (0,2) (1,3) - .inst 0x4e819559 // sdot v25.4s, v10.16b, v1.16b // (1,2) (0,3) - .inst 0x4e829492 // sdot v18.4s, v4.16b, v2.16b - .inst 0x4e82955a // sdot v26.4s, v10.16b, v2.16b - .inst 0x4e839493 // sdot v19.4s, v4.16b, v3.16b - .inst 0x4e83955b // sdot v27.4s, v10.16b, v3.16b - .inst 0x4e8094b4 // sdot v20.4s, v5.16b, v0.16b - .inst 0x4e80957c // sdot v28.4s, v11.16b, v0.16b - .inst 0x4e8194b5 // sdot v21.4s, v5.16b, v1.16b - .inst 0x4e81957d // sdot v29.4s, v11.16b, v1.16b - .inst 0x4e8294b6 // sdot v22.4s, v5.16b, v2.16b - .inst 0x4e82957e // sdot v30.4s, v11.16b, v2.16b - .inst 0x4e8394b7 // sdot v23.4s, v5.16b, v3.16b - .inst 0x4e83957f // sdot v31.4s, v11.16b, v3.16b - - subs x26, x26, #1 - bne LoopSz_TILE_4 - - addp v16.4s, v16.4s, v24.4s // (batch,oc)(0,0)(1,1)(1,0)(0,1) - addp v17.4s, v17.4s, v25.4s // (0,2)(1,3)(1,2)(0,3) - addp v18.4s, v18.4s, v26.4s // (0,4)(1,5)(1,4)(0,5) - addp v19.4s, v19.4s, v27.4s // (0,6)(1,7)(1,6)(0,7) - addp v20.4s, v20.4s, v28.4s - addp v21.4s, v21.4s, v29.4s - addp v22.4s, v22.4s, v30.4s - addp v23.4s, v23.4s, v31.4s - tbl v24.16b, {v16.16b, v17.16b}, v6.16b // batch=0,oc=0-3 - tbl v25.16b, {v16.16b, v17.16b}, v7.16b // batch=1,oc=0-3 - tbl v26.16b, {v18.16b, v19.16b}, v6.16b // batch=0,oc=4-7 - tbl v27.16b, {v18.16b, v19.16b}, v7.16b // batch=1,oc=4-7 - tbl v28.16b, {v20.16b, v21.16b}, v6.16b - tbl v29.16b, {v20.16b, v21.16b}, v7.16b - tbl v30.16b, {v22.16b, v23.16b}, v6.16b - tbl v31.16b, {v22.16b, v23.16b}, v7.16b - -LoopSzEnd_TILE_4: - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v24, v25, v26, v27 - Int32ToFloat v28, v29, v30, v31 - // using float scale dequant for precison - ld1 {v1.d}[0], [x23] // scales 4 batch - ld1 {v2.8h}, [x19], #16 // alpha - - fcvtl v3.4s, v2.4h // oc:0-3 - fcvtl2 v4.4s, v2.8h // oc:4-7 - fcvtl v5.4s, v1.4h // scales: 4 batch - - MulScale v24, v26, v25, v27, v5, 0, 1, v3, v4 - MulScale v28, v30, v29, v31, v5, 2, 3, v3, v4 - Float32ToHalf v24, v26, v25, v27, v10, v11 - Float32ToHalf v28, v30, v29, v31, v12, v13 -Tile4Dequant: - ld1 {v1.8h}, [x20], #16 // zero - ld1 {v2.8h}, [x21], #16 // bias - ld1 {v3.d}[0], [x22] // sums - // sum + (zero * sumx) + bias - Dequant v10, v1, v2, v3, 0 - Dequant v11, v1, v2, v3, 1 - Dequant v12, v1, v2, v3, 2 - Dequant v13, v1, v2, v3, 3 - st1 {v10.8h, v11.8h, v12.8h, v13.8h}, [x28], x4 - cmp x27, #1 - bge LoopDz_TILE_4 -Tile4End: - sub x6, x6, #4 // bach -= 4 - add x0, x0, #64 // dst += 4 * 8 * sizeof(float16_t) - add x1, x1, #32 // src += 4 * 8 * sizeof(int8_t) - add x11, x11, #8 // sum += 4 * sizeof(float16_t) - add x12, x12, #8 // scale += 4 * sizeof(float16_t) - b TILE_4 - -TILE_1: - cmp x6, #1 - blt End - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_1: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - movi v24.4s, #0 - movi v25.4s, #0 - movi v26.4s, #0 - movi v27.4s, #0 - movi v10.4s, #0 - movi v11.4s, #0 - movi v12.4s, #0 - movi v13.4s, #0 -LoopSz_TILE_1: - // src : 1 x [1 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - // dst : 1 x 4 x [2] : v16-v19 - ld1 {v8.16b, v9.16b}, [x25], #32 // weight - ld1 {v4.8b}, [x24], x15 // src - // int4 to int8: v0, v1, v2, v3 - ushr v0.16b, v8.16b, #4 - and v1.16b, v8.16b, v14.16b - ushr v2.16b, v9.16b, #4 - and v3.16b, v9.16b, v14.16b - - mov v29.d[0], v4.d[1] - mov v29.d[1], v4.d[0] - - .inst 0x4e809498 // sdot v24.4s, v4.16b, v0.16b // (0,0)x2 (1,1)x2 - .inst 0x4e8097b9 // sdot v25.4s, v29.16b, v0.16b // (1,0)x2 (0,1)x2 - .inst 0x4e81949a // sdot v26.4s, v4.16b, v1.16b // (0,2)x2 (1,3)x2 - .inst 0x4e8197bb // sdot v27.4s, v29.16b, v1.16b // (1,2)x2 (0,3)x2 - .inst 0x4e82948a // sdot v10.4s, v4.16b, v2.16b // (0,4)x2 (1,5)x2 - .inst 0x4e8297ab // sdot v11.4s, v29.16b, v2.16b // (1,4)x2 (0,5)x2 - .inst 0x4e83948c // sdot v12.4s, v4.16b, v3.16b // (0,6)x2 (1,7)x2 - .inst 0x4e8397ad // sdot v13.4s, v29.16b, v3.16b // (1,6)x2 (0,7)x2 - - subs x26, x26, #1 - bne LoopSz_TILE_1 - addp v16.4s, v24.4s, v25.4s - addp v17.4s, v26.4s, v27.4s - addp v18.4s, v10.4s, v11.4s - addp v19.4s, v12.4s, v13.4s - -LoopSzEnd_TILE_1: - add x7, x7, x13 - sub x27, x27, #1 - tbl v24.16b, {v16.16b, v17.16b}, v6.16b - tbl v20.16b, {v18.16b, v19.16b}, v6.16b - - scvtf v24.4s, v24.4s - scvtf v20.4s, v20.4s - // using float scale dequant for precison - ld1 {v4.h}[0], [x23] // scales - ld1 {v0.8h}, [x19], #16 // alpha - fcvtl v5.4s, v4.4h - fcvtl v22.4s, v0.4h - fcvtl2 v21.4s, v0.8h - fmul v24.4s, v24.4s, v5.s[0] - fmul v20.4s, v20.4s, v5.s[0] - fmul v24.4s, v24.4s, v22.4s - fmul v20.4s, v20.4s, v21.4s - fcvtn v17.4h, v24.4s - fcvtn2 v17.8h, v20.4s -Tile1Dequant: - ld1 {v1.8h}, [x20], #16 // zero - ld1 {v2.8h}, [x21], #16 // bias - ld1 {v3.h}[0], [x22] // sums - // sum + (zero * sumx) + bias - fadd v2.8h, v2.8h, v17.8h - fmla v2.8h, v1.8h, v3.h[0] - st1 {v2.8h}, [x28], x4 - cmp x27, #1 - bge LoopDz_TILE_1 -Tile1End: - sub x6, x6, #1 // batch -= 1 - add x0, x0, #16 // dst += 1 * 8 * sizeof(float16_t) - add x1, x1, #8 // dst += 1 * 8 * sizeof(int8_t) - add x11, x11, #2 // sum += 1 * sizeof(float16_t) - add x12, x12, #2 // scale += 1 * sizeof(float16_t) - b TILE_1 - -End: -ldp x27, x28, [sp, #(16 * 8)] -ldp x25, x26, [sp, #(16 * 7)] -ldp x23, x24, [sp, #(16 * 6)] -ldp x19, x20, [sp, #(16 * 5)] -ldp x21, x22, [sp, #(16 * 4)] -ldp d8, d9, [sp, #(16 * 3)] -ldp d10, d11, [sp, #(16 * 2)] -ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 9) -ret - -#endif \ No newline at end of file diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt4FP16_smmla.S b/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt4FP16_smmla.S deleted file mode 100644 index 62a3053ee..000000000 --- a/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt4FP16_smmla.S +++ /dev/null @@ -1,506 +0,0 @@ -// -// MNNGemmHybridInt4_smmla.S -// MNN -// -// Created by MNN on 2023/10/30. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifdef __aarch64__ - -#include "MNNAsmGlobal.h" - -.text -.align 5 - -.macro Int32ToFloat z0, z1, z2, z3 - scvtf \z0\().4s, \z0\().4s - scvtf \z1\().4s, \z1\().4s - scvtf \z2\().4s, \z2\().4s - scvtf \z3\().4s, \z3\().4s -.endm - -.macro Float32ToHalf s0, s1, s2, s3, d0, d1 - fcvtn \d0\().4h, \s0\().4s - fcvtn2 \d0\().8h, \s1\().4s - fcvtn \d1\().4h, \s2\().4s - fcvtn2 \d1\().8h, \s3\().4s -.endm - -.macro MulScale d0, d1, d2, d3, s, idx0, idx1, alpha0, alpha1 - fmul \d0\().4s, \d0\().4s, \s\().s[\idx0] - fmul \d1\().4s, \d1\().4s, \s\().s[\idx0] - fmul \d2\().4s, \d2\().4s, \s\().s[\idx1] - fmul \d3\().4s, \d3\().4s, \s\().s[\idx1] - fmul \d0\().4s, \d0\().4s, \alpha0\().4s - fmul \d1\().4s, \d1\().4s, \alpha1\().4s - fmul \d2\().4s, \d2\().4s, \alpha0\().4s - fmul \d3\().4s, \d3\().4s, \alpha1\().4s -.endm - -.macro MulScale_New d0, d1, d2, d3, s, a1, a2, a3, a4 - fmul \d0\().4s, \d0\().4s, \s\().4s - fmul \d1\().4s, \d1\().4s, \s\().4s - fmul \d2\().4s, \d2\().4s, \s\().4s - fmul \d3\().4s, \d3\().4s, \s\().4s - fmul \d0\().4s, \d0\().4s, \a1\().4s - fmul \d1\().4s, \d1\().4s, \a2\().4s - fmul \d2\().4s, \d2\().4s, \a3\().4s - fmul \d3\().4s, \d3\().4s, \a4\().4s -.endm - -.macro Dequant c0, z0, b0, s0, idx - fmla \c0\().8h, \z0\().8h, \s0\().h[\idx] - fadd \c0\().8h, \c0\().8h, \b0\().8h -.endm - -asm_function MNNGemmHybridInt4FP16_smmla - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; - -//void MNNGemmHybridInt4_smmla(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, float** param); - - -// Auto: x0: C*, x1: A*, x2:B*, x3: src_depth_quad, x4: dst_step, x5: dst_depth_quad, x6: realSize, x7: param -// load from param: x7: alpha*, x8: zero*, x9: bias*, x10: sums*, x11: scales* -stp d14, d15, [sp, #(-16 * 9)]! -stp d12, d13, [sp, #(16 * 1)] -stp d10, d11, [sp, #(16 * 2)] -stp d8, d9, [sp, #(16 * 3)] -stp x21, x22, [sp, #(16 * 4)] -stp x19, x20, [sp, #(16 * 5)] -stp x23, x24, [sp, #(16 * 6)] -stp x25, x26, [sp, #(16 * 7)] -stp x27, x28, [sp, #(16 * 8)] - -ldr x8, [x7, #0] -ldr x9, [x7, #8] -ldr x10, [x7, #16] -ldr x11, [x7, #24] -ldr x12, [x7, #32] - -Start: -lsl x13, x3, #5 // x13 = src_depth_quad * UNIT * UNIT_SRC / 2(int4) = src_depth_quad * 32 = src_depth_quad << 5 -// mask -movi v10.16b, #15 -// offset -movi v11.16b, #8 -TILE_8: - cmp x6, #8 - blt TILE_4 - //mov x14, x4 // dst_step - lsr x15, x4, #1 // src_step = dst_step / 2 - sub x14, x4, #64 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_8: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - dup v22.4s, wzr - dup v23.4s, wzr - dup v24.4s, wzr - dup v25.4s, wzr - dup v26.4s, wzr - dup v27.4s, wzr - dup v28.4s, wzr - dup v29.4s, wzr - dup v30.4s, wzr - dup v31.4s, wzr - - ld1 {v14.8h}, [x23] // scales - ld1 {v15.8h}, [x19], #16 // alpha - -LoopSz_TILE_8: - // src : 2 x [2 x 8] : v4-5 - // weight : 4 x [2 x 8] : v0-3 - // dst : 2 x 4 x [4] : v16-31 - ld1 {v8.16b, v9.16b}, [x25], #32 // weight - ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x24], x15 // src - // int4 to int8: v0, v1, v2, v3 - ushr v0.16b, v8.16b, #4 - and v1.16b, v8.16b, v10.16b - ushr v2.16b, v9.16b, #4 - and v3.16b, v9.16b, v10.16b - - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b // batch=0,1, oc=0,1 - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b // batch=0,1, oc=2,3 - .inst 0x4e82a492 // smmla v18.4s, v4.16b, v2.16b // batch=0,1, oc=4,5 - .inst 0x4e83a493 // smmla v19.4s, v4.16b, v3.16b // batch=0,1, oc=6,7 - .inst 0x4e80a4b4 // smmla v20.4s, v5.16b, v0.16b // batch=2,3, oc=0,1 - .inst 0x4e81a4b5 // smmla v21.4s, v5.16b, v1.16b // batch=2,3, oc=2,3 - .inst 0x4e82a4b6 // smmla v22.4s, v5.16b, v2.16b // batch=2,3, oc=4,5 - .inst 0x4e83a4b7 // smmla v23.4s, v5.16b, v3.16b // batch=2,3, oc=6,7 - - .inst 0x4e80a4d8 // smmla v24.4s, v6.16b, v0.16b // batch=4,5, oc=0,1 - .inst 0x4e81a4d9 // smmla v25.4s, v6.16b, v1.16b // batch=4,5, oc=2,3 - .inst 0x4e82a4da // smmla v26.4s, v6.16b, v2.16b // batch=4,5, oc=4,5 - .inst 0x4e83a4db // smmla v27.4s, v6.16b, v3.16b // batch=4,5, oc=6,7 - .inst 0x4e80a4fc // smmla v28.4s, v7.16b, v0.16b // batch=6,7, oc=0,1 - .inst 0x4e81a4fd // smmla v29.4s, v7.16b, v1.16b // batch=6,7, oc=2,3 - .inst 0x4e82a4fe // smmla v30.4s, v7.16b, v2.16b // batch=6,7, oc=4,5 - .inst 0x4e83a4ff // smmla v31.4s, v7.16b, v3.16b // batch=6,7, oc=6,7 - subs x26, x26, #1 - bne LoopSz_TILE_8 - -LoopSzEnd_TILE_8: - add x7, x7, x13 - fcvtl v8.4s, v15.4h // oc:0-3 - fcvtl2 v9.4s, v15.8h // oc:4-7 - fcvtl v12.4s, v14.4h // scales: batch 0,1,2,3 - fcvtl2 v13.4s, v14.8h // scales: batch 4,5,6,7 - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - Int32ToFloat v20, v21, v22, v23 - Int32ToFloat v24, v25, v26, v27 - Int32ToFloat v28, v29, v30, v31 - - zip1 v0.4s, v12.4s, v12.4s // scales: batch 0,0,1,1 - zip2 v1.4s, v12.4s, v12.4s // scales: batch 2,2,3,3 - zip1 v2.4s, v13.4s, v13.4s // scales: batch 4,4,5,5 - zip2 v3.4s, v13.4s, v13.4s // scales: batch 6,6,7,7 - trn1 v4.2d, v8.2d, v8.2d // alpha: oc 0,1,0,1 - trn2 v5.2d, v8.2d, v8.2d // alpha: oc 2,3,2,3 - trn1 v6.2d, v9.2d, v9.2d // alpha: oc 4,5,4,5 - trn2 v7.2d, v9.2d, v9.2d // alpha: oc 6,7,6,7 - - MulScale_New v16, v17, v18, v19, v0, v4, v5, v6, v7 - MulScale_New v20, v21, v22, v23, v1, v4, v5, v6, v7 - MulScale_New v24, v25, v26, v27, v2, v4, v5, v6, v7 - MulScale_New v28, v29, v30, v31, v3, v4, v5, v6, v7 - Float32ToHalf v16, v17, v18, v19, v0, v1 // (batch,oc) v12:(0,0)(0,1)(1,0)(1,1)(0,2)(0,3)(1,3)(1,2) - Float32ToHalf v20, v21, v22, v23, v12, v13 // batch=2,3 v14:(2,0)(2,1)(3,0)(3,1)(2,2)(2,3)(3,3)(3,2) - Float32ToHalf v24, v25, v26, v27, v14, v15 // batch=4,5 - Float32ToHalf v28, v29, v30, v31, v8, v9 // batch=6,7 - - uzp1 v4.4s, v0.4s, v1.4s - uzp2 v5.4s, v0.4s, v1.4s - uzp1 v6.4s, v12.4s, v13.4s - uzp2 v7.4s, v12.4s, v13.4s - uzp1 v0.4s, v14.4s, v15.4s - uzp2 v1.4s, v14.4s, v15.4s - uzp1 v2.4s, v8.4s, v9.4s - uzp2 v3.4s, v8.4s, v9.4s -Tile8Dequant: - ld1 {v16.8h}, [x20], #16 // zero - ld1 {v17.8h}, [x21], #16 // bias - ld1 {v12.8h}, [x22] // sums - // sum + (zero * sumx) + bias - Dequant v4, v16, v17, v12, 0 - Dequant v5, v16, v17, v12, 1 - Dequant v6, v16, v17, v12, 2 - Dequant v7, v16, v17, v12, 3 - - Dequant v0, v16, v17, v12, 4 - Dequant v1, v16, v17, v12, 5 - Dequant v2, v16, v17, v12, 6 - Dequant v3, v16, v17, v12, 7 - st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x28], #64 - st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_8 -Tile8End: - sub x6, x6, #8 // bach -= 8 - add x0, x0, #128 // dst += 8 * 8 * sizeof(float16_t) - add x1, x1, #64 // src += 8 * 8 * sizeof(int8_t) - add x11, x11, #16 // sum += 8 * sizeof(float16_t) - add x12, x12, #16 // scale += 8 * sizeof(float16_t) - b TILE_8 - -TILE_4: - cmp x6, #4 - blt TILE_2 - mov x14, x4 // dst_step - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_4: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - dup v22.4s, wzr - dup v23.4s, wzr - ld1 {v14.d}[0], [x23] // scales - ld1 {v15.8h}, [x19], #16 // alpha -LoopSz_TILE_4: - // src : 2 x [2 x 8] : v4-5 - // weight : 4 x [2 x 8] : v0-3 - // dst : 2 x 4 x [4] : v16-23 - ld1 {v8.16b, v9.16b}, [x25], #32 // weight - ld1 {v4.16b, v5.16b}, [x24], x15 // src - // int4 to int8: v0, v1, v2, v3 - ushr v0.16b, v8.16b, #4 - and v1.16b, v8.16b, v10.16b - ushr v2.16b, v9.16b, #4 - and v3.16b, v9.16b, v10.16b - - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b - .inst 0x4e82a492 // smmla v18.4s, v4.16b, v2.16b - .inst 0x4e83a493 // smmla v19.4s, v4.16b, v3.16b - .inst 0x4e80a4b4 // smmla v20.4s, v5.16b, v0.16b - .inst 0x4e81a4b5 // smmla v21.4s, v5.16b, v1.16b - .inst 0x4e82a4b6 // smmla v22.4s, v5.16b, v2.16b - .inst 0x4e83a4b7 // smmla v23.4s, v5.16b, v3.16b - subs x26, x26, #1 - bne LoopSz_TILE_4 - -LoopSzEnd_TILE_4: - add x7, x7, x13 - fcvtl v8.4s, v15.4h // oc:0-3 - fcvtl2 v9.4s, v15.8h // oc:4-7 - fcvtl v12.4s, v14.4h // scales: batch 0,1,2,3 - - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - Int32ToFloat v20, v21, v22, v23 - - zip1 v0.4s, v12.4s, v12.4s // scales: batch 0,0,1,1 - zip2 v1.4s, v12.4s, v12.4s // scales: batch 2,2,3,3 - trn1 v4.2d, v8.2d, v8.2d // alpha: oc 0,1,0,1 - trn2 v5.2d, v8.2d, v8.2d // alpha: oc 2,3,2,3 - trn1 v6.2d, v9.2d, v9.2d // alpha: oc 4,5,4,5 - trn2 v7.2d, v9.2d, v9.2d // alpha: oc 6,7,6,7 - - MulScale_New v16, v17, v18, v19, v0, v4, v5, v6, v7 - MulScale_New v20, v21, v22, v23, v1, v4, v5, v6, v7 - Float32ToHalf v16, v17, v18, v19, v0, v1 // (batch,oc) v12:(0,0)(0,1)(1,0)(1,1)(0,2)(0,3)(1,3)(1,2) - Float32ToHalf v20, v21, v22, v23, v12, v13 // batch=2,3 v14:(2,0)(2,1)(3,0)(3,1)(2,2)(2,3)(3,3)(3,2) - - uzp1 v4.4s, v0.4s, v1.4s - uzp2 v5.4s, v0.4s, v1.4s - uzp1 v6.4s, v12.4s, v13.4s - uzp2 v7.4s, v12.4s, v13.4s -Tile4Dequant: - ld1 {v16.8h}, [x20], #16 // zero - ld1 {v17.8h}, [x21], #16 // bias - ld1 {v12.d}[0], [x22] // sums - // sum + (zero * sumx) + bias - Dequant v4, v16, v17, v12, 0 - Dequant v5, v16, v17, v12, 1 - Dequant v6, v16, v17, v12, 2 - Dequant v7, v16, v17, v12, 3 - st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_4 -Tile4End: - sub x6, x6, #4 // bach -= 4 - add x0, x0, #64 // dst += 4 * 8 * sizeof(float16_t) - add x1, x1, #32 // src += 4 * 8 * sizeof(int8_t) - add x11, x11, #8 // sum += 4 * sizeof(float16_t) - add x12, x12, #8 // scale += 4 * sizeof(float16_t) - b TILE_4 - -TILE_2: - cmp x6, #2 - blt TILE_1 - mov x14, x4 // dst_step - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_2: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - ld1 {v14.s}[0], [x23] // scales - ld1 {v15.8h}, [x19], #16 // alpha -LoopSz_TILE_2: - // src : 1 x [2 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - // dst : 1 x 4 x [4] : v16-19 - ld1 {v8.16b, v9.16b}, [x25], #32 // weight - ld1 {v4.16b}, [x24], x15 // src - // int4 to int8: v0, v1, v2, v3 - ushr v0.16b, v8.16b, #4 - and v1.16b, v8.16b, v10.16b - ushr v2.16b, v9.16b, #4 - and v3.16b, v9.16b, v10.16b - - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b - .inst 0x4e82a492 // smmla v18.4s, v4.16b, v2.16b - .inst 0x4e83a493 // smmla v19.4s, v4.16b, v3.16b - subs x26, x26, #1 - bne LoopSz_TILE_2 - -LoopSzEnd_TILE_2: - add x7, x7, x13 - fcvtl v8.4s, v15.4h // oc:0-3 - fcvtl2 v9.4s, v15.8h // oc:4-7 - fcvtl v12.4s, v14.4h // scales: batch 0,1 - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - zip1 v0.4s, v12.4s, v12.4s // scales: batch 0,0,1,1 - trn1 v4.2d, v8.2d, v8.2d // alpha: oc 0,1,0,1 - trn2 v5.2d, v8.2d, v8.2d // alpha: oc 2,3,2,3 - trn1 v6.2d, v9.2d, v9.2d // alpha: oc 4,5,4,5 - trn2 v7.2d, v9.2d, v9.2d // alpha: oc 6,7,6,7 - MulScale_New v16, v17, v18, v19, v0, v4, v5, v6, v7 - Float32ToHalf v16, v17, v18, v19, v0, v1 // (batch,oc) v12:(0,0)(0,1)(1,0)(1,1)(0,2)(0,3)(1,3)(1,2) - - uzp1 v4.4s, v0.4s, v1.4s - uzp2 v5.4s, v0.4s, v1.4s - -Tile2Dequant: - ld1 {v16.8h}, [x20], #16 // zero - ld1 {v17.8h}, [x21], #16 // bias - ld1 {v12.s}[0], [x22] // sums - // sum + (zero * sumx) + bias - Dequant v4, v16, v17, v12, 0 - Dequant v5, v16, v17, v12, 1 - st1 {v4.8h, v5.8h}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_2 -Tile2End: - sub x6, x6, #2 // batch -= 2 - add x0, x0, #32 // dst += 2 * 8 * sizeof(float16_t) - add x1, x1, #16 // dst += 2 * 8 * sizeof(int8_t) - add x11, x11, #4 // sum += 2 * sizeof(float16_t) - add x12, x12, #4 // scale += 2 * sizeof(float16_t) - b TILE_2 - - -TILE_1: - cmp x6, #1 - blt End - mov x14, x4 // dst_step - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_1: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - ld1 {v14.d}[0], [x23] // scales - ld1 {v15.8h}, [x19], #16 // alpha -LoopSz_TILE_1: - // src : 1 x [1 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - // dst : 1 x 4 x [2] : v16-v19 - prfm pldl1keep, [x25, #64] // 预取下一次权重数据 - prfm pldl1keep, [x24, x15] // 预取下一次源数据 - ld1 {v8.16b, v9.16b}, [x25], #32 // weight - ld1 {v4.8b}, [x24], x15 // src - // int4 to int8: v0, v1, v2, v3 - ushr v0.16b, v8.16b, #4 - and v1.16b, v8.16b, v10.16b - ushr v2.16b, v9.16b, #4 - and v3.16b, v9.16b, v10.16b - - .inst 0x4e84a410 // smmla v16.4s, v0.16b, v4.16b - .inst 0x4e84a431 // smmla v17.4s, v1.16b, v4.16b - .inst 0x4e84a452 // smmla v18.4s, v2.16b, v4.16b - .inst 0x4e84a473 // smmla v19.4s, v3.16b, v4.16b - subs x26, x26, #1 - bne LoopSz_TILE_1 - -LoopSzEnd_TILE_1: - add x7, x7, x13 - sub x27, x27, #1 - uzp1 v20.4s, v16.4s, v17.4s - uzp1 v21.4s, v18.4s, v19.4s - scvtf v20.4s, v20.4s - scvtf v21.4s, v21.4s - // using float scale dequant for precison - fcvtl v28.4s, v15.4h // oc:0-3 - fcvtl2 v29.4s, v15.8h // oc:4-7 - fcvtl v12.4s, v14.4h // scales: batch 0 - - fmul v20.4s, v20.4s, v12.s[0] - fmul v21.4s, v21.4s, v12.s[0] - fmul v20.4s, v20.4s, v28.4s - fmul v21.4s, v21.4s, v29.4s - fcvtn v17.4h, v20.4s - fcvtn2 v17.8h, v21.4s -Tile1Dequant: - - ld1 {v1.8h}, [x20], #16 // zero - ld1 {v2.8h}, [x21], #16 // bias - ld1 {v3.h}[0], [x22] // sums - // alpha * sum + (zero * sumx) + bias - fadd v2.8h, v2.8h, v17.8h - fmla v2.8h, v1.8h, v3.h[0] - st1 {v2.8h}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_1 -Tile1End: - sub x6, x6, #1 // batch -= 1 - add x0, x0, #16 // dst += 1 * 8 * sizeof(float16_t) - add x1, x1, #8 // dst += 1 * 8 * sizeof(int8_t) - add x11, x11, #2 // sum += 1 * sizeof(float16_t) - add x12, x12, #2 // scale += 1 * sizeof(float16_t) - b TILE_1 - -End: -ldp x27, x28, [sp, #(16 * 8)] -ldp x25, x26, [sp, #(16 * 7)] -ldp x23, x24, [sp, #(16 * 6)] -ldp x19, x20, [sp, #(16 * 5)] -ldp x21, x22, [sp, #(16 * 4)] -ldp d8, d9, [sp, #(16 * 3)] -ldp d10, d11, [sp, #(16 * 2)] -ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 9) -ret - -#endif diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt8FP16_sdot.S b/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt8FP16_sdot.S deleted file mode 100644 index d675b79e8..000000000 --- a/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt8FP16_sdot.S +++ /dev/null @@ -1,303 +0,0 @@ -// -// MNNGemmHybridInt8_sdot.S -// MNN -// -// Created by MNN on 2023/11/09. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifdef __aarch64__ - -#include "MNNAsmGlobal.h" - -.text -.align 5 - -.macro Int32ToFloat z0, z1, z2, z3 - scvtf \z0\().4s, \z0\().4s - scvtf \z1\().4s, \z1\().4s - scvtf \z2\().4s, \z2\().4s - scvtf \z3\().4s, \z3\().4s -.endm - -.macro MulScale d0, d1, d2, d3, s, idx0, idx1, alpha0, alpha1 - fmul \d0\().4s, \d0\().4s, \s\().s[\idx0] - fmul \d1\().4s, \d1\().4s, \s\().s[\idx0] - fmul \d2\().4s, \d2\().4s, \s\().s[\idx1] - fmul \d3\().4s, \d3\().4s, \s\().s[\idx1] - fmul \d0\().4s, \d0\().4s, \alpha0\().4s - fmul \d1\().4s, \d1\().4s, \alpha1\().4s - fmul \d2\().4s, \d2\().4s, \alpha0\().4s - fmul \d3\().4s, \d3\().4s, \alpha1\().4s -.endm - -.macro Float32ToHalf s0, s1, s2, s3, d0, d1 - fcvtn \d0\().4h, \s0\().4s - fcvtn2 \d0\().8h, \s1\().4s - fcvtn \d1\().4h, \s2\().4s - fcvtn2 \d1\().8h, \s3\().4s -.endm - -.macro Dequant c0, z0, b0, s0, idx - fmla \c0\().8h, \z0\().8h, \s0\().h[\idx] - fadd \c0\().8h, \c0\().8h, \b0\().8h -.endm - -asm_function MNNGemmHybridInt8FP16_sdot - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; - -//void MNNGemmHybridInt8_sdot(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, float** param); - - -// Auto: x0: C*, x1: A*, x2:B*, x3: src_depth_quad, x4: dst_step, x5: dst_depth_quad, x6: realSize, x7: param -// load from param: x7: alpha*, x8: zero*, x9: bias*, x10: sums*, x11: scales* -stp d14, d15, [sp, #(-16 * 9)]! -stp d12, d13, [sp, #(16 * 1)] -stp d10, d11, [sp, #(16 * 2)] -stp d8, d9, [sp, #(16 * 3)] -stp x21, x22, [sp, #(16 * 4)] -stp x19, x20, [sp, #(16 * 5)] -stp x23, x24, [sp, #(16 * 6)] -stp x25, x26, [sp, #(16 * 7)] -stp x27, x28, [sp, #(16 * 8)] - -ldr x8, [x7, #0] -ldr x9, [x7, #8] -ldr x10, [x7, #16] -ldr x11, [x7, #24] -ldr x12, [x7, #32] -ldr x14, [x7, #40] -Start: -lsl x13, x3, #6 // x13 = src_depth_quad * UNIT * UNIT_SRC / 1(int8) = src_depth_quad * 64 = src_depth_quad << 6 -ld1 {v6.16b, v7.16b}, [x14] -TILE_4: - cmp x6, #4 - blt TILE_1 - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_4: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - dup v22.4s, wzr - dup v23.4s, wzr - dup v24.4s, wzr - dup v25.4s, wzr - dup v26.4s, wzr - dup v27.4s, wzr - dup v28.4s, wzr - dup v29.4s, wzr - dup v30.4s, wzr - dup v31.4s, wzr -LoopSz_TILE_4: - // v0: oc=0,1 - // v1: oc=2,3 - // v2: oc=4,5 - // v3: oc=6,7 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight, oc=0-7 - // v4:n=0,1, v5:n=2,3 - // v10:n=1,0, v11:n=3,2 - ld1 {v4.16b, v5.16b}, [x24], x15 // src batch=0,1,2,3 - mov v10.d[0], v4.d[1] // v10:n=1,0 - mov v10.d[1], v4.d[0] - mov v11.d[1], v5.d[0] - mov v11.d[0], v5.d[1] - .inst 0x4e809490 // sdot v16.4s, v4.16b, v0.16b // (0,0)x2 (1,1)x2 - .inst 0x4e809558 // sdot v24.4s, v10.16b, v0.16b // (1,0)x2 (0,1)x2 - .inst 0x4e819491 // sdot v17.4s, v4.16b, v1.16b // (0,2) (1,3) - .inst 0x4e819559 // sdot v25.4s, v10.16b, v1.16b // (1,2) (0,3) - .inst 0x4e829492 // sdot v18.4s, v4.16b, v2.16b - .inst 0x4e82955a // sdot v26.4s, v10.16b, v2.16b - .inst 0x4e839493 // sdot v19.4s, v4.16b, v3.16b - .inst 0x4e83955b // sdot v27.4s, v10.16b, v3.16b - .inst 0x4e8094b4 // sdot v20.4s, v5.16b, v0.16b - .inst 0x4e80957c // sdot v28.4s, v11.16b, v0.16b - .inst 0x4e8194b5 // sdot v21.4s, v5.16b, v1.16b - .inst 0x4e81957d // sdot v29.4s, v11.16b, v1.16b - .inst 0x4e8294b6 // sdot v22.4s, v5.16b, v2.16b - .inst 0x4e82957e // sdot v30.4s, v11.16b, v2.16b - .inst 0x4e8394b7 // sdot v23.4s, v5.16b, v3.16b - .inst 0x4e83957f // sdot v31.4s, v11.16b, v3.16b - - subs x26, x26, #1 - bne LoopSz_TILE_4 - - addp v16.4s, v16.4s, v24.4s // (batch,oc)(0,0)(1,1)(1,0)(0,1) - addp v17.4s, v17.4s, v25.4s // (0,2)(1,3)(1,2)(0,3) - addp v18.4s, v18.4s, v26.4s // (0,4)(1,5)(1,4)(0,5) - addp v19.4s, v19.4s, v27.4s // (0,6)(1,7)(1,6)(0,7) - addp v20.4s, v20.4s, v28.4s - addp v21.4s, v21.4s, v29.4s - addp v22.4s, v22.4s, v30.4s - addp v23.4s, v23.4s, v31.4s - tbl v24.16b, {v16.16b, v17.16b}, v6.16b // batch=0,oc=0-3 - tbl v25.16b, {v16.16b, v17.16b}, v7.16b // batch=1,oc=0-3 - tbl v26.16b, {v18.16b, v19.16b}, v6.16b // batch=0,oc=4-7 - tbl v27.16b, {v18.16b, v19.16b}, v7.16b // batch=1,oc=4-7 - tbl v28.16b, {v20.16b, v21.16b}, v6.16b - tbl v29.16b, {v20.16b, v21.16b}, v7.16b - tbl v30.16b, {v22.16b, v23.16b}, v6.16b - tbl v31.16b, {v22.16b, v23.16b}, v7.16b - -LoopSzEnd_TILE_4: - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v24, v25, v26, v27 - Int32ToFloat v28, v29, v30, v31 - // using float scale dequant for precison - ld1 {v1.d}[0], [x23] // scales 4 batch - ld1 {v2.8h}, [x19], #16 // alpha - - fcvtl v3.4s, v2.4h // oc:0-3 - fcvtl2 v4.4s, v2.8h // oc:4-7 - fcvtl v5.4s, v1.4h // scales: 4 batch - - MulScale v24, v26, v25, v27, v5, 0, 1, v3, v4 - MulScale v28, v30, v29, v31, v5, 2, 3, v3, v4 - Float32ToHalf v24, v26, v25, v27, v12, v13 - Float32ToHalf v28, v30, v29, v31, v14, v15 -Tile4Dequant: - ld1 {v1.8h}, [x20], #16 // zero - ld1 {v2.8h}, [x21], #16 // bias - ld1 {v3.d}[0], [x22] // sums - // sum + (zero * sumx) + bias - Dequant v12, v1, v2, v3, 0 - Dequant v13, v1, v2, v3, 1 - Dequant v14, v1, v2, v3, 2 - Dequant v15, v1, v2, v3, 3 - st1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x28], x4 - cmp x27, #1 - bge LoopDz_TILE_4 -Tile4End: - sub x6, x6, #4 // bach -= 4 - add x0, x0, #64 // dst += 4 * 8 * sizeof(float16_t) - add x1, x1, #32 // src += 4 * 8 * sizeof(int8_t) - add x11, x11, #8 // sum += 4 * sizeof(float16_t) - add x12, x12, #8 // scale += 4 * sizeof(float16_t) - b TILE_4 - -TILE_1: - cmp x6, #1 - blt End - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_1: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - movi v24.4s, #0 - movi v25.4s, #0 - movi v26.4s, #0 - movi v27.4s, #0 - movi v10.4s, #0 - movi v11.4s, #0 - movi v12.4s, #0 - movi v13.4s, #0 -LoopSz_TILE_1: - // src : 1 x [1 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - // dst : 1 x 4 x [2] : v16-v19 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v4.8b}, [x24], x15 // src - mov v29.d[0], v4.d[1] - mov v29.d[1], v4.d[0] - - .inst 0x4e809498 // sdot v24.4s, v4.16b, v0.16b // (0,0)x2 (1,1)x2 - .inst 0x4e8097b9 // sdot v25.4s, v29.16b, v0.16b // (1,0)x2 (0,1)x2 - .inst 0x4e81949a // sdot v26.4s, v4.16b, v1.16b // (0,2)x2 (1,3)x2 - .inst 0x4e8197bb // sdot v27.4s, v29.16b, v1.16b // (1,2)x2 (0,3)x2 - .inst 0x4e82948a // sdot v10.4s, v4.16b, v2.16b // (0,4)x2 (1,5)x2 - .inst 0x4e8297ab // sdot v11.4s, v29.16b, v2.16b // (1,4)x2 (0,5)x2 - .inst 0x4e83948c // sdot v12.4s, v4.16b, v3.16b // (0,6)x2 (1,7)x2 - .inst 0x4e8397ad // sdot v13.4s, v29.16b, v3.16b // (1,6)x2 (0,7)x2 - - subs x26, x26, #1 - bne LoopSz_TILE_1 - addp v16.4s, v24.4s, v25.4s - addp v17.4s, v26.4s, v27.4s - addp v18.4s, v10.4s, v11.4s - addp v19.4s, v12.4s, v13.4s - -LoopSzEnd_TILE_1: - add x7, x7, x13 - sub x27, x27, #1 - tbl v15.16b, {v16.16b, v17.16b}, v6.16b - tbl v20.16b, {v18.16b, v19.16b}, v6.16b - - scvtf v15.4s, v15.4s - scvtf v20.4s, v20.4s - // using float scale dequant for precison - ld1 {v4.h}[0], [x23] // scales - ld1 {v0.8h}, [x19], #16 // alpha - fcvtl v5.4s, v4.4h - fcvtl v22.4s, v0.4h - fcvtl2 v21.4s, v0.8h - fmul v15.4s, v15.4s, v5.s[0] - fmul v20.4s, v20.4s, v5.s[0] - fmul v15.4s, v15.4s, v22.4s - fmul v20.4s, v20.4s, v21.4s - fcvtn v17.4h, v15.4s - fcvtn2 v17.8h, v20.4s -Tile1Dequant: - ld1 {v1.8h}, [x20], #16 // zero - ld1 {v2.8h}, [x21], #16 // bias - ld1 {v3.h}[0], [x22] // sums - // sum + (zero * sumx) + bias - fadd v2.8h, v2.8h, v17.8h - fmla v2.8h, v1.8h, v3.h[0] - st1 {v2.8h}, [x28], x4 - cmp x27, #1 - bge LoopDz_TILE_1 -Tile1End: - sub x6, x6, #1 // batch -= 1 - add x0, x0, #16 // dst += 1 * 8 * sizeof(float16_t) - add x1, x1, #8 // dst += 1 * 8 * sizeof(int8_t) - add x11, x11, #2 // sum += 1 * sizeof(float16_t) - add x12, x12, #2 // scale += 1 * sizeof(float16_t) - b TILE_1 - -End: -ldp x27, x28, [sp, #(16 * 8)] -ldp x25, x26, [sp, #(16 * 7)] -ldp x23, x24, [sp, #(16 * 6)] -ldp x19, x20, [sp, #(16 * 5)] -ldp x21, x22, [sp, #(16 * 4)] -ldp d8, d9, [sp, #(16 * 3)] -ldp d10, d11, [sp, #(16 * 2)] -ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 9) -ret - -#endif diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt8FP16_smmla.S b/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt8FP16_smmla.S deleted file mode 100644 index 5f339725c..000000000 --- a/source/backend/arm82/asm/arm64/low_memory/MNNGemmHybridInt8FP16_smmla.S +++ /dev/null @@ -1,566 +0,0 @@ -// -// MNNGemmHybridInt8_smmla.S -// MNN -// -// Created by MNN on 2023/11/09. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifdef __aarch64__ - -#include "MNNAsmGlobal.h" - -.text -.align 5 - -.macro Int32ToFloat z0, z1, z2, z3 - scvtf \z0\().4s, \z0\().4s - scvtf \z1\().4s, \z1\().4s - scvtf \z2\().4s, \z2\().4s - scvtf \z3\().4s, \z3\().4s -.endm - -.macro MulScale d0, d1, d2, d3, s, idx0, idx1, alpha0, alpha1 - fmul \d0\().4s, \d0\().4s, \s\().s[\idx0] - fmul \d1\().4s, \d1\().4s, \s\().s[\idx0] - fmul \d2\().4s, \d2\().4s, \s\().s[\idx1] - fmul \d3\().4s, \d3\().4s, \s\().s[\idx1] - fmul \d0\().4s, \d0\().4s, \alpha0\().4s - fmul \d1\().4s, \d1\().4s, \alpha1\().4s - fmul \d2\().4s, \d2\().4s, \alpha0\().4s - fmul \d3\().4s, \d3\().4s, \alpha1\().4s -.endm - -.macro Float32ToHalf s0, s1, s2, s3, d0, d1 - fcvtn \d0\().4h, \s0\().4s - fcvtn2 \d0\().8h, \s1\().4s - fcvtn \d1\().4h, \s2\().4s - fcvtn2 \d1\().8h, \s3\().4s -.endm - -.macro Dequant c0, z0, b0, s0, idx - fmla \c0\().8h, \z0\().8h, \s0\().h[\idx] - fadd \c0\().8h, \c0\().8h, \b0\().8h -.endm - -asm_function MNNGemmHybridInt8FP16_smmla - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; - -//void MNNGemmHybridInt8_smmla(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, float** param); - - -// Auto: x0: C*, x1: A*, x2:B*, x3: src_depth_quad, x4: dst_step, x5: dst_depth_quad, x6: realSize, x7: param -// load from param: x7: alpha*, x8: zero*, x9: bias*, x10: sums*, x11: scales* -stp d14, d15, [sp, #(-16 * 9)]! -stp d12, d13, [sp, #(16 * 1)] -stp d10, d11, [sp, #(16 * 2)] -stp d8, d9, [sp, #(16 * 3)] -stp x21, x22, [sp, #(16 * 4)] -stp x19, x20, [sp, #(16 * 5)] -stp x23, x24, [sp, #(16 * 6)] -stp x25, x26, [sp, #(16 * 7)] -stp x27, x28, [sp, #(16 * 8)] - -ldr x8, [x7, #0] -ldr x9, [x7, #8] -ldr x10, [x7, #16] -ldr x11, [x7, #24] -ldr x12, [x7, #32] - -Start: -lsl x13, x3, #6 // x13 = src_depth_quad * UNIT * UNIT_SRC / 1(int8) = src_depth_quad * 64 = src_depth_quad << 6 -cmp x6, #1 -beq TILE_EQ_1 - -TILE_8: - cmp x6, #8 - blt TILE_4 - //mov x14, x4 // dst_step - lsr x15, x4, #1 // src_step = dst_step / 2 - sub x14, x4, #64 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_8: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - dup v22.4s, wzr - dup v23.4s, wzr - dup v24.4s, wzr - dup v25.4s, wzr - dup v26.4s, wzr - dup v27.4s, wzr - dup v28.4s, wzr - dup v29.4s, wzr - dup v30.4s, wzr - dup v31.4s, wzr -LoopSz_TILE_8: - // src : 2 x [2 x 8] : v4-5 - // weight : 4 x [2 x 8] : v0-3 - // dst : 2 x 4 x [4] : v16-23 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x24], x15 // src - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b // batch=0,1, oc=0,1 - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b // batch=0,1, oc=2,3 - .inst 0x4e82a492 // smmla v18.4s, v4.16b, v2.16b // batch=0,1, oc=4,5 - .inst 0x4e83a493 // smmla v19.4s, v4.16b, v3.16b // batch=0,1, oc=6,7 - .inst 0x4e80a4b4 // smmla v20.4s, v5.16b, v0.16b // batch=2,3, oc=0,1 - .inst 0x4e81a4b5 // smmla v21.4s, v5.16b, v1.16b // batch=2,3, oc=2,3 - .inst 0x4e82a4b6 // smmla v22.4s, v5.16b, v2.16b // batch=2,3, oc=4,5 - .inst 0x4e83a4b7 // smmla v23.4s, v5.16b, v3.16b // batch=2,3, oc=6,7 - - .inst 0x4e80a4d8 // smmla v24.4s, v6.16b, v0.16b // batch=4,5, oc=0,1 - .inst 0x4e81a4d9 // smmla v25.4s, v6.16b, v1.16b // batch=4,5, oc=2,3 - .inst 0x4e82a4da // smmla v26.4s, v6.16b, v2.16b // batch=4,5, oc=4,5 - .inst 0x4e83a4db // smmla v27.4s, v6.16b, v3.16b // batch=4,5, oc=6,7 - .inst 0x4e80a4fc // smmla v28.4s, v7.16b, v0.16b // batch=6,7, oc=0,1 - .inst 0x4e81a4fd // smmla v29.4s, v7.16b, v1.16b // batch=6,7, oc=2,3 - .inst 0x4e82a4fe // smmla v30.4s, v7.16b, v2.16b // batch=6,7, oc=4,5 - .inst 0x4e83a4ff // smmla v31.4s, v7.16b, v3.16b // batch=6,7, oc=6,7 - subs x26, x26, #1 - bne LoopSz_TILE_8 - -LoopSzEnd_TILE_8: - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - Int32ToFloat v20, v21, v22, v23 - Int32ToFloat v24, v25, v26, v27 - Int32ToFloat v28, v29, v30, v31 - // using float scale dequant for precison - trn1 v8.2d, v16.2d, v17.2d // batch=0,oc:0-3 - trn1 v9.2d, v18.2d, v19.2d // batch=0,oc:4-7 - trn2 v10.2d, v16.2d, v17.2d // batch=1,oc:0-3 - trn2 v11.2d, v18.2d, v19.2d // batch=1,oc:4-7 - trn1 v12.2d, v20.2d, v21.2d // batch=2,oc:0-3 - trn1 v13.2d, v22.2d, v23.2d // batch=2,oc:4-7 - trn2 v14.2d, v20.2d, v21.2d // batch=3,oc:0-3 - trn2 v15.2d, v22.2d, v23.2d // batch=3,oc:4-7 - - trn1 v0.2d, v24.2d, v25.2d // batch=4,oc:0-3 - trn1 v1.2d, v26.2d, v27.2d // batch=4,oc:4-7 - trn2 v2.2d, v24.2d, v25.2d // batch=5,oc:0-3 - trn2 v3.2d, v26.2d, v27.2d // batch=5,oc:4-7 - trn1 v4.2d, v28.2d, v29.2d // batch=6,oc:0-3 - trn1 v5.2d, v30.2d, v31.2d // batch=6,oc:4-7 - trn2 v6.2d, v28.2d, v29.2d // batch=7,oc:0-3 - trn2 v7.2d, v30.2d, v31.2d // batch=7,oc:4-7 - - ld1 {v16.8h}, [x23] // scales - ld1 {v17.8h}, [x19], #16 // alpha - fcvtl v18.4s, v17.4h // oc:0-3 - fcvtl2 v19.4s, v17.8h // oc:4-7 - fcvtl v28.4s, v16.4h // scales: batch 0,1,2,3 - fcvtl2 v29.4s, v16.8h // scales: batch 4,5,6,7 - - MulScale v8, v9, v10, v11, v28, 0, 1, v18, v19 - MulScale v12, v13, v14, v15, v28, 2, 3, v18, v19 - Float32ToHalf v8, v9, v10, v11, v20, v21 // batch=0,1 - Float32ToHalf v12, v13, v14, v15, v22, v23 // batch=2,3 - - MulScale v0, v1, v2, v3, v29, 0, 1, v18, v19 - MulScale v4, v5, v6, v7, v29, 2, 3, v18, v19 - Float32ToHalf v0, v1, v2, v3, v24, v25 // batch=4,5 - Float32ToHalf v4, v5, v6, v7, v26, v27 // batch=6,7 - -Tile8Dequant: - ld1 {v1.8h}, [x20], #16 // zero - ld1 {v2.8h}, [x21], #16 // bias - ld1 {v3.8h}, [x22] // sums - // sum + (zero * sumx) + bias - Dequant v20, v1, v2, v3, 0 - Dequant v21, v1, v2, v3, 1 - Dequant v22, v1, v2, v3, 2 - Dequant v23, v1, v2, v3, 3 - - Dequant v24, v1, v2, v3, 4 - Dequant v25, v1, v2, v3, 5 - Dequant v26, v1, v2, v3, 6 - Dequant v27, v1, v2, v3, 7 - st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x28], #64 - st1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_8 -Tile8End: - sub x6, x6, #8 // bach -= 8 - add x0, x0, #128 // dst += 8 * 8 * sizeof(float16_t) - add x1, x1, #64 // src += 8 * 8 * sizeof(int8_t) - add x11, x11, #16 // sum += 8 * sizeof(float16_t) - add x12, x12, #16 // scale += 8 * sizeof(float16_t) - b TILE_8 - -TILE_4: - cmp x6, #4 - blt TILE_2 - mov x14, x4 // dst_step - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_4: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - dup v22.4s, wzr - dup v23.4s, wzr -LoopSz_TILE_4: - // src : 2 x [2 x 8] : v4-5 - // weight : 4 x [2 x 8] : v0-3 - // dst : 2 x 4 x [4] : v16-23 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v4.16b, v5.16b}, [x24], x15 // src - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b // batch=0,1, oc=0,1 - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b // batch=0,1, oc=2,3 - .inst 0x4e82a492 // smmla v18.4s, v4.16b, v2.16b // batch=0,1, oc=4,5 - .inst 0x4e83a493 // smmla v19.4s, v4.16b, v3.16b // batch=0,1, oc=6,7 - .inst 0x4e80a4b4 // smmla v20.4s, v5.16b, v0.16b // batch=2,3, oc=0,1 - .inst 0x4e81a4b5 // smmla v21.4s, v5.16b, v1.16b // batch=2,3, oc=2,3 - .inst 0x4e82a4b6 // smmla v22.4s, v5.16b, v2.16b // batch=2,3, oc=4,5 - .inst 0x4e83a4b7 // smmla v23.4s, v5.16b, v3.16b // batch=2,3, oc=6,7 - subs x26, x26, #1 - bne LoopSz_TILE_4 - -LoopSzEnd_TILE_4: - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - Int32ToFloat v20, v21, v22, v23 - // using float scale dequant for precison - ld1 {v4.d}[0], [x23] // scales - ld1 {v31.8h}, [x19], #16 // alpha - fcvtl v29.4s, v31.4h // oc:0-3 - fcvtl2 v30.4s, v31.8h // oc:4-7 - trn1 v24.2d, v16.2d, v17.2d // batch=0,oc:0-3 - trn1 v25.2d, v18.2d, v19.2d // batch=0,oc:4-7 - trn2 v26.2d, v16.2d, v17.2d // batch=1,oc:0-3 - trn2 v27.2d, v18.2d, v19.2d // batch=1,oc:4-7 - trn1 v28.2d, v20.2d, v21.2d // batch=2,oc:0-3 - trn1 v6.2d, v22.2d, v23.2d // batch=2,oc:4-7 - trn2 v7.2d, v20.2d, v21.2d // batch=3,oc:0-3 - trn2 v8.2d, v22.2d, v23.2d // batch=3,oc:4-7 - - fcvtl v5.4s, v4.4h // scales: 4 batch - - MulScale v24, v25, v26, v27, v5, 0, 1, v29, v30 - MulScale v28, v6, v7, v8, v5, 2, 3, v29, v30 - Float32ToHalf v24, v25, v26, v27, v12, v13 - Float32ToHalf v28, v6, v7, v8, v14, v15 -Tile4Dequant: - ld1 {v1.8h}, [x20], #16 // zero - ld1 {v2.8h}, [x21], #16 // bias - ld1 {v3.d}[0], [x22] // sums - // sum + (zero * sumx) + bias - Dequant v12, v1, v2, v3, 0 - Dequant v13, v1, v2, v3, 1 - Dequant v14, v1, v2, v3, 2 - Dequant v15, v1, v2, v3, 3 - st1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_4 -Tile4End: - sub x6, x6, #4 // bach -= 4 - add x0, x0, #64 // dst += 4 * 8 * sizeof(float16_t) - add x1, x1, #32 // src += 4 * 8 * sizeof(int8_t) - add x11, x11, #8 // sum += 4 * sizeof(float16_t) - add x12, x12, #8 // scale += 4 * sizeof(float16_t) - b TILE_4 - -TILE_2: - cmp x6, #2 - blt TILE_1 - mov x14, x4 // dst_step - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_2: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr -LoopSz_TILE_2: - // src : 1 x [2 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - // dst : 1 x 4 x [4] : v16-19 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v4.16b}, [x24], x15 // src - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b - .inst 0x4e82a492 // smmla v18.4s, v4.16b, v2.16b - .inst 0x4e83a493 // smmla v19.4s, v4.16b, v3.16b - subs x26, x26, #1 - bne LoopSz_TILE_2 - -LoopSzEnd_TILE_2: - add x7, x7, x13 - sub x27, x27, #1 - uzp1 v13.2d, v16.2d, v17.2d - uzp1 v14.2d, v18.2d, v19.2d - uzp2 v15.2d, v16.2d, v17.2d - uzp2 v16.2d, v18.2d, v19.2d - Int32ToFloat v13, v14, v15, v16 - // using float scale dequant for precison - ld1 {v4.s}[0], [x23] // scales - ld1 {v0.8h}, [x19], #16 // alpha - fcvtl v5.4s, v4.4h - fcvtl v20.4s, v0.4h - fcvtl2 v21.4s, v0.8h - MulScale v13, v14, v15, v16, v5, 0, 1, v20, v21 - fcvtn v11.4h, v13.4s - fcvtn2 v11.8h, v14.4s - fcvtn v12.4h, v15.4s - fcvtn2 v12.8h, v16.4s -Tile2Dequant: - //ld1 {v0.8h}, [x19], #16 // alpha - ld1 {v1.8h}, [x20], #16 // zero - ld1 {v2.8h}, [x21], #16 // bias - ld1 {v3.s}[0], [x22] // sums - // alpha * sum + (zero * sumx) + bias - Dequant v11, v1, v2, v3, 0 - Dequant v12, v1, v2, v3, 1 - st1 {v11.8h, v12.8h}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_2 -Tile2End: - sub x6, x6, #2 // batch -= 2 - add x0, x0, #32 // dst += 2 * 8 * sizeof(float16_t) - add x1, x1, #16 // dst += 2 * 8 * sizeof(int8_t) - add x11, x11, #4 // sum += 2 * sizeof(float16_t) - add x12, x12, #4 // scale += 2 * sizeof(float16_t) - b TILE_2 - - -TILE_1: - cmp x6, #1 - blt End - mov x14, x4 // dst_step - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_1: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - ld1 {v29.8h}, [x20], #16 // zero - ld1 {v30.8h}, [x21], #16 // bias - ld1 {v8.h}[0], [x22] // sums - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - fmla v30.8h, v29.8h, v8.h[0] // bias + zero * sum - -LoopSz_TILE_1: - // src : 1 x [1 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - // dst : 1 x 4 x [2] : v16-v19 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v4.8b}, [x24], x15 // src - .inst 0x4e84a410 // smmla v16.4s, v0.16b, v4.16b - .inst 0x4e84a431 // smmla v17.4s, v1.16b, v4.16b - .inst 0x4e84a452 // smmla v18.4s, v2.16b, v4.16b - .inst 0x4e84a473 // smmla v19.4s, v3.16b, v4.16b - - subs x26, x26, #1 - bne LoopSz_TILE_1 - -LoopSzEnd_TILE_1: - add x7, x7, x13 - sub x27, x27, #1 - uzp1 v22.4s, v16.4s, v17.4s - uzp1 v23.4s, v18.4s, v19.4s - scvtf v22.4s, v22.4s - scvtf v23.4s, v23.4s - // using float scale dequant for precison - ld1 {v4.h}[0], [x23] // scales - ld1 {v0.8h}, [x19], #16 // alpha - fcvtl v5.4s, v4.4h - fcvtl v20.4s, v0.4h - fcvtl2 v21.4s, v0.8h - - fmul v22.4s, v22.4s, v5.s[0] - fmul v23.4s, v23.4s, v5.s[0] - fmul v22.4s, v22.4s, v20.4s - fmul v23.4s, v23.4s, v21.4s - fcvtn v17.4h, v22.4s - fcvtn2 v17.8h, v23.4s -Tile1Dequant: - // sum + (zero * sumx) + bias - fadd v30.8h, v30.8h, v17.8h - st1 {v30.8h}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_1 -Tile1End: - sub x6, x6, #1 // batch -= 1 - add x0, x0, #16 // dst += 1 * 8 * sizeof(float16_t) - add x1, x1, #8 // dst += 1 * 8 * sizeof(int8_t) - add x11, x11, #2 // sum += 1 * sizeof(float16_t) - add x12, x12, #2 // scale += 1 * sizeof(float16_t) - b TILE_1 -b End -TILE_EQ_1: - - mov x14, x4 // dst_step - lsr x15, x4, #1 // src_step = dst_step / 2 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - ld1 {v29.8h}, [x20], #16 // zero - ld1 {v30.8h}, [x21], #16 // bias - ld1 {v8.h}[0], [x22] // sums - // init - dup v14.4s, wzr - dup v15.4s, wzr - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - fmla v30.8h, v29.8h, v8.h[0] // bias + zero * sum - - -L2: -cmp x26, #2 -blt L1 -LoopSz_2: - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x25], #64 - ld1 {v8.16b}, [x24], #16 // src - sub x26, x26, #2 - - .inst 0x4e80a50e // smmla v14.4s, v8.16b, v0.16b // (N=0,OC=0) (N=0,OC=1) () () - .inst 0x4e81a50f // smmla v15.4s, v8.16b, v1.16b // (N=0,OC=2) (N=0,OC=3) () () - .inst 0x4e82a510 // smmla v16.4s, v8.16b, v2.16b // (N=0,OC=4) (N=0,OC=5) () () - .inst 0x4e83a511 // smmla v17.4s, v8.16b, v3.16b // (N=0,OC=6) (N=0,OC=7) () () - .inst 0x4e84a512 // smmla v18.4s, v8.16b, v4.16b - .inst 0x4e85a513 // smmla v19.4s, v8.16b, v5.16b - .inst 0x4e86a514 // smmla v20.4s, v8.16b, v6.16b - .inst 0x4e87a515 // smmla v21.4s, v8.16b, v7.16b - cmp x26, #2 - bge LoopSz_2 -L1: -cmp x26, #1 -blt LoopSzEnd -LoopSz_1: - // src : 1 x [1 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v4.8b}, [x24], x15 // src - .inst 0x4e80a48e // smmla v14.4s, v4.16b, v0.16b - .inst 0x4e81a48f // smmla v15.4s, v4.16b, v1.16b - .inst 0x4e82a490 // smmla v16.4s, v4.16b, v2.16b - .inst 0x4e83a491 // smmla v17.4s, v4.16b, v3.16b - - subs x26, x26, #1 - bne LoopSz_1 - -LoopSzEnd: - add x7, x7, x13 - sub x27, x27, #1 - - trn1 v26.2d, v14.2d, v15.2d - trn1 v27.2d, v16.2d, v17.2d - trn2 v28.2d, v18.2d, v19.2d - trn2 v29.2d, v20.2d, v21.2d - add v26.4s, v26.4s, v28.4s - add v27.4s, v27.4s, v29.4s - scvtf v26.4s, v26.4s - scvtf v27.4s, v27.4s - // using float scale dequant for precison - ld1 {v4.h}[0], [x23] // scales - ld1 {v0.8h}, [x19], #16 // alpha - fcvtl v5.4s, v4.4h - fcvtl v20.4s, v0.4h - fcvtl2 v21.4s, v0.8h - - fmul v26.4s, v26.4s, v5.s[0] - fmul v27.4s, v27.4s, v5.s[0] - fmul v26.4s, v26.4s, v20.4s - fmul v27.4s, v27.4s, v21.4s - fcvtn v17.4h, v26.4s - fcvtn2 v17.8h, v27.4s -Int8ToFP16: - // sum + (zero * sumx) + bias - fadd v30.8h, v30.8h, v17.8h - st1 {v30.8h}, [x28], x14 - cmp x27, #1 - bge LoopDz - -End: -ldp x27, x28, [sp, #(16 * 8)] -ldp x25, x26, [sp, #(16 * 7)] -ldp x23, x24, [sp, #(16 * 6)] -ldp x19, x20, [sp, #(16 * 5)] -ldp x21, x22, [sp, #(16 * 4)] -ldp d8, d9, [sp, #(16 * 3)] -ldp d10, d11, [sp, #(16 * 2)] -ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 9) -ret - -#endif \ No newline at end of file diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_Unit_FP16.S b/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_Unit_FP16.S new file mode 100644 index 000000000..143ec060a --- /dev/null +++ b/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_Unit_FP16.S @@ -0,0 +1,665 @@ +// +// MNNGemmInt8AddBiasScale_ARMV82_Unit_FP16.S +// MNN +// +// Created by MNN on 2019/12/17. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#if defined(__aarch64__) +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro ADD_BIAS_FLOAT d0, d1, d2, d3, z0 + fadd \d0\().4s, \d0\().4s, \z0\().4s + fadd \d1\().4s, \d1\().4s, \z0\().4s + fadd \d2\().4s, \d2\().4s, \z0\().4s + fadd \d3\().4s, \d3\().4s, \z0\().4s +.endm + +.macro SET_BIAS d0, d1, d2, d3 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 +.endm +.macro Int32ToFloat z0, z1, z2, z3 + scvtf \z0\().4s, \z0\().4s + scvtf \z1\().4s, \z1\().4s + scvtf \z2\().4s, \z2\().4s + scvtf \z3\().4s, \z3\().4s +.endm +.macro MUL_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s + fmul \d2\().4s, \d2\().4s, \s\().4s + fmul \d3\().4s, \d3\().4s, \s\().4s +.endm +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro MUL_EXTRA_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().s[0] + fmul \d1\().4s, \d1\().4s, \s\().s[1] + fmul \d2\().4s, \d2\().4s, \s\().s[2] + fmul \d3\().4s, \d3\().4s, \s\().s[3] +.endm +.macro ReLU_FP16 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().8h, \s0\().8h, \z1\().8h + fmin \s1\().8h, \s1\().8h, \z1\().8h + fmin \s2\().8h, \s2\().8h, \z1\().8h + fmin \s3\().8h, \s3\().8h, \z1\().8h + fmax \s0\().8h, \s0\().8h, \z0\().8h + fmax \s1\().8h, \s1\().8h, \z0\().8h + fmax \s2\().8h, \s2\().8h, \z0\().8h + fmax \s3\().8h, \s3\().8h, \z0\().8h +.endm + +.macro Float32ToHalf s0, s1, s2, s3, d0, d1 + fcvtn \d0\().4h, \s0\().4s + fcvtn2 \d0\().8h, \s1\().4s + fcvtn \d1\().4h, \s2\().4s + fcvtn2 \d1\().8h, \s3\().4s +.endm + +asm_function MNNGemmInt8AddBiasScale_ARMV82_Unit_FP16 +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum = 1; + const int32_t* bias; + const float* extraScale = nullptr; +}; +*/ + +//void MNNGemmInt8AddBiasScale_ARMV82_Unit(int8_t* dst, const int8_t* src, +// const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, +// const QuanPostTreatParameters* parameters, size_t realDstCount); + +//Auto: x0:dst, x1:src, x2:weight, x3:src_depth_quad, x4:dst_step +//x5:dst_depth_quad, x6: parameters, x7: realDstCount + +//Load from x6: x8: scale, x9: bias, x25: xKernelSum, x26: weightQuantBias, x23: fp32minmax, x27: blockNum +ldr x8, [x6, #0] +ldr x9, [x6, #8] +//ldr w12, [x6, #16] + +stp d14, d15, [sp, #(-16 * 10)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] +stp x21, x22, [sp, #(16 * 4)] +stp x19, x20, [sp, #(16 * 5)] +stp x27, x28, [sp, #(16 * 6)] +stp x25, x26, [sp, #(16 * 7)] +stp x23, x24, [sp, #(16 * 8)] + +ldr x25, [x6, #40] // xKernelSum +ldr x26, [x6, #48] // weightQuantBias +ldr x23, [x6, #56] // fp32minmax +ldr x27, [x6, #64] // blockNum + +//add x24, x23, #4 + +mov x21, #16 // sizeof(float16_t) * PACK +mul x27, x27, x3 +Start: +lsl x15, x27, #4 // x15 = src_depth_quad * UNIT * SRC_UNIT +mov x22, #48 // src_steps +add x24, x15, x15 +ldr x27, [x6, #80] // extra scale +TILE_12: + cmp x7, #12 + blt TILE_8 + +L8LoopDz_TILE_12: + // ld1 {v0.4s, v1.4s}, [x9], #32 // bias + mov x11, x1 + mov x13, x3 + // Init 0 + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + SET_BIAS v16, v17, v18, v19 + SET_BIAS v20, v21, v22, v23 + SET_BIAS v24, v25, v26, v27 + SET_BIAS v28, v29, v30, v31 + + mov x28, x2 + L8LoopSz_TILE_12: + ld1 {v3.16b}, [x2], x15 // weight + ld1 {v0.16b, v1.16b, v2.16b}, [x11], #48 // src + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + ld1 {v4.16b}, [x2], #16 + .inst 0x4f81e06c // sdot v12.4s, v3.16b, v1.4b[0] + .inst 0x4fa1e06d // sdot v13.4s, v3.16b, v1.4b[1] + .inst 0x4f81e86e // sdot v14.4s, v3.16b, v1.4b[2] + .inst 0x4fa1e86f // sdot v15.4s, v3.16b, v1.4b[3] + .inst 0x4f82e070 // sdot v16.4s, v3.16b, v2.4b[0] + .inst 0x4fa2e071 // sdot v17.4s, v3.16b, v2.4b[1] + .inst 0x4f82e872 // sdot v18.4s, v3.16b, v2.4b[2] + .inst 0x4fa2e873 // sdot v19.4s, v3.16b, v2.4b[3] + .inst 0x4f80e094 // sdot v20.4s, v4.16b, v0.4b[0] + .inst 0x4fa0e095 // sdot v21.4s, v4.16b, v0.4b[1] + .inst 0x4f80e896 // sdot v22.4s, v4.16b, v0.4b[2] + .inst 0x4fa0e897 // sdot v23.4s, v4.16b, v0.4b[3] + sub x2, x2, x15 + .inst 0x4f81e098 // sdot v24.4s, v4.16b, v1.4b[0] + .inst 0x4fa1e099 // sdot v25.4s, v4.16b, v1.4b[1] + .inst 0x4f81e89a // sdot v26.4s, v4.16b, v1.4b[2] + .inst 0x4fa1e89b // sdot v27.4s, v4.16b, v1.4b[3] + subs x13, x13, #1 + .inst 0x4f82e09c // sdot v28.4s, v4.16b, v2.4b[0] + .inst 0x4fa2e09d // sdot v29.4s, v4.16b, v2.4b[1] + .inst 0x4f82e89e // sdot v30.4s, v4.16b, v2.4b[2] + .inst 0x4fa2e89f // sdot v31.4s, v4.16b, v2.4b[3] + bne L8LoopSz_TILE_12 + + L8LoopSzEnd_TILE_12: + //add x2, x2, x15 + //add x24, x15, x15 + add x2, x28, x24 + sub x5, x5, #1 + + L8Tile12Quan: + ld1 {v0.4s, v1.4s}, [x8], #32 // scale + ld1 {v2.4s, v3.4s, v4.4s}, [x25] // x kernel sum + ld1 {v5.4s, v6.4s}, [x26], #32 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + Int32ToFloat v20, v21, v22, v23 + Int32ToFloat v24, v25, v26, v27 + Int32ToFloat v28, v29, v30, v31 + + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v0, v12, v13, v14, v15 + MUL_SCALE v0, v16, v17, v18, v19 + MUL_SCALE v1, v20, v21, v22, v23 + MUL_SCALE v1, v24, v25, v26, v27 + MUL_SCALE v1, v28, v29, v30, v31 + + cbz x27, TILE12_L8_MLA_TERM + ld1 {v0.4s, v1.4s}, [x27], #32 + ld1 {v7.4s}, [x27] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v1, v12, v13, v14, v15 + MUL_EXTRA_SCALE v7, v16, v17, v18, v19 + MUL_EXTRA_SCALE v0, v20, v21, v22, v23 + MUL_EXTRA_SCALE v1, v24, v25, v26, v27 + MUL_EXTRA_SCALE v7, v28, v29, v30, v31 + sub x27, x27, #32 + + TILE12_L8_MLA_TERM: + MLA_WEIGHTZERO v8, v2, v5, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v5, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v5, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v5, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v5, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v5, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v5, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v5, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v4, v5, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v17, v4, v5, 1 // tile:9, oc:0-3 + MLA_WEIGHTZERO v18, v4, v5, 2 // tile:10, oc:0-3 + MLA_WEIGHTZERO v19, v4, v5, 3 // tile:11, oc:0-3 + + //ld1r {v0.4s}, [x23] // f32 min + //ld1r {v1.4s}, [x24] // f32 max + MLA_WEIGHTZERO v20, v2, v6, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v21, v2, v6, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v22, v2, v6, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v23, v2, v6, 3 // tile:3, oc:4-7 + MLA_WEIGHTZERO v24, v3, v6, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v25, v3, v6, 1 // tile:5, oc:4-7 + MLA_WEIGHTZERO v26, v3, v6, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v27, v3, v6, 3 // tile:7, oc:4-7 + MLA_WEIGHTZERO v28, v4, v6, 0 // tile:8, oc:4-7 + MLA_WEIGHTZERO v29, v4, v6, 1 // tile:9, oc:4-7 + MLA_WEIGHTZERO v30, v4, v6, 2 // tile:10, oc:4-7 + MLA_WEIGHTZERO v31, v4, v6, 3 // tile:11, oc:4-7 + sub x4, x4, #128 + + cbz x9, TILE12_ADD_DSTV + TILE12_ADD_BIAS: + ld1 {v0.4s, v1.4s}, [x9], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v0 + ADD_BIAS_FLOAT v20, v21, v22, v23, v1 + ADD_BIAS_FLOAT v24, v25, v26, v27, v1 + ADD_BIAS_FLOAT v28, v29, v30, v31, v1 + + Float32ToHalf v8, v20, v9, v21, v0, v1 + Float32ToHalf v10, v22, v11, v23, v2, v3 + Float32ToHalf v12, v24, v13, v25, v4, v5 + Float32ToHalf v14, v26, v15, v27, v6, v7 + Float32ToHalf v16, v28, v17, v29, v8, v9 + Float32ToHalf v18, v30, v19, v31, v10, v11 + b TILE12_POST + + TILE12_ADD_DSTV: + Float32ToHalf v8, v20, v9, v21, v0, v1 + Float32ToHalf v10, v22, v11, v23, v2, v3 + Float32ToHalf v12, v24, v13, v25, v4, v5 + Float32ToHalf v14, v26, v15, v27, v6, v7 + Float32ToHalf v16, v28, v17, v29, v8, v9 + Float32ToHalf v18, v30, v19, v31, v10, v11 + ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64 + ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 + ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x0] + fadd v0.8h, v0.8h, v20.8h + fadd v1.8h, v1.8h, v21.8h + fadd v2.8h, v2.8h, v22.8h + fadd v3.8h, v3.8h, v23.8h + fadd v4.8h, v4.8h, v12.8h + fadd v5.8h, v5.8h, v13.8h + fadd v6.8h, v6.8h, v14.8h + fadd v7.8h, v7.8h, v15.8h + fadd v8.8h, v8.8h, v16.8h + fadd v9.8h, v9.8h, v17.8h + fadd v10.8h, v10.8h, v18.8h + fadd v11.8h, v11.8h, v19.8h + sub x0, x0, #128 + + TILE12_POST: + cbz x23, TILE12_STORE + ld1r {v24.8h}, [x23], #2 // f32 min + ld1r {v25.8h}, [x23] // f32 max + + ReLU_FP16 v0, v1, v2, v3, v24, v25 + ReLU_FP16 v4, v5, v6, v7, v24, v25 + ReLU_FP16 v8, v9, v10, v11, v24, v25 + sub x23, x23, #2 + + TILE12_STORE: + + st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 + st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 + st1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], x4 + add x4, x4, #128 + L8Tile12LoopCheck: + cmp x5, #1 + bge L8LoopDz_TILE_12 + blt End + +TILE_8: + //ld1r {v26.4s}, [x23] // f32 min + //ld1r {v27.4s}, [x24] // f32 max + cmp x7, #8 + blt TILE_4 + mov x10, x0 + mov x12, x2 + mov x14, x5 + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x26 // weightQuantBias + +L8LoopDz_TILE_8: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 + mov x13, x3 + + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + SET_BIAS v16, v17, v18, v19 + SET_BIAS v20, v21, v22, v23 + mov x28, x12 + L8LoopSz_TILE_8: + ld1 {v3.16b}, [x12], x15 // weight + ld1 {v0.16b, v1.16b}, [x11], x22 // src + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + ld1 {v4.16b}, [x12], #16 + .inst 0x4f81e06c // sdot v12.4s, v3.16b, v1.4b[0] + .inst 0x4fa1e06d // sdot v13.4s, v3.16b, v1.4b[1] + .inst 0x4f81e86e // sdot v14.4s, v3.16b, v1.4b[2] + .inst 0x4fa1e86f // sdot v15.4s, v3.16b, v1.4b[3] + sub x12, x12, x15 + .inst 0x4f80e090 // sdot v16.4s, v4.16b, v0.4b[0] + .inst 0x4fa0e091 // sdot v17.4s, v4.16b, v0.4b[1] + .inst 0x4f80e892 // sdot v18.4s, v4.16b, v0.4b[2] + .inst 0x4fa0e893 // sdot v19.4s, v4.16b, v0.4b[3] + subs x13, x13, #1 + .inst 0x4f81e094 // sdot v20.4s, v4.16b, v1.4b[0] + .inst 0x4fa1e095 // sdot v21.4s, v4.16b, v1.4b[1] + .inst 0x4f81e896 // sdot v22.4s, v4.16b, v1.4b[2] + .inst 0x4fa1e897 // sdot v23.4s, v4.16b, v1.4b[3] + bne L8LoopSz_TILE_8 + + L8LoopSzEnd_TILE_8: + //add x12, x12, x15 + //add x24, x15, x15 + add x12, x28, x24 + sub x14, x14, #1 + + L8Tile8Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.4s, v3.4s}, [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + Int32ToFloat v20, v21, v22, v23 + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v0, v12, v13, v14, v15 + MUL_SCALE v1, v16, v17, v18, v19 + MUL_SCALE v1, v20, v21, v22, v23 + + cbz x27, TILE8_L8_MLA_TERM + ld1 {v4.4s, v5.4s}, [x27] + MUL_EXTRA_SCALE v4, v8, v9, v10, v11 + MUL_EXTRA_SCALE v5, v12, v13, v14, v15 + MUL_EXTRA_SCALE v4, v16, v17, v18, v19 + MUL_EXTRA_SCALE v5, v20, v21, v22, v23 + + TILE8_L8_MLA_TERM: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v24, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v24, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v24, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v24, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v2, v25, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v17, v2, v25, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v18, v2, v25, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v19, v2, v25, 3 // tile:3, oc:4-7 + MLA_WEIGHTZERO v20, v3, v25, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v21, v3, v25, 1 // tile:5, oc:4-7 + MLA_WEIGHTZERO v22, v3, v25, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v23, v3, v25, 3 // tile:7, oc:4-7 + + sub x4, x4, #64 + + cbz x9, TILE8_ADD_DSTV + TILE8_ADD_BIAS: + ld1 {v0.4s, v1.4s}, [x20], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v1 + ADD_BIAS_FLOAT v20, v21, v22, v23, v1 + + Float32ToHalf v8, v16, v9, v17, v0, v1 + Float32ToHalf v10, v18, v11, v19, v2, v3 + Float32ToHalf v12, v20, v13, v21, v4, v5 + Float32ToHalf v14, v22, v15, v23, v6, v7 + b TILE8_POST + + TILE8_ADD_DSTV: + Float32ToHalf v8, v16, v9, v17, v0, v1 + Float32ToHalf v10, v18, v11, v19, v2, v3 + Float32ToHalf v12, v20, v13, v21, v4, v5 + Float32ToHalf v14, v22, v15, v23, v6, v7 + ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x10], #64 + ld1 {v28.8h, v29.8h, v30.8h, v31.8h}, [x10] + fadd v0.8h, v0.8h, v24.8h + fadd v1.8h, v1.8h, v25.8h + fadd v2.8h, v2.8h, v26.8h + fadd v3.8h, v3.8h, v27.8h + fadd v4.8h, v4.8h, v28.8h + fadd v5.8h, v5.8h, v29.8h + fadd v6.8h, v6.8h, v30.8h + fadd v7.8h, v7.8h, v31.8h + sub x10, x10, #64 + + TILE8_POST: + cbz x23, TILE8_STORE + ld1r {v24.8h}, [x23], #2 // f16 min + ld1r {v25.8h}, [x23] // f16 max + ReLU_FP16 v0, v1, v2, v3, v24, v25 + ReLU_FP16 v4, v5, v6, v7, v24, v25 + sub x23, x23, #2 + + TILE8_STORE: + st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x10], #64 + st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x10], x4 + + //st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], #64 + //st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10], x4 + //st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x10], #64 + //st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x10], x4 + add x4, x4, #64 + + L8Tile8LoopCheck: + cmp x14, #1 + bge L8LoopDz_TILE_8 + +cbz x27, Tile8End +add x27, x27, #32 +Tile8End: + sub x7, x7, #8 + add x0, x0, x21, LSL #3 + add x1, x1, #32 + add x25, x25, #32 + +TILE_4: + cmp x7, #4 + blt TILE_1 + mov x10, x0 + mov x12, x2 + mov x14, x5 + mov x19, x8 + mov x20, x9 + mov x6, x26 // weightQuantBias +L8LoopDz_TILE_4: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 + mov x13, x3 + + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + + mov x28, x12 + L8LoopSz_TILE_4: + ld1 {v3.16b}, [x12], x15 // weight + ld1 {v0.16b}, [x11], x22 // src + ld1 {v4.16b}, [x12], #16 // weight + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + subs x13, x13, #1 + sub x12, x12, x15 + .inst 0x4f80e08c // sdot v12.4s, v4.16b, v0.4b[0] + .inst 0x4fa0e08d // sdot v13.4s, v4.16b, v0.4b[1] + .inst 0x4f80e88e // sdot v14.4s, v4.16b, v0.4b[2] + .inst 0x4fa0e88f // sdot v15.4s, v4.16b, v0.4b[3] + bne L8LoopSz_TILE_4 + + L8LoopSzEnd_TILE_4: + //add x12, x12, x15 + //add x24, x15, x15 + add x12, x28, x24 + sub x14, x14, #1 + + L8Tile4Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.4s}, [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v1, v12, v13, v14, v15 + + cbz x27, TILE4_L8_MLA_TERM + ld1 {v4.4s}, [x27] + MUL_EXTRA_SCALE v4, v8, v9, v10, v11 + MUL_EXTRA_SCALE v4, v12, v13, v14, v15 + + TILE4_L8_MLA_TERM: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v2, v25, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v13, v2, v25, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v14, v2, v25, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v15, v2, v25, 3 // tile:3, oc:4-7 + + cbz x9, TILE4_ADD_DSTV + TILE4_ADD_BIAS: + ld1 {v0.4s, v1.4s}, [x20], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v1 + Float32ToHalf v8, v12, v9, v13, v0, v1 + Float32ToHalf v10, v14, v11, v15, v2, v3 + b TILE4_POST + + TILE4_ADD_DSTV: + Float32ToHalf v8, v12, v9, v13, v0, v1 + Float32ToHalf v10, v14, v11, v15, v2, v3 + ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x10] + fadd v0.8h, v0.8h, v20.8h + fadd v1.8h, v1.8h, v21.8h + fadd v2.8h, v2.8h, v22.8h + fadd v3.8h, v3.8h, v23.8h + + TILE4_POST: + cbz x23, TILE4_STORE + ld1r {v24.8h}, [x23], #2 // f16 min + ld1r {v25.8h}, [x23] // f16 max + sub x23, x23, #2 + ReLU_FP16 v0, v1, v2, v3, v24, v25 + //st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], x4 + //st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10], x4 + + + TILE4_STORE: + st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x10], x4 + + L8Tile4LoopCheck: + cmp x14, #1 + bge L8LoopDz_TILE_4 +cbz x27, Tile4End +add x27, x27, #16 +Tile4End: + sub x7, x7, #4 + add x0, x0, x21, LSL #2 + add x1, x1, #16 + add x25, x25, #16 + +TILE_1: + cbz x7, End + mov x10, x0 + mov x12, x2 + mov x14, x5 + mov x19, x8 + mov x20, x9 + mov x6, x26 // weightQuantBias + +L8LoopDz_TILE_1: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 + mov x13, x3 + + movi v8.16b, #0 + movi v9.16b, #0 + //mov v8.16b, v0.16b + //mov v9.16b, v1.16b + mov x28, x12 + L8LoopSz_TILE_1: + ld1 {v3.16b}, [x12], x15 // weight + ld1 {v0.s}[0], [x11], x22 // src + ld1 {v4.16b}, [x12], #16 // weight + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + subs x13, x13, #1 + sub x12, x12, x15 + .inst 0x4f80e089 // sdot v9.4s, v4.16b, v0.4b[0] + bne L8LoopSz_TILE_1 + + L8LoopSzEnd_TILE_1: + //add x12, x12, x15 + //add x24, x15, x15 + add x12, x28, x24 + sub x14, x14, #1 + + L8Tile1Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.s}[0], [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint + scvtf v8.4s, v8.4s + scvtf v9.4s, v9.4s + fmul v8.4s, v8.4s, v0.4s + fmul v9.4s, v9.4s, v1.4s + + cbz x27, TILE1_L8_MLA_TERM + ld1 {v4.s}[0], [x27] + fmul v8.4s, v8.4s, v4.s[0] + fmul v9.4s, v9.4s, v4.s[0] + + TILE1_L8_MLA_TERM: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v25, 0 // tile:0, oc:4-7 + + cbz x9, TILE1_ADD_DSTV + TILE1_ADD_BIAS: + ld1 {v20.4s, v21.4s}, [x20], #32 + fadd v8.4s, v8.4s, v20.4s + fadd v9.4s, v9.4s, v21.4s + fcvtn v0.4h, v8.4s + fcvtn2 v0.8h, v9.4s + b TILE1_POST + + TILE1_ADD_DSTV: + fcvtn v0.4h, v8.4s + fcvtn2 v0.8h, v9.4s + ld1 {v3.8h}, [x10] + fadd v0.8h, v0.8h, v3.8h + + TILE1_POST: + cbz x23, TILE1_STORE + ld1r {v24.8h}, [x23], #2 // f32 min + ld1r {v25.8h}, [x23] // f32 max + sub x23, x23, #2 + fmax v0.8h, v24.8h, v0.8h + fmin v0.8h, v25.8h, v0.8h + // st1 {v8.4s}, [x10], x4 + // st1 {v9.4s}, [x10], x4 + + //fcvtn v0.4h, v8.4s + //fcvtn2 v0.8h, v9.4s + TILE1_STORE: + st1 {v0.8h}, [x10], x4 + + L8Tile1LoopCheck: + cmp x14, #1 + bge L8LoopDz_TILE_1 +cbz x27, Tile1End +add x27, x27, #4 +Tile1End: + sub x7, x7, #1 + add x0, x0, x21 + add x1, x1, #4 + add x25, x25, #4 + b TILE_1 + +End: +ldp x23, x24, [sp, #(16 * 8)] +ldp x25, x26, [sp, #(16 * 7)] +ldp x27, x28, [sp, #(16 * 6)] +ldp x19, x20, [sp, #(16 * 5)] +ldp x21, x22, [sp, #(16 * 4)] +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 10) +ret + +#endif // __aarch64__ diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_w4_Unit_FP16.S b/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_w4_Unit_FP16.S new file mode 100644 index 000000000..5d92ae056 --- /dev/null +++ b/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_w4_Unit_FP16.S @@ -0,0 +1,690 @@ +// +// MNNGemmInt8AddBiasScale_ARMV82_w4_Unit_FP16.S +// MNN +// +// Created by MNN on 2019/12/17. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#if defined(__aarch64__) +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro ADD_BIAS_FLOAT d0, d1, d2, d3, z0 + fadd \d0\().4s, \d0\().4s, \z0\().4s + fadd \d1\().4s, \d1\().4s, \z0\().4s + fadd \d2\().4s, \d2\().4s, \z0\().4s + fadd \d3\().4s, \d3\().4s, \z0\().4s +.endm + +.macro SET_BIAS d0, d1, d2, d3 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 +.endm +.macro Int32ToFloat z0, z1, z2, z3 + scvtf \z0\().4s, \z0\().4s + scvtf \z1\().4s, \z1\().4s + scvtf \z2\().4s, \z2\().4s + scvtf \z3\().4s, \z3\().4s +.endm +.macro MUL_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s + fmul \d2\().4s, \d2\().4s, \s\().4s + fmul \d3\().4s, \d3\().4s, \s\().4s +.endm +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro MUL_EXTRA_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().s[0] + fmul \d1\().4s, \d1\().4s, \s\().s[1] + fmul \d2\().4s, \d2\().4s, \s\().s[2] + fmul \d3\().4s, \d3\().4s, \s\().s[3] +.endm +.macro ReLU_FP16 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().8h, \s0\().8h, \z1\().8h + fmin \s1\().8h, \s1\().8h, \z1\().8h + fmin \s2\().8h, \s2\().8h, \z1\().8h + fmin \s3\().8h, \s3\().8h, \z1\().8h + fmax \s0\().8h, \s0\().8h, \z0\().8h + fmax \s1\().8h, \s1\().8h, \z0\().8h + fmax \s2\().8h, \s2\().8h, \z0\().8h + fmax \s3\().8h, \s3\().8h, \z0\().8h +.endm + +.macro Float32ToHalf s0, s1, s2, s3, d0, d1 + fcvtn \d0\().4h, \s0\().4s + fcvtn2 \d0\().8h, \s1\().4s + fcvtn \d1\().4h, \s2\().4s + fcvtn2 \d1\().8h, \s3\().4s +.endm + +asm_function MNNGemmInt8AddBiasScale_ARMV82_w4_Unit_FP16 +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum = 1; + const int32_t* bias; + const float* extraScale = nullptr; +}; +*/ + +//void MNNGemmInt8AddBiasScale_ARMV82_w4_Unit_FP16(int8_t* dst, const int8_t* src, +// const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, +// const QuanPostTreatParameters* parameters, size_t realDstCount); + +//Auto: x0:dst, x1:src, x2:weight, x3:src_depth_quad, x4:dst_step +//x5:dst_depth_quad, x6: parameters, x7: realDstCount + +//Load from x6: x8: scale, x9: bias, x25: xKernelSum, x26: weightQuantBias, x23: fp32minmax, x27: blockNum +ldr x8, [x6, #0] +ldr x9, [x6, #8] +//ldr w12, [x6, #16] + +stp d14, d15, [sp, #(-16 * 10)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] +stp x21, x22, [sp, #(16 * 4)] +stp x19, x20, [sp, #(16 * 5)] +stp x27, x28, [sp, #(16 * 6)] +stp x25, x26, [sp, #(16 * 7)] +stp x23, x24, [sp, #(16 * 8)] + +//ldr w27, [x6, #20] +ldr x25, [x6, #40] // xKernelSum +ldr x26, [x6, #48] // weightQuantBias +ldr x23, [x6, #56] // fp32minmax +ldr x27, [x6, #64] // blockNum + +mov x21, #16 // sizeof(float16_t) * PACK +mul x27, x27, x3 +Start: +lsl x15, x27, #3 // x15 = src_depth_quad * UNIT * SRC_UNIT * sizeof(int4_t) +mov x22, #48 // src_steps +add x24, x15, x15 +ldr x27, [x6, #80] // extra scale +TILE_12: + cmp x7, #12 + blt TILE_8 + +L8LoopDz_TILE_12: + // ld1 {v0.4s, v1.4s}, [x9], #32 // bias + mov x11, x1 + mov x13, x3 + movi v7.16b, #15 + + // Init 0 + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + SET_BIAS v16, v17, v18, v19 + SET_BIAS v20, v21, v22, v23 + SET_BIAS v24, v25, v26, v27 + SET_BIAS v28, v29, v30, v31 + + mov x28, x2 + L8LoopSz_TILE_12: + ld1 {v3.d}[0], [x2], x15 // weight + ld1 {v4.d}[0], [x2], #8 + ld1 {v0.16b, v1.16b, v2.16b}, [x11], #48 // src + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + + .inst 0x4f81e06c // sdot v12.4s, v3.16b, v1.4b[0] + .inst 0x4fa1e06d // sdot v13.4s, v3.16b, v1.4b[1] + .inst 0x4f81e86e // sdot v14.4s, v3.16b, v1.4b[2] + .inst 0x4fa1e86f // sdot v15.4s, v3.16b, v1.4b[3] + // int4->int8 + ushr v5.16b, v4.16b, #4 + and v6.16b, v4.16b, v7.16b + zip1 v4.16b, v5.16b, v6.16b + + .inst 0x4f82e070 // sdot v16.4s, v3.16b, v2.4b[0] + .inst 0x4fa2e071 // sdot v17.4s, v3.16b, v2.4b[1] + .inst 0x4f82e872 // sdot v18.4s, v3.16b, v2.4b[2] + .inst 0x4fa2e873 // sdot v19.4s, v3.16b, v2.4b[3] + .inst 0x4f80e094 // sdot v20.4s, v4.16b, v0.4b[0] + .inst 0x4fa0e095 // sdot v21.4s, v4.16b, v0.4b[1] + .inst 0x4f80e896 // sdot v22.4s, v4.16b, v0.4b[2] + .inst 0x4fa0e897 // sdot v23.4s, v4.16b, v0.4b[3] + sub x2, x2, x15 + .inst 0x4f81e098 // sdot v24.4s, v4.16b, v1.4b[0] + .inst 0x4fa1e099 // sdot v25.4s, v4.16b, v1.4b[1] + .inst 0x4f81e89a // sdot v26.4s, v4.16b, v1.4b[2] + .inst 0x4fa1e89b // sdot v27.4s, v4.16b, v1.4b[3] + subs x13, x13, #1 + .inst 0x4f82e09c // sdot v28.4s, v4.16b, v2.4b[0] + .inst 0x4fa2e09d // sdot v29.4s, v4.16b, v2.4b[1] + .inst 0x4f82e89e // sdot v30.4s, v4.16b, v2.4b[2] + .inst 0x4fa2e89f // sdot v31.4s, v4.16b, v2.4b[3] + bne L8LoopSz_TILE_12 + + L8LoopSzEnd_TILE_12: + add x2, x28, x24 + sub x5, x5, #1 + + L8Tile12Quan: + ld1 {v0.4s, v1.4s}, [x8], #32 // scale + ld1 {v2.4s, v3.4s, v4.4s}, [x25] // x kernel sum + ld1 {v5.4s, v6.4s}, [x26], #32 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + Int32ToFloat v20, v21, v22, v23 + Int32ToFloat v24, v25, v26, v27 + Int32ToFloat v28, v29, v30, v31 + + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v0, v12, v13, v14, v15 + MUL_SCALE v0, v16, v17, v18, v19 + MUL_SCALE v1, v20, v21, v22, v23 + MUL_SCALE v1, v24, v25, v26, v27 + MUL_SCALE v1, v28, v29, v30, v31 + + cbz x27, TILE12_L8_MLA_TERM + ld1 {v0.4s, v1.4s}, [x27], #32 + ld1 {v7.4s}, [x27] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v1, v12, v13, v14, v15 + MUL_EXTRA_SCALE v7, v16, v17, v18, v19 + MUL_EXTRA_SCALE v0, v20, v21, v22, v23 + MUL_EXTRA_SCALE v1, v24, v25, v26, v27 + MUL_EXTRA_SCALE v7, v28, v29, v30, v31 + sub x27, x27, #32 + + TILE12_L8_MLA_TERM: + MLA_WEIGHTZERO v8, v2, v5, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v5, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v5, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v5, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v5, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v5, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v5, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v5, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v4, v5, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v17, v4, v5, 1 // tile:9, oc:0-3 + MLA_WEIGHTZERO v18, v4, v5, 2 // tile:10, oc:0-3 + MLA_WEIGHTZERO v19, v4, v5, 3 // tile:11, oc:0-3 + + //ld1r {v0.4s}, [x23] // f32 min + //ld1r {v1.4s}, [x24] // f32 max + MLA_WEIGHTZERO v20, v2, v6, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v21, v2, v6, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v22, v2, v6, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v23, v2, v6, 3 // tile:3, oc:4-7 + MLA_WEIGHTZERO v24, v3, v6, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v25, v3, v6, 1 // tile:5, oc:4-7 + MLA_WEIGHTZERO v26, v3, v6, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v27, v3, v6, 3 // tile:7, oc:4-7 + MLA_WEIGHTZERO v28, v4, v6, 0 // tile:8, oc:4-7 + MLA_WEIGHTZERO v29, v4, v6, 1 // tile:9, oc:4-7 + MLA_WEIGHTZERO v30, v4, v6, 2 // tile:10, oc:4-7 + MLA_WEIGHTZERO v31, v4, v6, 3 // tile:11, oc:4-7 + sub x4, x4, #128 + + cbz x9, TILE12_ADD_DSTV + TILE12_ADD_BIAS: + ld1 {v0.4s, v1.4s}, [x9], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v0 + ADD_BIAS_FLOAT v20, v21, v22, v23, v1 + ADD_BIAS_FLOAT v24, v25, v26, v27, v1 + ADD_BIAS_FLOAT v28, v29, v30, v31, v1 + + Float32ToHalf v8, v20, v9, v21, v0, v1 + Float32ToHalf v10, v22, v11, v23, v2, v3 + Float32ToHalf v12, v24, v13, v25, v4, v5 + Float32ToHalf v14, v26, v15, v27, v6, v7 + Float32ToHalf v16, v28, v17, v29, v8, v9 + Float32ToHalf v18, v30, v19, v31, v10, v11 + b TILE12_POST + + TILE12_ADD_DSTV: + Float32ToHalf v8, v20, v9, v21, v0, v1 + Float32ToHalf v10, v22, v11, v23, v2, v3 + Float32ToHalf v12, v24, v13, v25, v4, v5 + Float32ToHalf v14, v26, v15, v27, v6, v7 + Float32ToHalf v16, v28, v17, v29, v8, v9 + Float32ToHalf v18, v30, v19, v31, v10, v11 + ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64 + ld1 {v12.8h, v13.8h, v14.8h, v15.8h}, [x0], #64 + ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x0] + fadd v0.8h, v0.8h, v20.8h + fadd v1.8h, v1.8h, v21.8h + fadd v2.8h, v2.8h, v22.8h + fadd v3.8h, v3.8h, v23.8h + fadd v4.8h, v4.8h, v12.8h + fadd v5.8h, v5.8h, v13.8h + fadd v6.8h, v6.8h, v14.8h + fadd v7.8h, v7.8h, v15.8h + fadd v8.8h, v8.8h, v16.8h + fadd v9.8h, v9.8h, v17.8h + fadd v10.8h, v10.8h, v18.8h + fadd v11.8h, v11.8h, v19.8h + sub x0, x0, #128 + + TILE12_POST: + cbz x23, TILE12_STORE + ld1r {v24.8h}, [x23], #2 // f32 min + ld1r {v25.8h}, [x23] // f32 max + + ReLU_FP16 v0, v1, v2, v3, v24, v25 + ReLU_FP16 v4, v5, v6, v7, v24, v25 + ReLU_FP16 v8, v9, v10, v11, v24, v25 + sub x23, x23, #2 + + TILE12_STORE: + + st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 + st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 + st1 {v8.8h, v9.8h, v10.8h, v11.8h}, [x0], x4 + add x4, x4, #128 + L8Tile12LoopCheck: + cmp x5, #1 + bge L8LoopDz_TILE_12 + blt End + +TILE_8: + cmp x7, #8 + blt TILE_4 + mov x10, x0 + mov x12, x2 + mov x14, x5 + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x26 // weightQuantBias + +L8LoopDz_TILE_8: + mov x11, x1 + mov x13, x3 + movi v7.16b, #15 + + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + SET_BIAS v16, v17, v18, v19 + SET_BIAS v20, v21, v22, v23 + mov x28, x12 + L8LoopSz_TILE_8: + ld1 {v3.d}[0], [x12], x15 // weight + ld1 {v4.d}[0], [x12], #8 + ld1 {v0.16b, v1.16b}, [x11], x22 // src + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + // int4->int8 + ushr v5.16b, v4.16b, #4 + and v6.16b, v4.16b, v7.16b + zip1 v4.16b, v5.16b, v6.16b + .inst 0x4f81e06c // sdot v12.4s, v3.16b, v1.4b[0] + .inst 0x4fa1e06d // sdot v13.4s, v3.16b, v1.4b[1] + .inst 0x4f81e86e // sdot v14.4s, v3.16b, v1.4b[2] + .inst 0x4fa1e86f // sdot v15.4s, v3.16b, v1.4b[3] + sub x12, x12, x15 + .inst 0x4f80e090 // sdot v16.4s, v4.16b, v0.4b[0] + .inst 0x4fa0e091 // sdot v17.4s, v4.16b, v0.4b[1] + .inst 0x4f80e892 // sdot v18.4s, v4.16b, v0.4b[2] + .inst 0x4fa0e893 // sdot v19.4s, v4.16b, v0.4b[3] + subs x13, x13, #1 + .inst 0x4f81e094 // sdot v20.4s, v4.16b, v1.4b[0] + .inst 0x4fa1e095 // sdot v21.4s, v4.16b, v1.4b[1] + .inst 0x4f81e896 // sdot v22.4s, v4.16b, v1.4b[2] + .inst 0x4fa1e897 // sdot v23.4s, v4.16b, v1.4b[3] + bne L8LoopSz_TILE_8 + + L8LoopSzEnd_TILE_8: + add x12, x28, x24 + sub x14, x14, #1 + + L8Tile8Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.4s, v3.4s}, [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + Int32ToFloat v20, v21, v22, v23 + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v0, v12, v13, v14, v15 + MUL_SCALE v1, v16, v17, v18, v19 + MUL_SCALE v1, v20, v21, v22, v23 + + cbz x27, TILE8_L8_MLA_TERM + ld1 {v4.4s, v5.4s}, [x27] + MUL_EXTRA_SCALE v4, v8, v9, v10, v11 + MUL_EXTRA_SCALE v5, v12, v13, v14, v15 + MUL_EXTRA_SCALE v4, v16, v17, v18, v19 + MUL_EXTRA_SCALE v5, v20, v21, v22, v23 + + TILE8_L8_MLA_TERM: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v24, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v24, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v24, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v24, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v2, v25, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v17, v2, v25, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v18, v2, v25, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v19, v2, v25, 3 // tile:3, oc:4-7 + MLA_WEIGHTZERO v20, v3, v25, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v21, v3, v25, 1 // tile:5, oc:4-7 + MLA_WEIGHTZERO v22, v3, v25, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v23, v3, v25, 3 // tile:7, oc:4-7 + + sub x4, x4, #64 + + cbz x9, TILE8_ADD_DSTV + TILE8_ADD_BIAS: + ld1 {v0.4s, v1.4s}, [x20], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v1 + ADD_BIAS_FLOAT v20, v21, v22, v23, v1 + + Float32ToHalf v8, v16, v9, v17, v0, v1 + Float32ToHalf v10, v18, v11, v19, v2, v3 + Float32ToHalf v12, v20, v13, v21, v4, v5 + Float32ToHalf v14, v22, v15, v23, v6, v7 + b TILE8_POST + + TILE8_ADD_DSTV: + Float32ToHalf v8, v16, v9, v17, v0, v1 + Float32ToHalf v10, v18, v11, v19, v2, v3 + Float32ToHalf v12, v20, v13, v21, v4, v5 + Float32ToHalf v14, v22, v15, v23, v6, v7 + ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x10], #64 + ld1 {v28.8h, v29.8h, v30.8h, v31.8h}, [x10] + fadd v0.8h, v0.8h, v24.8h + fadd v1.8h, v1.8h, v25.8h + fadd v2.8h, v2.8h, v26.8h + fadd v3.8h, v3.8h, v27.8h + fadd v4.8h, v4.8h, v28.8h + fadd v5.8h, v5.8h, v29.8h + fadd v6.8h, v6.8h, v30.8h + fadd v7.8h, v7.8h, v31.8h + sub x10, x10, #64 + + TILE8_POST: + cbz x23, TILE8_STORE + ld1r {v24.8h}, [x23], #2 // f16 min + ld1r {v25.8h}, [x23] // f16 max + ReLU_FP16 v0, v1, v2, v3, v24, v25 + ReLU_FP16 v4, v5, v6, v7, v24, v25 + sub x23, x23, #2 + + TILE8_STORE: + st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x10], #64 + st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x10], x4 + + //st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], #64 + //st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10], x4 + //st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x10], #64 + //st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x10], x4 + add x4, x4, #64 + + L8Tile8LoopCheck: + cmp x14, #1 + bge L8LoopDz_TILE_8 + +cbz x27, Tile8End +add x27, x27, #32 +Tile8End: + sub x7, x7, #8 + add x0, x0, x21, LSL #3 + add x1, x1, #32 + add x25, x25, #32 + +TILE_4: + movi v7.16b, #15 + cmp x7, #4 + blt TILE_1 + mov x10, x0 + mov x12, x2 + mov x14, x5 + mov x19, x8 + mov x20, x9 + mov x6, x26 // weightQuantBias +L8LoopDz_TILE_4: + mov x11, x1 + mov x13, x3 + + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + + mov x28, x12 + L8LoopSz_TILE_4: + ld1 {v3.d}[0], [x12], x15 // weight + ld1 {v0.16b}, [x11], x22 // src + ld1 {v4.d}[0], [x12], #8 // weight + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + // int4->int8 + ushr v5.16b, v4.16b, #4 + and v6.16b, v4.16b, v7.16b + zip1 v4.16b, v5.16b, v6.16b + subs x13, x13, #1 + sub x12, x12, x15 + .inst 0x4f80e08c // sdot v12.4s, v4.16b, v0.4b[0] + .inst 0x4fa0e08d // sdot v13.4s, v4.16b, v0.4b[1] + .inst 0x4f80e88e // sdot v14.4s, v4.16b, v0.4b[2] + .inst 0x4fa0e88f // sdot v15.4s, v4.16b, v0.4b[3] + bne L8LoopSz_TILE_4 + + L8LoopSzEnd_TILE_4: + add x12, x28, x24 + sub x14, x14, #1 + + L8Tile4Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.4s}, [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v1, v12, v13, v14, v15 + + cbz x27, TILE4_L8_MLA_TERM + ld1 {v4.4s}, [x27] + MUL_EXTRA_SCALE v4, v8, v9, v10, v11 + MUL_EXTRA_SCALE v4, v12, v13, v14, v15 + + TILE4_L8_MLA_TERM: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v2, v25, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v13, v2, v25, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v14, v2, v25, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v15, v2, v25, 3 // tile:3, oc:4-7 + + cbz x9, TILE4_ADD_DSTV + TILE4_ADD_BIAS: + ld1 {v0.4s, v1.4s}, [x20], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v1 + Float32ToHalf v8, v12, v9, v13, v0, v1 + Float32ToHalf v10, v14, v11, v15, v2, v3 + b TILE4_POST + + TILE4_ADD_DSTV: + Float32ToHalf v8, v12, v9, v13, v0, v1 + Float32ToHalf v10, v14, v11, v15, v2, v3 + ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x10] + fadd v0.8h, v0.8h, v20.8h + fadd v1.8h, v1.8h, v21.8h + fadd v2.8h, v2.8h, v22.8h + fadd v3.8h, v3.8h, v23.8h + + TILE4_POST: + cbz x23, TILE4_STORE + ld1r {v24.8h}, [x23], #2 // f16 min + ld1r {v25.8h}, [x23] // f16 max + sub x23, x23, #2 + ReLU_FP16 v0, v1, v2, v3, v24, v25 + //st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], x4 + //st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10], x4 + + + TILE4_STORE: + st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x10], x4 + + L8Tile4LoopCheck: + cmp x14, #1 + bge L8LoopDz_TILE_4 +cbz x27, Tile4End +add x27, x27, #16 +Tile4End: + sub x7, x7, #4 + add x0, x0, x21, LSL #2 + add x1, x1, #16 + add x25, x25, #16 + +TILE_1: + // Already execute: [movi v7.16b, #15] in TILE_4 + cbz x7, End + mov x10, x0 + mov x12, x2 + mov x14, x5 + mov x19, x8 + mov x20, x9 + mov x6, x26 // weightQuantBias + +L8LoopDz_TILE_1: + mov x11, x1 + mov x13, x3 + + movi v8.16b, #0 + movi v9.16b, #0 + mov x28, x12 + L8LoopSz_TILE_1: + ld1 {v3.d}[0], [x12], x15 // weight + ld1 {v0.s}[0], [x11], x22 // src + ld1 {v4.d}[0], [x12], #8 // weight + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + subs x13, x13, #1 + // int4->int8 + ushr v5.16b, v4.16b, #4 + and v6.16b, v4.16b, v7.16b + zip1 v4.16b, v5.16b, v6.16b + sub x12, x12, x15 + + .inst 0x4f80e089 // sdot v9.4s, v4.16b, v0.4b[0] + bne L8LoopSz_TILE_1 + + L8LoopSzEnd_TILE_1: + add x12, x28, x24 + sub x14, x14, #1 + + L8Tile1Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.s}[0], [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint + scvtf v8.4s, v8.4s + scvtf v9.4s, v9.4s + fmul v8.4s, v8.4s, v0.4s + fmul v9.4s, v9.4s, v1.4s + + cbz x27, TILE1_L8_MLA_TERM + ld1 {v4.s}[0], [x27] + fmul v8.4s, v8.4s, v4.s[0] + fmul v9.4s, v9.4s, v4.s[0] + + TILE1_L8_MLA_TERM: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v25, 0 // tile:0, oc:4-7 + + cbz x9, TILE1_ADD_DSTV + TILE1_ADD_BIAS: + ld1 {v20.4s, v21.4s}, [x20], #32 + fadd v8.4s, v8.4s, v20.4s + fadd v9.4s, v9.4s, v21.4s + fcvtn v0.4h, v8.4s + fcvtn2 v0.8h, v9.4s + b TILE1_POST + + TILE1_ADD_DSTV: + fcvtn v0.4h, v8.4s + fcvtn2 v0.8h, v9.4s + ld1 {v3.8h}, [x10] + fadd v0.8h, v0.8h, v3.8h + + TILE1_POST: + cbz x23, TILE1_STORE + ld1r {v24.8h}, [x23], #2 // f16 min + ld1r {v25.8h}, [x23] // f16 max + sub x23, x23, #2 + fmax v0.8h, v24.8h, v0.8h + fmin v0.8h, v25.8h, v0.8h + // st1 {v8.4s}, [x10], x4 + // st1 {v9.4s}, [x10], x4 + TILE1_STORE: + st1 {v0.8h}, [x10], x4 + + L8Tile1LoopCheck: + cmp x14, #1 + bge L8LoopDz_TILE_1 +cbz x27, Tile1End +add x27, x27, #4 +Tile1End: + sub x7, x7, #1 + add x0, x0, x21 + add x1, x1, #4 + add x25, x25, #4 + b TILE_1 + +End: +ldp x23, x24, [sp, #(16 * 8)] +ldp x25, x26, [sp, #(16 * 7)] +ldp x27, x28, [sp, #(16 * 6)] +ldp x19, x20, [sp, #(16 * 5)] +ldp x21, x22, [sp, #(16 * 4)] +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 10) +ret + +#endif // __aarch64__ diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_Unit_FP16.S b/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_Unit_FP16.S new file mode 100644 index 000000000..76c79b42e --- /dev/null +++ b/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_Unit_FP16.S @@ -0,0 +1,855 @@ +// +// MNNGemmInt8AddBiasScale_ARMV86_Unit_FP16.S +// MNN +// +// Created by MNN on 2022/09/26. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#if defined(__aarch64__) +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro SET_0_5 d0, d1, d2, d3, d4 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 + movi \d4\().16b, #0 +.endm + +.macro SET_0_4 d0, d1, d2, d3 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 +.endm + +.macro SET_0_2 d0, d1 + movi \d0\().16b, #0 + movi \d1\().16b, #0 +.endm + +.macro ADD_BIAS_FLOAT d0, d1, d2, d3, z0 + fadd \d0\().4s, \d0\().4s, \z0\().4s + fadd \d1\().4s, \d1\().4s, \z0\().4s + fadd \d2\().4s, \d2\().4s, \z0\().4s + fadd \d3\().4s, \d3\().4s, \z0\().4s +.endm + +.macro ReLU_FP16 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().8h, \s0\().8h, \z1\().8h + fmin \s1\().8h, \s1\().8h, \z1\().8h + fmin \s2\().8h, \s2\().8h, \z1\().8h + fmin \s3\().8h, \s3\().8h, \z1\().8h + fmax \s0\().8h, \s0\().8h, \z0\().8h + fmax \s1\().8h, \s1\().8h, \z0\().8h + fmax \s2\().8h, \s2\().8h, \z0\().8h + fmax \s3\().8h, \s3\().8h, \z0\().8h +.endm + +.macro ReLU_FP16_2 s0, s1, z0, z1 // z0:min z1:max + fmin \s0\().8h, \s0\().8h, \z1\().8h + fmin \s1\().8h, \s1\().8h, \z1\().8h + fmax \s0\().8h, \s0\().8h, \z0\().8h + fmax \s1\().8h, \s1\().8h, \z0\().8h +.endm + +.macro SET_BIAS s, d0, d1, d2, d3, d4, idx + dup \d0\().2d, \s\().d[\idx] + dup \d1\().2d, \s\().d[\idx] + dup \d2\().2d, \s\().d[\idx] + dup \d3\().2d, \s\().d[\idx] + dup \d4\().2d, \s\().d[\idx] +.endm +.macro SET_BIAS_4 s, d0, d1, d2, d3, idx + dup \d0\().2d, \s\().d[\idx] + dup \d1\().2d, \s\().d[\idx] + dup \d2\().2d, \s\().d[\idx] + dup \d3\().2d, \s\().d[\idx] +.endm +.macro SET_BIAS_2 s, d0, d1, idx + dup \d0\().2d, \s\().d[\idx] + dup \d1\().2d, \s\().d[\idx] +.endm +.macro Int32ToFloat z0, z1, z2, z3 + scvtf \z0\().4s, \z0\().4s + scvtf \z1\().4s, \z1\().4s + scvtf \z2\().4s, \z2\().4s + scvtf \z3\().4s, \z3\().4s +.endm +.macro MUL_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s + fmul \d2\().4s, \d2\().4s, \s\().4s + fmul \d3\().4s, \d3\().4s, \s\().4s +.endm +.macro MUL_EXTRA_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().s[0] + fmul \d1\().4s, \d1\().4s, \s\().s[1] + fmul \d2\().4s, \d2\().4s, \s\().s[2] + fmul \d3\().4s, \d3\().4s, \s\().s[3] +.endm +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro Float32ToHalf s0, s1, s2, s3, d0, d1 + fcvtn \d0\().4h, \s0\().4s + fcvtn2 \d0\().8h, \s1\().4s + fcvtn \d1\().4h, \s2\().4s + fcvtn2 \d1\().8h, \s3\().4s +.endm + +asm_function MNNGemmInt8AddBiasScale_ARMV86_Unit_FP16 + +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum = 1; + const int32_t* bias; + const float* extraScale = nullptr; +}; +*/ + +//void MNNGemmInt8AddBiasScale_ARMV86_Unit(int8_t* dst, const int8_t* src, +// const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, +// const QuanPostTreatParameters* parameters, size_t realDstCount); + +//Auto: x0:dst, x1:src, x2:weight, x3:src_depth_quad, x4:dst_step +//x5:dst_depth_quad, x6: parameters, x7: realDstCount + +//Load from x7: x8: scale, x9: biasFloat, x27: srcKernelSum, x28: weightQuanBias, x14: fp32minmax +/* For FP16 +UNIT = 8; +SRC_UNIT = 8; +DST_XUNIT = 10; + */ +ldr x8, [x6, #0] +ldr x9, [x6, #8] + +stp d14, d15, [sp, #(-16 * 10)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] +stp x21, x22, [sp, #(16 * 4)] +stp x19, x20, [sp, #(16 * 5)] +stp x23, x24, [sp, #(16 * 6)] +stp x25, x26, [sp, #(16 * 7)] +stp x27, x28, [sp, #(16 * 8)] +// ldr w23, [x6, #24] +ldr x27, [x6, #40] // srcKernelSum +ldr x28, [x6, #48] // weightQuanBias +ldr x23, [x6, #64] // blockNum +ldr x14, [x6, #56] // fp32minmax + +mul x23, x23, x3 // UP_DIV(ic*ky*kx, SRC_UNIT) = blockNum * src_depth_quad_per_block +mov x22, #80 // GEMM_INT8_DST_XUNIT * GEMM_INT8_SRC_UNIT = 10 * 8 = 80 +mov x21, #16 // sizeof(float16_t) * UNIT + +Start: +lsl x15, x23, #6 // x15 = src_depth_quad * UNIT * UNIT_SRC * sizeof(int8_t) = src_depth_quad * 64 = src_depth_quad << 6 +ldr x23, [x6, #80] // extra scale +TILE_10: + cmp x7, #10 + blt TILE_8 +sub x4, x4, #128 +LoopDz_TILE_10: + //ld1 {v0.4s, v1.4s}, [x9], #32 // bias + mov x11, x1 // src + mov x12, x2 // weight + mov x13, x3 // src_depth_quad + mov x10, x0 // tag dst address + + SET_0_5 v12, v16, v20, v24, v28 // oc:0,1,0,1 + SET_0_5 v13, v17, v21, v25, v29 // oc:2,3,2,3 + SET_0_5 v14, v18, v22, v26, v30 // oc:4,5,4,5 + SET_0_5 v15, v19, v23, v27, v31 // oc:6,7,6,7 + +LoopSz_TILE_10: + ld1 {v8.16b, v9.16b, v10.16b, v11.16b}, [x12], #64 // weight + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], #64 // src: E0-E9 + ld1 {v7.16b}, [x11], #16 + subs x13, x13, #1 + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa46e // smmla v14.4s, v3.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba46f // smmla v15.4s, v3.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa492 // smmla v18.4s, v4.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba493 // smmla v19.4s, v4.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + .inst 0x4e8aa4b6 // smmla v22.4s, v5.16b, v10.16b // tile4-oc4, tile4-oc5, tile5-oc4, tile5-oc5 + .inst 0x4e8ba4b7 // smmla v23.4s, v5.16b, v11.16b // tile4-oc6, tile4-oc7, tile5-oc6, tile5-oc7 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + .inst 0x4e8aa4da // smmla v26.4s, v6.16b, v10.16b // tile6-oc4, tile6-oc5, tile7-oc4, tile7-oc5 + .inst 0x4e8ba4db // smmla v27.4s, v6.16b, v11.16b // tile6-oc6, tile6-oc7, tile7-oc6, tile7-oc7 + + .inst 0x4e88a4fc // smmla v28.4s, v7.16b, v8.16b // tile8-oc0, tile8-oc1, tile9-oc0, tile9-oc1 + .inst 0x4e89a4fd // smmla v29.4s, v7.16b, v9.16b // tile8-oc2, tile8-oc3, tile9-oc2, tile9-oc3 + .inst 0x4e8aa4fe // smmla v30.4s, v7.16b, v10.16b // tile8-oc4, tile8-oc5, tile9-oc4, tile9-oc5 + .inst 0x4e8ba4ff // smmla v31.4s, v7.16b, v11.16b // tile8-oc6, tile8-oc7, tile9-oc6, tile9-oc7 + bne LoopSz_TILE_10 +LoopSzEnd_TILE_10: + add x2, x2, x15 // weight += dz * src_depth_quad * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT) * sizeof(int8_t); + sub x5, x5, #1 // dz-- + // transpose + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v3.2d, v14.2d, v15.2d // E1: oc:4-7 + + uzp1 v4.2d, v16.2d, v17.2d + uzp2 v5.2d, v16.2d, v17.2d + uzp1 v6.2d, v18.2d, v19.2d + uzp2 v7.2d, v18.2d, v19.2d + + uzp1 v8.2d, v20.2d, v21.2d + uzp2 v9.2d, v20.2d, v21.2d + uzp1 v10.2d, v22.2d, v23.2d + uzp2 v11.2d, v22.2d, v23.2d + + uzp1 v12.2d, v24.2d, v25.2d + uzp2 v13.2d, v24.2d, v25.2d + uzp1 v14.2d, v26.2d, v27.2d + uzp2 v15.2d, v26.2d, v27.2d + + uzp1 v16.2d, v28.2d, v29.2d + uzp2 v17.2d, v28.2d, v29.2d + uzp1 v18.2d, v30.2d, v31.2d + uzp2 v19.2d, v30.2d, v31.2d + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + +Tile10Quan: + ld1 {v20.4s, v21.4s}, [x8], #32 // scale + ld1 {v22.4s, v23.4s}, [x27], #32 // x kernel sum + ld1 {v24.d}[0], [x27] + sub x27, x27, #32 + ld1 {v25.4s, v26.4s}, [x28], #32 // weight quan zeropoint + //ld1r {v27.4s}, [x6], #4 // f32 min + //ld1r {v28.4s}, [x6] // f32 max + //sub x6, x6, #4 + MUL_SCALE v20, v0, v1, v4, v5 + MUL_SCALE v21, v2, v3, v6, v7 + MUL_SCALE v20, v8, v9, v12, v13 + MUL_SCALE v21, v10, v11, v14, v15 + fmul v16.4s, v16.4s, v20.4s + fmul v17.4s, v17.4s, v20.4s + fmul v18.4s, v18.4s, v21.4s + fmul v19.4s, v19.4s, v21.4s + + cbz x23, TILE10_MLA + ld1 {v27.4s, v28.4s}, [x23], #32 + ld1 {v29.d}[0], [x23] + MUL_EXTRA_SCALE v27, v0, v1, v4, v5 + MUL_EXTRA_SCALE v28, v8, v9, v12, v13 + MUL_EXTRA_SCALE v27, v2, v3, v6, v7 + MUL_EXTRA_SCALE v28, v10, v11, v14, v15 + fmul v16.4s, v16.4s, v29.s[0] + fmul v17.4s, v17.4s, v29.s[1] + fmul v18.4s, v18.4s, v29.s[0] + fmul v19.4s, v19.4s, v29.s[1] + sub x23, x23, #32 + + TILE10_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v3, v22, v26, 1 // tile:1, oc:4-7 + + MLA_WEIGHTZERO v4, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v5, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v6, v22, v26, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v7, v22, v26, 3 // tile:3, oc:4-7 + + MLA_WEIGHTZERO v8, v23, v25, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v9, v23, v25, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v10, v23, v26, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v11, v23, v26, 1 // tile:5, oc:4-7 + + MLA_WEIGHTZERO v12, v23, v25, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v13, v23, v25, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v14, v23, v26, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v15, v23, v26, 3 // tile:7, oc:4-7 + + MLA_WEIGHTZERO v16, v24, v25, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v17, v24, v25, 1 // tile:9, oc:0-3 + MLA_WEIGHTZERO v18, v24, v26, 0 // tile:8, oc:4-7 + MLA_WEIGHTZERO v19, v24, v26, 1 // tile:9, oc:4-7 + + + cbz x9, TILE10_ADD_DSTV + TILE10_ADD_BIAS: + ld1 {v20.4s, v21.4s}, [x9], #32 // bias + ADD_BIAS_FLOAT v0, v1, v4, v5, v20 + ADD_BIAS_FLOAT v2, v3, v6, v7, v21 + ADD_BIAS_FLOAT v8, v9, v12, v13, v20 + ADD_BIAS_FLOAT v10, v11, v14, v15, v21 + fadd v16.4s, v16.4s, v20.4s + fadd v17.4s, v17.4s, v20.4s + fadd v18.4s, v18.4s, v21.4s + fadd v19.4s, v19.4s, v21.4s + + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + Float32ToHalf v8, v10, v9, v11, v24, v25 + Float32ToHalf v12, v14, v13, v15, v26, v27 + Float32ToHalf v16, v18, v17, v19, v30, v31 + b TILE10_POST // to Relu post + + TILE10_ADD_DSTV: + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + Float32ToHalf v8, v10, v9, v11, v24, v25 + Float32ToHalf v12, v14, v13, v15, v26, v27 + Float32ToHalf v16, v18, v17, v19, v30, v31 + + ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x10], #64 + ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x10], #64 + ld1 {v8.8h, v9.8h}, [x10] + + fadd v20.8h, v20.8h, v0.8h + fadd v21.8h, v21.8h, v1.8h + fadd v22.8h, v22.8h, v2.8h + fadd v23.8h, v23.8h, v3.8h + fadd v24.8h, v24.8h, v4.8h + fadd v25.8h, v25.8h, v5.8h + fadd v26.8h, v26.8h, v6.8h + fadd v27.8h, v27.8h, v7.8h + fadd v30.8h, v30.8h, v8.8h + fadd v31.8h, v31.8h, v9.8h + + TILE10_POST: + cbz x14, TILE10_STORE + ld1r {v29.8h}, [x14], #2 // f32 min + ld1r {v28.8h}, [x14] // f32 max + sub x14, x14, #2 + + ReLU_FP16 v20, v21, v22, v23, v29, v28 + ReLU_FP16 v24, v25, v26, v27, v29, v28 + ReLU_FP16_2 v30, v31, v29, v28 + + TILE10_STORE: + + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64 + st1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x0], #64 + st1 {v30.8h, v31.8h}, [x0], x4 + +Tile10LoopCheck: + cmp x5, #1 + bge LoopDz_TILE_10 + b End + +TILE_8: + cmp x7, #8 + blt TILE_4 + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias + sub x4, x4, #64 // For Tile8, revert it when Tile8 end +LoopDz_TILE_8: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + mov x10, x26 // tag dst + + SET_0_4 v12, v16, v20, v24 // oc:0,1,0,1 + SET_0_4 v13, v17, v21, v25 // oc:2,3,2,3 + SET_0_4 v14, v18, v22, v26 // oc:4,5,4,5 + SET_0_4 v15, v19, v23, v27 // oc:6,7,6,7 +LoopSz_TILE_8: + ld1 {v8.16b, v9.16b, v10.16b, v11.16b}, [x12], #64 // weight + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], x22 // src: E0-E7 + subs x13, x13, #1 + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa46e // smmla v14.4s, v3.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba46f // smmla v15.4s, v3.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa492 // smmla v18.4s, v4.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba493 // smmla v19.4s, v4.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + .inst 0x4e8aa4b6 // smmla v22.4s, v5.16b, v10.16b // tile4-oc4, tile4-oc5, tile5-oc4, tile5-oc5 + .inst 0x4e8ba4b7 // smmla v23.4s, v5.16b, v11.16b // tile4-oc6, tile4-oc7, tile5-oc6, tile5-oc7 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + .inst 0x4e8aa4da // smmla v26.4s, v6.16b, v10.16b // tile6-oc4, tile6-oc5, tile7-oc4, tile7-oc5 + .inst 0x4e8ba4db // smmla v27.4s, v6.16b, v11.16b // tile6-oc6, tile6-oc7, tile7-oc6, tile7-oc7 + + bne LoopSz_TILE_8 +LoopSzEnd_TILE_8: + add x25, x25, x15 + sub x24, x24, #1 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v3.2d, v14.2d, v15.2d // E1: oc:4-7 + + uzp1 v4.2d, v16.2d, v17.2d + uzp2 v5.2d, v16.2d, v17.2d + uzp1 v6.2d, v18.2d, v19.2d + uzp2 v7.2d, v18.2d, v19.2d + + uzp1 v8.2d, v20.2d, v21.2d + uzp2 v9.2d, v20.2d, v21.2d + uzp1 v10.2d, v22.2d, v23.2d + uzp2 v11.2d, v22.2d, v23.2d + + uzp1 v12.2d, v24.2d, v25.2d + uzp2 v13.2d, v24.2d, v25.2d + uzp1 v14.2d, v26.2d, v27.2d + uzp2 v15.2d, v26.2d, v27.2d + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + +Tile8Quan: + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.4s, v23.4s}, [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + MUL_SCALE v20, v0, v1, v4, v5 + MUL_SCALE v21, v2, v3, v6, v7 + MUL_SCALE v20, v8, v9, v12, v13 + MUL_SCALE v21, v10, v11, v14, v15 + + cbz x23, TILE8_MLA + ld1 {v27.4s, v28.4s}, [x23] + MUL_EXTRA_SCALE v27, v0, v1, v4, v5 + MUL_EXTRA_SCALE v28, v8, v9, v12, v13 + MUL_EXTRA_SCALE v27, v2, v3, v6, v7 + MUL_EXTRA_SCALE v28, v10, v11, v14, v15 + + TILE8_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v3, v22, v26, 1 // tile:1, oc:4-7 + + MLA_WEIGHTZERO v4, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v5, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v6, v22, v26, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v7, v22, v26, 3 // tile:3, oc:4-7 + + MLA_WEIGHTZERO v8, v23, v25, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v9, v23, v25, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v10, v23, v26, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v11, v23, v26, 1 // tile:5, oc:4-7 + + MLA_WEIGHTZERO v12, v23, v25, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v13, v23, v25, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v14, v23, v26, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v15, v23, v26, 3 // tile:7, oc:4-7 + + cbz x9, TILE8_ADD_DSTV + TILE8_ADD_BIAS: + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + ADD_BIAS_FLOAT v0, v1, v4, v5, v16 + ADD_BIAS_FLOAT v2, v3, v6, v7, v17 + ADD_BIAS_FLOAT v8, v9, v12, v13, v16 + ADD_BIAS_FLOAT v10, v11, v14, v15, v17 + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + Float32ToHalf v8, v10, v9, v11, v24, v25 + Float32ToHalf v12, v14, v13, v15, v26, v27 + b TILE8_POST + + TILE8_ADD_DSTV: + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + Float32ToHalf v8, v10, v9, v11, v24, v25 + Float32ToHalf v12, v14, v13, v15, v26, v27 + ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x10], #64 + ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x10] + fadd v20.8h, v20.8h, v0.8h + fadd v21.8h, v21.8h, v1.8h + fadd v22.8h, v22.8h, v2.8h + fadd v23.8h, v23.8h, v3.8h + fadd v24.8h, v24.8h, v4.8h + fadd v25.8h, v25.8h, v5.8h + fadd v26.8h, v26.8h, v6.8h + fadd v27.8h, v27.8h, v7.8h + + TILE8_POST: + cbz x14, TILE8_STORE + ld1r {v29.8h}, [x14], #2 // f32 min + ld1r {v28.8h}, [x14] // f32 max + sub x14, x14, #2 + ReLU_FP16 v20, v21, v22, v23, v29, v28 + ReLU_FP16 v24, v25, v26, v27, v29, v28 + + TILE8_STORE: + + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x26], #64 + st1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x26], x4 + +Tile8LoopCheck: + cmp x24, #1 + bge LoopDz_TILE_8 +cbz x23, Tile8End +add x23, x23, #32 +Tile8End: + sub x7, x7, #8 + add x0, x0, x21, LSL #3 + add x1, x1, #64 + add x27, x27, #32 + add x4, x4, #64 // Revert it + +TILE_4: + cmp x7, #4 + blt TILE_2 + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias +LoopDz_TILE_4: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + mov x10, x26 // tag dst + + SET_0_2 v12, v16 // oc:0,1,0,1 + SET_0_2 v13, v17 // oc:2,3,2,3 + SET_0_2 v14, v18 // oc:4,5,4,5 + SET_0_2 v15, v19 // oc:6,7,6,7 +LoopSz_TILE_4: + ld1 {v8.16b, v9.16b, v10.16b, v11.16b}, [x12], #64 // weight + ld1 {v4.16b, v5.16b}, [x11], x22 // src + subs x13, x13, #1 + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa48e // smmla v14.4s, v4.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba48f // smmla v15.4s, v4.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a4b0 // smmla v16.4s, v5.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a4b1 // smmla v17.4s, v5.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa4b2 // smmla v18.4s, v5.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba4b3 // smmla v19.4s, v5.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + + bne LoopSz_TILE_4 +LoopSzEnd_TILE_4: + add x25, x25, x15 + sub x24, x24, #1 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v3.2d, v14.2d, v15.2d // E1: oc:4-7 + + uzp1 v4.2d, v16.2d, v17.2d + uzp2 v5.2d, v16.2d, v17.2d + uzp1 v6.2d, v18.2d, v19.2d + uzp2 v7.2d, v18.2d, v19.2d + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + +Tile4Quan: + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.4s}, [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + MUL_SCALE v20, v0, v1, v4, v5 + MUL_SCALE v21, v2, v3, v6, v7 + + cbz x23, TILE4_MLA + ld1 {v27.4s}, [x23] + MUL_EXTRA_SCALE v27, v0, v1, v4, v5 + MUL_EXTRA_SCALE v27, v2, v3, v6, v7 + + TILE4_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v3, v22, v26, 1 // tile:1, oc:4-7 + + MLA_WEIGHTZERO v4, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v5, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v6, v22, v26, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v7, v22, v26, 3 // tile:3, oc:4-7 + + cbz x9, TILE4_ADD_DSTV + TILE4_ADD_BIAS: + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + ADD_BIAS_FLOAT v0, v1, v4, v5, v16 + ADD_BIAS_FLOAT v2, v3, v6, v7, v17 + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + b TILE4_POST + + TILE4_ADD_DSTV: + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x10] + fadd v20.8h, v20.8h, v24.8h + fadd v21.8h, v21.8h, v25.8h + fadd v22.8h, v22.8h, v26.8h + fadd v23.8h, v23.8h, v27.8h + + TILE4_POST: + cbz x14, TILE4_STORE + ld1r {v29.8h}, [x14], #2 // f32 min + ld1r {v28.8h}, [x14] // f32 max + sub x14, x14, #2 + ReLU_FP16 v20, v21, v22, v23, v29, v28 + + TILE4_STORE: + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x26], x4 + +Tile4LoopCheck: + cmp x24, #1 + bge LoopDz_TILE_4 +cbz x23, Tile4End +add x23, x23, #16 +Tile4End: + sub x7, x7, #4 + add x0, x0, x21, LSL #2 + add x1, x1, #32 + add x27, x27, #16 + //b TILE_4 + +TILE_2: + cmp x7, #2 + blt TILE_1 + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias +LoopDz_TILE_2: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + mov x10, x26 // tag dst + + // v12 oc:0,1,0,1 + // v13 oc:2,3,2,3 + // v14 oc:4,5,4,5 + // v15 oc:6,7,6,7 + SET_0_4 v12, v13, v14, v15 +LoopSz_TILE_2: + ld1 {v8.16b, v9.16b, v10.16b, v11.16b}, [x12], #64 + ld1 {v4.16b}, [x11], x22 // src + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa48e // smmla v14.4s, v4.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba48f // smmla v15.4s, v4.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + subs x13, x13, #1 + bne LoopSz_TILE_2 +LoopSzEnd_TILE_2: + add x25, x25, x15 + sub x24, x24, #1 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v3.2d, v14.2d, v15.2d // E1: oc:4-7 + Int32ToFloat v0, v1, v2, v3 + +Tile2Quan: + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.d}[0], [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + fmul v0.4s, v0.4s, v20.4s + fmul v1.4s, v1.4s, v20.4s + fmul v2.4s, v2.4s, v21.4s + fmul v3.4s, v3.4s, v21.4s + + cbz x23, TILE2_MLA + ld1 {v27.d}[0], [x23] + fmul v0.4s, v0.4s, v27.s[0] + fmul v1.4s, v1.4s, v27.s[1] + fmul v2.4s, v2.4s, v27.s[0] + fmul v3.4s, v3.4s, v27.s[1] + + TILE2_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v3, v22, v26, 1 // tile:1, oc:4-7 + + cbz x9, TILE2_ADD_DSTV + TILE2_ADD_BIAS: + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + fadd v0.4s, v0.4s, v16.4s + fadd v1.4s, v1.4s, v16.4s + fadd v2.4s, v2.4s, v17.4s + fadd v3.4s, v3.4s, v17.4s + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + b TILE2_POST + + TILE2_ADD_DSTV: + Float32ToHalf v0, v2, v1, v3, v20, v21 + ld1 {v24.8h, v25.8h}, [x10] + fadd v20.8h, v20.8h, v24.8h + fadd v21.8h, v21.8h, v25.8h + + TILE2_POST: + cbz x14, TILE2_STORE + ld1r {v29.8h}, [x14], #2 // f32 min + ld1r {v28.8h}, [x14] // f32 max + sub x14, x14, #2 + fmax v20.8h, v20.8h, v29.8h + fmax v21.8h, v21.8h, v29.8h + fmin v20.8h, v20.8h, v28.8h + fmin v21.8h, v21.8h, v28.8h + + TILE2_STORE: + st1 {v20.8h, v21.8h}, [x26], x4 + +Tile2LoopCheck: + cmp x24, #1 + bge LoopDz_TILE_2 +cbz x23, Tile2End +add x23, x23, #8 +Tile2End: + sub x7, x7, #2 + add x0, x0, x21, LSL #1 + add x1, x1, #16 + add x27, x27, #8 + +TILE_1: + cmp x7, #1 + blt End + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias +LoopDz_TILE_1: + //ld1 {v7.4s, v8.4s}, [x20], #32 // bias + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + mov x10, x26 + + //dup v16.2d, v7.d[0] // oc:0,1,0,1 + //dup v17.2d, v7.d[1] // oc:2,3,2,3 + //dup v18.2d, v8.d[0] // oc:4,5,4,5 + //dup v19.2d, v8.d[1] // oc:6,7,6,7 + movi v16.4s, #0 // oc:0,1,0,1 + movi v17.4s, #0 // oc:2,3,2,3 + movi v18.4s, #0 // oc:4,5,4,5 + movi v19.4s, #0 // oc:6,7,6,7 + + //movi v22.4s, #0 // oc:0,1,0,1 + //movi v23.4s, #0 // oc:2,3,2,3 + //movi v24.4s, #0 // oc:4,5,4,5 + //movi v25.4s, #0 // oc:6,7,6,7 + +LoopSz_TILE_1: + // src : 1 x [1 x 8] : v2 + // weight : 2 x [2 x 8] : v0-1 + // dst : 1 x 2 x [2] : v30-v31 + ld1 {v8.16b, v9.16b, v10.16b, v11.16b}, [x12], #64 // weight + ld1 {v2.8b}, [x11], x22 // src + subs x13, x13, #1 + .inst 0x4e88a450 // smmla v16.4s, v2.16b, v8.16b + .inst 0x4e89a451 // smmla v17.4s, v2.16b, v9.16b + .inst 0x4e8aa452 // smmla v18.4s, v2.16b, v10.16b + .inst 0x4e8ba453 // smmla v19.4s, v2.16b, v11.16b + + bne LoopSz_TILE_1 +LoopSzEnd_TILE_1: + add x25, x25, x15 + sub x24, x24, #1 + uzp1 v27.2d, v16.2d, v17.2d + uzp1 v26.2d, v18.2d, v19.2d + scvtf v27.4s, v27.4s + scvtf v26.4s, v26.4s + +Tile1Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v6.s}[0], [x27] // x kernel sum + ld1 {v8.4s, v9.4s}, [x6], #32 // weight quan zeropoint + fmul v27.4s, v27.4s, v0.4s + fmul v26.4s, v26.4s, v1.4s + + cbz x23, TILE1_MLA + ld1 {v4.s}[0], [x23] + fmul v27.4s, v27.4s, v4.s[0] + fmul v26.4s, v26.4s, v4.s[0] + TILE1_MLA: + MLA_WEIGHTZERO v27, v6, v8, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v26, v6, v9, 0 // tile:0, oc:4-7 + + cbz x9, TILE1_ADD_DSTV + TILE1_ADD_BIAS: + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + fadd v27.4s, v27.4s, v16.4s + fadd v26.4s, v26.4s, v17.4s + fcvtn v0.4h, v27.4s + fcvtn2 v0.8h, v26.4s + b TILE1_POST + + TILE1_ADD_DSTV: + fcvtn v0.4h, v27.4s + fcvtn2 v0.8h, v26.4s + ld1 {v24.8h}, [x10] + fadd v0.8h, v0.8h, v24.8h + + TILE1_POST: + cbz x14, TILE1_STORE + ld1r {v29.8h}, [x14], #2 // f32 min + ld1r {v28.8h}, [x14] // f32 max + sub x14, x14, #2 + fmax v0.8h, v0.8h, v29.8h + fmin v0.8h, v0.8h, v28.8h + TILE1_STORE: + st1 {v0.8h}, [x26], x4 + +Tile1LoopEnd: + cmp x24, #1 + bge LoopDz_TILE_1 + +End: +ldp x27, x28, [sp, #(16 * 8)] +ldp x25, x26, [sp, #(16 * 7)] +ldp x23, x24, [sp, #(16 * 6)] +ldp x19, x20, [sp, #(16 * 5)] +ldp x21, x22, [sp, #(16 * 4)] +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 10) +ret + +#endif // __aarch64__ diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_w4_Unit_FP16.S b/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_w4_Unit_FP16.S new file mode 100644 index 000000000..7022af3a1 --- /dev/null +++ b/source/backend/arm82/asm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_w4_Unit_FP16.S @@ -0,0 +1,875 @@ +// +// MNNGemmInt8AddBiasScale_ARMV86_w4_Unit_FP16.S +// MNN +// +// Created by MNN on 2022/09/26. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#if defined(__aarch64__) +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro SET_0_5 d0, d1, d2, d3, d4 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 + movi \d4\().16b, #0 +.endm + +.macro SET_0_4 d0, d1, d2, d3 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 +.endm + +.macro SET_0_2 d0, d1 + movi \d0\().16b, #0 + movi \d1\().16b, #0 +.endm + +.macro ADD_BIAS_FLOAT d0, d1, d2, d3, z0 + fadd \d0\().4s, \d0\().4s, \z0\().4s + fadd \d1\().4s, \d1\().4s, \z0\().4s + fadd \d2\().4s, \d2\().4s, \z0\().4s + fadd \d3\().4s, \d3\().4s, \z0\().4s +.endm + +.macro ReLU_FP16 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().8h, \s0\().8h, \z1\().8h + fmin \s1\().8h, \s1\().8h, \z1\().8h + fmin \s2\().8h, \s2\().8h, \z1\().8h + fmin \s3\().8h, \s3\().8h, \z1\().8h + fmax \s0\().8h, \s0\().8h, \z0\().8h + fmax \s1\().8h, \s1\().8h, \z0\().8h + fmax \s2\().8h, \s2\().8h, \z0\().8h + fmax \s3\().8h, \s3\().8h, \z0\().8h +.endm + +.macro ReLU_FP16_2 s0, s1, z0, z1 // z0:min z1:max + fmin \s0\().8h, \s0\().8h, \z1\().8h + fmin \s1\().8h, \s1\().8h, \z1\().8h + fmax \s0\().8h, \s0\().8h, \z0\().8h + fmax \s1\().8h, \s1\().8h, \z0\().8h +.endm +.macro Int32ToFloat z0, z1, z2, z3 + scvtf \z0\().4s, \z0\().4s + scvtf \z1\().4s, \z1\().4s + scvtf \z2\().4s, \z2\().4s + scvtf \z3\().4s, \z3\().4s +.endm +.macro MUL_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s + fmul \d2\().4s, \d2\().4s, \s\().4s + fmul \d3\().4s, \d3\().4s, \s\().4s +.endm +.macro MUL_EXTRA_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().s[0] + fmul \d1\().4s, \d1\().4s, \s\().s[1] + fmul \d2\().4s, \d2\().4s, \s\().s[2] + fmul \d3\().4s, \d3\().4s, \s\().s[3] +.endm +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro Float32ToHalf s0, s1, s2, s3, d0, d1 + fcvtn \d0\().4h, \s0\().4s + fcvtn2 \d0\().8h, \s1\().4s + fcvtn \d1\().4h, \s2\().4s + fcvtn2 \d1\().8h, \s3\().4s +.endm + +asm_function MNNGemmInt8AddBiasScale_ARMV86_w4_Unit_FP16 +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum = 1; + const int32_t* bias; + const float* extraScale = nullptr; +}; +*/ +//void MNNGemmInt8AddBiasScale_ARMV86_w4_Unit(int8_t* dst, const int8_t* src, +// const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, +// const QuanPostTreatParameters* parameters, size_t realDstCount); + +//Auto: x0:dst, x1:src, x2:weight, x3:src_depth_quad, x4:dst_step +//x5:dst_depth_quad, x6: parameters, x7: realDstCount + +//Load from x7: x8: scale, x9: biasFloat, x27: srcKernelSum, x28: weightQuanBias, x14: fp32minmax +/* For FP16 +UNIT = 8; +SRC_UNIT = 8; +DST_XUNIT = 10; + */ +ldr x8, [x6, #0] +ldr x9, [x6, #8] + +stp d14, d15, [sp, #(-16 * 10)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] +stp x21, x22, [sp, #(16 * 4)] +stp x19, x20, [sp, #(16 * 5)] +stp x23, x24, [sp, #(16 * 6)] +stp x25, x26, [sp, #(16 * 7)] +stp x27, x28, [sp, #(16 * 8)] +// ldr w23, [x6, #24] +ldr x27, [x6, #40] // srcKernelSum +ldr x28, [x6, #48] // weightQuanBias +ldr x23, [x6, #64] // blockNum +ldr x14, [x6, #56] // fp32minmax + +mul x23, x23, x3 // UP_DIV(ic*ky*kx, SRC_UNIT) = blockNum * src_depth_quad_per_block +mov x22, #80 // GEMM_INT8_DST_XUNIT * GEMM_INT8_SRC_UNIT = 10 * 8 = 80 +mov x21, #16 // sizeof(float16_t) * UNIT + +Start: +lsl x15, x23, #5 // x15 = src_depth_quad * UNIT * UNIT_SRC * sizeof(int4_t) = src_depth_quad * 8 * 8 * 0.5 = src_depth_quad << 5 +ldr x23, [x6, #80] // extra scale +TILE_10: + cmp x7, #10 + blt TILE_8 +sub x4, x4, #128 // For Tile10 +LoopDz_TILE_10: + //ld1 {v0.4s, v1.4s}, [x9], #32 // bias + mov x11, x1 // src + mov x12, x2 // weight + mov x13, x3 // src_depth_quad + mov x10, x0 // tag dst address + + SET_0_5 v12, v16, v20, v24, v28 // oc:0,1,0,1 + SET_0_5 v13, v17, v21, v25, v29 // oc:2,3,2,3 + SET_0_5 v14, v18, v22, v26, v30 // oc:4,5,4,5 + SET_0_5 v15, v19, v23, v27, v31 // oc:6,7,6,7 + +LoopSz_TILE_10: + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + movi v2.16b, #15 + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], #64 // src: E0-E9 + ld1 {v7.16b}, [x11], #16 + // int4->int8 + + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + and v10.16b, v0.16b, v2.16b // oc:4-5 + and v11.16b, v1.16b, v2.16b // oc:6-7 + + subs x13, x13, #1 + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa46e // smmla v14.4s, v3.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba46f // smmla v15.4s, v3.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa492 // smmla v18.4s, v4.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba493 // smmla v19.4s, v4.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + .inst 0x4e8aa4b6 // smmla v22.4s, v5.16b, v10.16b // tile4-oc4, tile4-oc5, tile5-oc4, tile5-oc5 + .inst 0x4e8ba4b7 // smmla v23.4s, v5.16b, v11.16b // tile4-oc6, tile4-oc7, tile5-oc6, tile5-oc7 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + .inst 0x4e8aa4da // smmla v26.4s, v6.16b, v10.16b // tile6-oc4, tile6-oc5, tile7-oc4, tile7-oc5 + .inst 0x4e8ba4db // smmla v27.4s, v6.16b, v11.16b // tile6-oc6, tile6-oc7, tile7-oc6, tile7-oc7 + + .inst 0x4e88a4fc // smmla v28.4s, v7.16b, v8.16b // tile8-oc0, tile8-oc1, tile9-oc0, tile9-oc1 + .inst 0x4e89a4fd // smmla v29.4s, v7.16b, v9.16b // tile8-oc2, tile8-oc3, tile9-oc2, tile9-oc3 + .inst 0x4e8aa4fe // smmla v30.4s, v7.16b, v10.16b // tile8-oc4, tile8-oc5, tile9-oc4, tile9-oc5 + .inst 0x4e8ba4ff // smmla v31.4s, v7.16b, v11.16b // tile8-oc6, tile8-oc7, tile9-oc6, tile9-oc7 + bne LoopSz_TILE_10 +LoopSzEnd_TILE_10: + add x2, x2, x15 // weight += dz * src_depth_quad * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT * 0.5); + sub x5, x5, #1 // dz-- + // transpose + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v3.2d, v14.2d, v15.2d // E1: oc:4-7 + + uzp1 v4.2d, v16.2d, v17.2d + uzp2 v5.2d, v16.2d, v17.2d + uzp1 v6.2d, v18.2d, v19.2d + uzp2 v7.2d, v18.2d, v19.2d + + uzp1 v8.2d, v20.2d, v21.2d + uzp2 v9.2d, v20.2d, v21.2d + uzp1 v10.2d, v22.2d, v23.2d + uzp2 v11.2d, v22.2d, v23.2d + + uzp1 v12.2d, v24.2d, v25.2d + uzp2 v13.2d, v24.2d, v25.2d + uzp1 v14.2d, v26.2d, v27.2d + uzp2 v15.2d, v26.2d, v27.2d + + uzp1 v16.2d, v28.2d, v29.2d + uzp2 v17.2d, v28.2d, v29.2d + uzp1 v18.2d, v30.2d, v31.2d + uzp2 v19.2d, v30.2d, v31.2d + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + +Tile10Quan: + ld1 {v20.4s, v21.4s}, [x8], #32 // scale + ld1 {v22.4s, v23.4s}, [x27], #32 // x kernel sum + ld1 {v24.d}[0], [x27] + sub x27, x27, #32 + ld1 {v25.4s, v26.4s}, [x28], #32 // weight quan zeropoint + MUL_SCALE v20, v0, v1, v4, v5 + MUL_SCALE v21, v2, v3, v6, v7 + MUL_SCALE v20, v8, v9, v12, v13 + MUL_SCALE v21, v10, v11, v14, v15 + fmul v16.4s, v16.4s, v20.4s + fmul v17.4s, v17.4s, v20.4s + fmul v18.4s, v18.4s, v21.4s + fmul v19.4s, v19.4s, v21.4s + + cbz x23, TILE10_MLA + ld1 {v27.4s, v28.4s}, [x23], #32 + ld1 {v29.d}[0], [x23] + MUL_EXTRA_SCALE v27, v0, v1, v4, v5 + MUL_EXTRA_SCALE v28, v8, v9, v12, v13 + MUL_EXTRA_SCALE v27, v2, v3, v6, v7 + MUL_EXTRA_SCALE v28, v10, v11, v14, v15 + fmul v16.4s, v16.4s, v29.s[0] + fmul v17.4s, v17.4s, v29.s[1] + fmul v18.4s, v18.4s, v29.s[0] + fmul v19.4s, v19.4s, v29.s[1] + sub x23, x23, #32 + + TILE10_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v3, v22, v26, 1 // tile:1, oc:4-7 + + MLA_WEIGHTZERO v4, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v5, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v6, v22, v26, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v7, v22, v26, 3 // tile:3, oc:4-7 + + MLA_WEIGHTZERO v8, v23, v25, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v9, v23, v25, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v10, v23, v26, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v11, v23, v26, 1 // tile:5, oc:4-7 + + MLA_WEIGHTZERO v12, v23, v25, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v13, v23, v25, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v14, v23, v26, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v15, v23, v26, 3 // tile:7, oc:4-7 + + MLA_WEIGHTZERO v16, v24, v25, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v17, v24, v25, 1 // tile:9, oc:0-3 + MLA_WEIGHTZERO v18, v24, v26, 0 // tile:8, oc:4-7 + MLA_WEIGHTZERO v19, v24, v26, 1 // tile:9, oc:4-7 + + + cbz x9, TILE10_ADD_DSTV + TILE10_ADD_BIAS: + ld1 {v20.4s, v21.4s}, [x9], #32 // bias + ADD_BIAS_FLOAT v0, v1, v4, v5, v20 + ADD_BIAS_FLOAT v2, v3, v6, v7, v21 + ADD_BIAS_FLOAT v8, v9, v12, v13, v20 + ADD_BIAS_FLOAT v10, v11, v14, v15, v21 + fadd v16.4s, v16.4s, v20.4s + fadd v17.4s, v17.4s, v20.4s + fadd v18.4s, v18.4s, v21.4s + fadd v19.4s, v19.4s, v21.4s + + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + Float32ToHalf v8, v10, v9, v11, v24, v25 + Float32ToHalf v12, v14, v13, v15, v26, v27 + Float32ToHalf v16, v18, v17, v19, v30, v31 + b TILE10_POST // to Relu post + + TILE10_ADD_DSTV: + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + Float32ToHalf v8, v10, v9, v11, v24, v25 + Float32ToHalf v12, v14, v13, v15, v26, v27 + Float32ToHalf v16, v18, v17, v19, v30, v31 + + ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x10], #64 + ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x10], #64 + ld1 {v8.8h, v9.8h}, [x10] + + fadd v20.8h, v20.8h, v0.8h + fadd v21.8h, v21.8h, v1.8h + fadd v22.8h, v22.8h, v2.8h + fadd v23.8h, v23.8h, v3.8h + fadd v24.8h, v24.8h, v4.8h + fadd v25.8h, v25.8h, v5.8h + fadd v26.8h, v26.8h, v6.8h + fadd v27.8h, v27.8h, v7.8h + fadd v30.8h, v30.8h, v8.8h + fadd v31.8h, v31.8h, v9.8h + + TILE10_POST: + cbz x14, TILE10_STORE + ld1r {v29.8h}, [x14], #2 // f32 min + ld1r {v28.8h}, [x14] // f32 max + sub x14, x14, #2 + + ReLU_FP16 v20, v21, v22, v23, v29, v28 + ReLU_FP16 v24, v25, v26, v27, v29, v28 + ReLU_FP16_2 v30, v31, v29, v28 + + TILE10_STORE: + + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64 + st1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x0], #64 + st1 {v30.8h, v31.8h}, [x0], x4 + +Tile10LoopCheck: + cmp x5, #1 + bge LoopDz_TILE_10 + b End + +TILE_8: + //ld1r {v28.4s}, [x6], #4 // f32 min + //ld1r {v29.4s}, [x6] // f32 max + movi v30.16b, #15 + cmp x7, #8 + blt TILE_4 + sub x4, x4, #64 // just for Tile8, revert it when Tile8end + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias +LoopDz_TILE_8: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + mov x10, x26 // tag dst + + SET_0_4 v12, v16, v20, v24 // oc:0,1,0,1 + SET_0_4 v13, v17, v21, v25 // oc:2,3,2,3 + SET_0_4 v14, v18, v22, v26 // oc:4,5,4,5 + SET_0_4 v15, v19, v23, v27 // oc:6,7,6,7 +LoopSz_TILE_8: + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + //movi v2.16b, #15 + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], x22 // src: E0-E7 + + // int4->int8 + subs x13, x13, #1 + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + and v10.16b, v0.16b, v30.16b // oc:4-5 + and v11.16b, v1.16b, v30.16b // oc:6-7 + + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa46e // smmla v14.4s, v3.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba46f // smmla v15.4s, v3.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa492 // smmla v18.4s, v4.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba493 // smmla v19.4s, v4.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + .inst 0x4e8aa4b6 // smmla v22.4s, v5.16b, v10.16b // tile4-oc4, tile4-oc5, tile5-oc4, tile5-oc5 + .inst 0x4e8ba4b7 // smmla v23.4s, v5.16b, v11.16b // tile4-oc6, tile4-oc7, tile5-oc6, tile5-oc7 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + .inst 0x4e8aa4da // smmla v26.4s, v6.16b, v10.16b // tile6-oc4, tile6-oc5, tile7-oc4, tile7-oc5 + .inst 0x4e8ba4db // smmla v27.4s, v6.16b, v11.16b // tile6-oc6, tile6-oc7, tile7-oc6, tile7-oc7 + + bne LoopSz_TILE_8 +LoopSzEnd_TILE_8: + add x25, x25, x15 + sub x24, x24, #1 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v3.2d, v14.2d, v15.2d // E1: oc:4-7 + + uzp1 v4.2d, v16.2d, v17.2d + uzp2 v5.2d, v16.2d, v17.2d + uzp1 v6.2d, v18.2d, v19.2d + uzp2 v7.2d, v18.2d, v19.2d + + uzp1 v8.2d, v20.2d, v21.2d + uzp2 v9.2d, v20.2d, v21.2d + uzp1 v10.2d, v22.2d, v23.2d + uzp2 v11.2d, v22.2d, v23.2d + + uzp1 v12.2d, v24.2d, v25.2d + uzp2 v13.2d, v24.2d, v25.2d + uzp1 v14.2d, v26.2d, v27.2d + uzp2 v15.2d, v26.2d, v27.2d + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + +Tile8Quan: + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.4s, v23.4s}, [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + MUL_SCALE v20, v0, v1, v4, v5 + MUL_SCALE v21, v2, v3, v6, v7 + MUL_SCALE v20, v8, v9, v12, v13 + MUL_SCALE v21, v10, v11, v14, v15 + + cbz x23, TILE8_MLA + ld1 {v27.4s, v28.4s}, [x23] + MUL_EXTRA_SCALE v27, v0, v1, v4, v5 + MUL_EXTRA_SCALE v28, v8, v9, v12, v13 + MUL_EXTRA_SCALE v27, v2, v3, v6, v7 + MUL_EXTRA_SCALE v28, v10, v11, v14, v15 + + TILE8_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v3, v22, v26, 1 // tile:1, oc:4-7 + + MLA_WEIGHTZERO v4, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v5, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v6, v22, v26, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v7, v22, v26, 3 // tile:3, oc:4-7 + + MLA_WEIGHTZERO v8, v23, v25, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v9, v23, v25, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v10, v23, v26, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v11, v23, v26, 1 // tile:5, oc:4-7 + + MLA_WEIGHTZERO v12, v23, v25, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v13, v23, v25, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v14, v23, v26, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v15, v23, v26, 3 // tile:7, oc:4-7 + + cbz x9, TILE8_ADD_DSTV + TILE8_ADD_BIAS: + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + ADD_BIAS_FLOAT v0, v1, v4, v5, v16 + ADD_BIAS_FLOAT v2, v3, v6, v7, v17 + ADD_BIAS_FLOAT v8, v9, v12, v13, v16 + ADD_BIAS_FLOAT v10, v11, v14, v15, v17 + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + Float32ToHalf v8, v10, v9, v11, v24, v25 + Float32ToHalf v12, v14, v13, v15, v26, v27 + b TILE8_POST + + TILE8_ADD_DSTV: + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + Float32ToHalf v8, v10, v9, v11, v24, v25 + Float32ToHalf v12, v14, v13, v15, v26, v27 + ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x10], #64 + ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x10] + fadd v20.8h, v20.8h, v0.8h + fadd v21.8h, v21.8h, v1.8h + fadd v22.8h, v22.8h, v2.8h + fadd v23.8h, v23.8h, v3.8h + fadd v24.8h, v24.8h, v4.8h + fadd v25.8h, v25.8h, v5.8h + fadd v26.8h, v26.8h, v6.8h + fadd v27.8h, v27.8h, v7.8h + + TILE8_POST: + cbz x14, TILE8_STORE + ld1r {v29.8h}, [x14], #2 // f32 min + ld1r {v28.8h}, [x14] // f32 max + sub x14, x14, #2 + ReLU_FP16 v20, v21, v22, v23, v29, v28 + ReLU_FP16 v24, v25, v26, v27, v29, v28 + + TILE8_STORE: + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x26], #64 + st1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x26], x4 + +Tile8LoopCheck: + cmp x24, #1 + bge LoopDz_TILE_8 +cbz x23, Tile8End +add x23, x23, #32 +Tile8End: + sub x7, x7, #8 + add x0, x0, x21, LSL #3 + add x1, x1, #64 + add x27, x27, #32 + add x4, x4, #64 // Revert x4 for following tiles + +TILE_4: + cmp x7, #4 + blt TILE_2 + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias +LoopDz_TILE_4: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + mov x10, x26 // tag dst + + SET_0_2 v12, v16 // oc:0,1,0,1 + SET_0_2 v13, v17 // oc:2,3,2,3 + SET_0_2 v14, v18 // oc:4,5,4,5 + SET_0_2 v15, v19 // oc:6,7,6,7 +LoopSz_TILE_4: + ld1 {v2.16b, v3.16b}, [x12], #32 // weight + ld1 {v4.16b, v5.16b}, [x11], x22 // src + // int4->int8 + ushr v8.16b, v2.16b, #4 + ushr v9.16b, v3.16b, #4 + and v10.16b, v2.16b, v30.16b + and v11.16b, v3.16b, v30.16b + subs x13, x13, #1 + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa48e // smmla v14.4s, v4.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba48f // smmla v15.4s, v4.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a4b0 // smmla v16.4s, v5.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a4b1 // smmla v17.4s, v5.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa4b2 // smmla v18.4s, v5.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba4b3 // smmla v19.4s, v5.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + + bne LoopSz_TILE_4 +LoopSzEnd_TILE_4: + add x25, x25, x15 + sub x24, x24, #1 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v3.2d, v14.2d, v15.2d // E1: oc:4-7 + + uzp1 v4.2d, v16.2d, v17.2d + uzp2 v5.2d, v16.2d, v17.2d + uzp1 v6.2d, v18.2d, v19.2d + uzp2 v7.2d, v18.2d, v19.2d + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + +Tile4Quan: + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.4s}, [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + MUL_SCALE v20, v0, v1, v4, v5 + MUL_SCALE v21, v2, v3, v6, v7 + + cbz x23, TILE4_MLA + ld1 {v27.4s}, [x23] + MUL_EXTRA_SCALE v27, v0, v1, v4, v5 + MUL_EXTRA_SCALE v27, v2, v3, v6, v7 + + TILE4_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v3, v22, v26, 1 // tile:1, oc:4-7 + + MLA_WEIGHTZERO v4, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v5, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v6, v22, v26, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v7, v22, v26, 3 // tile:3, oc:4-7 + + cbz x9, TILE4_ADD_DSTV + TILE4_ADD_BIAS: + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + ADD_BIAS_FLOAT v0, v1, v4, v5, v16 + ADD_BIAS_FLOAT v2, v3, v6, v7, v17 + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + b TILE4_POST + + TILE4_ADD_DSTV: + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + Float32ToHalf v4, v6, v5, v7, v22, v23 + ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x10] + fadd v20.8h, v20.8h, v24.8h + fadd v21.8h, v21.8h, v25.8h + fadd v22.8h, v22.8h, v26.8h + fadd v23.8h, v23.8h, v27.8h + + TILE4_POST: + cbz x14, TILE4_STORE + ld1r {v29.8h}, [x14], #2 // f32 min + ld1r {v28.8h}, [x14] // f32 max + sub x14, x14, #2 + ReLU_FP16 v20, v21, v22, v23, v29, v28 + + TILE4_STORE: + st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x26], x4 + +Tile4LoopCheck: + cmp x24, #1 + bge LoopDz_TILE_4 +cbz x23, Tile4End +add x23, x23, #16 +Tile4End: + sub x7, x7, #4 + add x0, x0, x21, LSL #2 + add x1, x1, #32 + add x27, x27, #16 + //b TILE_4 + +TILE_2: + cmp x7, #2 + blt TILE_1 + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias +LoopDz_TILE_2: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + mov x10, x26 // tag dst + + // v12 oc:0,1,0,1 + // v13 oc:2,3,2,3 + // v14 oc:4,5,4,5 + // v15 oc:6,7,6,7 + SET_0_4 v12, v13, v14, v15 +LoopSz_TILE_2: + ld1 {v2.16b, v3.16b}, [x12], #32 // weight + ld1 {v4.16b}, [x11], x22 // src + // int4->int8 + ushr v8.16b, v2.16b, #4 + ushr v9.16b, v3.16b, #4 + and v10.16b, v2.16b, v30.16b + and v11.16b, v3.16b, v30.16b + + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa48e // smmla v14.4s, v4.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba48f // smmla v15.4s, v4.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + subs x13, x13, #1 + bne LoopSz_TILE_2 +LoopSzEnd_TILE_2: + add x25, x25, x15 + sub x24, x24, #1 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v3.2d, v14.2d, v15.2d // E1: oc:4-7 + Int32ToFloat v0, v1, v2, v3 + +Tile2Quan: + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.d}[0], [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + fmul v0.4s, v0.4s, v20.4s + fmul v1.4s, v1.4s, v20.4s + fmul v2.4s, v2.4s, v21.4s + fmul v3.4s, v3.4s, v21.4s + + cbz x23, TILE2_MLA + ld1 {v27.d}[0], [x23] + fmul v0.4s, v0.4s, v27.s[0] + fmul v1.4s, v1.4s, v27.s[1] + fmul v2.4s, v2.4s, v27.s[0] + fmul v3.4s, v3.4s, v27.s[1] + + TILE2_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v3, v22, v26, 1 // tile:1, oc:4-7 + + cbz x9, TILE2_ADD_DSTV + TILE2_ADD_BIAS: + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + fadd v0.4s, v0.4s, v16.4s + fadd v1.4s, v1.4s, v16.4s + fadd v2.4s, v2.4s, v17.4s + fadd v3.4s, v3.4s, v17.4s + // float32->float16 + Float32ToHalf v0, v2, v1, v3, v20, v21 + b TILE2_POST + + TILE2_ADD_DSTV: + Float32ToHalf v0, v2, v1, v3, v20, v21 + ld1 {v24.8h, v25.8h}, [x10] + fadd v20.8h, v20.8h, v24.8h + fadd v21.8h, v21.8h, v25.8h + + TILE2_POST: + cbz x14, TILE2_STORE + ld1r {v29.8h}, [x14], #2 // f32 min + ld1r {v28.8h}, [x14] // f32 max + sub x14, x14, #2 + fmax v20.8h, v20.8h, v29.8h + fmax v21.8h, v21.8h, v29.8h + fmin v20.8h, v20.8h, v28.8h + fmin v21.8h, v21.8h, v28.8h + + TILE2_STORE: + st1 {v20.8h, v21.8h}, [x26], x4 + +Tile2LoopCheck: + cmp x24, #1 + bge LoopDz_TILE_2 +cbz x23, Tile2End +add x23, x23, #8 +Tile2End: + sub x7, x7, #2 + add x0, x0, x21, LSL #1 + add x1, x1, #16 + add x27, x27, #8 + //b TILE_2 + +TILE_1: + + cmp x7, #1 + blt End + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias +LoopDz_TILE_1: + //ld1 {v7.4s, v8.4s}, [x20], #32 // bias + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + mov x10, x26 + + //dup v16.2d, v7.d[0] // oc:0,1,0,1 + //dup v17.2d, v7.d[1] // oc:2,3,2,3 + //dup v18.2d, v8.d[0] // oc:4,5,4,5 + //dup v19.2d, v8.d[1] // oc:6,7,6,7 + movi v16.4s, #0 // oc:0,1,0,1 + movi v17.4s, #0 // oc:2,3,2,3 + movi v18.4s, #0 // oc:4,5,4,5 + movi v19.4s, #0 // oc:6,7,6,7 + + //movi v22.4s, #0 // oc:0,1,0,1 + //movi v23.4s, #0 // oc:2,3,2,3 + //movi v24.4s, #0 // oc:4,5,4,5 + //movi v25.4s, #0 // oc:6,7,6,7 + +LoopSz1_TILE_1: + // src : 1 x [1 x 8] : v2 + // weight : 2 x [2 x 8] : v0-1 + // dst : 1 x 2 x [2] : v30-v31 + ld1 {v13.16b, v14.16b}, [x12], #32 // weight + ld1 {v2.8b}, [x11], x22 // src + // int4->int8 + ushr v0.16b, v13.16b, #4 + and v3.16b, v13.16b, v30.16b + ushr v1.16b, v14.16b, #4 + and v4.16b, v14.16b, v30.16b + + .inst 0x4e80a450 // smmla v16.4s, v2.16b, v0.16b + .inst 0x4e81a451 // smmla v17.4s, v2.16b, v1.16b + .inst 0x4e83a452 // smmla v18.4s, v2.16b, v3.16b + .inst 0x4e84a453 // smmla v19.4s, v2.16b, v4.16b + subs x13, x13, #1 + bne LoopSz1_TILE_1 + + LoopSz_TILE_1_ADD: + //add v16.4s, v16.4s, v22.4s + //add v17.4s, v17.4s, v23.4s + //add v18.4s, v18.4s, v24.4s + //add v19.4s, v19.4s, v25.4s + +LoopSzEnd_TILE_1: + add x25, x25, x15 + sub x24, x24, #1 + uzp1 v27.2d, v16.2d, v17.2d + uzp1 v26.2d, v18.2d, v19.2d + scvtf v27.4s, v27.4s + scvtf v26.4s, v26.4s + +Tile1Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v6.s}[0], [x27] // x kernel sum + ld1 {v8.4s, v9.4s}, [x6], #32 // weight quan zeropoint + fmul v27.4s, v27.4s, v0.4s + fmul v26.4s, v26.4s, v1.4s + + cbz x23, TILE1_MLA + ld1 {v4.s}[0], [x23] + fmul v27.4s, v27.4s, v4.s[0] + fmul v26.4s, v26.4s, v4.s[0] + TILE1_MLA: + MLA_WEIGHTZERO v27, v6, v8, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v26, v6, v9, 0 // tile:0, oc:4-7 + + cbz x9, TILE1_ADD_DSTV + TILE1_ADD_BIAS: + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + fadd v27.4s, v27.4s, v16.4s + fadd v26.4s, v26.4s, v17.4s + fcvtn v0.4h, v27.4s + fcvtn2 v0.8h, v26.4s + b TILE1_POST + + TILE1_ADD_DSTV: + fcvtn v0.4h, v27.4s + fcvtn2 v0.8h, v26.4s + ld1 {v24.8h}, [x10] + fadd v0.8h, v0.8h, v24.8h + + TILE1_POST: + cbz x14, TILE1_STORE + ld1r {v29.8h}, [x14], #2 // f32 min + ld1r {v28.8h}, [x14] // f32 max + sub x14, x14, #2 + fmax v0.8h, v0.8h, v29.8h + fmin v0.8h, v0.8h, v28.8h + TILE1_STORE: + st1 {v0.8h}, [x26], x4 + +Tile1LoopEnd: + cmp x24, #1 + bge LoopDz_TILE_1 + +End: +ldp x27, x28, [sp, #(16 * 8)] +ldp x25, x26, [sp, #(16 * 7)] +ldp x23, x24, [sp, #(16 * 6)] +ldp x19, x20, [sp, #(16 * 5)] +ldp x21, x22, [sp, #(16 * 4)] +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 10) +ret + +#endif // __aarch64__ diff --git a/source/backend/arm82/asm/arm64/low_memory/MNNQuantScaleFP16.S b/source/backend/arm82/asm/arm64/low_memory/MNNQuantScaleFP16.S index b1e4d8ad0..3c2358402 100644 --- a/source/backend/arm82/asm/arm64/low_memory/MNNQuantScaleFP16.S +++ b/source/backend/arm82/asm/arm64/low_memory/MNNQuantScaleFP16.S @@ -29,10 +29,10 @@ stp d10, d11, [sp, #(16 * 2)] stp d8, d9, [sp, #(16 * 3)] Start: -mov w8, #1123942400 // 127.0 -dup v0.4s, w8 -fcvtn v31.4h, v0.4s -fcvtn2 v31.8h, v0.4s +movi v31.4s, #127 +scvtf v31.4s, v31.4s +//fcvtn v31.4h, v0.4s +//fcvtn2 v31.8h, v0.4s lsl x9, x4, #1 // src_step = batch * sizeof(float16_t) TILE_12: @@ -64,14 +64,32 @@ sub x4, x4, #12 add x0, x0, #24 // quant_scale = 127 / absmax // dequant_scale = absmax / 127 -fdiv v4.8h, v31.8h, v0.8h -fdiv v5.8h, v31.8h, v1.8h -fdiv v6.8h, v0.8h, v31.8h -fdiv v7.8h, v1.8h, v31.8h -st1 {v4.8h}, [x1], #16 -st1 {v5.d}[0], [x1], #8 -st1 {v6.8h}, [x2], #16 -st1 {v7.d}[0], [x2], #8 + +// float16->float32 +fcvtl v4.4s, v0.4h +fcvtl2 v5.4s, v0.8h +fcvtl v6.4s, v1.4h + +fdiv v8.4s, v31.4s, v4.4s +fdiv v9.4s, v31.4s, v5.4s +fdiv v10.4s, v31.4s, v6.4s + +fdiv v12.4s, v4.4s, v31.4s +fdiv v13.4s, v5.4s, v31.4s +fdiv v14.4s, v6.4s, v31.4s + +st1 {v8.4s, v9.4s, v10.4s}, [x1], #48 +st1 {v12.4s, v13.4s, v14.4s}, [x2], #48 + +//fdiv v4.8h, v31.8h, v0.8h +//fdiv v5.8h, v31.8h, v1.8h +//fdiv v6.8h, v0.8h, v31.8h +//fdiv v7.8h, v1.8h, v31.8h + +//st1 {v4.8h}, [x1], #16 +//st1 {v5.d}[0], [x1], #8 +//st1 {v6.8h}, [x2], #16 +//st1 {v7.d}[0], [x2], #8 b TILE_12 TILE_10: @@ -103,14 +121,33 @@ sub x4, x4, #10 add x0, x0, #20 // quant_scale = 127 / absmax // dequant_scale = absmax / 127 -fdiv v4.8h, v31.8h, v0.8h -fdiv v5.8h, v31.8h, v1.8h -fdiv v6.8h, v0.8h, v31.8h -fdiv v7.8h, v1.8h, v31.8h -st1 {v4.8h}, [x1], #16 -st1 {v5.s}[0], [x1], #4 -st1 {v6.8h}, [x2], #16 -st1 {v7.s}[0], [x2], #4 + +// float16->float32 +fcvtl v4.4s, v0.4h +fcvtl2 v5.4s, v0.8h +fcvtl v6.4s, v1.4h + +fdiv v8.4s, v31.4s, v4.4s +fdiv v9.4s, v31.4s, v5.4s +fdiv v10.4s, v31.4s, v6.4s + +fdiv v12.4s, v4.4s, v31.4s +fdiv v13.4s, v5.4s, v31.4s +fdiv v14.4s, v6.4s, v31.4s + +st1 {v8.4s, v9.4s}, [x1], #32 +st1 {v10.d}[0], [x1], #8 +st1 {v12.4s, v13.4s}, [x2], #32 +st1 {v14.d}[0], [x2], #8 + +// fdiv v4.8h, v31.8h, v0.8h +// fdiv v5.8h, v31.8h, v1.8h +// fdiv v6.8h, v0.8h, v31.8h +// fdiv v7.8h, v1.8h, v31.8h +// st1 {v4.8h}, [x1], #16 +// st1 {v5.s}[0], [x1], #4 +// st1 {v6.8h}, [x2], #16 +// st1 {v7.s}[0], [x2], #4 b TILE_10 @@ -139,10 +176,23 @@ sub x4, x4, #8 add x0, x0, #16 // quant_scale = 127 / absmax // dequant_scale = absmax / 127 -fdiv v2.8h, v31.8h, v0.8h -fdiv v3.8h, v0.8h, v31.8h -st1 {v2.8h}, [x1], #16 -st1 {v3.8h}, [x2], #16 +// float16->float32 +fcvtl v4.4s, v0.4h +fcvtl2 v5.4s, v0.8h + +fdiv v8.4s, v31.4s, v4.4s +fdiv v9.4s, v31.4s, v5.4s + +fdiv v12.4s, v4.4s, v31.4s +fdiv v13.4s, v5.4s, v31.4s + +st1 {v8.4s, v9.4s}, [x1], #32 +st1 {v12.4s, v13.4s}, [x2], #32 + +// fdiv v2.8h, v31.8h, v0.8h +// fdiv v3.8h, v0.8h, v31.8h +// st1 {v2.8h}, [x1], #16 +// st1 {v3.8h}, [x2], #16 b TILE_8 @@ -171,10 +221,18 @@ sub x4, x4, #1 add x0, x0, #2 // quant_scale = 127 / absmax // dequant_scale = absmax / 127 -fdiv h2, h31, h0 -fdiv h3, h0, h31 -st1 {v2.h}[0], [x1], #2 -st1 {v3.h}[0], [x2], #2 +fcvtl v4.4s, v0.4h + +fdiv v8.4s, v31.4s, v4.4s +fdiv v12.4s, v4.4s, v31.4s + +st1 {v8.s}[0], [x1], #4 +st1 {v12.s}[0], [x2], #4 + +// fdiv h2, h31, h0 +// fdiv h3, h0, h31 +// st1 {v2.h}[0], [x1], #2 +// st1 {v3.h}[0], [x2], #2 b TILE_1 diff --git a/source/backend/cpu/CMakeLists.txt b/source/backend/cpu/CMakeLists.txt index 22aeb1ef4..41426c66c 100644 --- a/source/backend/cpu/CMakeLists.txt +++ b/source/backend/cpu/CMakeLists.txt @@ -14,7 +14,6 @@ if (MNN_SUPPORT_BF16) endif() list(APPEND MNN_OBJECTS_TO_LINK $) list(APPEND MNN_TARGETS MNNCPU) -option(MNN_SSE_USE_FP16_INSTEAD "Use fp16 instead of bf16 for x86op" OFF) if(MNN_USE_SPARSE_COMPUTE) diff --git a/source/backend/cpu/CPUAttention.cpp b/source/backend/cpu/CPUAttention.cpp index a71472d1f..a420f2d0d 100644 --- a/source/backend/cpu/CPUAttention.cpp +++ b/source/backend/cpu/CPUAttention.cpp @@ -27,198 +27,366 @@ // reduce the value of 'query' to 'query * FP16_QSCALE', avoid fp16 overflow #define FP16_QSCALE 0.5 +#define FP8_E5M2 namespace MNN { +#if defined FP8_E5M2 // E5M2 : [S E E E E E M M] +typedef uint8_t fp8_t; +static inline fp8_t fp16_to_fp8(FLOAT16_T x) { + return *((fp8_t *)(&x) + 1); +} +static FLOAT16_T fp8_to_fp16(fp8_t x) { + uint16_t rawData = 0; + rawData |= (uint16_t)x << 8; + return *((FLOAT16_T *)(&rawData)); +} +static inline fp8_t float_to_fp8(float x) { + uint32_t rawData = *((uint32_t *)(&x)); + int sign = (rawData >> 31) & 1U; + int exp = (int)((rawData >> 23) & 0x0ffU) - 127; + if (exp < -16) + exp = -16; + if (exp > 15) + exp = 15; + exp += 16; // exp [-16, 15] ==> [0, 31] + int mant = (rawData >> 21) & 3U; + return (sign << 7) | (exp << 2) | mant; +} +static inline float fp8_to_float(fp8_t x) { + uint32_t sign = (x >> 7) & 1U; + uint32_t exp = (int)((x >> 2) & 0x1fU) - 16 + 127; + uint32_t mant = (x & 3U) << 21; + uint32_t rawData = (sign << 31) | (exp << 23) | mant; + return *((float *)(&rawData)); +} +#elif defined FP8_E4M3 // E4M3: [S E E E E M M M] +typedef uint8_t fp8_t; +static inline fp8_t fp16_to_fp8(FLOAT16_T x) { + uint16_t rawData = *((uint16_t *)(&x)); + int sign = (rawData >> 15) & 1U; + int exp = (int)((rawData >> 10) & 0x1fU) - 15; + if (exp < -8) + exp = -8; + if (exp > 7) + exp = 7; + exp += 8; // exp [-8, 7] ==> [0, 15] + int mant = (rawData >> 7) & 7U; + return (sign << 7) | (exp << 3) | mant; +} +static FLOAT16_T fp8_to_fp16(fp8_t x) { + uint32_t sign = (x >> 7) & 1U; + uint32_t exp = (int)((x >> 3) & 0x0fU) - 8 + 15; + uint32_t mant = (x & 7U) << 7; + uint16_t rawData = (sign << 15) | (exp << 10) | mant; + return *((FLOAT16_T *)(&rawData)); +} +static inline fp8_t float_to_fp8(float x) { + uint32_t rawData = *((uint32_t *)(&x)); + int sign = (rawData >> 31) & 1U; + int exp = (int)((rawData >> 23) & 0x0ffU) - 127; + if (exp < -8) + exp = -8; + if (exp > 7) + exp = 7; + exp += 8; // exp [-8, 7] ==> [0, 15] + int mant = (rawData >> 20) & 7U; + return (sign << 7) | (exp << 3) | mant; +} +static inline float fp8_to_float(fp8_t x) { + uint32_t sign = (x >> 7) & 1U; + uint32_t exp = (int)((x >> 3) & 0x0fU) - 8 + 127; + uint32_t mant = (x & 7U) << 20; + uint32_t rawData = (sign << 31) | (exp<< 23) | mant; + return *((float *)(&rawData)); +} +#else +// Do not support fp8 +#endif // fp8 format definition + +static int nearestInt(float x) { + return x < 0 ? -nearestInt(-x) : (int)(x + 0.5f); +} + template -static void prefill_pack(Tensor* query, Tensor* key, Tensor* value, char* query_ptr, char* key_ptr, char* value_ptr, - int mMaxLength, int mNumHead, int mKvNumHead, int mHeadDim, int mValueH, - int eP, int hP, int query_e, int key_h, int seq_len, int h, int kv_h, float q_scale) { - auto query_src = query->host(); - auto key_src = key->host(); - auto value_src = value->host(); - auto query_dst = reinterpret_cast(query_ptr); - auto key_dst = reinterpret_cast(key_ptr); - auto value_dst = reinterpret_cast(value_ptr); - // transpose query: [seq_len, num_head, head_dim] -> numhead, [seq_len/eP, head_dim, eP] - for (int i = 0; i < query_e; i++) { +static void pack_query(Tensor* query, char* pack_q, int mNumHead, int mHeadDim, int eP, int seq_len, int h, float q_scale) { + T * query_src = query->host(); + T * query_dst = reinterpret_cast(pack_q); + for (int i = 0; i < seq_len; i++) { + int out_index = i / eP; + int in_index = i % eP; for (int j = 0; j < mHeadDim; j++) { - for (int k = 0; k < eP; k++) { - int s = i * eP + k; - if (s < seq_len) { - query_dst[i * mHeadDim * eP + j * eP + k] = query_src[s * mNumHead * mHeadDim + h * mHeadDim + j] * q_scale; - } + query_dst[out_index * mHeadDim * eP + j * eP + in_index] = query_src[i * mNumHead * mHeadDim + h * mHeadDim + j] * q_scale; + } + } +} + +template +static void pack_key(Tensor* key, char* pack_key, int mPastLength, int seq_len, int mKvNumHead, int mHeadDim, int hP, int kv_h, char* scale, char* zero_point, bool quant) { + if (quant) { // Quantize the keys + auto key_src = key->host(); + auto key_dst = reinterpret_cast(pack_key); + auto scale_dst = reinterpret_cast(scale); + auto zeroPoint_dst = reinterpret_cast(zero_point); + for (int i = 0; i < seq_len; i++) { + float minKey = key_src[i * mKvNumHead * mHeadDim + kv_h * mHeadDim + 0]; + float maxKey = key_src[i * mKvNumHead * mHeadDim + kv_h * mHeadDim + 0]; + for (int j = 1; j < mHeadDim; j++) { + auto key = key_src[i * mKvNumHead * mHeadDim + kv_h * mHeadDim + j]; + minKey = ALIMIN(minKey, key); + maxKey = ALIMAX(maxKey, key); + } + int out_index = (mPastLength + i) / hP; + int in_index = (mPastLength + i) % hP; + scale_dst[out_index * hP + in_index] = (maxKey - minKey) / 255.0f; + zeroPoint_dst[out_index * hP + in_index] = 128.0f * (maxKey - minKey) / 255.0f + minKey; + for (int j = 0; j < mHeadDim; j++) { + key_dst[out_index * mHeadDim * hP + j * hP + in_index] = nearestInt((key_src[i * mKvNumHead * mHeadDim + kv_h * mHeadDim + j] - minKey) / (maxKey - minKey) * 255 - 128); } } } - // transpose key: [seq_len, num_head, head_dim] -> numhead, [seq_len/hP, head_dim, hP] - for (int i = 0; i < key_h; i++) { - for (int j = 0; j < mHeadDim; j++) { + else { // Do not quantize the keys + auto key_src = key->host(); + auto key_dst = reinterpret_cast(pack_key); + for (int i = 0; i < seq_len; i++) { + int out_index = (mPastLength + i) / hP; + int in_index = (mPastLength + i) % hP; + for (int j = 0; j < mHeadDim; j++) { + key_dst[out_index * mHeadDim * hP + j * hP + in_index] = key_src[i * mKvNumHead * mHeadDim + kv_h * mHeadDim + j]; + } + } + } +} + + + +template +static void pack_value(Tensor* value, char* pack_value, int mMaxLength, int mPastLength, int seq_len, int mKvNumHead, int mHeadDim, int hP, int kv_h, bool quant) { + if (quant) { // Quantize the values to fp8 + T * value_src = value->host(); + fp8_t * value_dst = reinterpret_cast(pack_value); + for (int i = 0; i < seq_len; i++) { + for (int j = 0; j < mHeadDim; j++) { + int out_index = j / hP; + int in_index = j % hP; + auto origin = value_src[i * mKvNumHead * mHeadDim + kv_h * mHeadDim + j]; + if (sizeof(T) == 2) + value_dst[out_index * mMaxLength * hP + (mPastLength + i) * hP + in_index] = fp16_to_fp8(origin); + else + value_dst[out_index * mMaxLength * hP + (mPastLength + i) * hP + in_index] = float_to_fp8(origin); + } + } + } + else { // Do not quantize the values + T * value_src = value->host(); + T * value_dst = reinterpret_cast(pack_value); + for (int i = 0; i < seq_len; i++) { + for (int j = 0; j < mHeadDim; j++) { + int out_index = j / hP; + int in_index = j % hP; + value_dst[out_index * mMaxLength * hP + (mPastLength + i) * hP + in_index] = value_src[i * mKvNumHead * mHeadDim + kv_h * mHeadDim + j]; + } + } + } +} + +void dequant_value_float(char * dst, char * src, int mHeadDim, int kv_seq_len, int hP, int mMaxLength) { + fp8_t * qv = (fp8_t *)src; + float * dqv = (float *)dst; + for (int i = 0; i < UP_DIV(mHeadDim, hP); i++) { + for (int j = 0; j < kv_seq_len; j++) { for (int k = 0; k < hP; k++) { - int s = i * hP + k; - if (s < seq_len) { - key_dst[i * mHeadDim * hP + j * hP + k] = key_src[s * mKvNumHead * mHeadDim + kv_h * mHeadDim + j]; - } + dqv[i * kv_seq_len * hP + j * hP + k] = fp8_to_float(qv[i * mMaxLength * hP + j * hP + k]); } } } - // transpose value: [seq_len, num_head, head_dim] -> numhead, [head_dim/hP, seq_len, hP] - for (int i = 0; i < mValueH; i++) { - for (int j = 0; j < seq_len; j++) { +} + +void dequant_value_fp16(char * dst, char * src, int mHeadDim, int kv_seq_len, int hP, int mMaxLength) { + fp8_t * qv = (fp8_t *)src; + FLOAT16_T * dqv = (FLOAT16_T *)dst; + for (int i = 0; i < UP_DIV(mHeadDim, hP); i++) { + for (int j = 0; j < kv_seq_len; j++) { for (int k = 0; k < hP; k++) { - int hd = i * hP + k; - if (hd < mHeadDim) { - value_dst[i * mMaxLength * hP + j * hP + k] = value_src[j * mKvNumHead * mHeadDim + kv_h * mHeadDim + hd]; - } + dqv[i * kv_seq_len * hP + j * hP + k] = fp8_to_fp16(qv[i * mMaxLength * hP + j * hP + k]); } } } } template -static void decode_pack(Tensor* query, Tensor* key, Tensor* value, char* query_ptr, char* key_ptr, char* value_ptr, - int mMaxLength, int mPastLength, int mHeadDim, int mValueH, int eP, int hP, int h, int kv_h, float q_scale) { - auto query_src = query->host(); - auto key_src = key->host(); - auto value_src = value->host(); - auto query_dst = reinterpret_cast(query_ptr); - auto key_dst = reinterpret_cast(key_ptr); - auto value_dst = reinterpret_cast(value_ptr); - for (int i = 0; i < mHeadDim; i++) { - query_dst[i * eP] = query_src[h * mHeadDim + i] * q_scale; - } - // transpose key: [1, num_head, head_dim] -> numhead, [kv_seq_len/hP, head_dim, hP] - int outside_offset = UP_DIV(mPastLength, hP); - int inside_offset = mPastLength % hP; - for (int i = 0; i < mHeadDim; i++) { - key_dst[(outside_offset - (inside_offset != 0)) * mHeadDim * hP + i * hP + inside_offset] = key_src[kv_h * mHeadDim + i]; - } - // transpose value: [1, num_head, head_dim] -> numhead, [head_dim/hP, kv_seq_len, hP] - for (int i = 0; i < mValueH; i++) { - for (int j = 0; j < hP; j++) { - value_dst[i * mMaxLength * hP + mPastLength * hP + j] = value_src[kv_h * mHeadDim + i * hP + j]; +static void unpack_QK(float * unpack_qk_dst, char * pack_qk_src, int seq_len, int kv_seq_len, int unit) { + float * dst = unpack_qk_dst; + T * src = (T *)(pack_qk_src); + // [kv_seq_len/unit, seq_len, unit] -> [seq_len, kv_seq_len] + for (int i = 0; i < seq_len; i++) { + for (int j = 0; j < kv_seq_len; j++) { + int out_index = j / unit; + int in_index = j % unit; + dst[i * kv_seq_len + j] = src[out_index * seq_len * unit + i * unit + in_index]; } } } template -static void prefill_unpack(char* pack_qkv, char* unpack_qkv, int mNumHead, int mHeadDim, int unit, int seq_len) { - auto src_ptr = reinterpret_cast(pack_qkv); - auto dst_ptr = reinterpret_cast(unpack_qkv); +static void pack_QK(char * pack_qk_dst, float * qk_src, int seq_len, int kv_seq_len, int eP) { + T * dst = reinterpret_cast(pack_qk_dst); + float * src = reinterpret_cast(qk_src); + // [seq_len, kv_seq_len] -> [seq_len/eP, kv_seq_len, eP] for (int i = 0; i < seq_len; i++) { - for (int j = 0; j < mHeadDim; j++) { - int a = j / unit; - int b = j % unit; - dst_ptr[i * mNumHead * mHeadDim + j] = src_ptr[a * seq_len * unit + i * unit + b]; + int out_index = i / eP; + int in_index = i % eP; + for (int j = 0; j < kv_seq_len; j++) { + dst[out_index * kv_seq_len * eP + j * eP + in_index] = src[i * kv_seq_len + j]; } } } template -static void prefill_softmax(int* mask_ptr, float* mask_qk, float* softmax_qk, char* unpack_qk, char* pack_qk, - float mScale, int eP, int query_e, int seq_len, float min_val, bool float_mask) { - T* qk_src = reinterpret_cast(unpack_qk); - T* qk_dst = reinterpret_cast(pack_qk); - if (float_mask) { - T* fpmask_ptr = reinterpret_cast(mask_ptr); +static void mask_QK(float * unpack_qk, int seq_len, int kv_seq_len, float mScale, float min_val, int * mask_ptr, bool float_mask) { + if (seq_len == 1) { + for (int i = 0; i < kv_seq_len; i++) { + unpack_qk[i] = unpack_qk[i] * mScale; + } + } else if (float_mask) { // float mask - for (int i = 0; i < seq_len * seq_len; i++) { - mask_qk[i] = qk_src[i] * mScale + fpmask_ptr[i]; + T* fpmask_ptr = reinterpret_cast(mask_ptr); + for (int i = 0; i < seq_len * kv_seq_len; i++) { + unpack_qk[i] = unpack_qk[i] * mScale + fpmask_ptr[i]; } } else { // int mask - for (int i = 0; i < seq_len * seq_len; i++) { + for (int i = 0; i < seq_len * kv_seq_len; i++) { if (mask_ptr[i]) { - mask_qk[i] = qk_src[i] * mScale; + unpack_qk[i] = unpack_qk[i] * mScale; } else { - mask_qk[i] = min_val; + unpack_qk[i] = min_val; } } } - for (int i = 0; i < seq_len; i++) { - MNNSoftmax(softmax_qk + i * seq_len, mask_qk + i * seq_len, seq_len); - } - for (int i = 0; i < query_e; i++) { - for (int j = 0; j < seq_len; j++) { - for (int k = 0; k < eP; k++) { - int s = i * eP + k; - if (s < seq_len) { - qk_dst[i * seq_len * eP + j * eP + k] = softmax_qk[s * seq_len + j]; - } - } - } +} + +static void softmax_QK(float* softmax_qk_addr, float* unpack_qk_addr, int seq_len, int kv_seq_len) { + for (int i = 0; i < seq_len; i++) { // softmax each row + MNNSoftmax(softmax_qk_addr + i * kv_seq_len, unpack_qk_addr + i * kv_seq_len, kv_seq_len); } } template -static void decode_softmax(float* mask_qk, float* softmax_qk, char* unpack_qk, char* pack_qk, - float mScale, int eP, int kv_seq_len) { - T* qk_src = reinterpret_cast(unpack_qk); - T* qk_dst = reinterpret_cast(pack_qk); - for (int i = 0; i < kv_seq_len; i++) { - mask_qk[i] = qk_src[i] * mScale; - } - // softmax - MNNSoftmax(softmax_qk, mask_qk, kv_seq_len); - // pack qk - for (int i = 0; i < kv_seq_len; i++) { - qk_dst[i * eP] = softmax_qk[i]; +static void unpack_QKV(char* pack_qkv, char* unpack_qkv, int mNumHead, int mHeadDim, int unit, int seq_len) { + auto src_ptr = reinterpret_cast(pack_qkv); + auto dst_ptr = reinterpret_cast(unpack_qkv); + for (int i = 0; i < seq_len; i++) { + for (int j = 0; j < mHeadDim; j++) { + int a = j / unit; + int b = j % unit; + dst_ptr[i * mNumHead * mHeadDim + j] = src_ptr[a * seq_len * unit + i * unit + b]; + } } } -void CPUAttention::allocKVCache() { +void CPUAttention::allocKVCache(int kv_seq_len, bool quantKey, bool quantValue) { if (!mKVCache) { return; } - mResource->mMaxLength = ROUND_UP(mResource->mPastLength, mResource->mExpandChunk); - // past_key: [1, numhead, headdim, maxlen] -> numhead, [headdim, maxlen] -> pack_b -> numhead, [maxlen/hP, head_dim, hP] - mResource->mPastKey.reset(Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mMaxLength, hP), mResource->mHeadDim, hP})); - // past_value: [1, numhead, maxlen, headdim] -> numhead, [maxlen, headdim] -> pack_b -> numhead, [head_dim/hP, max_len, hP] - mResource->mPastValue.reset(Tensor::createDevice({mResource->mKvNumHead, mResource->mValueH, mResource->mMaxLength, hP})); - backend()->onAcquireBuffer(mResource->mPastKey.get(), Backend::STATIC); - backend()->onAcquireBuffer(mResource->mPastValue.get(), Backend::STATIC); + mResource->mMaxLength = kv_seq_len + mResource->mExpandChunk; + if (quantKey) { + mResource->mPastKey.reset(Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mMaxLength, hP), mResource->mHeadDim, hP})); + mResource->mDequantKeyScale.reset(Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mMaxLength, hP), 1, hP})); + mResource->mDequantKeyZeroPoint.reset(Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mMaxLength, hP), 1, hP})); + backend()->onAcquireBuffer(mResource->mPastKey.get(), Backend::STATIC); + backend()->onAcquireBuffer(mResource->mDequantKeyScale.get(), Backend::STATIC); + backend()->onAcquireBuffer(mResource->mDequantKeyZeroPoint.get(), Backend::STATIC); + } else { + mResource->mPastKey.reset(Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mMaxLength, hP), mResource->mHeadDim, hP})); + backend()->onAcquireBuffer(mResource->mPastKey.get(), Backend::STATIC); + } + if (quantValue) { + mResource->mPastValue.reset(Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mHeadDim, hP), mResource->mMaxLength, hP})); + backend()->onAcquireBuffer(mResource->mPastValue.get(), Backend::STATIC); + } else { + mResource->mPastValue.reset(Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mHeadDim, hP), mResource->mMaxLength, hP})); + backend()->onAcquireBuffer(mResource->mPastValue.get(), Backend::STATIC); + } } -void CPUAttention::reallocKVCache() { - if (!mKVCache || mResource->mPastLength < mResource->mMaxLength) { +void CPUAttention::reallocKVCache(int kv_seq_len, bool quantKey, bool quantValue) { + if (!mKVCache || kv_seq_len <= mResource->mMaxLength) { return; } - mResource->mMaxLength = mResource->mPastLength + mResource->mExpandChunk; - // past_key: [1, numhead, headdim, maxlen] -> numhead, [headdim, maxlen] -> pack_b -> numhead, [maxlen/hP, head_dim, hP] - auto new_key = Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mMaxLength, hP), mResource->mHeadDim, hP}); - // past_value: [1, numhead, maxlen, headdim] -> numhead, [maxlen, headdim] -> pack_b -> numhead, [head_dim/hP, max_len, hP] - auto new_value = Tensor::createDevice({mResource->mKvNumHead, mResource->mValueH, mResource->mMaxLength, hP}); - backend()->onAcquireBuffer(new_key, Backend::STATIC); - backend()->onAcquireBuffer(new_value, Backend::STATIC); - // copy - for (int h = 0; h < mResource->mKvNumHead; h++) { - ::memset(new_key->host() + h * UP_DIV(mResource->mMaxLength, hP) * mResource->mHeadDim * hP * bytes, 0, UP_DIV(mResource->mMaxLength, hP) * mResource->mHeadDim * hP * bytes); - ::memset(new_value->host() + h * mResource->mValueH * mResource->mMaxLength * hP * bytes, 0, mResource->mValueH * mResource->mMaxLength * hP * bytes); - ::memcpy(new_key->host() + h * UP_DIV(mResource->mMaxLength, hP) * mResource->mHeadDim * hP * bytes, - mResource->mPastKey->host() + h * UP_DIV(mResource->mPastLength, hP) * mResource->mHeadDim * hP * bytes, - UP_DIV(mResource->mPastLength, hP) * mResource->mHeadDim * hP * bytes); - for (int i = 0; i < mResource->mValueH; i++) { - ::memcpy(new_value->host() + (h * mResource->mValueH + i) * mResource->mMaxLength * hP * bytes, - mResource->mPastValue->host() + (h * mResource->mValueH + i) * mResource->mPastLength * hP * bytes, - mResource->mPastLength * hP * bytes); + int oldMaxLength = mResource->mMaxLength; + mResource->mMaxLength = kv_seq_len + mResource->mExpandChunk; + if (quantKey) { + auto new_key = Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mMaxLength, hP), mResource->mHeadDim, hP}); + auto new_scale = Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mMaxLength, hP), 1, hP}); + auto new_zeroPoint = Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mMaxLength, hP), 1, hP}); + backend()->onAcquireBuffer(new_key, Backend::STATIC); + backend()->onAcquireBuffer(new_scale, Backend::STATIC); + backend()->onAcquireBuffer(new_zeroPoint, Backend::STATIC); + for (int h = 0; h < mResource->mKvNumHead; h++) { + memcpy(new_key->host() + h * UP_DIV(mResource->mMaxLength, hP) * mResource->mHeadDim * hP, + mResource->mPastKey->host() + h * UP_DIV(oldMaxLength, hP) * mResource->mHeadDim * hP, + UP_DIV(oldMaxLength, hP) * mResource->mHeadDim * hP); + memcpy(new_scale->host() + h * UP_DIV(mResource->mMaxLength, hP) * hP * bytes, + mResource->mDequantKeyScale->host() + h * UP_DIV(oldMaxLength, hP) * hP * bytes, + UP_DIV(oldMaxLength, hP) * hP * bytes); + memcpy(new_zeroPoint->host() + h * UP_DIV(mResource->mMaxLength, hP) * hP * bytes, + mResource->mDequantKeyZeroPoint->host() + h * UP_DIV(oldMaxLength, hP) * hP * bytes, + UP_DIV(oldMaxLength, hP) * hP * bytes); } + mResource->mPastKey.reset(new_key); + mResource->mDequantKeyScale.reset(new_scale); + mResource->mDequantKeyZeroPoint.reset(new_zeroPoint); + } + else { + auto new_key = Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mMaxLength, hP), mResource->mHeadDim, hP}); + backend()->onAcquireBuffer(new_key, Backend::STATIC); + for (int h = 0; h < mResource->mKvNumHead; h++) { + memcpy(new_key->host() + h * UP_DIV(mResource->mMaxLength, hP) * mResource->mHeadDim * hP * bytes, + mResource->mPastKey->host() + h * UP_DIV(oldMaxLength, hP) * mResource->mHeadDim * hP * bytes, + UP_DIV(oldMaxLength, hP) * mResource->mHeadDim * hP * bytes); + } + mResource->mPastKey.reset(new_key); + } + if (quantValue) { + auto new_value = Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mHeadDim, hP), mResource->mMaxLength, hP}); + backend()->onAcquireBuffer(new_value, Backend::STATIC); + for (int h = 0; h < mResource->mKvNumHead; h++) { + for (int i = 0; i < UP_DIV(mResource->mHeadDim, hP); i++) { + memcpy(new_value->host() + (h * UP_DIV(mResource->mHeadDim, hP) + i) * mResource->mMaxLength * hP, + mResource->mPastValue->host() + (h * UP_DIV(mResource->mHeadDim, hP) + i) * oldMaxLength * hP, + oldMaxLength * hP); + } + } + mResource->mPastValue.reset(new_value); + } + else { + auto new_value = Tensor::createDevice({mResource->mKvNumHead, UP_DIV(mResource->mHeadDim, hP), mResource->mMaxLength, hP}); + backend()->onAcquireBuffer(new_value, Backend::STATIC); + for (int h = 0; h < mResource->mKvNumHead; h++) { + for (int i = 0; i < UP_DIV(mResource->mHeadDim, hP); i++) { + memcpy(new_value->host() + (h * UP_DIV(mResource->mHeadDim, hP) + i) * mResource->mMaxLength * hP * bytes, + mResource->mPastValue->host() + (h * UP_DIV(mResource->mHeadDim, hP) + i) * oldMaxLength * hP * bytes, + oldMaxLength * hP * bytes); + } + } + mResource->mPastValue.reset(new_value); } - mResource->mPastKey.reset(new_key); - mResource->mPastValue.reset(new_value); } ErrorCode CPUAttention::onResize(const std::vector& inputs, const std::vector& outputs) { auto core = static_cast(backend())->functions(); core->MNNGetMatMulPackMode(&eP, &lP, &hP); - unit = core->pack; + mThreadNum = ((CPUBackend *)backend())->threadNumber(); + unit = core->pack; bytes = core->bytes; auto query = inputs[0]; - auto shape = query->shape(); - int seq_len = shape[1]; - mThreadNum = ((CPUBackend *)backend())->threadNumber(); - mResource->mHeadDim = shape[3]; - int query_e = UP_DIV(seq_len, eP); - mPackQ.reset(Tensor::createDevice({mThreadNum, query_e, mResource->mHeadDim, eP})); + auto key = inputs[1]; + int seq_len = query->shape()[1]; + mResource->mNumHead = query->shape()[2]; + mResource->mHeadDim = query->shape()[3]; + mResource->mKvNumHead = key->shape()[2]; + mPackQ.reset(Tensor::createDevice({mThreadNum, UP_DIV(seq_len, eP), mResource->mHeadDim, eP})); mPackQKV.reset(Tensor::createDevice({mThreadNum, UP_DIV(mResource->mHeadDim, unit), seq_len, unit})); backend()->onAcquireBuffer(mPackQ.get(), Backend::DYNAMIC); backend()->onAcquireBuffer(mPackQKV.get(), Backend::DYNAMIC); @@ -229,193 +397,240 @@ ErrorCode CPUAttention::onResize(const std::vector& inputs, const std:: ErrorCode CPUAttention::onExecute(const std::vector& inputs, const std::vector& outputs) { auto core = static_cast(backend())->functions(); - auto matmulUnit = core->MNNPackedMatMul; - auto matmulRemain = core->MNNPackedMatMulRemain; auto query = inputs[0]; - auto key = inputs[1]; + auto key = inputs[1]; auto value = inputs[2]; auto mask = inputs[3]; + auto mask_shape = mask->shape(); bool float_mask = (mask->getType() == halide_type_of()); - auto shape = query->shape(); - int seq_len = shape[1]; - mThreadNum = ((CPUBackend *)backend())->threadNumber(); - mIsDecode = seq_len == 1; - mResource->mNumHead = shape[2]; - mResource->mKvNumHead = key->shape()[2]; + int mask_seqlen = mask_shape[2]; + int mask_kvlen = mask_shape[3]; + int seq_len = query->shape()[1]; + MNN_ASSERT(seq_len == mask_seqlen); + mIsPrefill = (seq_len > 1); + // isPrefill and mask is Square Matrix, is FirstPrefill + mIsFirstPrefill = mIsPrefill && (mask_kvlen == mask_seqlen); + int tileCount = UP_DIV(mResource->mNumHead, mThreadNum); int group_size = mResource->mNumHead / mResource->mKvNumHead; - mResource->mHeadDim = shape[3]; - mResource->mScale = 1.0 / sqrt(mResource->mHeadDim); + + // 0: do not quant kv + // 1: only quant k + // 2: only quant v + // 3: quant kv + int quantKV = static_cast(backend())->getRuntime()->hint().kvcacheQuantOption; + bool quantKey = (quantKV & 1) == 1; + bool quantValue = ((quantKV >> 1) & 1) == 1; + // reduce the value of 'query' to avoid fp16 overflow + float mScale = 1.0 / sqrt(mResource->mHeadDim); float q_scale = 1.0; if (bytes == 2) { q_scale = FP16_QSCALE; - mResource->mScale /= q_scale; + mScale /= q_scale; } - mResource->mValueH = UP_DIV(mResource->mHeadDim, hP); - int query_e = UP_DIV(seq_len, eP); - int key_h = UP_DIV(seq_len, hP); - int tileCount = UP_DIV(mResource->mNumHead, mThreadNum); - std::shared_ptr mTempQK; - if (mIsDecode) { - reallocKVCache(); - mTempQK.reset(Tensor::createDevice({mThreadNum, eP + 2, mResource->mPastLength + 1})); - } else { - mResource->mPastLength = seq_len; - allocKVCache(); - mTempQK.reset(Tensor::createDevice({mThreadNum, 4, seq_len, seq_len})); + if (mIsPrefill) { + // Only reset the kvcache in the first prefill, but keep the kvcache in subsequent prefill + if (mIsFirstPrefill) { + mResource->mPastLength = 0; + allocKVCache(seq_len, quantKey, quantValue); + } else { + reallocKVCache(mResource->mPastLength + seq_len, quantKey, quantValue); + } + } else { // Decode + reallocKVCache(mResource->mPastLength + 1, quantKey, quantValue); } - backend()->onAcquireBuffer(mTempQK.get(), Backend::STATIC); + int kv_seq_len = mResource->mPastLength + seq_len; - std::function mPrefill = [=](int tId){ - auto pack_q = mPackQ->host() + tId * query_e * mResource->mHeadDim * eP * bytes; - auto pack_qk = mTempQK->host() + tId * 4 * seq_len * seq_len * bytes; - auto unpack_qk = pack_qk + seq_len * seq_len * 2 * bytes; - auto mask_qk = reinterpret_cast(pack_qk); - auto softmax_qk = reinterpret_cast(unpack_qk); - auto pack_qkv = mPackQKV->host() + tId * UP_DIV(mResource->mHeadDim, unit) * seq_len * unit * bytes; + // Temporary tensors for intermediate results + std::shared_ptr packQK(Tensor::createDevice({mThreadNum, UP_DIV(kv_seq_len, unit), seq_len, unit})); + std::shared_ptr unpackQK(Tensor::createDevice({mThreadNum, seq_len, kv_seq_len})); + std::shared_ptr softmaxQK(Tensor::createDevice({mThreadNum, seq_len, kv_seq_len})); + std::shared_ptr newPackQK(Tensor::createDevice({mThreadNum, UP_DIV(seq_len, eP), kv_seq_len, eP})); + std::shared_ptr dequantV(Tensor::createDevice({mThreadNum, UP_DIV(mResource->mHeadDim, hP), kv_seq_len, hP})); + backend()->onAcquireBuffer(packQK.get(), Backend::STATIC); + backend()->onAcquireBuffer(unpackQK.get(), Backend::STATIC); + backend()->onAcquireBuffer(softmaxQK.get(), Backend::STATIC); + backend()->onAcquireBuffer(newPackQK.get(), Backend::STATIC); + if (quantValue) { + backend()->onAcquireBuffer(dequantV.get(), Backend::STATIC); + } - int head_index = tId * tileCount; + std::function mCompute = [=](int tId) { + auto pack_q = mPackQ->host() + tId * UP_DIV(seq_len, eP) * mResource->mHeadDim * eP * bytes; + auto pack_qk = packQK->host() + tId * UP_DIV(kv_seq_len, unit) * seq_len * unit * bytes; + auto unpack_qk = unpackQK->host() + tId * seq_len * kv_seq_len; + auto softmax_qk = softmaxQK->host() + tId * seq_len * kv_seq_len; + auto new_pack_qk = newPackQK->host() + tId * UP_DIV(seq_len, eP) * kv_seq_len * eP * bytes; + auto pack_qkv = mPackQKV->host() + tId * UP_DIV(mResource->mHeadDim, unit) * seq_len * unit * bytes; + int head_index = tId * tileCount; for (int h = head_index; h < head_index + tileCount && h < mResource->mNumHead; h++) { + int kv_h = h / group_size; + char * key_dst = nullptr; + char * key_scale_dst = nullptr; + char * key_zero_point_dst = nullptr; + char * value_dst = nullptr; + if (quantKey) { + key_dst = mResource->mPastKey->host() + kv_h * UP_DIV(mResource->mMaxLength, hP) * mResource->mHeadDim * hP; + key_scale_dst = mResource->mDequantKeyScale->host() + kv_h * UP_DIV(mResource->mMaxLength, hP) * 1 * hP * bytes; + key_zero_point_dst = mResource->mDequantKeyZeroPoint->host() + kv_h * UP_DIV(mResource->mMaxLength, hP) * 1 * hP * bytes; + } else { + key_dst = mResource->mPastKey->host() + kv_h * UP_DIV(mResource->mMaxLength, hP) * mResource->mHeadDim * hP * bytes; + } + if (quantValue) { + value_dst = mResource->mPastValue->host() + kv_h * UP_DIV(mResource->mHeadDim, hP) * mResource->mMaxLength * hP; + } else { + value_dst = mResource->mPastValue->host() + kv_h * UP_DIV(mResource->mHeadDim, hP) * mResource->mMaxLength * hP * bytes; + } // pack for matmul - int kv_h = h / group_size; - auto key_dst = mResource->mPastKey->host() + kv_h * UP_DIV(mResource->mMaxLength, hP) * mResource->mHeadDim * hP * bytes; - auto value_dst = mResource->mPastValue->host() + kv_h * mResource->mValueH * mResource->mMaxLength * hP * bytes; if (bytes == 2) { - prefill_pack(query, key, value, pack_q, key_dst, value_dst, mResource->mMaxLength, mResource->mNumHead, mResource->mKvNumHead, mResource->mHeadDim, mResource->mValueH, eP, hP, query_e, key_h, seq_len, h, kv_h, q_scale); + pack_query(query, pack_q, mResource->mNumHead, mResource->mHeadDim, eP, seq_len, h, q_scale); + pack_key(key, key_dst, mResource->mPastLength, seq_len, mResource->mKvNumHead, mResource->mHeadDim, hP, kv_h, key_scale_dst, key_zero_point_dst, quantKey); + pack_value(value, value_dst, mResource->mMaxLength, mResource->mPastLength, seq_len, mResource->mKvNumHead, mResource->mHeadDim, hP, kv_h, quantValue); } else { - prefill_pack(query, key, value, pack_q, key_dst, value_dst, mResource->mMaxLength, mResource->mNumHead, mResource->mKvNumHead, mResource->mHeadDim, mResource->mValueH, eP, hP, query_e, key_h, seq_len, h, kv_h, q_scale); + pack_query(query, pack_q, mResource->mNumHead, mResource->mHeadDim, eP, seq_len, h, q_scale); + pack_key(key, key_dst, mResource->mPastLength, seq_len, mResource->mKvNumHead, mResource->mHeadDim, hP, kv_h, key_scale_dst, key_zero_point_dst, quantKey); + pack_value(value, value_dst, mResource->mMaxLength, mResource->mPastLength, seq_len, mResource->mKvNumHead, mResource->mHeadDim, hP, kv_h, quantValue); } // query @ key int loop_e = seq_len / eP; int remain = seq_len % eP; for (int i = 0 ; i < loop_e; i++) { - size_t shapeParameters[6]; + size_t shapeParameters[7]; size_t* parameters = shapeParameters; - parameters[0] = eP * bytes; - parameters[1] = mResource->mHeadDim; - parameters[2] = seq_len; - parameters[3] = seq_len * unit * bytes; - parameters[4] = 0; - parameters[5] = 0; - matmulUnit((float*)(pack_qk + (i * eP * unit) * bytes), (float*)(pack_q + (i * mResource->mHeadDim * eP) * bytes), (float*)key_dst, parameters, nullptr, nullptr, nullptr, nullptr); + parameters[0] = eP * bytes; + parameters[1] = mResource->mHeadDim; + parameters[2] = kv_seq_len; + parameters[3] = seq_len * unit * bytes; + parameters[4] = 0; + parameters[5] = 0; + parameters[6] = 0; + if (quantKey) { + core->MNNPackedMatMul_int8( + (float*)(pack_qk + (i * eP * unit) * bytes), + (float*)(pack_q + (i * mResource->mHeadDim * eP) * bytes), + (float*)key_dst, + parameters, nullptr, nullptr, + (float*)key_scale_dst, (float*)key_zero_point_dst + ); + } else { + core->MNNPackedMatMul( + (float*)(pack_qk + (i * eP * unit) * bytes), + (float*)(pack_q + (i * mResource->mHeadDim * eP) * bytes), + (float*)key_dst, + parameters, nullptr, nullptr, + nullptr, nullptr + ); + } } { - size_t shapeParameters[6]; + size_t shapeParameters[7]; size_t* parameters = shapeParameters; - parameters[0] = eP * bytes; - parameters[1] = mResource->mHeadDim; - parameters[2] = seq_len; - parameters[3] = seq_len * unit * bytes; - parameters[4] = 0; - parameters[5] = 0; - matmulRemain((float*)(pack_qk + (loop_e * eP * unit) * bytes), (float*)(pack_q + (loop_e * mResource->mHeadDim * eP) * bytes), (float*)key_dst, remain, parameters, nullptr, nullptr, nullptr, nullptr); + parameters[0] = eP * bytes; + parameters[1] = mResource->mHeadDim; + parameters[2] = kv_seq_len; + parameters[3] = seq_len * unit * bytes; + parameters[4] = 0; + parameters[5] = 0; + parameters[6] = 0; + if (quantKey) { + core->MNNPackedMatMulRemain_int8( + (float*)(pack_qk + (loop_e * eP * unit) * bytes), + (float*)(pack_q + (loop_e * mResource->mHeadDim * eP) * bytes), + (float*)key_dst, + remain, parameters, nullptr, nullptr, + (float*)key_scale_dst, (float*)key_zero_point_dst + ); + } else { + core->MNNPackedMatMulRemain( + (float*)(pack_qk + (loop_e * eP * unit) * bytes), + (float*)(pack_q + (loop_e * mResource->mHeadDim * eP) * bytes), + (float*)key_dst, + remain, parameters, nullptr, nullptr, + nullptr, nullptr + ); + } } - int area_offset[2] {seq_len, 0}; - core->MNNUnpackCUnitTranspose((float*)unpack_qk, (float*)pack_qk, seq_len, seq_len, area_offset); - // div scale and mask - auto mask_ptr = mask->host(); - if (bytes == 2) { - prefill_softmax(mask_ptr, mask_qk, softmax_qk, unpack_qk, pack_qk, mResource->mScale, eP, query_e, seq_len, -65504.0, float_mask); + if(bytes == 2) { + // unpack qk: [kv_seq_len/unit, seq_len, unit] -> [seq_len, kv_seq_len] + unpack_QK(unpack_qk, pack_qk, seq_len, kv_seq_len, unit); + mask_QK(unpack_qk, seq_len, kv_seq_len, mScale, std::numeric_limits::lowest(), mask->host(), float_mask); + softmax_QK(softmax_qk, unpack_qk, seq_len, kv_seq_len); + // pack qk for qk @ v : [seq_len, kv_seq_len] -> [seq_len/eP, kv_seq_len, eP] + pack_QK(new_pack_qk, softmax_qk, seq_len, kv_seq_len, eP); } else { - prefill_softmax(mask_ptr, mask_qk, softmax_qk, unpack_qk, pack_qk, mResource->mScale, eP, query_e, seq_len, std::numeric_limits::lowest(), float_mask); + unpack_QK(unpack_qk, pack_qk, seq_len, kv_seq_len, unit); + mask_QK(unpack_qk, seq_len, kv_seq_len, mScale, std::numeric_limits::lowest(), mask->host(), float_mask); + softmax_QK(softmax_qk, unpack_qk, seq_len, kv_seq_len); + pack_QK(new_pack_qk, softmax_qk, seq_len, kv_seq_len, eP); + } + // Dequantize values from fp8 to float + if (quantValue) { + char * qv = value_dst; + char * dqv = dequantV->host() + tId * UP_DIV(mResource->mHeadDim, hP) * kv_seq_len * hP * bytes; + if (bytes == 2) { + dequant_value_fp16(dqv, qv, mResource->mHeadDim, kv_seq_len, hP, mResource->mMaxLength); + } else { + dequant_value_float(dqv, qv, mResource->mHeadDim, kv_seq_len, hP, mResource->mMaxLength); + } + value_dst = dqv; } // qk @ v for (int i = 0 ; i < loop_e; i++) { size_t shapeParameters[6]; size_t* parameters = shapeParameters; parameters[0] = eP * bytes; - parameters[1] = seq_len; + parameters[1] = kv_seq_len; parameters[2] = mResource->mHeadDim; parameters[3] = seq_len * unit * bytes; parameters[4] = 0; - parameters[5] = (mResource->mMaxLength - seq_len) * hP * bytes; - matmulUnit((float*)(pack_qkv + (i * eP * unit) * bytes), (float*)(pack_qk + (i * seq_len * eP) * bytes), (float*)value_dst, parameters, nullptr, nullptr, nullptr, nullptr); + parameters[5] = quantValue ? 0 : (mResource->mMaxLength - kv_seq_len) * hP * bytes; + core->MNNPackedMatMul( + (float*)(pack_qkv + (i * eP * unit) * bytes), + (float*)(new_pack_qk + (i * kv_seq_len * eP) * bytes), + (float*)value_dst, parameters, + nullptr, nullptr, nullptr, nullptr + ); } { size_t shapeParameters[6]; size_t* parameters = shapeParameters; parameters[0] = eP * bytes; - parameters[1] = seq_len; + parameters[1] = kv_seq_len; parameters[2] = mResource->mHeadDim; parameters[3] = seq_len * unit * bytes; parameters[4] = 0; - parameters[5] = (mResource->mMaxLength - seq_len) * hP * bytes; - matmulRemain((float*)(pack_qkv + (loop_e * eP * unit) * bytes), (float*)(pack_qk + (loop_e * seq_len * eP) * bytes), (float*)value_dst, remain, parameters, nullptr, nullptr, nullptr, nullptr); + parameters[5] = quantValue ? 0 : (mResource->mMaxLength - kv_seq_len) * hP * bytes; + core->MNNPackedMatMulRemain( + (float*)(pack_qkv + (loop_e * eP * unit) * bytes), + (float*)(new_pack_qk + (loop_e * kv_seq_len * eP) * bytes), + (float*)value_dst, remain, parameters, + nullptr, nullptr, nullptr, nullptr + ); } - // transpose: [head_dim/unit, seq_len, unit] -> [seq_len, num_head, head_dim] + // unpack: [head_dim/unit, seq_len, unit] -> [seq_len, num_head, head_dim] auto dst_ptr = outputs[0]->host() + h * mResource->mHeadDim * bytes; if (bytes == 2) { - prefill_unpack(pack_qkv, dst_ptr, mResource->mNumHead, mResource->mHeadDim, unit, seq_len); + unpack_QKV(pack_qkv, dst_ptr, mResource->mNumHead, mResource->mHeadDim, unit, seq_len); } else { - prefill_unpack(pack_qkv, dst_ptr, mResource->mNumHead, mResource->mHeadDim, unit, seq_len); + unpack_QKV(pack_qkv, dst_ptr, mResource->mNumHead, mResource->mHeadDim, unit, seq_len); } } }; - std::function mDecode = [=](int tId) { - int kv_seq_len = mResource->mPastLength + 1; - auto pack_q = mPackQ->host() + tId * mResource->mHeadDim * eP * bytes; - auto pack_qk = mTempQK->host() + tId * (eP + 2) * kv_seq_len * bytes; - auto unpack_qk = pack_qk + kv_seq_len * eP * bytes; - auto mask_qk = reinterpret_cast(pack_qk); - auto softmax_qk = reinterpret_cast(unpack_qk); - auto pack_qkv = mPackQKV->host() + tId * UP_DIV(mResource->mHeadDim, unit) * unit * bytes; - - int head_index = tId * tileCount; - for (int h = head_index; h < head_index + tileCount && h < mResource->mNumHead; h++) { - int kv_h = h / group_size; - auto key_dst = mResource->mPastKey->host() + kv_h * UP_DIV(mResource->mMaxLength, hP) * mResource->mHeadDim * hP * bytes; - auto value_dst = mResource->mPastValue->host() + kv_h * mResource->mValueH * mResource->mMaxLength * hP * bytes; - // pack for matmul - if (bytes == 2) { - decode_pack(query, key, value, pack_q, key_dst, value_dst, mResource->mMaxLength, mResource->mPastLength, mResource->mHeadDim, mResource->mValueH, eP, hP, h, kv_h, q_scale); - } else { - decode_pack(query, key, value, pack_q, key_dst, value_dst, mResource->mMaxLength, mResource->mPastLength, mResource->mHeadDim, mResource->mValueH, eP, hP, h, kv_h, q_scale); - } - // query @ key: [1, head_dim] @ [head_dim, kv_seq_len] -> [1, kv_seq_len] - size_t shapeParameters[6]; - size_t* parameters = shapeParameters; - parameters[0] = eP * bytes; - parameters[1] = mResource->mHeadDim; - parameters[2] = kv_seq_len; - parameters[3] = seq_len * unit * bytes; - parameters[4] = 0; - parameters[5] = 0; - matmulRemain((float*)pack_qk, (float*)pack_q, (float*)key_dst, seq_len, parameters, nullptr, nullptr, nullptr, nullptr); - int area_offset[2] {seq_len, 0}; - core->MNNUnpackCUnitTranspose((float*)unpack_qk, (float*)pack_qk, seq_len, kv_seq_len, area_offset); - if (bytes == 2) { - decode_softmax(mask_qk, softmax_qk, unpack_qk, pack_qk, mResource->mScale, eP, kv_seq_len); - } else { - decode_softmax(mask_qk, softmax_qk, unpack_qk, pack_qk, mResource->mScale, eP, kv_seq_len); - } - // qk @ v: [1, kv_seq_len] @ [kv_seq_len, head_dim] -> [1, head_dim] - { - size_t shapeParameters[6]; - size_t* parameters = shapeParameters; - parameters[0] = eP * bytes; - parameters[1] = kv_seq_len; - parameters[2] = mResource->mHeadDim; - parameters[3] = 1 * unit * bytes; - parameters[5] = (mResource->mMaxLength - kv_seq_len) * hP * bytes; - matmulRemain((float*)pack_qkv, (float*)pack_qk, (float*)value_dst, 1, parameters, nullptr, nullptr, nullptr, nullptr); - } - // transpose: [head_dim/unit, 1, unit] -> [1, num_head, head_dim] - auto dst_ptr = outputs[0]->host() + h * mResource->mHeadDim * bytes; - core->MNNUnpackCUnitTranspose((float*)dst_ptr, (float*)pack_qkv, 1, mResource->mHeadDim, area_offset); - } - }; - - std::function mFunction = mIsDecode ? mDecode : mPrefill; MNN_CONCURRENCY_BEGIN(tId, mThreadNum) { - mFunction((int)tId); + mCompute((int)tId); } MNN_CONCURRENCY_END(); - if(mIsDecode) { - mResource->mPastLength++; + + mResource->mPastLength += seq_len; + backend()->onReleaseBuffer(packQK.get(), Backend::STATIC); + backend()->onReleaseBuffer(unpackQK.get(), Backend::STATIC); + backend()->onReleaseBuffer(softmaxQK.get(), Backend::STATIC); + backend()->onReleaseBuffer(newPackQK.get(), Backend::STATIC); + if (quantValue){ + backend()->onReleaseBuffer(dequantV.get(), Backend::STATIC); } - backend()->onReleaseBuffer(mTempQK.get(), Backend::STATIC); return NO_ERROR; } @@ -447,4 +662,4 @@ REGISTER_CPU_OP_CREATOR_TRANSFORMER(CPUAttentionCreator, OpType_Attention); } // namespace MNN -#endif +#endif \ No newline at end of file diff --git a/source/backend/cpu/CPUAttention.hpp b/source/backend/cpu/CPUAttention.hpp index bc48de6b4..abf351249 100644 --- a/source/backend/cpu/CPUAttention.hpp +++ b/source/backend/cpu/CPUAttention.hpp @@ -25,17 +25,19 @@ class CPUAttention : public Execution { virtual ErrorCode onExecute(const std::vector &inputs, const std::vector &outputs) override; virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override; struct Resource { - std::shared_ptr mPastKey; - std::shared_ptr mPastValue; - float mScale; - const int mExpandChunk = 64; + std::shared_ptr mPastKey; // numhead, [maxlen/eP, headdim, eP] + std::shared_ptr mPastValue; // numhead, [headdim/eP, maxlen, eP] + std::shared_ptr mDequantKeyScale; // numhead, [maxlen/eP, 1, eP] + std::shared_ptr mDequantKeyZeroPoint; // numhead, [maxlen/eP, 1, eP] int mPastLength = 0, mMaxLength = 0; - int mNumHead = 0, mKvNumHead = 0, mHeadDim = 0, mValueH = 0; + const int mExpandChunk = 64; + int mNumHead = 0, mKvNumHead = 0, mHeadDim = 0; }; private: - void allocKVCache(); - void reallocKVCache(); - bool mIsDecode = false; + void allocKVCache(int kv_seq_len, bool quantK, bool quantV); + void reallocKVCache(int kv_seq_len, bool quantK, bool quantV); + bool mIsPrefill = true; + bool mIsFirstPrefill = true; bool mKVCache; int mThreadNum = 1; std::shared_ptr mResource; diff --git a/source/backend/cpu/CPUBackend.cpp b/source/backend/cpu/CPUBackend.cpp index 66c349c37..5f1a75eab 100644 --- a/source/backend/cpu/CPUBackend.cpp +++ b/source/backend/cpu/CPUBackend.cpp @@ -30,7 +30,6 @@ #define MAX_THREAD_NUMBER 32 #define LARGE_MEMORY 1024 * 1024 * 500 #ifdef MNN_SUPPORT_BF16 -#include "bf16/BF16Backend.hpp" #include "bf16/BF16Functions.hpp" #endif @@ -48,56 +47,183 @@ ErrorCode CastWrapExecution::onExecute(const std::vector& inputs, const CPUCastCreator::cast(inputs[0], outputs[0], cpuBackend, convertType); return NO_ERROR; } - -CPURuntime::CPURuntime(const Backend::Info& info) { - mStaticAllocator.reset(new EagerBufferAllocator(BufferAllocator::Allocator::createDefault())); - mThreadNumber = info.numThread; - mThreadNumber = std::max(1, mThreadNumber); - mThreadNumber = std::min(mThreadNumber, MAX_THREAD_NUMBER); - mPower = BackendConfig::Power_Normal; - mMemory = BackendConfig::Memory_Normal; - mPrecision = BackendConfig::Precision_Normal; - mFlops = MNNGetCPUFlops(mThreadNumber); - if (info.user != nullptr) { - mPrecision = info.user->precision; - mPower = info.user->power; - mMemory = info.user->memory; - mFlags = info.user->flags; +void CPURuntime::computeDivideSizes(int size, int* dst) const { + if (mGroupWithComputeRate.size() <= 1) { + // Avg divide + int length = UP_DIV(size, mThreadNumber); + int cur = length; + for (int i=0; igroups.size() == 0) { + return; + } + std::vector> lockCPUIndexes(mThreadNumber); switch (mPower) { case BackendConfig::Power_Low: - MNNSetCPUThreadsMode(MNN_CPU_MODE_LITTLE); + for (int v=0; vgroups[0].ids.data(), cpuInfo->groups[0].ids.size()); + } break; case BackendConfig::Power_High: - MNNSetCPUThreadsMode(MNN_CPU_MODE_POWER_FRI); + { + int selectCPUSize = 0; + int groupIndex = cpuInfo->groups.size() - 1; + while (selectCPUSize < mThreadNumber && groupIndex >= 0) { + auto& group = cpuInfo->groups[groupIndex]; + int size = ALIMIN(group.ids.size(), mThreadNumber - selectCPUSize); + for (int v=0; v result(threadsNumber, 0); +#pragma omp parallel for + for (int i = 0; i < threadsNumber; ++i) { + result[i] = MNNSetSchedAffinity(lockCPUIndexes[i].first, lockCPUIndexes[i].second); + } #endif #ifdef MNN_USE_THREAD_POOL - mThreadNumber = ThreadPool::init(mThreadNumber); + ThreadPool::active(mThreadNumber); + ThreadPool::enqueue(std::make_pair([&](int i) { + MNNSetSchedAffinity(lockCPUIndexes[i].first, lockCPUIndexes[i].second); + return 0; + }, mThreadNumber), mTaskIndex, mThreadNumber); + ThreadPool::deactive(mThreadNumber); +#endif +} + +void CPURuntime::_resetGroupCompute() const { + if (mPastDecreaseHint == hint().cpuDecreaseRate) { + return; + } + mGroupWithComputeRate.clear(); + if (mThreadNumber <= 1 || mPower == BackendConfig::Power_Low) { + return; + } + mPastDecreaseHint = hint().cpuDecreaseRate; + auto cpuInfo = MNNGetCPUInfo(); + if (cpuInfo->groups.size() < 2) { + return; + } + float decreaseRate = (float)(hint().cpuDecreaseRate) / 100.0f; + int validCpuSize = (int)(cpuInfo->groups[cpuInfo->groups.size()-1].ids.size()); + int groupIndex = (int)cpuInfo->groups.size()-2; + float maxFreq = (float)cpuInfo->groups[cpuInfo->groups.size()-1].maxFreq; + validCpuSize = ALIMIN(validCpuSize, mThreadNumber); + float totalComputeRate = 1.0f * validCpuSize; + mGroupWithComputeRate.emplace_back(std::make_pair(totalComputeRate, validCpuSize)); + float currentRate = 1.0f; + while (validCpuSize < mThreadNumber && groupIndex >= 0) { + auto& group = cpuInfo->groups[groupIndex]; + int selectSize = ALIMIN(mThreadNumber - validCpuSize, (int)group.ids.size()); + validCpuSize += group.ids.size(); + currentRate *= decreaseRate; + totalComputeRate += currentRate * selectSize; + mGroupWithComputeRate.emplace_back(std::make_pair(currentRate * selectSize, selectSize)); + } + for (auto& g : mGroupWithComputeRate) { + g.first = g.first / totalComputeRate; + } +} + +void CPURuntime::_resetThreadPool() { + mThreadNumber = std::max(1, mThreadNumber); + mThreadNumber = std::min(mThreadNumber, MAX_THREAD_NUMBER); +#ifdef MNN_USE_THREAD_POOL + ThreadPool::releaseWorkIndex(mTaskIndex); + auto cpuInfo = MNNGetCPUInfo(); + if (mThreadNumber > 1) { + int systemThreadNumber = (int)cpuInfo->cpuNumber; + if (systemThreadNumber == 0) { + systemThreadNumber = mThreadNumber; + } + mThreadNumber = ALIMIN(ThreadPool::init(systemThreadNumber), mThreadNumber); + } + mGroupWithComputeRate.clear(); if (mThreadNumber > 1) { mTaskIndex = ThreadPool::acquireWorkIndex(); + if (-1 == mTaskIndex) { + MNN_ERROR("The ThreadPool has been used to MNN_THREAD_POOL_MAX_TASKS, can't use thread pool\n"); + mThreadNumber = 1; + } } else { mTaskIndex = -1; } - if (mTaskIndex >= 0 && mPower == BackendConfig::Power_High) { - ThreadPool::active(); - } #endif + // Reset tid to rebind cpu if necessary + mCurrentTID = 0; +} +void CPURuntime::onReset(int numberThread, const BackendConfig* config) { + if (config != nullptr) { + mPrecision = config->precision; + mPower = config->power; + mMemory = config->memory; + mFlags = config->flags; + } + mThreadNumber = numberThread; + _resetThreadPool(); + // Mask Group Compute reset + mPastDecreaseHint = -1; +} + +CPURuntime::CPURuntime(const Backend::Info& info) { + mStaticAllocator.reset(new EagerBufferAllocator(BufferAllocator::Allocator::createDefault())); + mThreadNumber = info.numThread; + mPower = BackendConfig::Power_Normal; + mMemory = BackendConfig::Memory_Normal; + mPrecision = BackendConfig::Precision_Normal; + if (info.user != nullptr) { + mPrecision = info.user->precision; + mPower = info.user->power; + mMemory = info.user->memory; + mFlags = info.user->flags; + } + _resetThreadPool(); #ifdef LOG_VERBOSE MNN_PRINT("create CPURuntime:%p\n", this); #endif } CPURuntime:: ~ CPURuntime() { #ifdef MNN_USE_THREAD_POOL - if (mTaskIndex >= 0 && mPower == BackendConfig::Power_High) { - ThreadPool::deactive(); - } ThreadPool::releaseWorkIndex(mTaskIndex); #endif } @@ -106,13 +232,7 @@ float CPURuntime::onGetMemoryInMB() { return staticMemoryInMB; } bool CPURuntime::onCheckInfo(Backend::Info& info) const { -#ifdef MNN_USE_THREAD_POOL - int threadNumber = mThreadNumber; - if (mTaskIndex < 0) { - threadNumber = 1; - } - info.numThread = threadNumber; -#endif + info.numThread = mThreadNumber; return true; } @@ -120,6 +240,7 @@ Backend* CPURuntime::onCreate(const BackendConfig* config) const { auto precision = mPrecision; auto memory = mMemory; size_t flags = mFlags; + _resetGroupCompute(); if (nullptr != config) { precision = config->precision; flags = config->flags; @@ -137,7 +258,9 @@ Backend* CPURuntime::onCreate(const BackendConfig* config) const { #endif #ifdef MNN_SUPPORT_BF16 if (precision == BackendConfig::Precision_Low_BF16 && BF16Functions::get()) { - return new BF16Backend(this); + auto res = new CPUBackend(this, precision, memory, MNN_FORWARD_CPU_EXTENSION, 0); + res->mCoreFunctions = BF16Functions::get(); + return res; } #endif if (flags == MNN_CPU_USE_DEFAULT_BACKEND) { @@ -178,8 +301,9 @@ void CPURuntime::onGabageCollect(int level) { void CPURuntime::onConcurrencyBegin() const { #ifdef MNN_USE_THREAD_POOL - if (mTaskIndex >= 0 && mPower != BackendConfig::Power_High) { - ThreadPool::active(); + if (mTaskIndex >= 0) { + ThreadPool::active(mThreadNumber); + mThreadOpen = true; } #else #ifdef _OPENMP @@ -187,12 +311,14 @@ void CPURuntime::onConcurrencyBegin() const { omp_set_num_threads(mThreadNumber); #endif #endif + _bindCPUCore(); } void CPURuntime::onConcurrencyEnd() const { #ifdef MNN_USE_THREAD_POOL - if (mTaskIndex >= 0 && mPower != BackendConfig::Power_High) { - ThreadPool::deactive(); + if (mTaskIndex >= 0) { + ThreadPool::deactive(mThreadNumber); + mThreadOpen = false; } #endif } @@ -219,7 +345,7 @@ CPUBackend::CPUBackend(const CPURuntime* runtime, BackendConfig::PrecisionMode p mMemory = memory; mRuntime = const_cast(runtime); std::shared_ptr defaultAlloc(BufferAllocator::Allocator::createRecurse(runtime->mStaticAllocator.get())); - if (mRuntime->getAllocatorType() == Runtime::Allocator_Defer) { + if (mRuntime->hint().memoryAllocatorType == Runtime::Allocator_Defer) { mDynamicAllocator.reset(new DeferBufferAllocator(defaultAlloc)); } else { mDynamicAllocator.reset(new EagerBufferAllocator(defaultAlloc)); @@ -256,7 +382,7 @@ bool CPUBackend::onSelectDynamicAllocator(int index, int maxIndex) { return false; } if (maxIndex == 2 && mDynamicAllocatorBackup.get() == nullptr) { - if (mRuntime->getAllocatorType() == Runtime::Allocator_Defer) { + if (mRuntime->hint().memoryAllocatorType == Runtime::Allocator_Defer) { mDynamicAllocatorBackup.reset(new DeferBufferAllocator(BufferAllocator::Allocator::createRecurse(mStaticAllocator.get()))); } else { mDynamicAllocatorBackup.reset(new EagerBufferAllocator(BufferAllocator::Allocator::createRecurse(mStaticAllocator.get()))); diff --git a/source/backend/cpu/CPUBackend.hpp b/source/backend/cpu/CPUBackend.hpp index 7793b696c..1ac8721de 100644 --- a/source/backend/cpu/CPUBackend.hpp +++ b/source/backend/cpu/CPUBackend.hpp @@ -11,6 +11,7 @@ #include #include +#include #include "core/Backend.hpp" #include "core/Execution.hpp" #include "core/BufferAllocator.hpp" @@ -24,6 +25,7 @@ class CPURuntime : public Runtime { virtual ~ CPURuntime(); int onGetRuntimeStatus(RuntimeStatus statusEnum) const override; virtual Backend* onCreate(const BackendConfig* config) const override; + virtual void onReset(int numberThread, const BackendConfig* config) override; virtual void onGabageCollect(int level) override; virtual float onGetMemoryInMB() override; virtual CompilerType onGetCompilerType() const override { @@ -33,20 +35,35 @@ class CPURuntime : public Runtime { void onConcurrencyEnd() const; virtual bool onCheckInfo(Backend::Info& info) const override; + // dividedSize's length should be larger than threadNumber + void computeDivideSizes(int size, int* dst) const; + +#ifdef MNN_USE_THREAD_POOL + inline bool multiThreadValid() const { + return mThreadOpen; + } +#endif private: + void _bindCPUCore() const; + void _resetThreadPool(); std::shared_ptr mStaticAllocator; int mThreadNumber; - mutable int mTaskIndex; +#ifdef MNN_USE_THREAD_POOL + mutable int mTaskIndex = -1; + mutable bool mThreadOpen = false; +#endif + void _resetGroupCompute() const; + mutable std::vector> mGroupWithComputeRate; + mutable int mPastDecreaseHint = -1; BackendConfig::MemoryMode mMemory; BackendConfig::PowerMode mPower; BackendConfig::PrecisionMode mPrecision; // Backend features // CPU features - float mFlops = 0.0f; static Backend*(*gExtraCreate)(const Runtime* runtime); size_t mFlags = 0; - int mAllocator = 0; + mutable int mCurrentTID = 0; }; struct CoreFunctions; struct CoreInt8Functions; @@ -114,9 +131,14 @@ class CPUBackend : public Backend { static bool addCreator(OpType t, Creator* c); - int threadNumber() const { + inline int threadNumber() const { return mRuntime->mThreadNumber; } +#ifdef MNN_USE_THREAD_POOL + inline bool threadOpen() const { + return mRuntime->mThreadOpen; + } +#endif BufferAllocator* getBufferAllocator(bool defer_allocator = true) const { return mCurrentDynamicAllocator; @@ -140,12 +162,13 @@ class CPUBackend : public Backend { static void initCreatorMap(); static int getBytes(const Backend* backend, const Tensor* output); static DataType getDataType(const Tensor* tensor); + friend class CPURuntime; protected: MemObj* allocBuffer(size_t size, Tensor* dest, StorageType storageType); - const CoreFunctions* mCoreFunctions; - const CoreInt8Functions* mInt8CoreFunctions; + CoreFunctions* mCoreFunctions; + CoreInt8Functions* mInt8CoreFunctions; private: std::shared_ptr mStaticAllocator; std::shared_ptr mDynamicAllocator; diff --git a/source/backend/cpu/CPUConvolution.cpp b/source/backend/cpu/CPUConvolution.cpp index 511623299..9c42008d9 100644 --- a/source/backend/cpu/CPUConvolution.cpp +++ b/source/backend/cpu/CPUConvolution.cpp @@ -49,6 +49,13 @@ bool CPUConvolution::Resource::copyBiasAlign(const float* bias, int outputCount) return true; } CPUConvolution::MutableResourceInt8::MutableResourceInt8(std::shared_ptr res, Backend* backend) : mResource(res) { + auto outputChannleUp4 = res->mOriginBias->length(0); + mBiasFloat.reset(Tensor::createDevice({outputChannleUp4})); + mValid = backend->onAcquireBuffer(mBiasFloat.get(), Backend::STATIC); + if (!mValid) { + MNN_ERROR("mBiasFloat buffer allocated error!\n"); + return; + } if (res->mUseConvQuan) { mBiasInt32 = res->mOriginBias; mScaleFloat = res->mOriginScale; @@ -59,11 +66,21 @@ CPUConvolution::MutableResourceInt8::MutableResourceInt8(std::shared_ptrmOutputZeroPoint; mClampMax = res->mClampMax; mClampMin = res->mClampMin; + // bias int32 -> bias float + auto int32BiasPtr = res->mOriginBias->host(); + auto floatBiasPtr = mBiasFloat->host(); + auto weightScale = res->mOriginScale->host(); + for (int i = 0; i < outputChannleUp4; ++i) { + if (mInputScale && mOutputScale) { // symmetric quan + floatBiasPtr[i] = int32BiasPtr[i] * weightScale[i] * mInputScale / mOutputScale; + } else { + floatBiasPtr[i] = int32BiasPtr[i] * weightScale[i]; + } + } return; } - auto outputChannleUp4 = res->mOriginBias->length(0); mBiasInt32.reset(Tensor::createDevice({outputChannleUp4})); - mScaleFloat.reset(Tensor::createDevice({outputChannleUp4})); + mScaleFloat.reset(Tensor::createDevice({outputChannleUp4})); mValid = backend->onAcquireBuffer(mBiasInt32.get(), Backend::STATIC); if (mValid) { mValid = backend->onAcquireBuffer(mScaleFloat.get(), Backend::STATIC); @@ -82,73 +99,102 @@ void CPUConvolution::MutableResourceInt8::updateInputOutputScale(std::vectormInputScale; + mOutputScale = mResource->mOutputScale; + mInputZeroPoint = mResource->mInputZeroPoint; + mOutputZeroPoint = mResource->mOutputZeroPoint; +// if (mInputScale == inputScale && mOutputScale == outputScale) { +// return; +// } + if (inputScale != 0 && outputScale != 0) { + mInputScale = inputScale; + mOutputScale = outputScale; + mInputZeroPoint = int8_t(inputZeroPoint); + mOutputZeroPoint = int8_t(outputZeroPoint); } - if (mInputScale == inputScale && mOutputScale == outputScale) { + if (mInputScale == 0 || mOutputScale == 0) { return; } - mInputScale = inputScale; - mOutputScale = outputScale; - mInputZeroPoint = int8_t(inputZeroPoint); - mOutputZeroPoint = int8_t(outputZeroPoint); + int size = mResource->mOutputCount; const int kernelNum = static_cast(mResource->mInt8WeightKernelSum.size()); auto biasData = mResource->mOriginBias->host(); auto alphaData = mResource->mOriginScale->host(); - auto alphaScale = inputScale / outputScale; + auto alphaScale = mInputScale / mOutputScale; auto scale = mScaleFloat->host(); auto bias = mBiasInt32->host(); + auto biasfloat = mBiasFloat->host(); #ifdef MNN_USE_SSE - inputZeroPoint += 128.0f; + float offset = 128.f; +#else + float offset = 0.f; #endif for (int i = 0; i < kernelNum; i++) { auto alphaValue = alphaData[i]; if (fabs(alphaValue) < 1e-6) { alphaValue = 1e-6; } - scale[i] = alphaValue * alphaScale; + scale[i] = alphaValue * alphaScale; // input_scale*weight_scale/output_scale // compute outputZeroPointFused in asymmetric quant - int outputZeroPointFused = static_cast(outputZeroPoint / scale[i]); - bias[i] = static_cast(biasData[i] / (inputScale * alphaValue)) - mResource->mInt8WeightKernelSum[i] * inputZeroPoint + outputZeroPointFused; + int outputZeroPointFused = static_cast(mOutputZeroPoint / scale[i]); + bias[i] = static_cast(biasData[i] / (mInputScale * alphaValue)) - mResource->mInt8WeightKernelSum[i] * (mInputZeroPoint + offset) + outputZeroPointFused; + // biasfloat[i] = biasData[i] / mOutputScale - mResource->mInt8WeightKernelSum[i] * (mInputZeroPoint + offset) * scale[i] + mOutputZeroPoint; + biasfloat[i] = bias[i] * scale[i]; } } std::shared_ptr CPUConvolution::makeResourceInt8(Backend* backend, const MNN::Convolution2D *convParam, int pack) { auto core = static_cast(backend)->functions(); // TODO: use different pack from float int UNIT = pack; - + std::shared_ptr resource(new ResourceInt8); // TODO: ConvInt8Winograd need in/out scale, which isn't exist in quantinfo when model construct by V3 API const auto convCommon = convParam->common(); const auto group = convParam->common()->group(); const auto outputCount = convCommon->outputCount(); const auto outputChannleUp4 = UP_DIV(outputCount, UNIT) * UNIT; - - resource->mOriginBias.reset(Tensor::createDevice({outputChannleUp4})); - resource->mOriginScale.reset(Tensor::createDevice({outputChannleUp4})); + + int quanCount = outputChannleUp4; + if (convParam->quanParameter() && convParam->quanParameter()->alpha()) { + quanCount = convParam->quanParameter()->alpha()->size(); + quanCount = ROUND_UP(quanCount, UNIT); + } + resource->mOriginBias.reset(Tensor::createDevice({quanCount})); + resource->mOriginScale.reset(Tensor::createDevice({quanCount * core->bytes})); + resource->mWeightQuantZero.reset(Tensor::createDevice({quanCount})); auto allocRes = backend->onAcquireBuffer(resource->mOriginBias.get(), Backend::STATIC); allocRes &= backend->onAcquireBuffer(resource->mOriginScale.get(), Backend::STATIC); + allocRes &= backend->onAcquireBuffer(resource->mWeightQuantZero.get(), Backend::STATIC); if (!allocRes) { return nullptr; } auto biasPtr = resource->mOriginBias->host(); - memset(biasPtr, 0, outputChannleUp4 * sizeof(int32_t)); + memset(biasPtr, 0, quanCount * sizeof(int32_t)); auto scalePtr = resource->mOriginScale->host(); - memset(scalePtr, 0, outputChannleUp4 * sizeof(float)); + memset(scalePtr, 0, quanCount * sizeof(uint8_t) * core->bytes); + auto betaPtr = resource->mWeightQuantZero->host(); + memset(betaPtr, 0, quanCount * sizeof(int32_t)); - resource->mActBits = convParam->symmetricQuan()->nbits(); + resource->mActBits = 8; + if (convParam->symmetricQuan()) { + resource->mActBits = convParam->symmetricQuan()->nbits(); + } const int8_t* weightSrc = nullptr; int weightSize = 0; std::shared_ptr quanCommon; resource->mOutputCount = outputCount; - if (!ConvolutionCommon::getConvInt8Parameters(convParam, quanCommon, backend, weightSrc, weightSize, scalePtr, biasPtr)) { + if (!ConvolutionCommon::getConvInt8Parameters(convParam, quanCommon, backend, weightSrc, weightSize, scalePtr, biasPtr, betaPtr)) { return nullptr; } if (convParam->bias() && convParam->quanParameter()->alpha()) { resource->mUseConvQuan = false; } + if (quanCommon.get()) { + resource->mWeightAsymmetricQuant = quanCommon->asymmetric; + } + resource->mWeightInt8.reset(Tensor::createDevice({weightSize})); allocRes = backend->onAcquireBuffer(resource->mWeightInt8.get(), Backend::STATIC); if (!allocRes) { @@ -156,12 +202,16 @@ std::shared_ptr CPUConvolution::makeResourceInt8(B } const int kernelNum = outputCount; const int kernelSize = weightSize / kernelNum; - resource->mInt8WeightKernelSum.resize(kernelNum); + resource->mInt8WeightKernelSum.resize(outputChannleUp4); + bool checkWeightQuantZero = false; for (int i = 0; i < kernelNum; i++) { int temp = 0; int offset = i * kernelSize; + if (static_cast(betaPtr[i]) != 0) { + checkWeightQuantZero = true; + } for (int j = 0; j < kernelSize; j++) { - temp += int(weightSrc[offset + j]); + temp += (static_cast(weightSrc[offset + j]) - betaPtr[i]); } resource->mInt8WeightKernelSum[i] = temp; #ifdef MNN_USE_SSE @@ -170,10 +220,19 @@ std::shared_ptr CPUConvolution::makeResourceInt8(B } #endif } - resource->mInputZeroPoint = convParam->symmetricQuan()->zeroPoint(); - resource->mOutputZeroPoint = convParam->symmetricQuan()->outputZeroPoint(); - resource->mClampMin = convParam->symmetricQuan()->clampMin(); - resource->mClampMax = convParam->symmetricQuan()->clampMax(); + if (false == checkWeightQuantZero) { // All weight quant bias is 0, do not need to compute related term in gemm kernel. + resource->mWeightAsymmetricQuant = false; + } + resource->mInputZeroPoint = 0; + resource->mOutputZeroPoint = 0; + resource->mClampMin = -128; + resource->mClampMax = 127; + if (convParam->symmetricQuan()) { + resource->mInputZeroPoint = convParam->symmetricQuan()->zeroPoint(); + resource->mOutputZeroPoint = convParam->symmetricQuan()->outputZeroPoint(); + resource->mClampMin = convParam->symmetricQuan()->clampMin(); + resource->mClampMax = convParam->symmetricQuan()->clampMax(); + } if (convParam->quanParameter() != nullptr) { resource->mInputScale = convParam->quanParameter()->scaleIn(); resource->mOutputScale = convParam->quanParameter()->scaleOut(); @@ -181,9 +240,113 @@ std::shared_ptr CPUConvolution::makeResourceInt8(B auto weightDst = resource->mWeightInt8->host(); memcpy(weightDst, weightSrc, resource->mWeightInt8->size()); resource->mRelu = convCommon->relu() || convCommon->relu6(); + if (convParam->symmetricQuan() && convParam->symmetricQuan()->outputDataType() == MNN::DataType_DT_FLOAT) { + resource->mOutputZeroPoint = 0; + resource->mOutputScale = 1.0f; + } return resource; } +void CPUConvolution::makeResource(Backend* backend, std::shared_ptr resource, const Convolution2D* conv2d, std::shared_ptr resourceInt8) { + /* Used to compute weight quant scale and bias and weightKernelSum of type float. */ + bool quanBuffer = (conv2d->quanParameter() != nullptr && conv2d->quanParameter()->buffer() != nullptr); + MNN_ASSERT(quanBuffer || resourceInt8); + resource->backend = backend; + auto core = static_cast(backend)->functions(); + // common parameters + int outputCount = conv2d->common()->outputCount(); + int LSize = conv2d->common()->inputCount() * conv2d->common()->kernelX() * conv2d->common()->kernelY(); + int ocUp4 = ROUND_UP(outputCount, core->pack); + int8_t* weightOrigin; + + // Save weight quant scale and bias: wf=scale*wi+bias + resource->mDequantize.mScaleBias.reset(Tensor::createDevice({2 * ocUp4 * core->bytes})); + auto success = resource->backend->onAcquireBuffer(resource->mDequantize.mScaleBias.get(), Backend::STATIC); + if (!success) { + MNN_ERROR("Alloc denquant scaleBias memory error\n"); + return; + } + auto alphaPtr = resource->mDequantize.mScaleBias->host(); + auto biasPtr = reinterpret_cast(reinterpret_cast(alphaPtr) + ocUp4 * core->bytes); + ::memset(alphaPtr, 0, 2 * ocUp4 * core->bytes); + + std::shared_ptr quantCommon; + // Load quant scale and bias + if (quanBuffer) { + quantCommon = ConvolutionCommon::load(conv2d, backend, false, true); + weightOrigin = quantCommon->weight.get(); // weight before reorder + + int h = quantCommon->alpha.size(); + if (core->bytes == 2) { + if (quantCommon->asymmetric) { + std::unique_ptr tmp(new int16_t[h]); + core->MNNFp32ToLowp(quantCommon->alpha.get(), tmp.get(), h); + for (int i=0; i< h/2; ++i) { + reinterpret_cast(alphaPtr)[i] = tmp[2 * i + 1]; + reinterpret_cast(biasPtr)[i] = tmp[2 * i]; + } + } else { + core->MNNFp32ToLowp(quantCommon->alpha.get(), reinterpret_cast(alphaPtr), h); + } + } else { + if (quantCommon->asymmetric) { + h = h / 2; + for (int i=0; ialpha.get()[2 * i + 1]; + biasPtr[i] = quantCommon->alpha.get()[2 * i]; + } + } else { + for (int i=0; ialpha.get()[i]; + biasPtr[i] = 0.f; + } + } + } + } else { + weightOrigin = resourceInt8->mWeightInt8->host(); + auto wZero = resourceInt8->mWeightQuantZero->host(); // has packed to outputUp4 + auto wScale = resourceInt8->mOriginScale->host(); + int h = ocUp4; + if (core->bytes == 2) { + std::unique_ptr tmp(new int16_t[h]); + core->MNNFp32ToLowp(wScale, tmp.get(), h); + for (int i=0; i< h; ++i) { + reinterpret_cast(alphaPtr)[i] = tmp[i]; + reinterpret_cast(biasPtr)[i] = (-1.f) * wZero[i] * tmp[i]; + } + } else { + for (int i=0; i< h; ++i) { + alphaPtr[i] = wScale[i]; + biasPtr[i] = (-1.f) * wZero[i] * wScale[i]; + } + } + } + + // Compute float weightKernelSum + resource->mWeightKernelSum.reset(Tensor::createDevice({ocUp4 * 4})); + success = resource->backend->onAcquireBuffer(resource->mWeightKernelSum.get(), Backend::STATIC); + if (!success) { + MNN_ERROR("Alloc denquant mWeightKernelSum memory error\n"); + return; + } + auto weightKernelSum = resource->mWeightKernelSum->host(); + for (int i = 0; i < outputCount; ++i) { + int sum = 0; + for (int j = 0; j < LSize; ++j) { + sum = sum + static_cast(weightOrigin[j + i * LSize]); + } + if(core->bytes == 2) { + auto scale = reinterpret_cast(alphaPtr)[i]; + auto bias = reinterpret_cast(biasPtr)[i]; + weightKernelSum[i] = static_cast(sum) * scale + LSize * bias; + } else { + auto scale = alphaPtr[i]; + auto bias = biasPtr[i]; + weightKernelSum[i] = static_cast(sum) * scale + LSize * bias; + } + } +} + CPUConvolution::CPUConvolution(const Convolution2DCommon *convOp, Backend *b) : MNN::Execution(b), mCommon(convOp) { // Do nothing } @@ -245,7 +408,7 @@ class CPUConvInt8Creator : public CPUBackend::Creator { if (ConvInt8Winograd::mustUse(convOp)) { return new ConvInt8Winograd(backend, convOp, res); } - return new DenseConvInt8TiledExecutor(backend, convOp, res); + return new DenseConvInt8TiledExecutor(backend, convOp, res, false); } }; diff --git a/source/backend/cpu/CPUConvolution.hpp b/source/backend/cpu/CPUConvolution.hpp index d79c3ee73..d241007d6 100644 --- a/source/backend/cpu/CPUConvolution.hpp +++ b/source/backend/cpu/CPUConvolution.hpp @@ -45,6 +45,7 @@ class CPUConvolution : public Execution { std::shared_ptr mScaleBias; }; struct Resource { + std::shared_ptr mWeightKernelSum; std::shared_ptr mWeight; std::shared_ptr mBias; ResourceDequantizeInfo mDequantize; @@ -54,18 +55,21 @@ class CPUConvolution : public Execution { int lU; int lP; int hP; + std::vector mReluThreshold; }; struct ResourceInt8 { std::vector mInt8WeightKernelSum; std::shared_ptr mWeightInt8; std::shared_ptr mOriginBias; std::shared_ptr mOriginScale; + std::shared_ptr mWeightQuantZero; // relu or relu6 bool mRelu; int mActBits; int mOutputCount; bool mUseConvQuan = true; + bool mWeightAsymmetricQuant = true; #ifdef MNN_USE_SSE std::vector offsets; #endif @@ -89,10 +93,12 @@ class CPUConvolution : public Execution { int8_t mClampMax; std::shared_ptr mBiasInt32; std::shared_ptr mScaleFloat; + std::shared_ptr mBiasFloat; int32_t mShiftBits = 14; bool mValid; }; static std::shared_ptr makeResourceInt8(Backend *backend, const MNN::Convolution2D *convOp, int pack=4); + static void makeResource(Backend* backend, std::shared_ptr resource, const Convolution2D* conv2d, std::shared_ptr resourceInt8 = nullptr); CPUConvolution(const Convolution2DCommon *convOp, Backend *b); virtual ~CPUConvolution() = default; virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; diff --git a/source/backend/cpu/CPUConvolutionDepthwise.cpp b/source/backend/cpu/CPUConvolutionDepthwise.cpp index a3b129fa2..03767edfa 100644 --- a/source/backend/cpu/CPUConvolutionDepthwise.cpp +++ b/source/backend/cpu/CPUConvolutionDepthwise.cpp @@ -187,7 +187,8 @@ ErrorCode CPUConvolutionDepthwise::BasicFloatExecution::onResize(const std::vect auto postData = getPostParameters(); auto batch = inputs[0]->batch(); int total = batch * dst_depth_quad; - int numberThread = std::min(((CPUBackend*)backend())->threadNumber(), total); + int numberThread = ((CPUBackend*)backend())->threadNumber(); + auto rt = static_cast(backend()->getRuntime()); auto runBasic = [=](uint8_t* dst_z, const uint8_t* src_z, const uint8_t* weight_dz, int L, int T, int R, int B) { for (int dy = T; dy < B; ++dy) { auto dst_y = dst_z + dy * dst_y_step * bytes; @@ -207,10 +208,13 @@ ErrorCode CPUConvolutionDepthwise::BasicFloatExecution::onResize(const std::vect } } }; + std::vector divides(numberThread+1); + divides[0] = 0; + rt->computeDivideSizes(total, divides.data()+1); mExecutor = [=](const uint8_t* srcOrigin, uint8_t* dstOrigin, int tId) { auto biasP = inputs[2]->host(); auto weightP = inputs[1]->host(); - for (int index = tId; index < total; index += numberThread) { + for (int index = divides[tId]; index < divides[tId+1]; ++index) { int dz = index / batch; auto dst_z = dstOrigin + dst_z_step * index * bytes; const auto src_z = srcOrigin + src_z_step * index * bytes; diff --git a/source/backend/cpu/CPUDeconvolution.cpp b/source/backend/cpu/CPUDeconvolution.cpp index 9968e6198..0a1e6f813 100644 --- a/source/backend/cpu/CPUDeconvolution.cpp +++ b/source/backend/cpu/CPUDeconvolution.cpp @@ -87,8 +87,10 @@ static void _transformWeight(const uint8_t* tempWeight, uint8_t* dest, int outpu static void _reorderWeightInt8(Backend* bn, const Convolution2DCommon* common, const int8_t* srcPtr, std::shared_ptr& weight) { auto core = static_cast(bn)->int8Functions(); + auto gcore = static_cast(bn)->functions(); int UNIT, SRC_UNIT, DST_XUNIT; core->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); + UNIT = gcore->pack; int oc = common->outputCount(), ic = common->inputCount(), kernelCount = common->kernelX() * common->kernelY(); std::vector shape = {UP_DIV(oc, UNIT), UP_DIV(ic, SRC_UNIT) * kernelCount, UNIT, SRC_UNIT}; @@ -167,11 +169,13 @@ CPUDeconvolution::CPUDeconvolution(const Tensor* input, const Op* convOp, Backen std::vector _bias(outputChannleUp4, 0); std::vector _scale(outputChannleUp4, 0); + std::vector _beta(outputChannleUp4, 0); auto biasPtr = _bias.data(); auto scalePtr = _scale.data(); + auto betaPtr = _beta.data(); if (ModeInt8) { - ConvolutionCommon::getConvInt8Parameters(conv2d, quanCommon, backend, quanWeightInt8, tempWeightSize, scalePtr, biasPtr); + ConvolutionCommon::getConvInt8Parameters(conv2d, quanCommon, backend, quanWeightInt8, tempWeightSize, scalePtr, biasPtr, betaPtr); } else { ConvolutionCommon::getConvParameters(&quanCommon, backend, conv2d, &tempWeight, &tempWeightSize); } diff --git a/source/backend/cpu/CPUDeconvolution.hpp b/source/backend/cpu/CPUDeconvolution.hpp index 750fc4816..c9e0427f0 100644 --- a/source/backend/cpu/CPUDeconvolution.hpp +++ b/source/backend/cpu/CPUDeconvolution.hpp @@ -78,7 +78,7 @@ class CPUDeconvolutionOrigin : public CPUDeconvolutionBasic { } } #else - if(conv2d->symmetricQuan()->method() == QuantizeAlgo_OVERFLOW_AWARE){ + if(conv2d->symmetricQuan() && conv2d->symmetricQuan()->method() == QuantizeAlgo_OVERFLOW_AWARE){ gemmKernel = core->Int8GemmKernelFast; } #endif diff --git a/source/backend/cpu/CPUMatMul.cpp b/source/backend/cpu/CPUMatMul.cpp index 1ad8ff4aa..069a77965 100644 --- a/source/backend/cpu/CPUMatMul.cpp +++ b/source/backend/cpu/CPUMatMul.cpp @@ -89,8 +89,12 @@ ErrorCode CPUMatMul::onResize(const std::vector& inputs, const std::vec core->MNNGetMatMulPackMode(&eP, &lP, &hP); int numberThread = mSupportMultiThread ? ((CPUBackend*)backend())->threadNumber() : 1; auto bufferAlloc = static_cast(backend())->getBufferAllocator(); - auto ATPtrAlloc = bufferAlloc->alloc(eP * l * core->bytes * numberThread); - auto BTPtrAlloc = bufferAlloc->alloc(UP_DIV(h, hP) * UP_DIV(l, lP) * lP * hP * core->bytes); + auto ATPtrAlloc = bufferAlloc->alloc(eP * UP_DIV(l, lP) * lP * core->bytes * numberThread); + int matmulBytes = core->bytes; + if (core->matmulBytes != 0) { + matmulBytes = core->matmulBytes; + } + auto BTPtrAlloc = bufferAlloc->alloc(UP_DIV(h, hP) * UP_DIV(l, lP) * lP * hP * matmulBytes); auto CTPtrAlloc = bufferAlloc->alloc(UP_DIV(h, core->pack) * eP * core->pack * core->bytes * numberThread); if (ATPtrAlloc.invalid() || BTPtrAlloc.invalid() || CTPtrAlloc.invalid()) { return OUT_OF_MEMORY; @@ -180,10 +184,11 @@ void CPUMatMul::execute(const float* APtr, const float* BPtr, float* CPtr, const if (nullptr == biasPtr) { postPtr = nullptr; } + auto lAlign = UP_DIV(mL, lP) * lP; int tileCount = UP_DIV(mE, eP); int numberThread = mSupportMultiThread ? ((CPUBackend*)backend())->threadNumber() : 1; MNN_CONCURRENCY_BEGIN(tId, numberThread) { - auto TA = mTempA.ptr() + tId * eP * mL * core->bytes; + auto TA = mTempA.ptr() + tId * eP * lAlign * core->bytes; auto TB = mTempB.ptr(); auto hC4 = UP_DIV(mH, core->pack); auto TC = mTempC.ptr() + tId * eP * hC4 * core->pack * core->bytes; @@ -199,27 +204,78 @@ void CPUMatMul::execute(const float* APtr, const float* BPtr, float* CPtr, const int xEnd = ALIMIN(xStart + eP, mE); int xC = xEnd - xStart; if (mTransposeA) { - for (int y=0; ybytes, (uint8_t*)APtr + (y * mE + xStart) * core->bytes, core->bytes * xC); + // l, e -> l/lp, xC|eP, lp + if (lP > 1) { + // TODO: Speed up it + if (mL % lP != 0) { + ::memset(TA, 0, eP * lAlign * core->bytes); + } + if (core->bytes == 4) { + auto D = (int32_t*)TA; + auto S = (int32_t*)APtr; + for (int y=0; ybytes == 2); + auto D = (int16_t*)TA; + auto S = (int16_t*)APtr; + for (int y=0; ybytes, (uint8_t*)APtr + (y * mE + xStart) * core->bytes, core->bytes * xC); + } } } else { - // e, l -> l, eP - int dims[] = { - xC, - mL, - mL, - eP - }; - if (core->bytes == 2) { - auto S = (const int16_t*)APtr + xStart * mL; - auto D = (int16_t*)TA; - MNNTranspose16Bit(D, S, dims); - } else if (core->bytes == 4) { - auto S = (const int32_t*)APtr + xStart * mL; - auto D = (int32_t*)TA; - MNNTranspose32Bit(D, S, dims); + if (lP > 1) { + // e, l -> l/lp, 1, xC|eP, lp + int lC = mL / lP; + int lR = mL % lP; + for (int yy=0; yybytes, (uint8_t*)APtr + ((x+xStart)*mL+yy*lP)*core->bytes, lP * core->bytes); + } + } + if (lR > 0) { + int yy = lC; + for (int x=0; xbytes, 0, lP * core->bytes); + ::memcpy(TA + (yy * eP * lP + x * lP) * core->bytes, (uint8_t*)APtr + ((x+xStart)*mL+yy*lP)*core->bytes, xC * core->bytes); + } + } + } else { + // e, l -> l, eP + int dims[] = { + xC, + mL, + mL, + eP + }; + if (core->bytes == 2) { + auto S = (const int16_t*)APtr + xStart * mL; + auto D = (int16_t*)TA; + MNNTranspose16Bit(D, S, dims); + } else if (core->bytes == 4) { + auto S = (const int32_t*)APtr + xStart * mL; + auto D = (int32_t*)TA; + MNNTranspose32Bit(D, S, dims); + } } } + if (core->matmulBytes != 0) { + core->MNNFp32ToLowp((const float*)TA, (int16_t*)TA, eP * lAlign); + } if (xC == eP) { core->MNNPackedMatMul((float*)TC, (float*)TA, (float*)TB, parameters, postPtr, biasPtr, nullptr, nullptr); } else { diff --git a/source/backend/cpu/CPURelu.cpp b/source/backend/cpu/CPURelu.cpp index fb5aaa335..073556464 100644 --- a/source/backend/cpu/CPURelu.cpp +++ b/source/backend/cpu/CPURelu.cpp @@ -179,10 +179,6 @@ ErrorCode CPUPRelu::onResize(const std::vector& inputs, const std::vect ssize_t outputZero = static_cast(TensorUtils::getDescribe(outputs[0])->quantAttr->zero); ssize_t maxValue = static_cast(TensorUtils::getDescribe(inputs[0])->quantAttr->max); ssize_t minValue = static_cast(TensorUtils::getDescribe(inputs[0])->quantAttr->min); - float inputScales[1] = {inputScale}; - float outputScales[1] = {outputScale}; - ssize_t inputZeros[1] = {inputZero}; - ssize_t outputZeros[1] = {outputZero}; mQuanScalesInput.resize(1); mQuanScalesOutput.resize(1); mQuanZerosInput.resize(1); @@ -210,13 +206,14 @@ ErrorCode CPUPRelu::onExecute(const std::vector& inputs, const std::vec auto coreInt8 = static_cast(backend())->int8Functions(); const int channel = ib.dim[1].extent; const int batch = ib.dim[0].extent; - const int depthQuad = UP_DIV(channel, core->pack); + int pack = 4; + int depthQuad = UP_DIV(channel, core->pack); const uint8_t* srcO = (const uint8_t*)ib.host; uint8_t* dstO = (uint8_t*)ob.host; auto totalCount = batch * depthQuad; auto numberThread = ((CPUBackend*)backend())->threadNumber(); if (mUseInt8) { - + depthQuad = UP_DIV(channel, pack); MNN_CONCURRENCY_BEGIN(tId, numberThread) { QuanPrePostParameters params; params.maxValue = static_cast(TensorUtils::getDescribe(inputs[0])->quantAttr->max); @@ -227,7 +224,7 @@ ErrorCode CPUPRelu::onExecute(const std::vector& inputs, const std::vec params.outputZeroPoint = mQuanZerosOutput.data(); for (int b=tId; bMNNReluWithSlopeChannelInt8((int8_t*)(dstO + sizeQuad * core->pack * b), (const int8_t*)(srcO + sizeQuad * core->pack * b), (const float*)(mSlope.host() + core->bytes * core->pack * c), sizeQuad, 1, ¶ms); + coreInt8->MNNReluWithSlopeChannelInt8((int8_t*)(dstO + sizeQuad * pack * b), (const int8_t*)(srcO + sizeQuad * pack * b), (const float*)(mSlope.host() + core->bytes * pack * c), sizeQuad, 1, ¶ms); } } MNN_CONCURRENCY_END(); diff --git a/source/backend/cpu/CPURuntime.cpp b/source/backend/cpu/CPURuntime.cpp index 0cf9a336d..98b7d04f5 100644 --- a/source/backend/cpu/CPURuntime.cpp +++ b/source/backend/cpu/CPURuntime.cpp @@ -11,19 +11,33 @@ https://github.com/Tencent/ncnn/blob/master/src/cpu.cpp https://github.com/pytorch/cpuinfo */ -#ifdef __ANDROID__ +#ifdef __linux__ #include #include #include +#include +#include +#include +#include +#include +#include + +#define CPUINFO_ARM_LINUX_FEATURE_FPHP UINT32_C(0x00000200) +#define CPUINFO_ARM_LINUX_FEATURE_ASIMDHP UINT32_C(0x00000400) +#define CPUINFO_ARM_LINUX_FEATURE_ASIMDDP UINT32_C(0x00100000) +// ref: https://cs.android.com/android/platform/superproject/+/master:bionic/libc/kernel/uapi/asm-arm64/asm/hwcap.h;drc=04da58f5b3bc40dbbafb4f8422aa2991479d9e1e;l=70 +#define CPUINFO_ARM_LINUX_FEATURE_I8MM UINT32_C(0x00002000) +#define CPUINFO_ARM_LINUX_FEATURE_SVE UINT32_C(0x00400000) +#define CPUINFO_ARM_LINUX_FEATURE_SVE2 UINT32_C(0x00000002) #endif -#include "core/Macro.h" +#include +#include +#include "core/Macro.h" #ifdef __ANDROID__ -#include -#include #include -#endif // __ANDROID__ +#endif #if __APPLE__ #include "TargetConditionals.h" @@ -37,30 +51,68 @@ #endif // TARGET_OS_IPHONE #endif // __APPLE__ -#ifdef _OPENMP -#include -#endif // _OPENMP - #include #include #include #include #include #include "backend/cpu/CPURuntime.hpp" +#include "core/FileLoader.hpp" -#if defined (__linux__) && defined (__aarch64__) -#include +#define BUFFER_SIZE 1024 -#define CPUINFO_ARM_LINUX_FEATURE_FPHP UINT32_C(0x00000200) -#define CPUINFO_ARM_LINUX_FEATURE_ASIMDHP UINT32_C(0x00000400) -#define CPUINFO_ARM_LINUX_FEATURE_ASIMDDP UINT32_C(0x00100000) -#define CPUINFO_ARM_LINUX_FEATURE_I8MM UINT32_C(0x00002000) -#define CPUINFO_ARM_LINUX_FEATURE_SVE UINT32_C(0x00400000) -#define CPUINFO_ARM_LINUX_FEATURE_SVE2 UINT32_C(0x00000002) +int MNNGetCurrentPid() { +#if defined (__linux__) +#ifdef __GLIBC__ + pid_t pid = syscall(SYS_gettid); +#else +#ifdef PI3 + pid_t pid = getpid(); +#else + pid_t pid = gettid(); +#endif +#endif + return pid; +#else + return 0; +#endif +} +int MNNSetSchedAffinity(const int* cpuIDs, int size) { +#if defined (__linux__) +#ifndef CPU_SETSIZE +#define CPU_SETSIZE 1024 +#endif +#define __NCPUBITS (8 * sizeof(unsigned long)) + typedef struct { + unsigned long __bits[CPU_SETSIZE / __NCPUBITS]; + } cpu_set_t; -#endif /* __linux__ && __aarch64__ */ +#ifndef CPU_SET +#define CPU_SET(cpu, cpusetp) ((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS))) +#endif +#ifndef CPU_ZERO +#define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t)) +#endif + // set affinity for thread + pid_t pid = MNNGetCurrentPid(); + cpu_set_t mask; + CPU_ZERO(&mask); + for (int i = 0; i < size; i++) { + CPU_SET(cpuIDs[i], &mask); + } -#ifdef __ANDROID__ + int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask); + if (syscallret) { + MNN_PRINT("syscall error %d\n", syscallret); + return -1; + } +#endif + return 0; +} + +// cpuinfo +// Reference from: https://github.com/pytorch/cpuinfo +#if defined(ENABLE_ARMV82) && defined(__arm__) /* As per include/sys/system_properties.h in Android NDK */ #define CPUINFO_HARDWARE_VALUE_MAX 64 @@ -154,231 +206,6 @@ struct cpuinfo_arm_chipset { char suffix[8]; }; -#define BUFFER_SIZE 1024 - -static uint32_t getNumberOfCPU() { - FILE* fp = fopen("/proc/cpuinfo", "rb"); - if (!fp) { - return 1; - } - uint32_t number = 0; - char buffer[BUFFER_SIZE]; - while (!feof(fp)) { - char* str = fgets(buffer, BUFFER_SIZE, fp); - if (!str) { - break; - } - if (memcmp(buffer, "processor", 9) == 0) { - number++; - } - } - fclose(fp); - if (number < 1) { - number = 1; - } - return number; -} - -static int getCPUMaxFreqKHz(int cpuID) { - char path[256]; - sprintf(path, "/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state", cpuID); - FILE* fp = fopen(path, "rb"); - if (!fp) { - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state", cpuID); - fp = fopen(path, "rb"); - if (!fp) { - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpuID); - fp = fopen(path, "rb"); - if (!fp) { - return -1; - } - int maxfrequency = -1; - fscanf(fp, "%d", &maxfrequency); - fclose(fp); - return maxfrequency; - } - } - int maxfrequency = 0; - while (!feof(fp)) { - int frequency = 0; - int history = fscanf(fp, "%d %*d", &frequency); - if (history != 1) { - break; - } - if (frequency > maxfrequency) { - maxfrequency = frequency; - } - } - fclose(fp); - return maxfrequency; -} - -static int sortCPUIDByMaxFrequency(std::vector& cpuIDs, int* littleClusterOffset) { - const int cpuNumbers = cpuIDs.size(); - *littleClusterOffset = 0; - if (cpuNumbers == 0) { - return 0; - } - std::vector cpusFrequency; - cpusFrequency.resize(cpuNumbers); - for (int i = 0; i < cpuNumbers; ++i) { - int frequency = getCPUMaxFreqKHz(i); - cpuIDs[i] = i; - cpusFrequency[i] = frequency; - // MNN_PRINT("cpu fre: %d, %d\n", i, frequency); - } - for (int i = 0; i < cpuNumbers; ++i) { - for (int j = i + 1; j < cpuNumbers; ++j) { - if (cpusFrequency[i] < cpusFrequency[j]) { - // id - int temp = cpuIDs[i]; - cpuIDs[i] = cpuIDs[j]; - cpuIDs[j] = temp; - // frequency - temp = cpusFrequency[i]; - cpusFrequency[i] = cpusFrequency[j]; - cpusFrequency[j] = temp; - } - } - } - int midMaxFrequency = (cpusFrequency.front() + cpusFrequency.back()) / 2; - if (midMaxFrequency == cpusFrequency.back()) { - return 0; - } - for (int i = 0; i < cpuNumbers; ++i) { - if (cpusFrequency[i] < midMaxFrequency) { - *littleClusterOffset = i; - break; - } - } - return 0; -} - -static int setSchedAffinity(const std::vector& cpuIDs) { -#define CPU_SETSIZE 1024 -#define __NCPUBITS (8 * sizeof(unsigned long)) - typedef struct { - unsigned long __bits[CPU_SETSIZE / __NCPUBITS]; - } cpu_set_t; - -#define CPU_SET(cpu, cpusetp) ((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS))) - -#define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t)) - - // set affinity for thread -#ifdef __GLIBC__ - pid_t pid = syscall(SYS_gettid); -#else -#ifdef PI3 - pid_t pid = getpid(); -#else - pid_t pid = gettid(); -#endif -#endif - cpu_set_t mask; - CPU_ZERO(&mask); - for (int i = 0; i < (int)cpuIDs.size(); i++) { - CPU_SET(cpuIDs[i], &mask); - } - - int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask); - if (syscallret) { - MNN_PRINT("syscall error %d\n", syscallret); - return -1; - } - - return 0; -} - -#endif // arch - -int MNNSetCPUThreadsMode(MNNCPUThreadsMode mode) { -#ifdef __ANDROID__ - auto numberOfCPUs = getNumberOfCPU(); - if (mode == MNN_CPU_MODE_DEFAULT) { - return 0; - } - static std::vector sortedCPUIDs; - static int littleClusterOffset = 0; - if (sortedCPUIDs.empty()) { - sortedCPUIDs.resize(numberOfCPUs); - for (int i = 0; i < numberOfCPUs; ++i) { - sortedCPUIDs[i] = i; - } - sortCPUIDByMaxFrequency(sortedCPUIDs, &littleClusterOffset); - } - - if (littleClusterOffset == 0 && mode != MNN_CPU_MODE_POWER_FRI) { - MNN_PRINT("This CPU Arch Do NOT support for setting cpu thread mode\n"); - } - std::vector cpuAttachIDs; - switch (mode) { - case MNN_CPU_MODE_POWER_FRI: - cpuAttachIDs = sortedCPUIDs; - break; - case MNN_CPU_MODE_LITTLE: - cpuAttachIDs = std::vector(sortedCPUIDs.begin() + littleClusterOffset, sortedCPUIDs.end()); - break; - case MNN_CPU_MODE_BIG: - cpuAttachIDs = std::vector(sortedCPUIDs.begin(), sortedCPUIDs.begin() + littleClusterOffset); - break; - default: - cpuAttachIDs = sortedCPUIDs; - break; - } - -#ifdef _OPENMP - const int threadsNumber = cpuAttachIDs.size(); - omp_set_num_threads(threadsNumber); - std::vector result(threadsNumber, 0); -#pragma omp parallel for - for (int i = 0; i < threadsNumber; ++i) { - result[i] = setSchedAffinity(cpuAttachIDs); - } - for (int i = 0; i < threadsNumber; ++i) { - if (result[i] != 0) { - return -1; - } - } -#else - int res = setSchedAffinity(cpuAttachIDs); - if (res != 0) { - return -1; - } -#endif // _OPENMP - return 0; -#elif __IOS__ - return -1; -#else - return -1; -#endif // arch -} -float MNNGetCPUFlops(uint32_t number) { - float flops = 2048.0f; -#ifdef __ANDROID__ - auto numberOfCPUs = getNumberOfCPU(); - if (0 == numberOfCPUs) { - return flops; - } - std::vector freqs; - freqs.resize(numberOfCPUs); - for (int i = 0; i < numberOfCPUs; ++i) { - freqs[i] = getCPUMaxFreqKHz(i); - } - std::sort(freqs.rbegin(), freqs.rend()); - number = std::min(number, numberOfCPUs); - flops = 0.0f; - for (uint32_t i = 0; i < number; ++i) { - flops += (float)freqs[i] / 1024.0f; - } -#endif - return flops; -} - -// cpuinfo -// Reference from: https://github.com/pytorch/cpuinfo -#ifdef __ANDROID__ - #define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK UINT32_C(0xFF000000) #define CPUINFO_ARM_MIDR_VARIANT_MASK UINT32_C(0x00F00000) #define CPUINFO_ARM_MIDR_ARCHITECTURE_MASK UINT32_C(0x000F0000) @@ -400,19 +227,6 @@ float MNNGetCPUFlops(uint32_t number) { #define CPUINFO_ARM_MIDR_PART_OFFSET 4 #define CPUINFO_ARM_MIDR_REVISION_OFFSET 0 -#ifdef __aarch64__ -#define CPUINFO_ARM_LINUX_FEATURE_FPHP UINT32_C(0x00000200) -#define CPUINFO_ARM_LINUX_FEATURE_ASIMDHP UINT32_C(0x00000400) -#define CPUINFO_ARM_LINUX_FEATURE_ASIMDDP UINT32_C(0x00100000) -// ref: https://cs.android.com/android/platform/superproject/+/master:bionic/libc/kernel/uapi/asm-arm64/asm/hwcap.h;drc=04da58f5b3bc40dbbafb4f8422aa2991479d9e1e;l=70 -#define CPUINFO_ARM_LINUX_FEATURE_I8MM UINT32_C(0x00002000) -#define CPUINFO_ARM_LINUX_FEATURE_SVE UINT32_C(0x00400000) -#define CPUINFO_ARM_LINUX_FEATURE_SVE2 UINT32_C(0x00000002) -#else -#define CPUINFO_ARM_LINUX_FEATURE_HALF UINT32_C(0x00000002) -#define CPUINFO_ARM_LINUX_FEATURE_NEON UINT32_C(0x00001000) -#endif - struct cpuinfo_arm_linux_processor { uint32_t architecture_version; // Main ID Register value @@ -1308,39 +1122,18 @@ struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset(const struct cpuin // MNN_PRINT("chipset vendor, series, model is: %d, %d, %d\n", chipset.vendor, chipset.series, chipset.model); return chipset; } - -#endif // __ANDROID__ - -#if defined(__APPLE__) && defined(__aarch64__) - -static uint32_t get_sys_info_by_name(const char* type_specifier) { - size_t size = 0; - uint32_t result = 0; - if (sysctlbyname(type_specifier, NULL, &size, NULL, 0) != 0) { - MNN_PRINT("sysctlbyname(\"%s\") failed\n", type_specifier); - } else if (size == sizeof(uint32_t)) { - sysctlbyname(type_specifier, &result, &size, NULL, 0); - MNN_PRINT("%s: %u , size = %lu\n", type_specifier, result, size); - } else { - MNN_PRINT("sysctl does not support non-integer lookup for (\"%s\")\n", type_specifier); - } - return result; -} - -#endif // iOS - -void cpuinfo_arm_init(struct cpuinfo_arm_isa* cpuinfo_isa) { - memset(cpuinfo_isa, 0, sizeof(struct cpuinfo_arm_isa)); - - // android -#ifdef __ANDROID__ +static void _getInfoARMv7(MNNCPUInfo* cpuinfo_isa) { + // Get White List And Black List struct cpuinfo_arm_linux_processor* arm_linux_processors = NULL; - const uint32_t processors_count = getNumberOfCPU(); + if (0 == cpuinfo_isa->groups.size()) { + return; + } + const uint32_t processors_count = cpuinfo_isa->allCpuIdsSorted.size(); char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX] = {0}; arm_linux_processors = static_cast( - calloc(processors_count, sizeof(struct cpuinfo_arm_linux_processor))); + malloc(processors_count * sizeof(struct cpuinfo_arm_linux_processor))); if (arm_linux_processors == NULL) { MNN_PRINT("failed to allocate %zu bytes for descriptions of %u ARM logical processors\n", processors_count * sizeof(struct cpuinfo_arm_linux_processor), processors_count); @@ -1349,6 +1142,7 @@ void cpuinfo_arm_init(struct cpuinfo_arm_isa* cpuinfo_isa) { if (!cpuinfo_arm_linux_parse_proc_cpuinfo(proc_cpuinfo_hardware, processors_count, arm_linux_processors)) { MNN_PRINT("failed to parse processor information from /proc/cpuinfo\n"); + free(arm_linux_processors); return; } @@ -1369,54 +1163,17 @@ void cpuinfo_arm_init(struct cpuinfo_arm_isa* cpuinfo_isa) { } } } - - uint32_t isa_features = 0; -#ifdef __aarch64__ - isa_features = (uint32_t)getauxval(AT_HWCAP); -#endif - struct cpuinfo_android_properties android_properties; cpuinfo_arm_android_parse_properties(&android_properties); const struct cpuinfo_arm_chipset chipset = cpuinfo_arm_android_decode_chipset(&android_properties, valid_processors, 0); - - switch (last_midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { - case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ - cpuinfo_isa->dot = true; - break; - default: -#ifdef __aarch64__ - if (isa_features & CPUINFO_ARM_LINUX_FEATURE_ASIMDDP) { - cpuinfo_isa->dot = true; - } -#endif - // TODO, whitelist, ex: hisilicon_kirin 980... - break; - } -#ifdef __aarch64__ - const uint32_t fp16arith_mask = CPUINFO_ARM_LINUX_FEATURE_FPHP | CPUINFO_ARM_LINUX_FEATURE_ASIMDHP; - if ((isa_features & fp16arith_mask) == fp16arith_mask) { - if (chipset.series == cpuinfo_arm_chipset_series_samsung_exynos && chipset.model == 9810) { - cpuinfo_isa->fp16arith = false; - } else { - cpuinfo_isa->fp16arith = true; - } - } - if (isa_features & CPUINFO_ARM_LINUX_FEATURE_I8MM) { - cpuinfo_isa->i8mm = true; - } - /* - if (isa_features & CPUINFO_ARM_LINUX_FEATURE_SVE2) { - // MNN_PRINT("Support SVE2\n"); - } - */ -#else // pytorch/cpuinfo: src/arm/linux/aarch32-isa.c uint32_t architecture_version = 0; if (processors_count > 0) { architecture_version = arm_linux_processors[0].architecture_version; } if (architecture_version >= 8) { + FUNC_PRINT_ALL((last_midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)), 0x); /* * NEON FP16 compute extension and VQRDMLAH/VQRDMLSH instructions are not indicated in /proc/cpuinfo. * Use a MIDR-based heuristic to whitelist processors known to support it: @@ -1437,6 +1194,8 @@ void cpuinfo_arm_init(struct cpuinfo_arm_isa* cpuinfo_isa) { case UINT32_C(0x4100D050): /* Cortex-A55 */ case UINT32_C(0x4100D060): /* Cortex-A65 */ case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100d440): /* 888 */ + case UINT32_C(0x4100d480): /* 8gen1 */ case UINT32_C(0x4100D0C0): /* Neoverse N1 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ @@ -1459,6 +1218,8 @@ void cpuinfo_arm_init(struct cpuinfo_arm_isa* cpuinfo_isa) { case UINT32_C(0x4100D0B0): /* Cortex-A76 */ case UINT32_C(0x4100D0D0): /* Cortex-A77 */ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100d440): /* 888 */ + case UINT32_C(0x4100d480): /* 8gen1 */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ @@ -1474,106 +1235,210 @@ void cpuinfo_arm_init(struct cpuinfo_arm_isa* cpuinfo_isa) { break; } } -#endif + // Whitelist + switch (last_midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ + cpuinfo_isa->dot = true; + break; + default: + // TODO, whitelist, ex: hisilicon_kirin 980... + break; + } + // Blacklist + if (chipset.series == cpuinfo_arm_chipset_series_samsung_exynos && chipset.model == 9810) { + // Spectial machine, disable fp16 + cpuinfo_isa->fp16arith = false; + } if (arm_linux_processors) { free(arm_linux_processors); } - -#endif // #ifdef __ANDROID__ - - // iOS -#if defined(__IOS__) && defined(__aarch64__) - -// A11 -#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL -#define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6 -#endif -// A12 -#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST -#define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f -#endif -// A13 -#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER -#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2 -#endif -// A14 -#ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM -#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3 -#endif -// A15 -#ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD -#define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xda33d83d -#endif -// A16 -#ifndef CPUFAMILY_ARM_EVEREST_SAWTOOTH -#define CPUFAMILY_ARM_EVEREST_SAWTOOTH 0x8765edea -#endif -// A17 Pro -#ifndef CPUFAMILY_ARM_PCORE_ECORE_COLL -#define CPUFAMILY_ARM_PCORE_ECORE_COLL 0x2876f5b5 +} #endif - const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); - // const uint32_t cpu_type = get_sys_info_by_name("hw.cputype"); - // const uint32_t cpu_subtype = get_sys_info_by_name("hw.cpusubtype"); - - cpuinfo_isa->fp16arith = cpu_family == CPUFAMILY_ARM_MONSOON_MISTRAL || - cpu_family == CPUFAMILY_ARM_VORTEX_TEMPEST || - cpu_family == CPUFAMILY_ARM_LIGHTNING_THUNDER || - cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM || - cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD || - cpu_family == CPUFAMILY_ARM_EVEREST_SAWTOOTH || - cpu_family == CPUFAMILY_ARM_PCORE_ECORE_COLL; - - cpuinfo_isa->dot = cpu_family == CPUFAMILY_ARM_LIGHTNING_THUNDER || - cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM || - cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD || - cpu_family == CPUFAMILY_ARM_EVEREST_SAWTOOTH || - cpu_family == CPUFAMILY_ARM_PCORE_ECORE_COLL; - - cpuinfo_isa->i8mm = cpu_family == CPUFAMILY_ARM_EVEREST_SAWTOOTH || - cpu_family == CPUFAMILY_ARM_PCORE_ECORE_COLL; -#endif // iOS - -// arm64-osx -#if defined(__APPLE__) && defined(__aarch64__) && !defined(__IOS__) -// Apple M1 -#ifndef CPUFAMILY_AARCH64_FIRESTORM_ICESTORM -#define CPUFAMILY_AARCH64_FIRESTORM_ICESTORM 0x1b588bb3 -#endif -// Apple M2 -#ifndef CPUFAMILY_AARCH64_AVALANCHE_BLIZZARD -#define CPUFAMILY_AARCH64_AVALANCHE_BLIZZARD 0xda33d83d -#endif - const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); - cpuinfo_isa->fp16arith = cpu_family == CPUFAMILY_AARCH64_FIRESTORM_ICESTORM || - cpu_family == CPUFAMILY_AARCH64_AVALANCHE_BLIZZARD; - cpuinfo_isa->dot = cpu_family == CPUFAMILY_AARCH64_FIRESTORM_ICESTORM || - cpu_family == CPUFAMILY_AARCH64_AVALANCHE_BLIZZARD; +#if defined(__APPLE__) && defined(__aarch64__) +static bool have_feature(const char* feature) { + // For more information on sysctlbyname(), see: + // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics + int64_t feature_present = 0; + size_t size = sizeof(feature_present); + if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) { + return false; + } + return feature_present; +} +static void _getInfoApple(MNNCPUInfo* cpuinfo_isa) { + /**Ref from + https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics + */ + if (have_feature("hw.optional.arm.FEAT_FP16")) { + cpuinfo_isa->fp16arith = true; + } + if (have_feature("hw.optional.arm.FEAT_DotProd")) { + cpuinfo_isa->dot = true; + } + if (have_feature("hw.optional.arm.FEAT_I8MM")) { + cpuinfo_isa->i8mm = true; + } +} #endif -#ifndef __ANDROID__ -#if defined (__linux__) && defined (__aarch64__) - +#if defined(__linux__) && defined(__aarch64__) +static void _getInfoAux(MNNCPUInfo* cpuinfo_isa) { + // Use AUX to get info for linux-aarch64 uint32_t isa_features = 0; isa_features = (uint32_t)getauxval(AT_HWCAP); + if (isa_features & CPUINFO_ARM_LINUX_FEATURE_ASIMDDP) { + cpuinfo_isa->dot = true; + } + const uint32_t fp16arith_mask = CPUINFO_ARM_LINUX_FEATURE_FPHP | CPUINFO_ARM_LINUX_FEATURE_ASIMDHP; + if ((isa_features & fp16arith_mask) == fp16arith_mask) { + cpuinfo_isa->fp16arith = true; + } + if (isa_features & CPUINFO_ARM_LINUX_FEATURE_I8MM) { + cpuinfo_isa->i8mm = true; + } + isa_features = (uint32_t)getauxval(AT_HWCAP2); + if (isa_features & CPUINFO_ARM_LINUX_FEATURE_SVE2) { + cpuinfo_isa->sve2 = true; + } +} +#endif - - if (isa_features & CPUINFO_ARM_LINUX_FEATURE_ASIMDDP) { - cpuinfo_isa->dot = true; +static bool _readAll(const std::string& fileName, MNN::AutoStorage& buffer) { + MNN::FileLoader l(fileName.c_str()); + if (false == l.read()) { + return false; + } + return l.merge(buffer); +} +static std::vector _readNumber(const char* data, int length) { + int current = -1; + std::vector res; + for (int i=0; i '9') { + if (current >=0 ) { + res.emplace_back(current); + current = -1; + } + continue; } - - const uint32_t fp16arith_mask = CPUINFO_ARM_LINUX_FEATURE_FPHP | CPUINFO_ARM_LINUX_FEATURE_ASIMDHP; - if ((isa_features & fp16arith_mask) == fp16arith_mask) { - cpuinfo_isa->fp16arith = true; + if (current >= 0) { + current = current*10 + (c - '0'); + } else { + current = c - '0'; } + } + if (current >=0 ) { + res.emplace_back(current); + current = -1; + } + return res; +} +static MNNCPUInfo* gCPUInfo = nullptr; +static void _fillInfo(MNNCPUInfo* cpuInfo); +const MNNCPUInfo* MNNGetCPUInfo() { + if (nullptr != gCPUInfo) { + return gCPUInfo; + } + gCPUInfo = new MNNCPUInfo; + _fillInfo(gCPUInfo); + return gCPUInfo; +} - if (isa_features & CPUINFO_ARM_LINUX_FEATURE_I8MM) { - cpuinfo_isa->i8mm = true; +static void _fillInfo(MNNCPUInfo* cpuinfo_isa) { + cpuinfo_isa->dot = false; + cpuinfo_isa->fp16arith = false; + cpuinfo_isa->i8mm = false; + cpuinfo_isa->sve2 = false; + // android + /**Get CPU Info*/ +#ifdef __linux__ + do { + DIR* root; + std::string dir = "/sys/devices/system/cpu/cpufreq"; + if ((root = opendir(dir.c_str())) == NULL) { + break; + } + CPUGroup group; + struct dirent* ent; + while ((ent = readdir(root)) != NULL) { + if (ent->d_name[0] != '.') { + std::string policyName = dir + "/" + ent->d_name; + std::string cpus = policyName + "/affected_cpus"; + { + MNN::AutoStorage buffer; + if (false == _readAll(cpus, buffer)) { + continue; + } + group.ids = _readNumber((const char*)buffer.get(), buffer.size()); + } + std::string minfreq = policyName + "/cpuinfo_min_freq"; + { + MNN::AutoStorage buffer; + if (_readAll(minfreq, buffer)) { + auto freq = _readNumber((const char*)buffer.get(), buffer.size()); + if (freq.size() > 0) { + group.minFreq = freq[0]; + } + } + } + std::string maxfreq = policyName + "/cpuinfo_max_freq"; + { + MNN::AutoStorage buffer; + if (_readAll(maxfreq, buffer)) { + auto freq = _readNumber((const char*)buffer.get(), buffer.size()); + if (freq.size() > 0) { + group.maxFreq = freq[0]; + } + } + } + cpuinfo_isa->groups.emplace_back(group); + } } + closedir(root); + std::sort(cpuinfo_isa->groups.begin(), cpuinfo_isa->groups.end(), [](const CPUGroup& left, const CPUGroup& right) { + return left.maxFreq < right.maxFreq; + }); + // Merge group if needed + if (cpuinfo_isa->groups.size() >= 2 && cpuinfo_isa->groups[0].maxFreq == cpuinfo_isa->groups[1].maxFreq) { + auto backupGroups = std::move(cpuinfo_isa->groups); + CPUGroup&& current = std::move(backupGroups[0]); + for (int v=1; vgroups.emplace_back(current); + current = std::move(backupGroups[v]); + } else { + current.ids.insert(current.ids.end(), backupGroups[v].ids.begin(), backupGroups[v].ids.end()); + } + } + cpuinfo_isa->groups.emplace_back(current); + } + cpuinfo_isa->cpuNumber = 0; + for (auto& group : cpuinfo_isa->groups) { + cpuinfo_isa->cpuNumber += group.ids.size(); + std::string message = "CPU Group: ["; + for (int v=0; vdot, cpuinfo_isa->fp16arith, cpuinfo_isa->i8mm); + MNN_PRINT("The device supports: i8sdot:%d, fp16:%d, i8mm: %d, sve2: %d\n", cpuinfo_isa->dot, cpuinfo_isa->fp16arith, cpuinfo_isa->i8mm, cpuinfo_isa->sve2); + return; } diff --git a/source/backend/cpu/CPURuntime.hpp b/source/backend/cpu/CPURuntime.hpp index 4376553c7..7155e023b 100644 --- a/source/backend/cpu/CPURuntime.hpp +++ b/source/backend/cpu/CPURuntime.hpp @@ -9,30 +9,24 @@ #define CPURuntime_hpp #include +#include #include "core/Macro.h" -struct cpuinfo_arm_isa { +struct CPUGroup { + uint32_t minFreq; + uint32_t maxFreq; + std::vector ids; +}; +struct MNNCPUInfo { bool fp16arith; bool dot; bool i8mm; + bool sve2; + std::vector groups; + int cpuNumber = 0; }; -/* - CPU thread mode, only effective on HMP(Heterogeneous Multi-Processing)arch CPUs - that have ARM big.LITTLE technology and on Android - */ -typedef enum { - /* Compliance with Operating System Scheduling */ - MNN_CPU_MODE_DEFAULT = 0, - /* Bind threads to CPU IDs according to CPU frequency, but this mode is power-friendly */ - MNN_CPU_MODE_POWER_FRI = 1, - /* Bind threads to little CPUs */ - MNN_CPU_MODE_LITTLE = 2, - /* Bind threads to big CPUs */ - MNN_CPU_MODE_BIG = 3 -} MNNCPUThreadsMode; -int MNNSetCPUThreadsMode(MNNCPUThreadsMode mode); - -float MNNGetCPUFlops(uint32_t number); -void cpuinfo_arm_init(struct cpuinfo_arm_isa* cpuinfo_isa); +int MNNSetSchedAffinity(const int* cpuIDs, int size); +int MNNGetCurrentPid(); +const MNNCPUInfo* MNNGetCPUInfo(); #endif /* CPUInfo_hpp */ diff --git a/source/backend/cpu/CPUSoftMaxInt8.cpp b/source/backend/cpu/CPUSoftMaxInt8.cpp deleted file mode 100644 index 1630ae52c..000000000 --- a/source/backend/cpu/CPUSoftMaxInt8.cpp +++ /dev/null @@ -1,317 +0,0 @@ -// -// CPUSoftMaxInt8.cpp -// MNNCPU -// -// Created by jbyang on 2023/4/22. -// - -#include "CPUSoftMaxInt8.hpp" -#include "backend/cpu/CPUBackend.hpp" -#include "backend/cpu/CPUFixedPoint.hpp" -#include "backend/cpu/CPUQuantizationUtils.hpp" -#include "core/Macro.h" -#include "core/TensorUtils.hpp" -#include "core/Concurrency.h" -#include "CPUTensorConvert.hpp" - -namespace MNN { - -CPUSoftmaxInt8::CPUSoftmaxInt8(Backend* backend, int axis) : Execution(backend), mAxis(axis), mStorage(2), mTempOutput(2), mNeedUnpackC4(false) { - // do nothing. -} - -const int kScaledDiffIntegerBits = 5; -const int kAccumulationIntegerBits = 12; - -ErrorCode CPUSoftmaxInt8::onResize(const std::vector& inputs, const std::vector& outputs) { - auto input = inputs[0]; - auto output = outputs[0]; - auto inputQuant = TensorUtils::getQuantInfo(input); - float beta = 1.0; - float scale = inputQuant[0]; - PreprocessSoftmaxScaling(beta, scale, kScaledDiffIntegerBits, &mInputMultiplier, &mInputLeftShift); - mDiffMin = -1.0 * CalculateInputRadius(kScaledDiffIntegerBits, mInputLeftShift); - - const auto layout = TensorUtils::getDescribe(input)->dimensionFormat; - mNeedUnpackC4 = layout == MNN_DATA_FORMAT_NC4HW4; - const int dimensions = input->buffer().dimensions; - - int axis = mAxis; - if (axis < 0) { - axis += input->dimensions(); - } - mInside = 1; mOutside = 1; - for (int i = 0; i < axis; ++i) { - mOutside *= input->length(i); - } - mTargetAxis = input->length(axis); - for (int i = axis + 1; i < dimensions; ++i) { - mInside *= input->length(i); - } - - mStorage.buffer().dim[0].extent = input->length(0); - mStorage.buffer().dim[1].extent = input->stride(0); - TensorUtils::getDescribe(&mStorage)->dimensionFormat = MNN_DATA_FORMAT_NHWC; - mStorage.buffer().dimensions = 2; - mStorage.buffer().type = input->getType(); - backend()->onAcquireBuffer(&mStorage, Backend::DYNAMIC); - backend()->onReleaseBuffer(&mStorage, Backend::DYNAMIC); - - if (mNeedUnpackC4) { - mTempOutput.buffer().dim[0].extent = output->length(0); - mTempOutput.buffer().dim[1].extent = output->stride(0); - TensorUtils::getDescribe(&mTempOutput)->dimensionFormat = MNN_DATA_FORMAT_NHWC; - mTempOutput.buffer().dimensions = 2; - mTempOutput.buffer().type = input->getType(); - backend()->onAcquireBuffer(&mTempOutput, Backend::DYNAMIC); - backend()->onReleaseBuffer(&mTempOutput, Backend::DYNAMIC); - } - - return NO_ERROR; -} - -void CPUSoftmaxInt8::QuantizedSoftmax(const uint8_t* inputData, int outerSize, int targetAxis, - int32_t inputBetaMultiplier, int32_t inputBetaLeftShift, - uint8_t* outputData, int threadNum) { - using FixedPointScaledDiff = FixedPoint; - using FixedPointAccum = FixedPoint; - using FixedPoint0 = FixedPoint; - - const int depth = targetAxis; -#ifdef MNN_USE_SSE - int32_t zeroPoint = 128; - int32_t minValue = 0; - int32_t maxValue = 255; - const uint8_t* src_ = inputData; - uint8_t* dst_ = outputData; -#else - int32_t zeroPoint = 0; - int32_t minValue = -128; - int32_t maxValue = 127; - const int8_t* src_ = (int8_t*)inputData; - int8_t* dst_ = (int8_t*)outputData; -#endif - MNN_CONCURRENCY_BEGIN(tId, threadNum) { - auto inputDataPtr = src_ + tId * depth; - uint8_t* outputDataPtr = (uint8_t*)dst_ + tId * depth; - for (int b = (int)tId; b < outerSize; b += threadNum, inputDataPtr += depth * threadNum, outputDataPtr += depth * threadNum) { - // Determine the largest entry in the current row - int8_t maxInRow = -128; - { - int c = 0; -#ifdef MNN_USE_NEON - int8x16_t max16_0 = vdupq_n_s8(0); - int8x16_t max16_1 = vdupq_n_s8(0); - for (; c <= depth - 32; c += 32) { - max16_0 = vmaxq_s8(max16_0, vld1q_s8(inputDataPtr + c + 0)); - max16_1 = vmaxq_s8(max16_1, vld1q_s8(inputDataPtr + c + 16)); - } - int8x16_t max16 = vmaxq_s8(max16_0, max16_1); - if (c <= depth - 16) { - max16 = vmaxq_s8(max16, vld1q_s8(inputDataPtr + c)); - c += 16; - } - int8x8_t max8 = vmax_s8(vget_low_s8(max16), vget_high_s8(max16)); - if (c <= depth - 8) { - max8 = vmax_s8(max8, vld1_s8(inputDataPtr + c)); - c += 8; - } - int8x8_t max4 = vmax_s8(max8, vext_s8(max8, max8, 4)); - int8x8_t max2 = vmax_s8(max4, vext_s8(max4, max4, 2)); - int8x8_t max1 = vpmax_s8(max2, max2); - maxInRow = vget_lane_s8(max1, 0); -#endif - for (; c < depth; ++c) { - maxInRow = std::max(maxInRow, static_cast(inputDataPtr[c] - zeroPoint)); - } - } - -#ifdef MNN_USE_NEON - using FixedPointAccumInt32x4 = FixedPoint; - using FixedPointScaledDiffInt32x4 = FixedPoint; - using FixedPoint0Int32x4 = FixedPoint; - FixedPoint0Int32x4 input_beta_multiplier_f0 = FixedPoint0Int32x4::FromScalarRaw(inputBetaMultiplier); - int16x8_t max_in_row_s16 = vdupq_n_s16(maxInRow); -#endif - - FixedPointAccum sumOfExps = FixedPointAccum::Zero(); - { - int c = 0; -#ifdef MNN_USE_NEON - int32x4_t diff_min_s32 = vdupq_n_s32(mDiffMin); - FixedPointAccumInt32x4 sum_of_exps_0 = FixedPointAccumInt32x4::Zero(); - FixedPointAccumInt32x4 sum_of_exps_1 = FixedPointAccumInt32x4::Zero(); - FixedPointAccumInt32x4 zeros = FixedPointAccumInt32x4::Zero(); - for (; c <= depth - 8; c += 8) { - int16x8_t input_s16 = vmovl_s8(vld1_s8(inputDataPtr + c)); - int16x8_t input_diff_s16 = - vsubq_s16(input_s16, max_in_row_s16); - int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); - int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); - int32x4_t mask_0 = - MaskIfGreaterThanOrEqual(input_diff_s32_0, diff_min_s32); - int32x4_t mask_1 = - MaskIfGreaterThanOrEqual(input_diff_s32_1, diff_min_s32); - FixedPointScaledDiffInt32x4 scaled_diff_0 = - input_beta_multiplier_f0 * - FixedPointScaledDiffInt32x4::FromRaw( - ShiftLeft(input_diff_s32_0, inputBetaLeftShift)); - FixedPointScaledDiffInt32x4 scaled_diff_1 = - input_beta_multiplier_f0 * - FixedPointScaledDiffInt32x4::FromRaw( - ShiftLeft(input_diff_s32_1, inputBetaLeftShift)); - FixedPointAccumInt32x4 exps_0 = - Rescale( - exp_on_negative_values(scaled_diff_0)); - FixedPointAccumInt32x4 exps_1 = - Rescale( - exp_on_negative_values(scaled_diff_1)); - FixedPointAccumInt32x4 masked_exps_0 = - SelectUsingMask(mask_0, exps_0, zeros); - FixedPointAccumInt32x4 masked_exps_1 = - SelectUsingMask(mask_1, exps_1, zeros); - sum_of_exps_0 = sum_of_exps_0 + masked_exps_0; - sum_of_exps_1 = sum_of_exps_1 + masked_exps_1; - } - int32x4_t sum_of_exps_reduced_4 = (sum_of_exps_0 + sum_of_exps_1).raw(); - int32x2_t sum_of_exps_reduced_2 = - vadd_s32(vget_low_s32(sum_of_exps_reduced_4), - vget_high_s32(sum_of_exps_reduced_4)); - int32x2_t sum_of_exps_reduced_1 = - vpadd_s32(sum_of_exps_reduced_2, sum_of_exps_reduced_2); - sumOfExps = - FixedPointAccum::FromRaw(vget_lane_s32(sum_of_exps_reduced_1, 0)); -#endif - for (; c < depth; ++c) { - int32_t inputDiff = (inputDataPtr[c] - zeroPoint) - maxInRow; - if (inputDiff >= mDiffMin) { - const int32_t inputDiffRescaled = - MultiplyByQuantizedMultiplierGreaterThanOne(inputDiff, inputBetaMultiplier, inputBetaLeftShift); - const FixedPointScaledDiff scaledDiffF8 = FixedPointScaledDiff::FromRaw(inputDiffRescaled); - sumOfExps = sumOfExps + Rescale(exp_on_negative_values(scaledDiffF8)); - } - } - } - - int fixedSumOfExps = sumOfExps.raw(); - #if defined(_MSC_VER) - int headroomPlusOne; - { - unsigned long leading_zero = 0; - if (_BitScanReverse(&leading_zero, static_cast(fixedSumOfExps))) { - headroomPlusOne = 31 - leading_zero; - } else { - headroomPlusOne = 31; - } - } - #else - int headroomPlusOne = __builtin_clz(static_cast(fixedSumOfExps)); - #endif - - int numBitsOverUnit = kAccumulationIntegerBits - headroomPlusOne; - - if (numBitsOverUnit + 31 - 8 > 31) { - numBitsOverUnit = 8; - } - int32_t shiftedSumMinusOne = static_cast((static_cast(fixedSumOfExps) << headroomPlusOne) - - (static_cast(1) << 31)); - FixedPoint0 shiftedScale = one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shiftedSumMinusOne)); - - { - int c = 0; -#ifdef MNN_USE_NEON - int16x8_t diff_min_s16 = vdupq_n_s16(mDiffMin); - for (; c <= depth - 8; c += 8) { - int16x8_t input_s16 = vmovl_s8(vld1_s8(inputDataPtr + c)); - int16x8_t input_diff_s16 = - vsubq_s16(input_s16, max_in_row_s16); - int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); - int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); - uint8x8_t mask = vmovn_u16(vcgeq_s16(input_diff_s16, diff_min_s16)); - FixedPointScaledDiffInt32x4 scaled_diff_0 = - input_beta_multiplier_f0 * - FixedPointScaledDiffInt32x4::FromRaw( - ShiftLeft(input_diff_s32_0, inputBetaLeftShift)); - FixedPointScaledDiffInt32x4 scaled_diff_1 = - input_beta_multiplier_f0 * - FixedPointScaledDiffInt32x4::FromRaw( - ShiftLeft(input_diff_s32_1, inputBetaLeftShift)); - FixedPoint0Int32x4 exp_0 = exp_on_negative_values(scaled_diff_0); - FixedPoint0Int32x4 exp_1 = exp_on_negative_values(scaled_diff_1); - int32x4_t output_s32_0 = RoundingDivideByPOT( - vqrdmulhq_n_s32(exp_0.raw(), shiftedScale.raw()), - numBitsOverUnit + 31 - 8); - int32x4_t output_s32_1 = RoundingDivideByPOT( - vqrdmulhq_n_s32(exp_1.raw(), shiftedScale.raw()), - numBitsOverUnit + 31 - 8); - int16x8_t output_s16 = - vcombine_s16(vqmovn_s32(output_s32_0), vqmovn_s32(output_s32_1)); - uint8x8_t output_s8 = vqmovun_s16(output_s16); - uint8x8_t masked_output = vbsl_u8(mask, output_s8, vdup_n_u8(0)); - vst1_u8(outputDataPtr + c, masked_output); - } -#endif - for (; c < depth; ++c) { - int32_t inputDiff = (inputDataPtr[c] - zeroPoint) - maxInRow; - if (inputDiff >= mDiffMin) { - const int inputDiffRescaled = - MultiplyByQuantizedMultiplierGreaterThanOne(inputDiff, inputBetaMultiplier, inputBetaLeftShift); - const FixedPointScaledDiff scaledDiffF8 = FixedPointScaledDiff::FromRaw(inputDiffRescaled); - FixedPoint0 expIn0 = exp_on_negative_values(scaledDiffF8); - - int unsatOutput = RoundingDivideByPOT((shiftedScale * expIn0).raw(), numBitsOverUnit + 31 - 8) + zeroPoint; - outputDataPtr[c] = std::max(std::min(unsatOutput, maxValue), minValue); - - } - else { - outputDataPtr[c] = zeroPoint; - } - } - } - } - } - MNN_CONCURRENCY_END(); -} - -ErrorCode CPUSoftmaxInt8::onExecute(const std::vector& inputs, - const std::vector& outputs) { - MNN_ASSERT(1 == inputs.size()); - MNN_ASSERT(1 == outputs.size()); - - Tensor* input = inputs[0]; - Tensor* output = outputs[0]; - uint8_t* inputData = input->host(); - uint8_t* outputData = output->host(); - - auto batch = input->batch(); - auto dimentions = input->dimensions(); - int areaInput = 1; - for (int i = 2; i < dimentions; ++i) { - areaInput *= input->length(i); - } - int threadNum = ((CPUBackend *)backend())->threadNumber(); - - uint8_t* tempInputData = mStorage.host(); - auto functions = ((CPUBackend*)backend())->functions(); - if (mNeedUnpackC4) { - uint8_t* tempOutputData = mTempOutput.host(); - CPUTensorConverter::convert(inputData, outputData, MNN_DATA_FORMAT_NC4HW4, MNN_DATA_FORMAT_NCHW, batch, areaInput, input->channel(), 1, functions); - CPUTensorConverter::convert(outputData, tempInputData, MNN_DATA_FORMAT_NCHW, MNN_DATA_FORMAT_NHWC, mOutside, mInside, mTargetAxis, 1, functions); - QuantizedSoftmax(tempInputData, mInside * mOutside, mTargetAxis, mInputMultiplier, mInputLeftShift, tempOutputData, threadNum); - CPUTensorConverter::convert(tempOutputData, tempInputData, MNN_DATA_FORMAT_NHWC, MNN_DATA_FORMAT_NCHW, mOutside, mInside, mTargetAxis, 1, functions); - CPUTensorConverter::convert(tempInputData, outputData, MNN_DATA_FORMAT_NCHW, MNN_DATA_FORMAT_NC4HW4, batch, areaInput, input->channel(), 1, functions); - } else { - CPUTensorConverter::convert(inputData, outputData, MNN_DATA_FORMAT_NCHW, MNN_DATA_FORMAT_NHWC, mOutside, mInside, mTargetAxis, 1, functions); - QuantizedSoftmax(outputData, mInside * mOutside, mTargetAxis, mInputMultiplier, mInputLeftShift, tempInputData, threadNum); - CPUTensorConverter::convert(tempInputData, outputData, MNN_DATA_FORMAT_NHWC, MNN_DATA_FORMAT_NCHW, mOutside, mInside, mTargetAxis, 1, functions); - } - - return NO_ERROR; -} - -Execution* CPUSoftmaxInt8::create(const MNN::Op *op, Backend *backend) { - auto axis = op->main_as_Axis()->axis(); - return new CPUSoftmaxInt8(backend, axis); -} - -} diff --git a/source/backend/cpu/CPUSoftMaxInt8.hpp b/source/backend/cpu/CPUSoftMaxInt8.hpp deleted file mode 100644 index a1f8e4da4..000000000 --- a/source/backend/cpu/CPUSoftMaxInt8.hpp +++ /dev/null @@ -1,39 +0,0 @@ -// -// CPUSoftMaxInt8.hpp -// MNNCPU -// -// Created by MNN on 2023/4/22. -// - -#ifndef CPUSoftMaxInt8_hpp -#define CPUSoftMaxInt8_hpp -#include "core/Execution.hpp" -#include -namespace MNN { - -class CPUSoftmaxInt8 : public Execution { -public: - CPUSoftmaxInt8(Backend *backend, int axis); - virtual ~CPUSoftmaxInt8() = default; - virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; - virtual ErrorCode onExecute(const std::vector &inputs, const std::vector &outputs) override; - static Execution* create(const MNN::Op *op, Backend *backend); - - void QuantizedSoftmax(const uint8_t *inputData, int outerSize, int targetAxis, int32_t inputBetaMultiplier, - int32_t inputBetaLeftShift, uint8_t *output_data, int threadNum); - -private: - int32_t mInputMultiplier; - int mInputLeftShift; - int mDiffMin; - int mAxis; - int mInside; - int mOutside; - int mTargetAxis; - Tensor mStorage; - Tensor mTempOutput; - bool mNeedUnpackC4; -}; - -} -#endif /* CPUSoftMaxInt8_hpp */ diff --git a/source/backend/cpu/CPUSoftmax.cpp b/source/backend/cpu/CPUSoftmax.cpp index c8cfecede..d4811899a 100644 --- a/source/backend/cpu/CPUSoftmax.cpp +++ b/source/backend/cpu/CPUSoftmax.cpp @@ -8,13 +8,13 @@ #include #include "backend/cpu/CPUSoftmax.hpp" -#include "backend/cpu/CPUSoftMaxInt8.hpp" #include "backend/cpu/CPUBackend.hpp" #include "backend/cpu/compute/CommonOptFunction.h" #include "core/Concurrency.h" #include "core/Macro.h" #include "core/TensorUtils.hpp" #include "CPUTensorConvert.hpp" +#include "CPUCast.hpp" namespace MNN { static void ___MNNSoftmax(float* dest, const float* source, size_t size, MNNBinaryExecute mulfunction) { @@ -71,19 +71,39 @@ int CPUSoftmax::_softmaxCommon(const uint8_t *srcData, uint8_t *dstData) { addFunction = fp32Core->MNNSelectBinaryFunctionForFloat(BinaryOpOperation_ADD); recFunction = fp32Core->MNNSelectUnaryFunctionForFloat(UnaryOpOperation_RECIPROCAL, 1);//Use high precision MNN_CONCURRENCY_BEGIN(tId, threadNumber) { - auto tempInput = (float*)(mTmpInput.ptr() + tId * outsideStride * sizeof(float)); - auto tempOutput = (float*)(mTmpOutput.ptr() + tId * outsideStride * sizeof(float)); + float* tempOutput = nullptr; + float* tempInput = nullptr; + if (mTmpInput.ptr()) { + tempInput = (float*)(mTmpInput.ptr() + tId * outsideStride * sizeof(float)); + } + + if (mTmpOutput.ptr()) { + tempOutput = (float*)(mTmpOutput.ptr() + tId * outsideStride * sizeof(float)); + } + for (int o=tId; oscale, mInQuantAttr->zero, mInQuantAttr->min, mInQuantAttr->max, cpuBn); + ::memcpy(tempOutput, tempInput, mInside * 4); + for (int z = 1; z < mChannel; ++z) { + maxFunction(tempOutput, tempOutput, tempInput + z * mInside, mInside, -1); + } + } else { + ::memcpy(tempInput, srcO, mInside * mLowOrInt8); + for (int z = 1; z < mChannel; ++z) { + maxFunction(tempInput, tempInput, srcO + z * mInside * mLowOrInt8, mInside, -1); + } } // Sub Max for (int z=0; zbytes != 4) { + if (mLowOrInt8 != 4) { workSrc = tempInput; workDst = tempOutput; - core->MNNLowpToFp32((int16_t*)(dstO), workSrc, outsideStride); + if (mLowOrInt8 == 2) { + core->MNNLowpToFp32((int16_t*)(dstO), workSrc, outsideStride); + } } // Use Fp32 to compute Begin MNNExp(workDst, workSrc, exprOffset, outsideStride); @@ -113,8 +135,12 @@ int CPUSoftmax::_softmaxCommon(const uint8_t *srcData, uint8_t *dstData) { mulFunction(workDst + z * mInside, workDst + z * mInside, tempInput, mInside, -1); } // Use Fp32 Compute end - if (core->bytes != 4) { + if (mLowOrInt8 == 2) { core->MNNFp32ToLowp(workDst, (int16_t*)(dstO), outsideStride); + } else if (mLowOrInt8 == 1) { + CPUCastCreator::cast(workDst, dstO, CPUCastCreator::FlOAT_TO_INT8, outsideStride, mOutQuantAttr->scale, mOutQuantAttr->zero, mOutQuantAttr->min, mOutQuantAttr->max, cpuBn); + } else { + // do nothing. } } }; @@ -122,19 +148,29 @@ int CPUSoftmax::_softmaxCommon(const uint8_t *srcData, uint8_t *dstData) { return 0; } MNN_CONCURRENCY_BEGIN(tId, threadNumber) { - auto tempInput = (float*)(mTmpInput.ptr() + tId * outsideStride * sizeof(float)); - auto tempOutput = (float*)(mTmpOutput.ptr() + tId * outsideStride * sizeof(float)); + float* tempInput; + float* tempOutput; + if (mTmpInput.ptr()) { + tempInput = (float*)(mTmpInput.ptr() + tId * outsideStride * sizeof(float)); + } + if (mTmpOutput.ptr()) { + tempOutput = (float*)(mTmpOutput.ptr() + tId * outsideStride * sizeof(float)); + } for (int o=tId; oMNNLowpToFp32((int16_t*)(srcO), tempInput, outsideStride); workDst = tempOutput; workSrc = tempInput; + } else if (mLowOrInt8 == 1) { + CPUCastCreator::cast(srcO, tempInput, CPUCastCreator::INT8_TO_FlOAT, outsideStride, mInQuantAttr->scale, mInQuantAttr->zero, mInQuantAttr->min, mInQuantAttr->max, cpuBn); + workDst = tempOutput; + workSrc = tempInput; } } else { int dims[] = { @@ -143,12 +179,17 @@ int CPUSoftmax::_softmaxCommon(const uint8_t *srcData, uint8_t *dstData) { mInside, mChannel }; - if (bytes != 4) { + if (mLowOrInt8 == 2) { MNN_ASSERT(bytes == 2); MNNTranspose16Bit((int16_t*)tempOutput, (int16_t*)(srcO), dims); core->MNNLowpToFp32((int16_t*)tempOutput, tempInput, outsideStride); workDst = tempOutput; workSrc = tempInput; + } else if (mLowOrInt8 == 1) { + CPUCastCreator::cast(srcO, tempOutput, CPUCastCreator::INT8_TO_FlOAT, outsideStride, mInQuantAttr->scale, mInQuantAttr->zero, mInQuantAttr->min, mInQuantAttr->max, cpuBn); + MNNTranspose32Bit((int32_t*)tempInput, (int32_t*)tempOutput, dims); + workDst = tempOutput; + workSrc = tempInput; } else { // Use output to cache transpoe result MNNTranspose32Bit((int32_t*)dstO, (int32_t*)(srcO), dims); @@ -166,8 +207,10 @@ int CPUSoftmax::_softmaxCommon(const uint8_t *srcData, uint8_t *dstData) { } // PostTreat if (1 == mInside) { - if (bytes != 4) { + if (mLowOrInt8 == 2) { core->MNNFp32ToLowp(tempOutput, (int16_t*)(dstO), outsideStride); + } else if (mLowOrInt8 == 1) { + CPUCastCreator::cast(tempOutput, dstO, CPUCastCreator::FlOAT_TO_INT8, outsideStride, mOutQuantAttr->scale, mOutQuantAttr->zero, mOutQuantAttr->min, mOutQuantAttr->max, cpuBn); } } else { int dims[] = { @@ -176,10 +219,13 @@ int CPUSoftmax::_softmaxCommon(const uint8_t *srcData, uint8_t *dstData) { mChannel, mInside }; - if (bytes != 4) { - MNN_ASSERT(bytes == 2); + if (mLowOrInt8 == 2) { + MNN_ASSERT(bytes == 2); core->MNNFp32ToLowp((float*)tempOutput, (int16_t*)tempInput, outsideStride); MNNTranspose16Bit((int16_t*)dstO, (int16_t*)(tempInput), dims); + } else if (mLowOrInt8 == 1) { + MNNTranspose32Bit((int32_t*)tempInput, (int32_t*)tempOutput, dims); + CPUCastCreator::cast(tempInput, dstO, CPUCastCreator::FlOAT_TO_INT8, outsideStride, mOutQuantAttr->scale, mOutQuantAttr->zero, mOutQuantAttr->min, mOutQuantAttr->max, cpuBn); } else { MNNTranspose32Bit((int32_t*)dstO, (int32_t*)(tempInput), dims); } @@ -227,14 +273,24 @@ ErrorCode CPUSoftmax::onResize(const std::vector &inputs, const std::v mInside = inside; mOutside = outside; mChannel = channel; + + mLowOrInt8 = 4; + if (static_cast(backend())->functions()->bytes != 4) { + mLowOrInt8 = 2; + } + if (CPUBackend::getDataType(inputs[0]) == DataType_DT_INT8 || inputs[0]->getType().bytes() == 1) { + mLowOrInt8 = 1; + } + mInQuantAttr = TensorUtils::getDescribe(inputs[0])->quantAttr; + mOutQuantAttr = TensorUtils::getDescribe(outputs[0])->quantAttr; auto cpuBn = static_cast(backend()); - if (inside != 1 || cpuBn->functions()->bytes != 4) { // not run _softmax1, we need maxValue Tensor and sumValue Tensor. + if (inside != 1 || mLowOrInt8 != 4) { // not run _softmax1, we need maxValue Tensor and sumValue Tensor. int threadNum = cpuBn->threadNumber(); auto buf = cpuBn->getBufferAllocator(); threadNum = ALIMIN(threadNum, outside); mTmpInput = buf->alloc(threadNum * inside * channel * sizeof(float)); - if (cpuBn->functions()->bytes != 4) { + if (mLowOrInt8 != 4) { mTmpOutput = buf->alloc(threadNum * inside * channel * sizeof(float)); buf->free(mTmpOutput); } @@ -274,9 +330,9 @@ ErrorCode CPUSoftmax::onExecute(const std::vector &inputs, const std:: return NO_ERROR; } auto functions = static_cast(backend())->functions(); - CPUTensorConverter::convert(inputDataPtr, outputDataPtr, MNN_DATA_FORMAT_NC4HW4, MNN_DATA_FORMAT_NCHW, batch, areaInput, inputTensor->channel(), functions->bytes, functions); + CPUTensorConverter::convert(inputDataPtr, outputDataPtr, MNN_DATA_FORMAT_NC4HW4, MNN_DATA_FORMAT_NCHW, batch, areaInput, inputTensor->channel(), mLowOrInt8, functions); _softmaxCommon((uint8_t*)outputDataPtr, (uint8_t*)tempData); - CPUTensorConverter::convert(tempData, outputDataPtr, MNN_DATA_FORMAT_NCHW, MNN_DATA_FORMAT_NC4HW4, batch, areaInput, inputTensor->channel(), functions->bytes, functions); + CPUTensorConverter::convert(tempData, outputDataPtr, MNN_DATA_FORMAT_NCHW, MNN_DATA_FORMAT_NC4HW4, batch, areaInput, inputTensor->channel(), mLowOrInt8, functions); return NO_ERROR; } @@ -293,11 +349,8 @@ class CPUSoftmaxCreator : public CPUBackend::Creator { public: virtual Execution *onCreate(const std::vector &inputs, const std::vector &outputs, const MNN::Op *op, Backend *backend) const override { - if (CPUBackend::getDataType(inputs[0]) == DataType_DT_INT8 || inputs[0]->getType().bytes() == 1) { - return CPUSoftmaxInt8::create(op, backend); - } else { - return CPUSoftmax::create(op, backend); - } + return CPUSoftmax::create(op, backend); + } }; diff --git a/source/backend/cpu/CPUSoftmax.hpp b/source/backend/cpu/CPUSoftmax.hpp index c76cd7554..ec6f25102 100644 --- a/source/backend/cpu/CPUSoftmax.hpp +++ b/source/backend/cpu/CPUSoftmax.hpp @@ -11,6 +11,7 @@ #include "core/Execution.hpp" #include "core/BufferAllocator.hpp" +#include "core/TensorUtils.hpp" namespace MNN { class CPUSoftmax : public Execution { public: @@ -32,6 +33,11 @@ class CPUSoftmax : public Execution { int mInside; int mOutside; int mChannel; + + std::shared_ptr mInQuantAttr; + std::shared_ptr mOutQuantAttr; + + int mLowOrInt8; }; } // namespace MNN diff --git a/source/backend/cpu/ThreadPool.cpp b/source/backend/cpu/ThreadPool.cpp index 75020fdd7..4b489151b 100644 --- a/source/backend/cpu/ThreadPool.cpp +++ b/source/backend/cpu/ThreadPool.cpp @@ -10,15 +10,6 @@ #include #include -//#define MNN_THREAD_LOCK_CPU - -#ifdef MNN_THREAD_LOCK_CPU -#include -#include -#include -#include -#endif - #define MNN_THREAD_POOL_MAX_TASKS 2 namespace MNN { ThreadPool* ThreadPool::gInstance = nullptr; @@ -45,115 +36,13 @@ void ThreadPool::destroy() { gInstance = nullptr; } } -#ifdef MNN_THREAD_LOCK_CPU -static int getNumberOfCPU() { - FILE* fp = fopen("/proc/cpuinfo", "rb"); - if (!fp) { - return 1; - } - int number = 0; - char buffer[1024]; - while (!feof(fp)) { - char* str = fgets(buffer, 1024, fp); - if (!str) { - break; - } - if (memcmp(buffer, "processor", 9) == 0) { - number++; - } - } - fclose(fp); - if (number < 1) { - number = 1; - } - return number; -} - -static int getCPUMaxFreqKHz(int cpuID) { - char path[256]; - sprintf(path, "/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state", cpuID); - FILE* fp = fopen(path, "rb"); - if (!fp) { - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state", cpuID); - fp = fopen(path, "rb"); - if (!fp) { - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpuID); - fp = fopen(path, "rb"); - if (!fp) { - return -1; - } - int maxfrequency = -1; - fscanf(fp, "%d", &maxfrequency); - fclose(fp); - return maxfrequency; - } - } - int maxfrequency = 0; - while (!feof(fp)) { - int frequency = 0; - int history = fscanf(fp, "%d %*d", &frequency); - if (history != 1) { - break; - } - if (frequency > maxfrequency) { - maxfrequency = frequency; - } - } - fclose(fp); - return maxfrequency; -} - -static std::vector sortCPUIDByMaxFrequency(int maxNumbers) { - const int cpuNumbers = getNumberOfCPU(); - if (cpuNumbers == 0) { - return {}; - } - std::vector cpuIDs; - std::vector> cpusFrequency; - cpusFrequency.resize(cpuNumbers); - for (int i = 0; i < cpuNumbers; ++i) { - int frequency = getCPUMaxFreqKHz(i); - cpusFrequency[i].first = frequency; - cpusFrequency[i].second = i; - } - maxNumbers = std::min(maxNumbers, cpuNumbers); - std::sort(cpusFrequency.rbegin(), cpusFrequency.rend()); - cpuIDs.resize(maxNumbers); - for (int i = 0; i < maxNumbers; ++i) { - cpuIDs[i] = cpusFrequency[i].second; - } - // FUNC_PRINT(cpusFrequency[0].first); - return cpuIDs; -} - -static int setSchedAffinity(const std::vector& cpuIDs) { -#define __NCPUBITS (8 * sizeof(unsigned long)) - typedef struct { - unsigned long __bits[CPU_SETSIZE / __NCPUBITS]; - } cpu_set_t; - - // set affinity for thread - - pid_t pid = gettid(); - cpu_set_t mask; - CPU_ZERO(&mask); - for (int i = 1; i < (int)cpuIDs.size(); i++) { - CPU_SET(cpuIDs[i], &mask); - } - - int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask); - if (syscallret) { - MNN_PRINT("syscall error %d\n", syscallret); - return -1; - } - - return 0; -} -#endif // arch ThreadPool::ThreadPool(int numberThread) { mNumberThread = numberThread; - mActiveCount = 0; + mActiveCount.resize(numberThread); + for (int i=0; i sortedCPUIDs = sortCPUIDByMaxFrequency(numberThread); -#endif for (int i = 1; i < mNumberThread; ++i) { int threadIndex = i; -#ifdef MNN_THREAD_LOCK_CPU - mWorkers.emplace_back([this, sortedCPUIDs, threadIndex]() { -#else mWorkers.emplace_back([this, threadIndex]() { -#endif -#ifdef MNN_THREAD_LOCK_CPU - int res = setSchedAffinity(sortedCPUIDs); -#endif while (!mStop) { - while (mActiveCount > 0) { + while (*mActiveCount[threadIndex] > 0) { for (int i = 0; i < MNN_THREAD_POOL_MAX_TASKS; ++i) { if (*mTasks[i].second[threadIndex]) { mTasks[i].first.first(threadIndex); @@ -186,7 +65,7 @@ ThreadPool::ThreadPool(int numberThread) { std::this_thread::yield(); } std::unique_lock _l(mQueueMutex); - mCondition.wait(_l, [this] { return mStop || mActiveCount > 0; }); + mCondition.wait(_l, [this, threadIndex] { return mStop || *mActiveCount[threadIndex] > 0; }); } }); } @@ -206,6 +85,9 @@ ThreadPool::~ThreadPool() { delete c; } } + for (int i=0; imTaskAvailable[index] = true; } -void ThreadPool::active() { +void ThreadPool::active(int threadNumber) { if (nullptr == gInstance) { return; } { std::lock_guard _l(gInstance->mQueueMutex); - gInstance->mActiveCount++; + for (int i=0; imActiveCount[i])++; + } } gInstance->mCondition.notify_all(); } -void ThreadPool::deactive() { +void ThreadPool::deactive(int threadNumber) { if (nullptr == gInstance) { return; } - gInstance->mActiveCount--; + for (int i=0; imActiveCount[i])--; + } } -void ThreadPool::enqueue(TASK&& task, int index) { +void ThreadPool::enqueue(TASK&& task, int index, int threadNumber) { if (1 >= task.second || 0 > index) { for (int i = 0; i < task.second; ++i) { task.first(i); @@ -257,25 +143,24 @@ void ThreadPool::enqueue(TASK&& task, int index) { return; } MNN_ASSERT(nullptr != gInstance); - gInstance->enqueueInternal(std::move(task), index); + gInstance->enqueueInternal(std::move(task), index, threadNumber); } -void ThreadPool::enqueueInternal(TASK&& task, int index) { - if (mActiveCount == 0) { +void ThreadPool::enqueueInternal(TASK&& task, int index, int threadNumber) { + if (threadNumber <= 1) { for (int i = 0; i < task.second; ++i) { task.first(i); } return; } int workSize = task.second; - if (workSize > mNumberThread) { + if (workSize > threadNumber) { mTasks[index].first = std::make_pair( - [workSize, &task, this](int tId) { - for (int v = tId; v < workSize; v += mNumberThread) { + [workSize, &task, threadNumber, this](int tId) { + for (int v = tId; v < workSize; v += threadNumber) { task.first(v); } - }, - mNumberThread); - workSize = mNumberThread; + },threadNumber); + workSize = threadNumber; } else { mTasks[index].first = std::move(task); } diff --git a/source/backend/cpu/ThreadPool.hpp b/source/backend/cpu/ThreadPool.hpp index c491338b5..f93ee9cf8 100644 --- a/source/backend/cpu/ThreadPool.hpp +++ b/source/backend/cpu/ThreadPool.hpp @@ -25,10 +25,10 @@ class MNN_PUBLIC ThreadPool { int number() const { return mNumberThread; } - static void enqueue(TASK&& task, int index); + static void enqueue(TASK&& task, int index, int threadNumber); - static void active(); - static void deactive(); + static void active(int threadNumber); + static void deactive(int threadNumber); static int acquireWorkIndex(); static void releaseWorkIndex(int index); @@ -37,7 +37,7 @@ class MNN_PUBLIC ThreadPool { static void destroy(); private: - void enqueueInternal(TASK&& task, int index); + void enqueueInternal(TASK&& task, int index, int threadNumber); static ThreadPool* gInstance; ThreadPool(int number = 0); @@ -52,7 +52,7 @@ class MNN_PUBLIC ThreadPool { std::mutex mQueueMutex; int mNumberThread = 0; - std::atomic_int mActiveCount = {0}; + std::vector mActiveCount; }; } // namespace MNN #endif diff --git a/source/backend/cpu/arm/CommonNeonBF16.cpp b/source/backend/cpu/arm/CommonNeonBF16.cpp deleted file mode 100644 index abb1bb1be..000000000 --- a/source/backend/cpu/arm/CommonNeonBF16.cpp +++ /dev/null @@ -1,187 +0,0 @@ - - -#if defined(MNN_SUPPORT_BF16) // CmakeList.txt does not work for ios, this file has to be self-filted, MNN.podspec doesnot filter this. - -#include "core/Macro.h" -#include "../compute/CommonOptFunction.h" -#include "./FunctionSummary.hpp" - -// todo: search for proper value for bf16 -void NEON_MNNGetMatMulPackMode_BF16(int* eP, int* lP, int* hP) { - *eP = 12; - *lP = 1; -#ifdef __aarch64__ - *hP = 8; -#else - *hP = 4; -#endif -} - -#ifdef __aarch64__ -#define EP 12 -#define HP 8 -#define LP 4 -void ARMV86_MNNGetMatMulPackMode_BF16(int* eP, int* lP, int* hP) { - *eP = EP; - *hP = HP; - *lP = LP; -} -void ARMV86_MNNPackForMatMul_B_BF16(float* destF, const float* sourceF, size_t h, size_t l, bool transpose) { - // [l, h] -> [h/hp, l/lp, hp, lp] - auto dest = (int16_t*)destF; - auto source = (const int16_t*)sourceF; - auto lCP = UP_DIV(l, LP); - auto hCP = UP_DIV(h, HP); - int sYstride = 1; - int sXstride = h; - if (transpose) { - sYstride = l; - sXstride = 1; - } - ::memset(dest, 0, lCP * hCP * sizeof(int16_t) * HP * LP); - for (int y = 0; y < h; ++y) { - int yC = y / HP; - int yR = y % HP; - for (int x = 0; x < l; ++x) { - int xC = x / LP; - int xR = x % LP; - dest[xR + yR * LP + xC * HP * LP + yC * HP * LP * lCP] = source[sXstride * x + sYstride * y]; - } - } -} -void ARMV86_MNNPackC4ForMatMul_A_BF16(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el) { - // [l/4, e, 4] -> [l/4, ep, 4] - int number = info[0]; - int eReal = info[1]; - int eDest = info[2]; - int offset = info[3]; - if (1 == number) { - int l = el[1]; - if (l % 8 != 0) { - auto lAigin = UP_DIV(l, LP) * LP; - ::memset(destOrigin, 0, eDest * lAigin * sizeof(int16_t)); - } - } - - for (int n=0; n [l/4, ep, 4] - for (int x = 0; x < lDiv; ++x) { - auto destX = (int64_t*)(dest + x * eDest * 4); - auto srcX = (int64_t*)(source + x * eReal * 4); - for (int y = 0; y < e; ++y) { - destX[y] = srcX[y * offset]; - } - } - continue; - } - for (int x = 0; x < l; ++x) { - auto dl = lOR + x; - auto dlC = dl / LP; - auto dlR = dl % LP; - auto xC = x / LP; - auto xR = x % LP; - auto destX = dest + dlC * eDest * LP + dlR; - auto srcX = source + xC * eReal * LP + xR; - for (int y = 0; y < e; ++y) { - destX[y * 4] = srcX[y * 4 * offset]; - } - } - } -} -#undef EP -#undef HP -#undef LP -void NEON_MNNPackForMatMul_B_BF16(float* destFloat, const float* sourceFloat, size_t h, size_t l, bool transpose) { - auto hP = (int)h / 8; - auto hR = (int)hP * 8; - int16_t* dest = (int16_t*)destFloat; - int16_t* source = (int16_t*)sourceFloat; - if (hR != h) { - ::memset(dest, 0, UP_DIV(h, 8) * 8 * l * sizeof(int16_t)); - } - if (!transpose) { - for (int y = 0; y < hP; ++y) { - auto destY = dest + y * 8 * l; - auto sourceY = source + y * 8; - for (int x = 0; x < l; ++x) { - ::memcpy(destY + 8 * x, sourceY + x * h, 8 * sizeof(int16_t)); - } - } - auto hRemain = h - hR; - if (hRemain > 0) { - auto destY = dest + hP * 8 * l; - auto sourceY = source + hP * 8; - for (int x = 0; x < l; ++x) { - ::memcpy(destY + 8 * x, sourceY + x * h, hRemain * sizeof(int16_t)); - } - } - return; - } - int lC8 = (int)l / 8; - auto lR = lC8 * 8; - if (hP > 0 && lC8 > 0) { - MNNPackC8_BF16(destFloat, sourceFloat, l, h); - } - for (int y = hR; y < h; ++y) { - auto yR = y % 8; - auto yC = hP; - for (int x = 0; x < l; ++x) { - dest[x * 8 + yR + yC * 8 * l] = source[x + y * l]; - } - } - for (int y = 0; y < hR; ++y) { - auto yR = y % 8; - auto yC = y / 8; - for (int x = lR; x < l; ++x) { - dest[x * 8 + yR + yC * 8 * l] = source[x + y * l]; - } - } -} - -#else -void NEON_MNNPackForMatMul_B_BF16(float* destFloat, const float* sourceFloat, size_t h, size_t l, bool transpose) { - int16_t* dest = (int16_t*)destFloat; - int16_t* source = (int16_t*)sourceFloat; - if (!transpose) { - auto hP = h / 4; - auto hR = hP * 4; - if (hR != h) { - ::memset(dest, 0, UP_DIV(h, 4) * 4 * l * sizeof(int16_t)); - } - for (int y = 0; y < hP; ++y) { - auto destY = dest + y * 4 * l; - auto sourceY = source + y * 4; - for (int x = 0; x < l; ++x) { - ::memcpy(destY + 4 * x, sourceY + x * h, 4 * sizeof(int16_t)); - } - } - auto hRemain = h - hR; - if (hRemain > 0) { - auto destY = dest + hP * 4 * l; - auto sourceY = source + hP * 4; - for (int x = 0; x < l; ++x) { - ::memcpy(destY + 4 * x, sourceY + x * h, hRemain * sizeof(int16_t)); - } - } - return; - } - int offset[2] = { - (int)l, - (int)l, - }; - MNNPackC4_BF16(destFloat, sourceFloat, l, h, offset); -} -#endif // __aarch64__ -#endif // MNN_SUPPORT_BF16 - diff --git a/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_Unit.S b/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_Unit.S index cdcd1226f..72ff71423 100644 --- a/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_Unit.S +++ b/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_Unit.S @@ -15,14 +15,24 @@ .align 5 asm_function MNNGemmInt8AddBiasScale_16x4_Unit - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum = 1; + const int32_t* bias; + const float* extraScale = nullptr; + const float* extraBias = nullptr; +}; +*/ //void MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, // size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t real) { @@ -42,23 +52,25 @@ ldr lr, [r6, #4] vpush {q4-q7} -// Branch1: input is int8_t, output is float32, DO NOT USE "scale". -// Branch2: input is int8_t, output is float32. USE "scale", DO NOT USE "minValue" and "maxValue". -// Branch3: input is int8_t, output is int8_t. USE "scale", "minValue" and "maxValue". - ldr r7, [r6, #16] // r7: useInt8 -cmp r7, #1 -beq InitBranch3 - -InitBranch2: -mov r7, #-0x80 // Branch2 do not use "minValue", so set r7 as a flag to decide the branch. -b Start -InitBranch3: -ldr r7, [r6, #8] -ldr r6, [r6, #12] -b Start +ldr r12, [r6, #28] // srcKernelSum +str r12, [sp, #4] +ldr r12, [r6, #32] // weightBias +str r12, [sp, #8] +ldr r12, [r6, #36] // f32minmax +str r12, [sp, #12] +ldr r12, [r6, #8] // int8 max +str r12, [sp, #16] +ldr r12, [r6, #12] // int8 min +str r12, [sp, #20] +ldr r12, [r6, #40] // blockNum +mul r12, r12, r3 // src_depth_quad=src_depth_quad*blockNum +lsl r12, r12, #6 // weight_stride = src_depth_quad*LP*HP +str r12, [sp, #24] +ldr r12, [r6, #48] // extraScale +str r12, [sp, #28] Start: cmp r10, #2 @@ -66,7 +78,7 @@ blt L1LoopDz L2LoopDz: mov r10, r1 - + str r2, [sp, #32] // store weight ptr subs r12, r3, #1 // first four output vld1.8 {q2}, [r1]! @@ -143,8 +155,7 @@ L2LoopDz: L2LoopSzEnd: L2Quan: - vld1.s32 {q4}, [lr]! - vld1.f32 {q5}, [r8]! + vld1.f32 {q5}, [r8]! // scale vpadd.s32 d16, d16, d17 vpadd.s32 d20, d20, d21 @@ -157,31 +168,85 @@ L2LoopDz: vpadd.s32 d30, d30, d31 // q8,q9 - vdup.32 q2, r6 - vdup.32 q3, r7 + vpadd.s32 d16, d16, d18 vpadd.s32 d17, d20, d22 vpadd.s32 d18, d24, d26 vpadd.s32 d19, d28, d30 - vaddq.s32 q0, q8, q4 - vaddq.s32 q1, q9, q4 - - vcvt.f32.s32 q0, q0 - vcvt.f32.s32 q1, q1 + // vaddq.s32 q0, q8, q4 // add bias + // vaddq.s32 q1, q9, q4 - vmov.f32 q10, #0.5 - vmov.f32 q11, #-0.5 + vcvt.f32.s32 q0, q8 + vcvt.f32.s32 q1, q9 - vmulq.f32 q0, q0, q5 + vmulq.f32 q0, q0, q5 // mul scale vmulq.f32 q1, q1, q5 - cmp r7, #-0x80 + // extra scale if has + ldr r6, [sp, #28] + cmp r6, #0 + beq L2_MLA + vld1.f32 {d10[0]}, [r6]! // tile0 + vld1.f32 {d10[1]}, [r6] // tile1 + vmulq.f32 q0, q0, d10[0] + vmulq.f32 q1, q1, d10[1] + + L2_MLA: + ldr r6, [sp, #4] // srcKernelSum + vld1.f32 {d12[0]}, [r6]! // tile 0 + vld1.f32 {d12[1]}, [r6] // tile 1 + ldr r6, [sp, #8] // weightBias + vld1.f32 {q7}, [r6]! + str r6, [sp, #8] // update next 4 weightBias + + vmla.f32 q0, q7, d12[0] + vmla.f32 q1, q7, d12[1] + + cmp r7, #0 bne L2QuanUseInt8 + + L2_ADD_BIAS: + cmp lr, #0 + beq L2_ADD_DSTV + vld1.f32 {q4}, [lr]! // bias + vadd.f32 q0, q0, q4 // bias + vadd.f32 q1, q1, q4 + b L2_POST + + L2_ADD_DSTV: + vld1.f32 {q4, q5}, [r0] + vadd.f32 q0, q0, q4 + vadd.f32 q1, q1, q5 + + L2_POST: + ldr r6, [sp, #12] // fp32 minmax + cmp r6, #0 + beq L2_STORE + vld1.f32 {d20[0]}, [r6]! + vld1.f32 {d22[0]}, [r6] + vdup.f32 q10, d20[0] + vdup.f32 q11, d22[0] + vmax.f32 q0, q0, q10 + vmax.f32 q1, q1, q10 + vmin.f32 q0, q0, q11 + vmin.f32 q1, q1, q11 + + L2_STORE: vst1.f32 {q0, q1}, [r0], r4 b L2LoopCheck L2QuanUseInt8: + vld1.f32 {q4}, [lr]! // bias + vadd.f32 q0, q0, q4 // bias + vadd.f32 q1, q1, q4 + + vmov.f32 q10, #0.5 + vmov.f32 q11, #-0.5 + ldr r6, [sp, #16] + vdup.32 q3, r6 // max + ldr r6, [sp, #20] + vdup.32 q2, r6 // min vcgt.f32 q12, q0, #0 vcgt.f32 q13, q1, #0 vbsl.f32 q12, q10, q11 @@ -201,17 +266,20 @@ L2LoopDz: vqmovn.s16 d6, q2 - vst1.s8 d6, [r0], r4 + vst1.s8 {d6}, [r0], r4 L2LoopCheck: subs r5, r5, #1 mov r1, r10 + ldr r2, [sp, #32] // origin weight ptr + ldr r6, [sp, #24] // weight stride + add r2, r2, r6 // next oc4 weight ptr bne L2LoopDz b End L1LoopDz: mov r10, r1 - + str r2, [sp, #32] // store weight ptr subs r12, r3, #1 // first four output vld1.8 {q2}, [r1]! @@ -259,35 +327,74 @@ L1LoopDz: L1LoopSzEnd: L1Quan: - vld1.s32 {q4}, [lr]! - vld1.f32 {q5}, [r8]! + //vld1.f32 {q4}, [lr]! // bias + vld1.f32 {q5}, [r8]! // scale vpadd.s32 d16, d16, d17 vpadd.s32 d20, d20, d21 vpadd.s32 d18, d18, d19 vpadd.s32 d22, d22, d23 - // q8,q9 - vdup.32 q2, r6 - vdup.32 q3, r7 + // q8 vpadd.s32 d16, d16, d18 vpadd.s32 d17, d20, d22 - vaddq.s32 q0, q8, q4 - - vcvt.f32.s32 q0, q0 - - vmov.f32 q10, #0.5 - vmov.f32 q11, #-0.5 - + // vaddq.s32 q0, q8, q4 + vcvt.f32.s32 q0, q8 vmulq.f32 q0, q0, q5 - - cmp r7, #-0x80 + // extra scale if has + ldr r6, [sp, #28] + cmp r6, #0 + beq L1_MLA + vld1.f32 {d10[0]}, [r6] // tile0 + vmulq.f32 q0, q0, d10[0] + + L1_MLA: + ldr r6, [sp, #4] // srcKernelSum + vld1.f32 {d12[0]}, [r6] // tile 0 + ldr r6, [sp, #8] // weightBias + vld1.f32 {q7}, [r6]! + str r6, [sp, #8] // update next 4 weightBias + vmla.f32 q0, q7, d12[0] + //vadd.f32 q0, q0, q4 + + cmp r7, #0 bne L1QuanUseInt8 + + cmp lr, #0 + beq L1_ADD_DSTV + vld1.f32 {q4}, [lr]! // bias + vadd.f32 q0, q0, q4 + b L1_POST + + L1_ADD_DSTV: + vld1.f32 {q4}, [r0] + vadd.f32 q0, q0, q4 + + L1_POST: + ldr r6, [sp, #12] // fp32 minmax + cmp r6, #0 + beq L1_STORE + + vld1.f32 {d20[0]}, [r6]! + vld1.f32 {d22[0]}, [r6] + vdup.f32 q10, d20[0] + vdup.f32 q11, d22[0] + vmax.f32 q0, q0, q10 + vmin.f32 q0, q0, q11 + L1_STORE: vst1.f32 {q0}, [r0], r4 b L1LoopCheck L1QuanUseInt8: + vld1.f32 {q4}, [lr]! // bias + vadd.f32 q0, q0, q4 + vmov.f32 q10, #0.5 + vmov.f32 q11, #-0.5 + ldr r6, [sp, #16] + vdup.32 q3, r6 // max + ldr r6, [sp, #20] + vdup.32 q2, r6 // min vcgt.f32 q12, q0, #0 vbsl.f32 q12, q10, q11 vbsl.f32 q13, q10, q11 @@ -301,10 +408,13 @@ L1LoopDz: vqmovn.s16 d6, q2 - vst1.s32 d6[0], [r0], r4 + vst1.s32 {d6[0]}, [r0], r4 L1LoopCheck: subs r5, r5, #1 mov r1, r10 + ldr r2, [sp, #32] // origin weight ptr + ldr r6, [sp, #24] // weight stride + add r2, r2, r6 // next oc4 weight ptr bne L1LoopDz End: diff --git a/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S b/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S index a77529575..25c9e5359 100644 --- a/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S +++ b/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S @@ -43,22 +43,18 @@ ldr lr, [r6, #4] vpush {q4-q7} -// Branch1: input is int8_t, output is float32, DO NOT USE "scale". -// Branch2: input is int8_t, output is float32. USE "scale", DO NOT USE "minValue" and "maxValue". -// Branch3: input is int8_t, output is int8_t. USE "scale", "minValue" and "maxValue". - -ldr r7, [r6, #16] // r7: useInt8 -cmp r7, #1 -beq InitBranch3 - -InitBranch2: -mov r7, #-0x80 // Branch2 do not use "minValue", so set r7 as a flag to decide the branch. -b Start - -InitBranch3: -ldr r7, [r6, #8] -ldr r6, [r6, #12] -b Start +// Only int8 output use this kernel. + +ldr r12, [r6, #28] // srcKernelSum +str r12, [sp, #4] +ldr r12, [r6, #32] // weightBias +str r12, [sp, #8] +ldr r12, [r6, #36] // f32minmax +str r12, [sp, #12] +ldr r12, [r6, #8] // int8 max +str r12, [sp, #16] +ldr r12, [r6, #12] // int8 min +str r12, [sp, #20] Start: cmp r10, #2 @@ -132,10 +128,11 @@ L2LoopDz: vpaddl.s16 q7, q15 L2Quan: - vld1.s32 {q14}, [lr]! + vld1.f32 {q14}, [lr]! // bias + vld1.f32 {q15}, [r8]! // scale vpadd.s32 d20, d0, d1 vpadd.s32 d21, d2, d3 - vld1.f32 {q15}, [r8]! + vpadd.s32 d22, d4, d5 vpadd.s32 d23, d6, d7 vpadd.s32 d24, d8, d9 @@ -149,24 +146,35 @@ L2LoopDz: vpadd.s32 d18, d24, d25 vpadd.s32 d19, d26, d27 - vaddq.s32 q0, q8, q14 - vaddq.s32 q1, q9, q14 + //vaddq.s32 q0, q8, q14 // add bias + //vaddq.s32 q1, q9, q14 - vcvt.f32.s32 q0, q0 - vcvt.f32.s32 q1, q1 - vmulq.f32 q0, q0, q15 + vcvt.f32.s32 q0, q8 + vcvt.f32.s32 q1, q9 + vmulq.f32 q0, q0, q15 // mul scale vmulq.f32 q1, q1, q15 - cmp r7, #-0x80 - bne L2QuanUseInt8 - vst1.f32 {q0, q1}, [r0], r4 - b L2LoopCheck + ldr r6, [sp, #4] // srcKernelSum + vld1.f32 {d12[0]}, [r6]! // tile 0 + vld1.f32 {d12[1]}, [r6] // tile 1 + ldr r6, [sp, #8] // weightBias + vld1.f32 {q7}, [r6]! + str r6, [sp, #8] // update next 4 weightBias + + vmla.f32 q0, q7, d12[0] // add srcKernelSum x weightBias + vmla.f32 q1, q7, d12[1] + + vadd.f32 q0, q0, q14 // add bias + vadd.f32 q1, q1, q14 + L2QuanUseInt8: vmov.f32 q10, #0.5 vmov.f32 q11, #-0.5 - vdup.32 q2, r6 - vdup.32 q3, r7 + ldr r6, [sp, #16] + vdup.32 q2, r6 // max + ldr r6, [sp, #20] + vdup.32 q3, r6 // min vcgt.f32 q12, q0, #0 vcgt.f32 q13, q1, #0 @@ -177,10 +185,10 @@ L2LoopDz: vcvt.s32.f32 q0, q0 vcvt.s32.f32 q1, q1 - vmax.s32 q0, q2, q0 - vmax.s32 q1, q2, q1 - vmin.s32 q0, q3, q0 - vmin.s32 q1, q3, q1 + vmin.s32 q0, q2, q0 + vmin.s32 q1, q2, q1 + vmax.s32 q0, q3, q0 + vmax.s32 q1, q3, q1 vqmovn.s32 d4, q0 vqmovn.s32 d5, q1 @@ -242,7 +250,7 @@ L1LoopDz: vpaddl.s16 q3, q11 L1Quan: - vld1.s32 {q14}, [lr]! + vld1.f32 {q14}, [lr]! vpadd.s32 d20, d0, d1 vpadd.s32 d21, d2, d3 vld1.f32 {q15}, [r8]! @@ -253,21 +261,26 @@ L1LoopDz: vpadd.s32 d16, d20, d21 vpadd.s32 d17, d22, d23 - vaddq.s32 q0, q8, q14 + //vaddq.s32 q0, q8, q14 - vcvt.f32.s32 q0, q0 - vdup.32 q2, r6 - vdup.32 q3, r7 + vcvt.f32.s32 q0, q8 vmulq.f32 q0, q0, q15 - cmp r7, #-0x80 - bne L1QuanUseInt8 - vst1.f32 {q0, q1}, [r0], r4 - b L1LoopCheck + + ldr r6, [sp, #4] // srcKernelSum + vld1.f32 {d12[0]}, [r6] // tile 0 + ldr r6, [sp, #8] // weightBias + vld1.f32 {q7}, [r6]! + str r6, [sp, #8] // update next 4 weightBias + vmla.f32 q0, q7, d12[0] + vadd.f32 q0, q0, q14 // add bias L1QuanUseInt8: vmov.f32 q10, #0.5 vmov.f32 q11, #-0.5 - + ldr r6, [sp, #16] + vdup.32 q3, r6 // max + ldr r6, [sp, #20] + vdup.32 q2, r6 // min vcgt.f32 q12, q0, #0 vbsl.f32 q12, q10, q11 vbsl.f32 q13, q10, q11 diff --git a/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_w4_Unit.S b/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_w4_Unit.S new file mode 100644 index 000000000..6368937de --- /dev/null +++ b/source/backend/cpu/arm/arm32/MNNGemmInt8AddBiasScale_16x4_w4_Unit.S @@ -0,0 +1,392 @@ +// +// MNNGemmInt8AddBiasScale_16x4_w4_Unit.S +// MNN +// +// Created by MNN on 2019/06/11. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifdef __arm__ +#ifndef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 + +asm_function MNNGemmInt8AddBiasScale_16x4_w4_Unit +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum = 1; + const int32_t* bias; + const float* extraScale = nullptr; + const float* extraBias = nullptr; +}; +*/ + +//void MNNGemmInt8AddBiasScale_16x4_w4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, +// size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t real) { + +//Auto: r0: dst*, r1: src*, r2:weight*, r3: src_depth_quad +// Load from sp: r4: dst_step, r5: dst_depth_quad, r6: post, r10: real +// Load from post: r8: scale, lr: bias, r7: maxValue, r6: minValue + +push {r4-r8, r10, lr} // avoid to touch platform-register r-9 + +ldr r4, [sp, #28] +ldr r5, [sp, #32] +ldr r6, [sp, #36] +ldr r10, [sp, #40] +ldr r8, [r6, #0] +ldr lr, [r6, #4] + +vpush {q4-q7} + +// Branch1: input is int8_t, output is float32, DO NOT USE "scale". +// Branch2: input is int8_t, output is float32. USE "scale", DO NOT USE "minValue" and "maxValue". +// Branch3: input is int8_t, output is int8_t. USE "scale", "minValue" and "maxValue". + + +ldr r7, [r6, #16] // r7: useInt8 + +ldr r12, [r6, #28] // srcKernelSum +str r12, [sp, #4] +ldr r12, [r6, #32] // weightBias +str r12, [sp, #8] +ldr r12, [r6, #36] // f32minmax +str r12, [sp, #12] +ldr r12, [r6, #40] // blockNum +mul r12, r12, r3 // src_depth_quad=src_depth_quad*blockNum +lsl r12, r12, #6 // weight_stride = src_depth_quad*LP*HP +str r12, [sp, #16] +ldr r12, [r6, #48] // extraScale +str r12, [sp, #20] + +Start: +cmp r10, #2 +blt L1LoopDz + +L2LoopDz: + mov r10, r1 + str r2, [sp, #24] // store weight ptr + subs r12, r3, #1 + // first four output + vld1.8 {q2}, [r1]! + vld1.8 {q4}, [r2]! // weight, d8,d9,d10,d11 + // int4->int8 + vmov.i8 q5, #15 + vand.i8 q5, q5, q4 + vshr.u8 q4, q4, #4 + vzip.8 q4, q5 + + vmull.s8 q0, d4, d8 + vmull.s8 q1, d4, d10 + vmlal.s8 q0, d5, d9 + vmlal.s8 q1, d5, d11 + vpaddl.s16 q8, q0 + vpaddl.s16 q9, q1 + vld1.8 {q6}, [r2]! // weight,d12,d13,d14,d15 + // int4->int8 + vmov.i8 q7, #15 + vand.i8 q7, q7, q6 + vshr.u8 q6, q6, #4 + vzip.8 q6, q7 + + vmull.s8 q0, d4, d12 + vmull.s8 q1, d4, d14 + vmlal.s8 q0, d5, d13 + vmlal.s8 q1, d5, d15 + vpaddl.s16 q10, q0 + vld1.8 {q3}, [r1]! + vpaddl.s16 q11, q1 + // second four output + vmull.s8 q0, d6, d8 + vmull.s8 q1, d6, d10 + vmlal.s8 q0, d7, d9 + vmlal.s8 q1, d7, d11 + vpaddl.s16 q12, q0 + vpaddl.s16 q13, q1 + + vmull.s8 q0, d6, d12 + vmull.s8 q1, d6, d14 + vmlal.s8 q0, d7, d13 + vmlal.s8 q1, d7, d15 + vpaddl.s16 q14, q0 + vpaddl.s16 q15, q1 + + beq L2LoopSzEnd + + L2LoopSz: + // first four output + vld1.8 {q2}, [r1]! + vld1.8 {q4}, [r2]! + // int4->int8 + vmov.i8 q5, #15 + vand.i8 q5, q5, q4 + vshr.u8 q4, q4, #4 + vzip.8 q4, q5 + vmull.s8 q0, d4, d8 + vmull.s8 q1, d4, d10 + vmlal.s8 q0, d5, d9 + vmlal.s8 q1, d5, d11 + vld1.8 {q6}, [r2]! + // int4->int8 + vmov.i8 q7, #15 + vand.i8 q7, q7, q6 + vshr.u8 q6, q6, #4 + vzip.8 q6, q7 + vpadal.s16 q8, q0 + vpadal.s16 q9, q1 + + vmull.s8 q0, d4, d12 + vmull.s8 q1, d4, d14 + vmlal.s8 q0, d5, d13 + vmlal.s8 q1, d5, d15 + vld1.8 {q3}, [r1]! + vpadal.s16 q10, q0 + vpadal.s16 q11, q1 + // second four output + vmull.s8 q0, d6, d8 + vmull.s8 q1, d6, d10 + vmlal.s8 q0, d7, d9 + vmlal.s8 q1, d7, d11 + vpadal.s16 q12, q0 + vpadal.s16 q13, q1 + + vmull.s8 q0, d6, d12 + vmull.s8 q1, d6, d14 + vmlal.s8 q0, d7, d13 + vmlal.s8 q1, d7, d15 + vpadal.s16 q14, q0 + vpadal.s16 q15, q1 + + subs r12, r12, #1 + bne L2LoopSz + + L2LoopSzEnd: + + L2Quan: + vld1.f32 {q5}, [r8]! // scale + + vpadd.s32 d16, d16, d17 + vpadd.s32 d20, d20, d21 + vpadd.s32 d18, d18, d19 + vpadd.s32 d22, d22, d23 + + vpadd.s32 d24, d24, d25 + vpadd.s32 d28, d28, d29 + vpadd.s32 d26, d26, d27 + vpadd.s32 d30, d30, d31 + + // q8,q9 + + vpadd.s32 d16, d16, d18 + vpadd.s32 d17, d20, d22 + vpadd.s32 d18, d24, d26 + vpadd.s32 d19, d28, d30 + + // vaddq.s32 q0, q8, q4 // add bias + // vaddq.s32 q1, q9, q4 + + vcvt.f32.s32 q0, q0 + vcvt.f32.s32 q1, q1 + + vmulq.f32 q0, q0, q5 // mul scale + vmulq.f32 q1, q1, q5 + + // extra scale if has + ldr r6, [sp, #20] + cmp r6, #0 + beq L2_MLA + vld1.f32 {d10[0]}, [r6]! // tile0 + vld1.f32 {d10[1]}, [r6] // tile1 + vmulq.f32 q0, q0, d10[0] + vmulq.f32 q1, q1, d10[1] + + L2_MLA: + ldr r6, [sp, #4] // srcKernelSum + vld1.f32 {d12[0]}, [r6]! // tile 0 + vld1.f32 {d12[1]}, [r6] // tile 1 + ldr r6, [sp, #8] // weightBias + vld1.f32 {q7}, [r6]! + str r6, [sp, #8] // update next 4 weightBias + + vmla.f32 q0, q7, d12[0] + vmla.f32 q1, q7, d12[1] + + L2_POST: + ldr r6, [sp, #12] // fp32 minmax + cmp r6, #0 + beq L2_STORE + vld1.f32 {d20[0]}, [r6]! + vld1.f32 {d22[0]}, [r6] + vdup.f32 q10, d20[0] + vdup.f32 q11, d22[0] + vmax.f32 q0, q0, q10 + vmax.f32 q1, q1, q10 + vmin.f32 q0, q0, q11 + vmin.f32 q1, q1, q11 + + L2_STORE: + vst1.f32 {q0, q1}, [r0], r4 + +L2LoopCheck: + subs r5, r5, #1 + mov r1, r10 + ldr r2, [sp, #24] // origin weight ptr + ldr r6, [sp, #16] // weight stride + add r2, r2, r6 // next oc4 weight ptr + bne L2LoopDz + +b End + +L1LoopDz: + mov r10, r1 + str r2, [sp, #24] // store weight ptr + subs r12, r3, #1 + // first four output + vld1.8 {q2}, [r1]! + vld1.8 {q4}, [r2]! + // int4->int8 + vmov.i8 q5, #15 + vand.i8 q5, q5, q4 + vshr.u8 q4, q4, #4 + vzip.8 q4, q5 + + vmull.s8 q0, d4, d8 + vmull.s8 q1, d4, d10 + vmlal.s8 q0, d5, d9 + vmlal.s8 q1, d5, d11 + vpaddl.s16 q8, q0 + vpaddl.s16 q9, q1 + vld1.8 {q6}, [r2]! + // int4->int8 + vmov.i8 q7, #15 + vand.i8 q7, q7, q6 + vshr.u8 q6, q6, #4 + vzip.8 q6, q7 + + vmull.s8 q0, d4, d12 + vmull.s8 q1, d4, d14 + vmlal.s8 q0, d5, d13 + vmlal.s8 q1, d5, d15 + vpaddl.s16 q10, q0 + add r1, r1, #16 + vpaddl.s16 q11, q1 + + beq L1LoopSzEnd + + L1LoopSz: + // first four output + vld1.8 {q2}, [r1]! + vld1.8 {q4}, [r2]! + // int4->int8 + vmov.i8 q5, #15 + vand.i8 q5, q5, q4 + vshr.u8 q4, q4, #4 + vzip.8 q4, q5 + vmull.s8 q0, d4, d8 + vmull.s8 q1, d4, d10 + vmlal.s8 q0, d5, d9 + vmlal.s8 q1, d5, d11 + vld1.8 {q6}, [r2]! + // int4->int8 + vmov.i8 q7, #15 + vand.i8 q7, q7, q6 + vshr.u8 q6, q6, #4 + vzip.8 q6, q7 + vpadal.s16 q8, q0 + vpadal.s16 q9, q1 + + vmull.s8 q0, d4, d12 + vmull.s8 q1, d4, d14 + vmlal.s8 q0, d5, d13 + vmlal.s8 q1, d5, d15 + add r1, r1, #16 + vpadal.s16 q10, q0 + vpadal.s16 q11, q1 + + subs r12, r12, #1 + bne L1LoopSz + + L1LoopSzEnd: + L1Quan: + //vld1.f32 {q4}, [lr]! // bias + vld1.f32 {q5}, [r8]! // scale + + vpadd.s32 d16, d16, d17 + vpadd.s32 d20, d20, d21 + vpadd.s32 d18, d18, d19 + vpadd.s32 d22, d22, d23 + + // q8 + vpadd.s32 d16, d16, d18 + vpadd.s32 d17, d20, d22 + + // vaddq.s32 q0, q8, q4 + vcvt.f32.s32 q0, q0 + vmulq.f32 q0, q0, q5 + // extra scale if has + ldr r6, [sp, #20] + cmp r6, #0 + beq L1_MLA + vld1.f32 {d10[0]}, [r6] // tile0 + vmulq.f32 q0, q0, d10[0] + + L1_MLA: + ldr r6, [sp, #4] // srcKernelSum + vld1.f32 {d12[0]}, [r6] // tile 0 + ldr r6, [sp, #8] // weightBias + vld1.f32 {q7}, [r6]! + str r6, [sp, #8] // update next 4 weightBias + vmla.f32 q0, q7, d12[0] + //vadd.f32 q0, q0, q4 + + cmp lr, #0 + beq L1_ADD_DSTV + vld1.f32 {q4}, [lr]! // bias + vadd.f32 q0, q0, q4 + b L1_POST + + L1_ADD_DSTV: + vld1.f32 {q4}, [r0] + vadd.f32 q0, q0, q4 + + L1_POST: + ldr r6, [sp, #12] // fp32 minmax + cmp r6, #0 + beq L1_STORE + + vld1.f32 {d20[0]}, [r6]! + vld1.f32 {d22[0]}, [r6] + vdup.f32 q10, d20[0] + vdup.f32 q11, d22[0] + vmax.f32 q0, q0, q10 + vmin.f32 q0, q0, q11 + L1_STORE: + vst1.f32 {q0}, [r0], r4 + +L1LoopCheck: + subs r5, r5, #1 + mov r1, r10 + ldr r2, [sp, #24] // origin weight ptr + ldr r6, [sp, #16] // weight stride + add r2, r2, r6 // next oc4 weight ptr + bne L1LoopDz + +End: +vpop {q4-q7} +pop {r4-r8, r10, pc} + +#endif +#endif diff --git a/source/backend/cpu/arm/arm32/MNNLineDepthWiseInt8AddBiasScaleUnit.S b/source/backend/cpu/arm/arm32/MNNLineDepthWiseInt8AddBiasScaleUnit.S index e905a3703..55460f637 100644 --- a/source/backend/cpu/arm/arm32/MNNLineDepthWiseInt8AddBiasScaleUnit.S +++ b/source/backend/cpu/arm/arm32/MNNLineDepthWiseInt8AddBiasScaleUnit.S @@ -54,7 +54,7 @@ ldr r11, [r3, #8] vdup.i8 d23, r11 ldr r11, [r3, #12] vdup.i8 d22, r11 -ldr r3, [r3, #4] +ldr r3, [r3, #44] // bias mul r10, r6, r8 sub lr, lr, r10 diff --git a/source/backend/cpu/arm/arm32/MNNPackedSparseQuantMatMulEpx1.S b/source/backend/cpu/arm/arm32/MNNPackedSparseQuantMatMulEpx1.S index 9297eb959..f9e220cb5 100644 --- a/source/backend/cpu/arm/arm32/MNNPackedSparseQuantMatMulEpx1.S +++ b/source/backend/cpu/arm/arm32/MNNPackedSparseQuantMatMulEpx1.S @@ -7,6 +7,24 @@ // // +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum = 1; + const int32_t* bias; + +}; + */ + #ifdef __arm__ #ifndef __aarch64__ @@ -58,7 +76,7 @@ loop_e8: ldr r5, [sp, #(push_registers_bytes + 4)] ldr r6, [sp, #(push_registers_bytes + 8)] ldr r7, [r4] - ldr r8, [r4, #4] + ldr r8, [r4, #44] push {r0-r2, r10} ldr lr, [r6], #4 // dataOffset add r1, r1, lr @@ -135,7 +153,7 @@ loop_e4: ldr r5, [sp, #(push_registers_bytes + 4)] ldr r6, [sp, #(push_registers_bytes + 8)] ldr r7, [r4] - ldr r8, [r4, #4] + ldr r8, [r4, #44] push {r0-r2, r10} ldr lr, [r6], #4 // dataOffset add r1, r1, lr @@ -196,7 +214,7 @@ loop_e2: ldr r5, [sp, #(push_registers_bytes + 4)] ldr r6, [sp, #(push_registers_bytes + 8)] ldr r7, [r4] - ldr r8, [r4, #4] + ldr r8, [r4, #44] push {r0-r2, r10} ldr lr, [r6], #4 // dataOffset add r1, r1, lr @@ -255,7 +273,7 @@ loop_e1: ldr r5, [sp, #(push_registers_bytes + 4)] ldr r6, [sp, #(push_registers_bytes + 8)] ldr r7, [r4] - ldr r8, [r4, #4] + ldr r8, [r4, #44] push {r0-r2, r10} ldr lr, [r6], #4 // dataOffset diff --git a/source/backend/cpu/arm/arm32/MNNPackedSparseQuantMatMulEpx4.S b/source/backend/cpu/arm/arm32/MNNPackedSparseQuantMatMulEpx4.S index c6a13b39f..01ce74082 100644 --- a/source/backend/cpu/arm/arm32/MNNPackedSparseQuantMatMulEpx4.S +++ b/source/backend/cpu/arm/arm32/MNNPackedSparseQuantMatMulEpx4.S @@ -96,7 +96,7 @@ loop_e8: ldr r5, [sp, #(push_registers_bytes + 4)] ldr r6, [sp, #(push_registers_bytes + 8)] ldr r7, [r4] - ldr r8, [r4, #4] + ldr r8, [r4, #44] push {r0-r2, r10} ldr r10, [r3, #20] // cStride ldr lr, [r6], #4 // dataOffset @@ -175,7 +175,7 @@ loop_e4: ldr r5, [sp, #(push_registers_bytes + 4)] ldr r6, [sp, #(push_registers_bytes + 8)] ldr r7, [r4] - ldr r8, [r4, #4] + ldr r8, [r4, #44] push {r0-r2, r10} ldr r10, [r3, #20] // cStride ldr lr, [r6], #4 // dataOffset @@ -233,7 +233,7 @@ loop_e2: ldr r5, [sp, #(push_registers_bytes + 4)] ldr r6, [sp, #(push_registers_bytes + 8)] ldr r7, [r4] - ldr r8, [r4, #4] + ldr r8, [r4, #44] push {r0-r2, r10} ldr r10, [r3, #20] // cStride ldr lr, [r6], #4 // dataOffset @@ -294,7 +294,7 @@ loop_e1: ldr r5, [sp, #(push_registers_bytes + 4)] ldr r6, [sp, #(push_registers_bytes + 8)] ldr r7, [r4] - ldr r8, [r4, #4] + ldr r8, [r4, #44] push {r0-r2, r10} ldr r10, [r3, #20] // cStride ldr lr, [r6], #4 // dataOffset diff --git a/source/backend/cpu/arm/arm32/bf16/MNNPackC4ForMatMul_A_BF16.S b/source/backend/cpu/arm/arm32/bf16/MNNPackC4ForMatMul_A_BF16.S index 663ffae68..54744568e 100644 --- a/source/backend/cpu/arm/arm32/bf16/MNNPackC4ForMatMul_A_BF16.S +++ b/source/backend/cpu/arm/arm32/bf16/MNNPackC4ForMatMul_A_BF16.S @@ -25,7 +25,7 @@ ldr r6, [r2, #12] // xOffset // eReal -> eReal * 4 * sizeof(float) // eDest -> eDest * sizeof(float) mov r12, #2 // sizeof(int16_t) -mov lr, #8 // sizeof(int16_t) * 4 +mov lr, #16 // sizeof(float) * 4 mul r4, lr, r4 mul r11, r12, r11 mul r6, lr, r6 @@ -39,7 +39,7 @@ push {r0, r1} ldr r1, [r1, #0] // Compute dest ptr: r0 = r0 + eOffset * sizeof(float) + lOffset * eDest * sizeof(float) -; mov lr, #2 //sizeof(int16_t) + mul r7, r11, r7 mul r8, r12, r8 add r0, r0, r7 @@ -55,18 +55,36 @@ bne Right LoopL4: mov r2, r1 .macro MAIN_TRANSPOSE - vld1.16 {d16}, [r1], r6 // load size: 4 * sizeof(int16_t) - vld1.16 {d19}, [r1], r6 - vld1.16 {d22}, [r1], r6 - vld1.16 {d25}, [r1], r6 - vld1.16 {d17}, [r1], r6 - vld1.16 {d20}, [r1], r6 - vld1.16 {d23}, [r1], r6 - vld1.16 {d26}, [r1], r6 - vld1.16 {d18}, [r1], r6 - vld1.16 {d21}, [r1], r6 - vld1.16 {d24}, [r1], r6 - vld1.16 {d27}, [r1], r6 + + vld1.32 {q0}, [r1], r6 // load size: 4 * sizeof(float) + vld1.32 {q1}, [r1], r6 + vld1.32 {q2}, [r1], r6 + vld1.32 {q3}, [r1], r6 + + vshrn.i32 d16, q0, #16 + vshrn.i32 d19, q1, #16 + vshrn.i32 d22, q2, #16 + vshrn.i32 d25, q3, #16 + + vld1.32 {q0}, [r1], r6 // load size: 4 * sizeof(float) + vld1.32 {q1}, [r1], r6 + vld1.32 {q2}, [r1], r6 + vld1.32 {q3}, [r1], r6 + + vshrn.i32 d17, q0, #16 + vshrn.i32 d20, q1, #16 + vshrn.i32 d23, q2, #16 + vshrn.i32 d26, q3, #16 + + vld1.32 {q0}, [r1], r6 // load size: 4 * sizeof(float) + vld1.32 {q1}, [r1], r6 + vld1.32 {q2}, [r1], r6 + vld1.32 {q3}, [r1], r6 + + vshrn.i32 d18, q0, #16 + vshrn.i32 d21, q1, #16 + vshrn.i32 d24, q2, #16 + vshrn.i32 d27, q3, #16 // transpose each 4 16-bit elements in 2 d_n vectors, by transpose 16-bit and scale up transpose 32-bit. vtrn.16 d16, d19 @@ -145,7 +163,9 @@ LoopE1: cmp r5, #4 blt LoopE1L3 LoopE1L4: - vld1.16 {d0}, [r1], r4 + vld1.32 {q0}, [r1], r4 + vshrn.i32 d0, q0, #16 + vst1.16 {d0[0]}, [r0], r11 vst1.16 {d0[1]}, [r0], r11 vst1.16 {d0[2]}, [r0], r11 @@ -157,7 +177,9 @@ LoopE1: LoopE1L3: cmp r5, #3 blt LoopE1L2 - vld1.16 {d0}, [r1], r4 + vld1.32 {q0}, [r1], r4 + vshrn.i32 d0, q0, #16 + vst1.16 {d0[0]}, [r0], r11 vst1.16 {d0[1]}, [r0], r11 vst1.16 {d0[2]}, [r0], r11 @@ -167,7 +189,9 @@ LoopE1: LoopE1L2: cmp r5, #2 blt LoopE1L1 - vld1.16 {d0}, [r1], r4 + vld1.32 {q0}, [r1], r4 + vshrn.i32 d0, q0, #16 + vst1.16 {d0[0]}, [r0], r11 vst1.16 {d0[1]}, [r0], r11 sub r5, r5, #2 @@ -175,7 +199,8 @@ LoopE1: LoopE1L1: cmp r5, #1 blt LoopE1End - vld1.16 {d0[0]}, [r1], r4 + vld1.32 {d0}, [r1], r4 + vshrn.i32 d0, q0, #16 vst1.16 {d0[0]}, [r0], r11 LoopE1End: diff --git a/source/backend/cpu/arm/arm32/bf16/MNNPackC4_BF16.S b/source/backend/cpu/arm/arm32/bf16/MNNPackC4_BF16.S index 70b9e61e4..844f32d48 100644 --- a/source/backend/cpu/arm/arm32/bf16/MNNPackC4_BF16.S +++ b/source/backend/cpu/arm/arm32/bf16/MNNPackC4_BF16.S @@ -39,8 +39,8 @@ mul r4, r2, r3 cmp r4, #0 beq UpEnd -//r4: srcDepthOffset:srcArea*sizeof(int16_t) -mov r4, #2 +//r4: srcDepthOffset:srcArea*sizeof(float) +mov r4, #4 mul r4, lr, r4 //r10 -> 4 * (dstArea * sizeof(int16_t) - area * sizeof(int16_t)) @@ -48,8 +48,8 @@ mov r12, #8 sub r10, r10, r2 mul r10, r12, r10 -//lr -> (srcArea * sizeof(int16_t) - area * sizeof(int16_t)) -mov r12, #2 +//lr -> (srcArea * sizeof(float) - area * sizeof(float)) +mov r12, #4 sub lr, lr, r2 mul lr, r12, lr @@ -65,10 +65,15 @@ mov r8, r2 cmp r8, #3 ble UpL4AreaRemain UpL4AreaLoop: -vld1.16 {d0}, [r1]! // load 4 elements of 16-bit into 64bit vector register d0 -vld1.16 {d1}, [r5]! -vld1.16 {d2}, [r6]! -vld1.16 {d3}, [r7]! +vld1.32 {q0}, [r1]! // load 4 elements of 16-bit into 64bit vector register d0 +vld1.32 {q1}, [r5]! +vld1.32 {q2}, [r6]! +vld1.32 {q3}, [r7]! +vshrn.i32 d0, q0, #16 +vshrn.i32 d1, q1, #16 +vshrn.i32 d2, q2, #16 +vshrn.i32 d3, q3, #16 + // transpose // no suitable instruction to transpose int16_t type vst4.16 {d0, d1, d2, d3}, [r0]! sub r8, r8, #4 @@ -79,10 +84,11 @@ UpL4AreaRemain: cmp r8, #0 beq UpL4AreaRemainEnd UpL4AreaRemainLoop: -vld1.16 {d0[0]}, [r1]! -vld1.16 {d0[1]}, [r5]! -vld1.16 {d0[2]}, [r6]! -vld1.16 {d0[3]}, [r7]! +vld1.32 {d0[0]}, [r1]! +vld1.32 {d0[1]}, [r5]! +vld1.32 {d1[0]}, [r6]! +vld1.32 {d1[1]}, [r7]! +vshrn.i32 d0, q0, #16 vst1.16 {d0}, [r0]! @@ -104,10 +110,14 @@ mov r8, r2 cmp r8, #3 ble UpL3AreaRemain UpL3AreaLoop: -vld1.16 {d0}, [r1]! +vld1.32 {q0}, [r1]! +vld1.32 {q1}, [r5]! +vld1.32 {q2}, [r6]! +vshrn.i32 d0, q0, #16 +vshrn.i32 d1, q1, #16 +vshrn.i32 d2, q2, #16 vmov.i16 d3, #0 -vld1.16 {d1}, [r5]! -vld1.16 {d2}, [r6]! + // transpose // no suitable instruction to transpose int16_t type vst4.16 {d0, d1, d2, d3}, [r0]! sub r8, r8, #4 @@ -117,10 +127,11 @@ bge UpL3AreaLoop cmp r8, #0 beq UpL3AreaRemainEnd UpL3AreaRemain: -vmov.i16 d0, #0 -vld1.16 {d0[0]}, [r1]! -vld1.16 {d0[1]}, [r5]! -vld1.16 {d0[2]}, [r6]! +vmov.i32 q0, #0 +vld1.32 {d0[0]}, [r1]! +vld1.32 {d0[1]}, [r5]! +vld1.32 {d1[0]}, [r6]! +vshrn.i32 d0, q0, #16 vst1.16 {d0}, [r0]! @@ -139,11 +150,13 @@ mov r8, r2 cmp r8, #3 ble UpL2AreaRemain UpL2AreaLoop: -vld1.16 {d0}, [r1]! +vld1.32 {q0}, [r1]! +vld1.32 {q1}, [r5]! +vshrn.i32 d0, q0, #16 +vshrn.i32 d1, q1, #16 + vmov.i16 d3, #0 -vld1.16 {d1}, [r5]! vmov.i16 d2, #0 -// transpose // no suitable instruction to transpose int16_t type vst4.16 {d0, d1, d2, d3}, [r0]! sub r8, r8, #4 cmp r8, #4 @@ -152,9 +165,11 @@ bge UpL2AreaLoop cmp r8, #0 beq UpL2AreaRemainEnd UpL2AreaRemain: -vmov.i16 d0, #0 -vld1.16 {d0[0]}, [r1]! -vld1.16 {d0[1]}, [r5]! +vmov.i32 q0, #0 +vld1.32 {d0[0]}, [r1]! +vld1.32 {d0[1]}, [r5]! + +vshrn.i32 d0, q0, #16 vst1.16 {d0}, [r0]! @@ -171,7 +186,8 @@ mov r8, r2 cmp r8, #3 ble UpL1AreaRemain UpL1AreaLoop: -vld1.16 {d0}, [r1]! +vld1.32 {q0}, [r1]! +vshrn.i32 d0, q0, #16 vmov.i16 d3, #0 vmov.i16 d1, #0 vmov.i16 d2, #0 @@ -184,8 +200,9 @@ bge UpL1AreaLoop cmp r8, #0 beq UpL1AreaRemainEnd UpL1AreaRemain: -vmov.i16 d0, #0 -vld1.16 {d0[0]}, [r1]! +vmov.i16 q0, #0 +vld1.32 {d0[0]}, [r1]! +vshrn.i32 d0, q0, #16 vst1.16 {d0}, [r0]! diff --git a/source/backend/cpu/arm/arm32/bf16/MNNPackedMatMulRemain_BF16.S b/source/backend/cpu/arm/arm32/bf16/MNNPackedMatMulRemain_BF16.S index 252f1956a..ea64bd0fd 100644 --- a/source/backend/cpu/arm/arm32/bf16/MNNPackedMatMulRemain_BF16.S +++ b/source/backend/cpu/arm/arm32/bf16/MNNPackedMatMulRemain_BF16.S @@ -25,6 +25,8 @@ ldr r4, [sp, #32] ldr r6, [sp, #36] ldr r7, [sp, #40] ldr r12, [r4, #0] +// aStride is compute as float, divided 2 to as bf16 +lsr r12, r12, #1 cmp r6, #0 beq Start vld1.32 {q3}, [r6] @@ -61,8 +63,8 @@ LoopE4: bne LoopE4L cmp r6, #0 beq StoreE4 - vld1.16 {d28}, [r7]! // load 4 * sizeof(int16_t) - vshll.s16 q14, d28, #16 // shift left long of each int16_t as float32 + vld1.32 {q14}, [r7]! // load 4 * sizeof(float) + vmla.f32 q8, q14, d6[1] vmla.f32 q9, q14, d6[1] vmla.f32 q10, q14, d6[1] @@ -81,20 +83,17 @@ LoopE4: StoreE4: ldr r8, [r4, #20] + lsr r8, r8, #1 // bExtraStride is compute as float, divide to bf16 add r11, r11, r8 ldr r8, [r4, #12] - vshrn.i32 d16, q8, #16 // shift right 16bit of each float32 as int16_t - vshrn.i32 d17, q9, #16 - vshrn.i32 d18, q10, #16 - vshrn.i32 d19, q11, #16 - vst1.16 {d16, d17}, [lr]! - vst1.16 {d18, d19}, [lr], r8 - sub lr, lr, #16 + vst1.32 {q8, q9}, [lr]! + vst1.32 {q10, q11}, [lr], r8 + sub lr, lr, #32 // revert to next C4 begin subs r5, r5, #1 // move 4 colum along lP dim. lP = l / 4 bne LoopE4H sub r3, r3, #4 // move 4 colum along e dim. - add r0, r0, #32 // move address of 4 * 4 * sizeof(int16_t) + add r0, r0, #64 // move address of 4 * 4 * sizeof(float) add r1, r1, #8 // move address of 4 * sizeof(int16_t) in src tile block cmp r3, #4 pop {r7} @@ -125,8 +124,7 @@ LoopE1: bne LoopE1L cmp r6, #0 beq StoreE1 - vld1.16 {d28}, [r7]! // load 4 * sizeof(int16_t) - vshll.s16 q14, d28, #16 // shift left long of each int16_t as float32 + vld1.32 {q14}, [r7]! // load 4 * sizeof(float) vmla.f32 q15, q14, d6[1] PostTreatE1: @@ -135,15 +133,15 @@ LoopE1: StoreE1: ldr r8, [r4, #20] + lsr r8, r8, #1 add r11, r11, r8 ldr r8, [r4, #12] - vshrn.i32 d30, q15, #16 // shift right 16bit of each float32 as int16_t - vst1.16 {d30}, [lr], r8 + vst1.16 {q15}, [lr], r8 subs r5, r5, #1 bne LoopE1H subs r3, r3, #1 - add r0, r0, #8 // move address of 4 * sizeof(int16_t) + add r0, r0, #16 // move address of 4 * sizeof(float) add r1, r1, #2 // move address of 1 * sizeof(int16_t) pop {r7} bne LoopE1 diff --git a/source/backend/cpu/arm/arm32/bf16/MNNPackedMatMul_BF16.S b/source/backend/cpu/arm/arm32/bf16/MNNPackedMatMul_BF16.S index 3b9ab3d48..22719baf4 100644 --- a/source/backend/cpu/arm/arm32/bf16/MNNPackedMatMul_BF16.S +++ b/source/backend/cpu/arm/arm32/bf16/MNNPackedMatMul_BF16.S @@ -30,8 +30,9 @@ add r4, r4, #3 ldr r8, [r3, #12]//cStride ldr r3, [r3, #20]//bExtraStride lsr r4, r4, #2 +lsr r3, r3, #1 //bExtraStride is compute as fp32, turn to bf16 -sub r8, r8, #96 // after segment "Store", total line stride is CStride, all vst. offset is 12 * 4 * size_t(int16_t) = 96byte +sub r8, r8, #192 // after segment "Store", total line stride is CStride, all vst. offset is 12 * 4 * size_t(float) = 192byte vpush {q4-q7} // q0, q1, q2: src @@ -95,10 +96,8 @@ LoopH: cmp r5, #0 beq Store vld1.32 {q0}, [r5] // parameter remains float - cmp r6, #0 - beq LoadOrigin - vld1.16 {d6}, [r6]! // load 4 * sizeof(int16_t) - vshll.s16 q3, d6, #16 // shift left long of each int16_t as int32_t + vld1.32 {q3}, [r6]! // load 4 * sizeof(float) + vmla.f32 q4, q3, d0[1] vmla.f32 q5, q3, d0[1] vmla.f32 q6, q3, d0[1] @@ -114,44 +113,6 @@ LoopH: b PostTreat - LoadOrigin: - mov r11, r0 - vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) - vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t - vshll.s16 q1, d2, #16 - vmla.f32 q4, q1, d0[1] - vmla.f32 q5, q2, d0[1] - - vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) - vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t - vshll.s16 q1, d2, #16 - vmla.f32 q6, q1, d0[1] - vmla.f32 q7, q2, d0[1] - - vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) - vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t - vshll.s16 q1, d2, #16 - vmla.f32 q8, q1, d0[1] - vmla.f32 q9, q2, d0[1] - - vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) - vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t - vshll.s16 q1, d2, #16 - vmla.f32 q10, q1, d0[1] - vmla.f32 q11, q2, d0[1] - - vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) - vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t - vshll.s16 q1, d2, #16 - vmla.f32 q12, q1, d0[1] - vmla.f32 q13, q2, d0[1] - - vld1.16 {d2, d3}, [r11]! // load 2 * 4 * sizeof(int16_t) - vshll.s16 q2, d3, #16 // shift left long of each int16_t as int32_t - vshll.s16 q1, d2, #16 - vmla.f32 q14, q1, d0[1] - vmla.f32 q15, q2, d0[1] - PostTreat: vdup.f32 q2, d1[0] // min vdup.f32 q1, d1[1] // max @@ -183,20 +144,13 @@ LoopH: vmin.f32 q15, q15, q1 Store: - vshrn.i32 d8, q4, #16 // !!caution: these instructions has relying, eg: d10 must be written after reading q5. shift right 16bit of each float32 as int16_t - vshrn.i32 d9, q5, #16 - vshrn.i32 d10, q6, #16 - vshrn.i32 d11, q7, #16 - vshrn.i32 d12, q8, #16 - vshrn.i32 d13, q9, #16 - vshrn.i32 d14, q10, #16 - vshrn.i32 d15, q11, #16 - vshrn.i32 d16, q12, #16 - vshrn.i32 d17, q13, #16 - vshrn.i32 d18, q14, #16 - vshrn.i32 d19, q15, #16 - - vstm r0!, {d8, d9, d10, d11, d12, d13, d14, d15, d16, d17, d18, d19} + + vst1.32 {q4, q5}, [r0]! + vst1.32 {q6, q7}, [r0]! + vst1.32 {q8, q9}, [r0]! + vst1.32 {q10, q11}, [r0]! + vst1.32 {q12, q13}, [r0]! + vst1.32 {q14, q15}, [r0]! add r0, r0, r8 add r2, r2, r3 diff --git a/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_16x4_Unit.S b/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_16x4_Unit.S index 5621142ff..d31d57ad7 100644 --- a/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_16x4_Unit.S +++ b/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_16x4_Unit.S @@ -13,15 +13,81 @@ .text .align 5 +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro ReLU_FP32_4 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmin \s2\().4s, \s2\().4s, \z1\().4s + fmin \s3\().4s, \s3\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s + fmax \s2\().4s, \s2\().4s, \z0\().4s + fmax \s3\().4s, \s3\().4s, \z0\().4s +.endm +.macro ReLU_FP32_3 s0, s1, s2, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmin \s2\().4s, \s2\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s + fmax \s2\().4s, \s2\().4s, \z0\().4s +.endm +.macro ReLU_FP32_2 s0, s1, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s +.endm +.macro ReLU_FP32_1 s0, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s +.endm +.macro MUL_SCALE4 s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s + fmul \d2\().4s, \d2\().4s, \s\().4s + fmul \d3\().4s, \d3\().4s, \s\().4s +.endm +.macro MUL_SCALE3 s, d0, d1, d2 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s + fmul \d2\().4s, \d2\().4s, \s\().4s +.endm +.macro MUL_SCALE2 s, d0, d1 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s +.endm +.macro MUL_SCALE1 s, d0 + fmul \d0\().4s, \d0\().4s, \s\().4s +.endm +.macro MUL_EXTRA_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().s[0] + fmul \d1\().4s, \d1\().4s, \s\().s[1] + fmul \d2\().4s, \d2\().4s, \s\().s[2] + fmul \d3\().4s, \d3\().4s, \s\().s[3] +.endm + asm_function MNNGemmInt8AddBiasScale_16x4_Unit -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum; + const int32_t* bias; + float* extraScale; +}; +*/ //void MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, // size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realSize) { @@ -30,7 +96,7 @@ asm_function MNNGemmInt8AddBiasScale_16x4_Unit // x5: dst_depth_quad, x6: post, x7: realSize //Load from post: -// x7: scale, x10: bias, w11: maxValue, w6: minValue, w13: UseInt8 +// x7: scale, x10: bias, w11: maxValue, w6: minValue, w13: UseInt8, x14: srcKernelSum, x12: weightQuantBias mov x8, x7 mov x15, x6 ldr x7, [x15, #0] @@ -38,11 +104,23 @@ ldr x10, [x15, #8] ldr w11, [x15, #16] ldr w6, [x15, #20] ldr w13, [x15, #24] - -stp d14, d15, [sp, #-64]! -stp d12, d13, [sp, #16] -stp d10, d11, [sp, #32] -stp d8, d9, [sp, #48] +ldr x14, [x15, #40] // srcKernelSum +ldr x12, [x15, #48] // weightQuantBias + +stp d14, d15, [sp, #(-16 * 8)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] +stp x19, x20, [sp, #(16 * 4)] +stp x21, x22, [sp, #(16 * 5)] +stp x23, x24, [sp, #(16 * 6)] + +ldr x19, [x15, #56] // fp32 min max +ldr x21, [x15, #64] // blockNum +ldr x23, [x15, #80] // extraScale +mul x21, x21, x3 // blockNum * src_depth_quad_perblock +lsl x21, x21, #6 // src_depth_quad* SRC_UNIT * UNIT * sizeof(int8_t) +add x20, x19, #4 Start: cmp x8, #3 @@ -56,9 +134,10 @@ beq L1Dz cmp w13, #1 bne L4LoopDz -//sub x4, x4, #8 // post->scale != nullptr && post->useInt8 == 1. + L4LoopDz: mov x8, x1 + mov x22, x2 ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], #64 ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #64 @@ -183,7 +262,6 @@ L4LoopDz: ComputeSum: - ld1 {v0.4s}, [x10], #16 addp v4.4s, v16.4s, v17.4s addp v5.4s, v18.4s, v19.4s addp v6.4s, v20.4s, v21.4s @@ -199,35 +277,69 @@ L4LoopDz: addp v15.4s, v10.4s, v11.4s L4Quan: - ld1 {v1.4s}, [x7], #16 - add v16.4s, v12.4s, v0.4s - add v17.4s, v13.4s, v0.4s - add v18.4s, v14.4s, v0.4s - add v19.4s, v15.4s, v0.4s + ld1 {v1.4s}, [x7], #16 // scalefuse + ld1 {v20.4s}, [x14] // srcKernelSum + ld1 {v21.4s}, [x12], #16 // weightQuanZero - dup v31.16b, w6 // Min - dup v30.16b, w11 // Max + scvtf v4.4s, v12.4s + scvtf v5.4s, v13.4s + scvtf v6.4s, v14.4s + scvtf v7.4s, v15.4s - scvtf v4.4s, v16.4s - scvtf v5.4s, v17.4s - scvtf v6.4s, v18.4s - scvtf v7.4s, v19.4s + cbz x23, TILE4_MUL_OHE_SCALE + ld1 {v2.4s}, [x23] + MUL_EXTRA_SCALE v2, v4, v5, v6, v7 + + TILE4_MUL_OHE_SCALE: + MUL_SCALE4 v1, v4, v5, v6, v7 + + MLA_WEIGHTZERO v4, v20, v21, 0 + MLA_WEIGHTZERO v5, v20, v21, 1 + MLA_WEIGHTZERO v6, v20, v21, 2 + MLA_WEIGHTZERO v7, v20, v21, 3 - fmul v12.4s, v4.4s, v1.4s - fmul v13.4s, v5.4s, v1.4s - fmul v14.4s, v6.4s, v1.4s - fmul v15.4s, v7.4s, v1.4s cmp w13, #1 beq L4QuantUseInt8 - st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x0], x4 + + L4_Add_BIAS: + cbz x10, L4_ADD_DSTV + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v0.4s + fadd v6.4s, v6.4s, v0.4s + fadd v7.4s, v7.4s, v0.4s + b L4_POST + + L4_ADD_DSTV: + ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x0] + fadd v4.4s, v4.4s, v8.4s + fadd v5.4s, v5.4s, v9.4s + fadd v6.4s, v6.4s, v10.4s + fadd v7.4s, v7.4s, v11.4s + + L4_POST: + cbz x19, L4_STORE + ld1r {v26.4s}, [x19] // f32 min + ld1r {v27.4s}, [x20] // f32 max + ReLU_FP32_4 v4, v5, v6, v7, v26, v27 + + L4_STORE: + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x0], x4 b L4LoopCheck L4QuantUseInt8: + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v0.4s + fadd v6.4s, v6.4s, v0.4s + fadd v7.4s, v7.4s, v0.4s - fcvtas v8.4s, v12.4s - fcvtas v9.4s, v13.4s - fcvtas v10.4s, v14.4s - fcvtas v11.4s, v15.4s + dup v31.16b, w6 // Min + dup v30.16b, w11 // Max + fcvtas v8.4s, v4.4s + fcvtas v9.4s, v5.4s + fcvtas v10.4s, v6.4s + fcvtas v11.4s, v7.4s sqxtn v0.4h, v8.4s sqxtn2 v0.8h, v9.4s @@ -243,6 +355,7 @@ L4LoopDz: L4LoopCheck: subs x5, x5, #1 mov x1, x8 + add x2, x22, x21 bne L4LoopDz b End @@ -253,10 +366,11 @@ bne L3LoopDz sub x4, x4, #8 L3LoopDz: mov x8, x1 + mov x22, x2 ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], #64 ld1 {v4.16b, v5.16b, v6.16b}, [x1], #48 add x1, x1, #16 - + smull v8.8h, v0.8b, v4.8b smull v9.8h, v1.8b, v4.8b smull v10.8h, v2.8b, v4.8b @@ -347,10 +461,6 @@ L3LoopDz: smlal2 v9.8h, v1.16b, v6.16b smlal2 v10.8h, v2.16b, v6.16b smlal2 v11.8h, v3.16b, v6.16b - smlal2 v8.8h, v0.16b, v6.16b - smlal2 v9.8h, v1.16b, v6.16b - smlal2 v10.8h, v2.16b, v6.16b - smlal2 v11.8h, v3.16b, v6.16b sadalp v24.4s, v8.8h sadalp v25.4s, v9.8h @@ -360,7 +470,6 @@ L3LoopDz: bne L3LoopSz L3ComputeSum: - ld1 {v0.4s}, [x10], #16 addp v4.4s, v16.4s, v17.4s addp v5.4s, v18.4s, v19.4s addp v6.4s, v20.4s, v21.4s @@ -374,29 +483,65 @@ L3LoopDz: L3Quan: ld1 {v1.4s}, [x7], #16 - add v16.4s, v12.4s, v0.4s - add v17.4s, v13.4s, v0.4s - add v18.4s, v14.4s, v0.4s - - dup v31.16b, w6 // Min - dup v30.16b, w11 // Max - - scvtf v4.4s, v16.4s - scvtf v5.4s, v17.4s - scvtf v6.4s, v18.4s + ld1 {v20.d}[0], [x14], #8 // srcKernelSum + ld1 {v20.s}[2], [x14] + ld1 {v21.4s}, [x12], #16 // weightQuanZero + + scvtf v4.4s, v12.4s + scvtf v5.4s, v13.4s + scvtf v6.4s, v14.4s + MUL_SCALE3 v1, v4, v5, v6 + + cbz x23, TILE3_MUL_OHE_SCALE + ld1 {v2.d}[0], [x23], #8 + ld1 {v2.s}[2], [x23] + fmul v4.4s, v4.4s, v2.s[0] + fmul v5.4s, v5.4s, v2.s[1] + fmul v6.4s, v6.4s, v2.s[2] + sub x23, x23, #8 + + TILE3_MUL_OHE_SCALE: + sub x14, x14, #8 + MLA_WEIGHTZERO v4, v20, v21, 0 + MLA_WEIGHTZERO v5, v20, v21, 1 + MLA_WEIGHTZERO v6, v20, v21, 2 - fmul v12.4s, v4.4s, v1.4s - fmul v13.4s, v5.4s, v1.4s - fmul v14.4s, v6.4s, v1.4s cmp w13, #1 beq L3QuantUseInt8 - st1 {v12.4s, v13.4s, v14.4s}, [x0], x4 + + L3_ADD_BIAS: + cbz x10, L3_ADD_DSTV + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v0.4s + fadd v6.4s, v6.4s, v0.4s + b L3_POST + + L3_ADD_DSTV: + ld1 {v0.4s, v1.4s, v2.4s}, [x0] + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v1.4s + fadd v6.4s, v6.4s, v2.4s + + L3_POST: + cbz x19, L3_STORE + ld1r {v26.4s}, [x19] // f32 min + ld1r {v27.4s}, [x20] // f32 max + ReLU_FP32_3 v4, v5, v6, v26, v27 + L3_STORE: + st1 {v4.4s, v5.4s, v6.4s}, [x0], x4 b L3LoopCheck L3QuantUseInt8: - fcvtas v8.4s, v12.4s - fcvtas v9.4s, v13.4s - fcvtas v10.4s, v14.4s + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v0.4s + fadd v6.4s, v6.4s, v0.4s + dup v31.16b, w6 // Min + dup v30.16b, w11 // Max + fcvtas v8.4s, v4.4s + fcvtas v9.4s, v5.4s + fcvtas v10.4s, v6.4s sqxtn v0.4h, v8.4s sqxtn2 v0.8h, v9.4s @@ -417,6 +562,7 @@ L3LoopDz: L3LoopCheck: subs x5, x5, #1 mov x1, x8 + add x2, x22, x21 bne L3LoopDz b End @@ -424,6 +570,7 @@ b End L2Dz: L2LoopDz: mov x8, x1 + mov x22, x2 ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], #64 ld1 {v4.16b, v5.16b}, [x1], #32 @@ -496,7 +643,6 @@ L2LoopDz: L2ComputeSum: - ld1 {v0.4s}, [x10], #16 addp v4.4s, v16.4s, v17.4s addp v5.4s, v18.4s, v19.4s addp v6.4s, v20.4s, v21.4s @@ -507,25 +653,55 @@ L2LoopDz: L2Quan: ld1 {v1.4s}, [x7], #16 - add v16.4s, v12.4s, v0.4s - add v17.4s, v13.4s, v0.4s + ld1 {v20.d}[0], [x14] // srcKernelSum + ld1 {v21.4s}, [x12], #16 // weightQuanZero - dup v31.8b, w6 // Min - dup v30.8b, w11 // Max + scvtf v4.4s, v12.4s + scvtf v5.4s, v13.4s + MUL_SCALE2 v1, v4, v5 - scvtf v4.4s, v16.4s - scvtf v5.4s, v17.4s + cbz x23, TILE2_MUL_OHE_SCALE + ld1 {v2.d}[0], [x23] + fmul v4.4s, v4.4s, v2.s[0] + fmul v5.4s, v5.4s, v2.s[1] + + TILE2_MUL_OHE_SCALE: + MLA_WEIGHTZERO v4, v20, v21, 0 + MLA_WEIGHTZERO v5, v20, v21, 1 - fmul v12.4s, v4.4s, v1.4s - fmul v13.4s, v5.4s, v1.4s cmp w13, #1 beq L2QuantUseInt8 - st1 {v12.4s, v13.4s}, [x0], x4 + + L2_ADD_BIAS: + cbz x10, L2_ADD_DSTV + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v0.4s + b L2_POST + + L2_ADD_DSTV: + ld1 {v0.4s, v1.4s}, [x0] + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v1.4s + + L2_POST: + cbz x19, L2_STORE + ld1r {v26.4s}, [x19] // f32 min + ld1r {v27.4s}, [x20] // f32 max + ReLU_FP32_2 v4, v5, v26, v27 + + L2_STORE: + st1 {v4.4s, v5.4s}, [x0], x4 b L2LoopCheck L2QuantUseInt8: - fcvtas v8.4s, v12.4s - fcvtas v9.4s, v13.4s + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v0.4s + dup v31.8b, w6 // Min + dup v30.8b, w11 // Max + fcvtas v8.4s, v4.4s + fcvtas v9.4s, v5.4s sqxtn v0.4h, v8.4s sqxtn2 v0.8h, v9.4s @@ -540,6 +716,7 @@ L2LoopDz: L2LoopCheck: subs x5, x5, #1 mov x1, x8 + add x2, x22, x21 bne L2LoopDz b End @@ -547,6 +724,7 @@ b End L1Dz: L1LoopDz: mov x8, x1 + mov x22, x2 ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], #64 dup v16.4s, wzr dup v17.4s, wzr @@ -599,7 +777,7 @@ L1LoopDz: sadalp v18.4s, v10.8h sadalp v19.4s, v11.8h - ld1 {v0.4s}, [x10], #16 + //ld1 {v0.4s}, [x10], #16 addp v4.4s, v16.4s, v17.4s addp v5.4s, v18.4s, v19.4s @@ -607,22 +785,49 @@ L1LoopDz: L1Quan: ld1 {v1.4s}, [x7], #16 - add v16.4s, v12.4s, v0.4s + ld1 {v20.s}[0], [x14] // srcKernelSum + ld1 {v21.4s}, [x12], #16 // weightQuanZero - dup v31.4s, w6 // Min - dup v30.4s, w11 // Max + scvtf v4.4s, v12.4s + MUL_SCALE1 v1, v4 - scvtf v4.4s, v16.4s + cbz x23, TILE1_MUL_OHE_SCALE + ld1 {v2.s}[0], [x23] + fmul v4.4s, v4.4s, v2.s[0] + + TILE1_MUL_OHE_SCALE: + MLA_WEIGHTZERO v4, v20, v21, 0 - fmul v12.4s, v4.4s, v1.4s cmp w13, #1 beq L1QuantUseInt8 - st1 {v12.4s}, [x0], x4 + + L1_ADD_BIAS: + cbz x10, L1_ADD_DSTV + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + b L1_POST + + L1_ADD_DSTV: + ld1 {v0.4s}, [x0] + fadd v4.4s, v4.4s, v0.4s + + L1_POST: + cbz x19, L1_STORE + ld1r {v26.4s}, [x19] // f32 min + ld1r {v27.4s}, [x20] // f32 max + ReLU_FP32_1 v4, v26, v27 + + L1_STORE: + st1 {v4.4s}, [x0], x4 b L1LoopCheck L1QuantUseInt8: + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + dup v31.4s, w6 // Min + dup v30.4s, w11 // Max - fcvtas v8.4s, v12.4s + fcvtas v8.4s, v4.4s smin v8.4s, v30.4s, v8.4s @@ -635,13 +840,17 @@ L1LoopDz: L1LoopCheck: subs x5, x5, #1 mov x1, x8 + add x2, x22, x21 bne L1LoopDz End: -ldp d8, d9, [sp, #48] -ldp d10, d11, [sp, #32] -ldp d12, d13, [sp, #16] -ldp d14, d15, [sp], #64 +ldp x23, x24, [sp, #(16 * 6)] +ldp x21, x22, [sp, #(16 * 5)] +ldp x19, x20, [sp, #(16 * 4)] +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 8) ret #endif diff --git a/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S b/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S index a6e0142d1..16b2837b7 100644 --- a/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S +++ b/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_16x4_Unit_FAST.S @@ -13,14 +13,54 @@ .text .align 5 +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro ReLU_FP32 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmin \s2\().4s, \s2\().4s, \z1\().4s + fmin \s3\().4s, \s3\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s + fmax \s2\().4s, \s2\().4s, \z0\().4s + fmax \s3\().4s, \s3\().4s, \z0\().4s +.endm +.macro ReLU_FP32_3 s0, s1, s2, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmin \s2\().4s, \s2\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s + fmax \s2\().4s, \s2\().4s, \z0\().4s +.endm +.macro ReLU_FP32_2 s0, s1, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s +.endm +.macro ReLU_FP32_1 s0, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s +.endm + asm_function MNNGemmInt8AddBiasScale_16x4_Unit_FAST -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -//}; +/* +struct QuanPostTreatParameters { + const float* scale; + const float* bias; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; +}; +*/ //void MNNGemmInt8AddBiasScale_16x4_Unit_FAST(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, // size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t remain) { @@ -30,6 +70,7 @@ asm_function MNNGemmInt8AddBiasScale_16x4_Unit_FAST //Load from post: // x7: scale, x10: bias, w11: maxValue, w13: minValue, w12: useInt8 +// x19: srcKernelSum, x20: weightQuanBias mov x8, x7 ldr x7, [x6, #0] ldr x10, [x6, #8] @@ -37,10 +78,14 @@ ldr w11, [x6, #16] ldr w13, [x6, #20] ldr w12, [x6, #24] -stp d14, d15, [sp, #-64]! -stp d12, d13, [sp, #16] -stp d10, d11, [sp, #32] -stp d8, d9, [sp, #48] +stp d14, d15, [sp, #(-16 * 6)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] +stp x21, x22, [sp, #(16 * 4)] +stp x19, x20, [sp, #(16 * 5)] +ldr x19, [x6, #40] +ldr x20, [x6, #48] cmp x8, #3 beq L3Dz @@ -183,33 +228,47 @@ L4LoopDz: addp v14.4s, v20.4s, v21.4s addp v15.4s, v22.4s, v23.4s - add v16.4s, v12.4s, v0.4s - add v17.4s, v13.4s, v0.4s + //add v16.4s, v12.4s, v0.4s + //add v17.4s, v13.4s, v0.4s + //add v18.4s, v14.4s, v0.4s + //add v19.4s, v15.4s, v0.4s L4Quan: - ld1 {v1.4s}, [x7], #16 - add v18.4s, v14.4s, v0.4s - add v19.4s, v15.4s, v0.4s - - scvtf v4.4s, v16.4s - scvtf v5.4s, v17.4s - scvtf v6.4s, v18.4s - scvtf v7.4s, v19.4s + ld1 {v1.4s}, [x7], #16 // scale + ld1 {v2.4s}, [x19] // x kernel sum + ld1 {v24.4s}, [x20], #16 // weight quan zeropoint - dup v31.4s, w13 // Min - dup v30.4s, w11 // Max + TILE4_INT2FLOAT: + scvtf v4.4s, v12.4s + scvtf v5.4s, v13.4s + scvtf v6.4s, v14.4s + scvtf v7.4s, v15.4s fmul v12.4s, v4.4s, v1.4s fmul v13.4s, v5.4s, v1.4s fmul v14.4s, v6.4s, v1.4s fmul v15.4s, v7.4s, v1.4s + MLA_WEIGHTZERO v12, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v13, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v14, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v15, v2, v24, 3 // tile:3, oc:0-3 + + + fadd v12.4s, v12.4s, v0.4s + fadd v13.4s, v13.4s, v0.4s + fadd v14.4s, v14.4s, v0.4s + fadd v15.4s, v15.4s, v0.4s + cmp w12, #1 beq L4QuantUseInt8 + ReLU_FP32 v12, v13, v14, v15, v26, v27 st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x0], x4 b L4LoopCheck L4QuantUseInt8: + dup v31.4s, w13 // Min + dup v30.4s, w11 // Max fcvtas v8.4s, v12.4s fcvtas v9.4s, v13.4s fcvtas v10.4s, v14.4s @@ -243,6 +302,7 @@ L4LoopCheck: b End L3Dz: +add x3, x19, #8 cmp w12, #1 bne L3LoopDz sub x4, x4, #8 @@ -346,35 +406,43 @@ L3LoopDz: addp v19.4s, v9.4s, v8.4s addp v20.4s, v7.4s, v6.4s addp v21.4s, v5.4s, v4.4s + addp v12.4s, v16.4s, v17.4s addp v13.4s, v18.4s, v19.4s - ld1 {v0.4s}, [x10], #16 addp v14.4s, v20.4s, v21.4s - - add v16.4s, v12.4s, v0.4s - add v17.4s, v13.4s, v0.4s + ld1 {v0.4s}, [x10], #16 L3Quan: ld1 {v1.4s}, [x7], #16 - add v18.4s, v14.4s, v0.4s + ld1 {v2.d}[0], [x19] // x kernel sum + ld1 {v2.s}[2], [x6] + ld1 {v24.4s}, [x20], #16 // weight quan zeropoint - scvtf v4.4s, v16.4s - scvtf v5.4s, v17.4s - scvtf v6.4s, v18.4s - - dup v31.4s, w13 // Min - dup v30.4s, w11 // Max + TILE3_INT2FLOAT: + scvtf v4.4s, v12.4s + scvtf v5.4s, v13.4s + scvtf v6.4s, v14.4s fmul v12.4s, v4.4s, v1.4s fmul v13.4s, v5.4s, v1.4s fmul v14.4s, v6.4s, v1.4s + MLA_WEIGHTZERO v12, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v13, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v14, v2, v24, 2 // tile:2, oc:0-3 + + + fadd v12.4s, v12.4s, v0.4s + fadd v13.4s, v13.4s, v0.4s + fadd v14.4s, v14.4s, v0.4s cmp w12, #1 beq L3QuantUseInt8 + ReLU_FP32_3 v12, v13, v14, v26, v27 st1 {v12.4s, v13.4s, v14.4s}, [x0], x4 b L3LoopCheck L3QuantUseInt8: - + dup v31.4s, w13 // Min + dup v30.4s, w11 // Max fcvtas v8.4s, v12.4s fcvtas v9.4s, v13.4s fcvtas v10.4s, v14.4s @@ -480,29 +548,33 @@ L2LoopDz: addp v19.4s, v9.4s, v8.4s addp v12.4s, v16.4s, v17.4s addp v13.4s, v18.4s, v19.4s - ld1 {v0.4s}, [x10], #16 - - add v16.4s, v12.4s, v0.4s - add v17.4s, v13.4s, v0.4s L2Quan: ld1 {v1.4s}, [x7], #16 + ld1 {v2.d}[0], [x19] // x kernel sum + ld1 {v24.4s}, [x20], #16 // weight quan zeropoint + ld1 {v0.4s}, [x10], #16 - scvtf v4.4s, v16.4s - scvtf v5.4s, v17.4s - - dup v31.4s, w13 // Min - dup v30.4s, w11 // Max + TILE2_INT2FLOAT: + scvtf v4.4s, v12.4s + scvtf v5.4s, v13.4s fmul v12.4s, v4.4s, v1.4s fmul v13.4s, v5.4s, v1.4s + MLA_WEIGHTZERO v12, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v13, v2, v24, 1 // tile:1, oc:0-3 + fadd v12.4s, v12.4s, v0.4s + fadd v13.4s, v13.4s, v0.4s + cmp w12, #1 beq L2QuantUseInt8 + ReLU_FP32_2 v12, v13, v26, v27 st1 {v12.4s, v13.4s}, [x0], x4 b L2LoopCheck L2QuantUseInt8: - + dup v31.4s, w13 // Min + dup v30.4s, w11 // Max fcvtas v8.4s, v12.4s fcvtas v9.4s, v13.4s @@ -580,25 +652,27 @@ L1LoopDz: addp v12.4s, v16.4s, v17.4s ld1 {v0.4s}, [x10], #16 - add v16.4s, v12.4s, v0.4s - L1Quan: ld1 {v1.4s}, [x7], #16 + ld1 {v2.s}[0], [x19] // x kernel sum + ld1 {v24.4s}, [x20], #16 // weight quan zeropoint - scvtf v4.4s, v16.4s - - dup v31.4s, w13 // Min - dup v30.4s, w11 // Max - + TILE1_INT2FLOAT: + scvtf v4.4s, v12.4s fmul v12.4s, v4.4s, v1.4s + MLA_WEIGHTZERO v12, v2, v24, 0 // tile:0, oc:0-3 + fadd v12.4s, v12.4s, v0.4s + cmp w12, #1 beq L1QuantUseInt8 + ReLU_FP32_1 v12, v26, v27 st1 {v12.4s}, [x0], x4 b L1LoopCheck L1QuantUseInt8: - + dup v31.4s, w13 // Min + dup v30.4s, w11 // Max fcvtas v8.4s, v12.4s smin v8.4s, v30.4s, v8.4s @@ -615,10 +689,12 @@ L1LoopCheck: bne L1LoopDz End: +ldp x19, x20, [sp, #80] +ldp x21, x22, [sp, #64] ldp d8, d9, [sp, #48] ldp d10, d11, [sp, #32] ldp d12, d13, [sp, #16] -ldp d14, d15, [sp], #64 +ldp d14, d15, [sp], #96 ret #endif diff --git a/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_ARMV82_Unit.S b/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_ARMV82_Unit.S index 943e5655f..d1fdd68bd 100644 --- a/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_ARMV82_Unit.S +++ b/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_ARMV82_Unit.S @@ -12,11 +12,25 @@ .text .align 5 -.macro SET_BIAS s, d0, d1, d2, d3 - mov \d0\().16b, \s\().16b - mov \d1\().16b, \s\().16b - mov \d2\().16b, \s\().16b - mov \d3\().16b, \s\().16b +.macro ADD_BIAS_FLOAT d0, d1, d2, d3, z0 + fadd \d0\().4s, \d0\().4s, \z0\().4s + fadd \d1\().4s, \d1\().4s, \z0\().4s + fadd \d2\().4s, \d2\().4s, \z0\().4s + fadd \d3\().4s, \d3\().4s, \z0\().4s +.endm + +.macro ADD_FLOAT d0, d1, d2, d3, s0, s1, s2, s3 + fadd \d0\().4s, \d0\().4s, \s0\().4s + fadd \d1\().4s, \d1\().4s, \s1\().4s + fadd \d2\().4s, \d2\().4s, \s2\().4s + fadd \d3\().4s, \d3\().4s, \s3\().4s +.endm + +.macro SET_BIAS d0, d1, d2, d3 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 .endm .macro Int32ToFloat z0, z1, z2, z3 scvtf \z0\().4s, \z0\().4s @@ -30,6 +44,12 @@ fmul \d2\().4s, \d2\().4s, \s\().4s fmul \d3\().4s, \d3\().4s, \s\().4s .endm +.macro MUL_EXTRA_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().s[0] + fmul \d1\().4s, \d1\().4s, \s\().s[1] + fmul \d2\().4s, \d2\().4s, \s\().s[2] + fmul \d3\().4s, \d3\().4s, \s\().s[3] +.endm .macro FloatToInt32 z0, z1, z2, z3 fcvtas \z0\().4s, \z0\().4s fcvtas \z1\().4s, \z1\().4s @@ -50,15 +70,38 @@ Int16ToInt8_ONE \s0, \s1, \d0 Int16ToInt8_ONE \s2, \s3, \d1 .endm +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro ReLU_FP32 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmin \s2\().4s, \s2\().4s, \z1\().4s + fmin \s3\().4s, \s3\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s + fmax \s2\().4s, \s2\().4s, \z0\().4s + fmax \s3\().4s, \s3\().4s, \z0\().4s +.endm asm_function MNNGemmInt8AddBiasScale_ARMV82_Unit - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -//}; +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum; + const int32_t* bias; + float* extraScale; +}; +*/ //void MNNGemmInt8AddBiasScale_ARMV82_Unit(int8_t* dst, const int8_t* src, // const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, @@ -67,29 +110,37 @@ asm_function MNNGemmInt8AddBiasScale_ARMV82_Unit //Auto: x0:dst, x1:src, x2:weight, x3:src_depth_quad, x4:dst_step //x5:dst_depth_quad, x6: parameters, x7: realDstCount -//Load from x7: x8: scale, x9: bias, w12: maxValue, w13: minValue, w28: useInt8 +//Load from x6: x8: scale, x9: bias, w28: useInt8, x25: xKernelSum, x26: weightQuantBias, x23: fp32minmax +// x24: extraScale ldr x8, [x6, #0] ldr x9, [x6, #8] -ldr w12, [x6, #16] -ldr w13, [x6, #20] -stp d14, d15, [sp, #(-16 * 7)]! +stp d14, d15, [sp, #(-16 * 9)]! stp d12, d13, [sp, #(16 * 1)] stp d10, d11, [sp, #(16 * 2)] stp d8, d9, [sp, #(16 * 3)] stp x21, x22, [sp, #(16 * 4)] stp x19, x20, [sp, #(16 * 5)] stp x27, x28, [sp, #(16 * 6)] +stp x25, x26, [sp, #(16 * 7)] +stp x23, x24, [sp, #(16 * 8)] + +ldr x27, [x6, #64] // blockNum +mul x27, x27, x3 // blockNum * src_depth_quad_perblock +lsl x15, x27, #4 // x15 = src_depth_quad * UNIT * SRC_UNIT + ldr w28, [x6, #24] // useInt8 +ldr x25, [x6, #40] // xKernelSum +ldr x26, [x6, #48] // weightQuantBias +ldr x24, [x6, #80] // extraScale +add x23, x6, #16 // int8 max ptr mov x21, #4 // sizeof(int8_t) * UNIT cbnz w28, Start mov x21, #16 // sizeof(float) * UNIT +ldr x23, [x6, #56] // fp32minmax Start: -lsl x15, x3, #4 // x15 = src_depth_quad * UNIT * SRC_UNIT mov x22, #48 // src_steps -dup v7.16b, w12 // max -dup v6.16b, w13 // min TILE_12: cmp x7, #12 @@ -97,16 +148,18 @@ TILE_12: cmp x5, #2 blt L4LoopDz_TILE_12 L8LoopDz_TILE_12: - ld1 {v0.4s, v1.4s}, [x9], #32 // bias + //ld1 {v0.4s, v1.4s}, [x9], #32 // bias mov x11, x1 mov x13, x3 + mov x20, x0 // tag dst address + mov x27, x2 - SET_BIAS v0, v8, v9, v10, v11 - SET_BIAS v0, v12, v13, v14, v15 - SET_BIAS v0, v16, v17, v18, v19 - SET_BIAS v1, v20, v21, v22, v23 - SET_BIAS v1, v24, v25, v26, v27 - SET_BIAS v1, v28, v29, v30, v31 + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + SET_BIAS v16, v17, v18, v19 + SET_BIAS v20, v21, v22, v23 + SET_BIAS v24, v25, v26, v27 + SET_BIAS v28, v29, v30, v31 L8LoopSz_TILE_12: ld1 {v3.16b}, [x2], x15 // weight @@ -141,26 +194,108 @@ L8LoopDz_TILE_12: bne L8LoopSz_TILE_12 L8LoopSzEnd_TILE_12: - add x2, x2, x15 + // add x2, x2, x15 + add x2, x27, x15, LSL #1 sub x5, x5, #2 L8Tile12Quan: ld1 {v0.4s, v1.4s}, [x8], #32 // scale + ld1 {v2.4s, v3.4s, v4.4s}, [x25] // x kernel sum + ld1 {v5.4s, v6.4s}, [x26], #32 // weight quan zeropoint Int32ToFloat v8, v9, v10, v11 Int32ToFloat v12, v13, v14, v15 Int32ToFloat v16, v17, v18, v19 Int32ToFloat v20, v21, v22, v23 Int32ToFloat v24, v25, v26, v27 Int32ToFloat v28, v29, v30, v31 + MUL_SCALE v0, v8, v9, v10, v11 MUL_SCALE v0, v12, v13, v14, v15 MUL_SCALE v0, v16, v17, v18, v19 MUL_SCALE v1, v20, v21, v22, v23 MUL_SCALE v1, v24, v25, v26, v27 MUL_SCALE v1, v28, v29, v30, v31 + + cbz x24, TILE12_L8_MLA + ld1 {v0.4s, v1.4s}, [x24], #32 + ld1 {v7.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v1, v12, v13, v14, v15 + MUL_EXTRA_SCALE v7, v16, v17, v18, v19 + MUL_EXTRA_SCALE v0, v20, v21, v22, v23 + MUL_EXTRA_SCALE v1, v24, v25, v26, v27 + MUL_EXTRA_SCALE v7, v28, v29, v30, v31 + sub x24, x24, #32 + + TILE12_L8_MLA: + MLA_WEIGHTZERO v8, v2, v5, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v5, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v5, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v5, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v5, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v5, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v5, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v5, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v4, v5, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v17, v4, v5, 1 // tile:9, oc:0-3 + MLA_WEIGHTZERO v18, v4, v5, 2 // tile:10, oc:0-3 + MLA_WEIGHTZERO v19, v4, v5, 3 // tile:11, oc:0-3 + + MLA_WEIGHTZERO v20, v2, v6, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v21, v2, v6, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v22, v2, v6, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v23, v2, v6, 3 // tile:3, oc:4-7 + MLA_WEIGHTZERO v24, v3, v6, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v25, v3, v6, 1 // tile:5, oc:4-7 + MLA_WEIGHTZERO v26, v3, v6, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v27, v3, v6, 3 // tile:7, oc:4-7 + MLA_WEIGHTZERO v28, v4, v6, 0 // tile:8, oc:4-7 + MLA_WEIGHTZERO v29, v4, v6, 1 // tile:9, oc:4-7 + MLA_WEIGHTZERO v30, v4, v6, 2 // tile:10, oc:4-7 + MLA_WEIGHTZERO v31, v4, v6, 3 // tile:11, oc:4-7 + cmp w28, #1 beq L8Tile12QuanUseInt8 sub x4, x4, #128 + + cbz x9, TILE12_ADD_DSTV + TILE12_ADD_BIAS: + ld1 {v0.4s, v1.4s}, [x9], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v0 + ADD_BIAS_FLOAT v20, v21, v22, v23, v1 + ADD_BIAS_FLOAT v24, v25, v26, v27, v1 + ADD_BIAS_FLOAT v28, v29, v30, v31, v1 + b TILE12_POST + + TILE12_ADD_DSTV: + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x20], #64 + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x20], #64 + ADD_FLOAT v8, v9, v10, v11, v0, v1, v2, v3 + ADD_FLOAT v12, v13, v14, v15, v4, v5, v6, v7 + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x20], x4 + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x20], #64 + ADD_FLOAT v16, v17, v18, v19, v0, v1, v2, v3 + ADD_FLOAT v20, v21, v22, v23, v4, v5, v6, v7 + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x20], #64 + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x20] + ADD_FLOAT v24, v25, v26, v27, v0, v1, v2, v3 + ADD_FLOAT v28, v29, v30, v31, v4, v5, v6, v7 + + TILE12_POST: + cbz x23, TILE12_STORE + ld1r {v0.4s}, [x23], #4 // f32 min + ld1r {v1.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v0, v1 + ReLU_FP32 v12, v13, v14, v15, v0, v1 + ReLU_FP32 v16, v17, v18, v19, v0, v1 + ReLU_FP32 v20, v21, v22, v23, v0, v1 + ReLU_FP32 v24, v25, v26, v27, v0, v1 + ReLU_FP32 v28, v29, v30, v31, v0, v1 + sub x23, x23, #4 + + TILE12_STORE: st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x0], #64 st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x0], #64 st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x0], x4 @@ -171,6 +306,19 @@ L8LoopDz_TILE_12: b L8Tile12LoopCheck L8Tile12QuanUseInt8: + ld1r {v7.4s}, [x23], #4 // int8 max + ld1r {v6.4s}, [x23] // int8 min + ld1 {v0.4s, v1.4s}, [x9], #32 + dup v7.16b, v7.b[0] + dup v6.16b, v6.b[0] + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v0 + ADD_BIAS_FLOAT v20, v21, v22, v23, v1 + ADD_BIAS_FLOAT v24, v25, v26, v27, v1 + ADD_BIAS_FLOAT v28, v29, v30, v31, v1 + + sub x23, x23, #4 FloatToInt32 v8, v9, v10, v11 FloatToInt32 v12, v13, v14, v15 FloatToInt32 v16, v17, v18, v19 @@ -207,11 +355,9 @@ L8LoopDz_TILE_12: blt End L4LoopDz_TILE_12: - ld1 {v0.4s}, [x9] // bias - - SET_BIAS v0, v8, v9, v10, v11 - SET_BIAS v0, v12, v13, v14, v15 - SET_BIAS v0, v16, v17, v18, v19 + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + SET_BIAS v16, v17, v18, v19 L4LoopSz_TILE_12: ld1 {v3.16b}, [x2], #16 // weight @@ -235,15 +381,66 @@ L4LoopDz_TILE_12: L4Tile12Quan: ld1 {v0.4s}, [x8] // scale + ld1 {v2.4s, v3.4s, v4.4s}, [x25]// x kernel sum + ld1 {v5.4s}, [x26], #16 // weight quan zeropoint Int32ToFloat v8, v9, v10, v11 Int32ToFloat v12, v13, v14, v15 Int32ToFloat v16, v17, v18, v19 MUL_SCALE v0, v8, v9, v10, v11 MUL_SCALE v0, v12, v13, v14, v15 MUL_SCALE v0, v16, v17, v18, v19 + + cbz x24, TILE12_L4_MLA + ld1 {v0.4s, v1.4s}, [x24], #32 + ld1 {v7.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v1, v12, v13, v14, v15 + MUL_EXTRA_SCALE v7, v16, v17, v18, v19 + sub x24, x24, #32 + + TILE12_L4_MLA: + MLA_WEIGHTZERO v8, v2, v5, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v5, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v5, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v5, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v5, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v5, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v5, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v5, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v4, v5, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v17, v4, v5, 1 // tile:9, oc:0-3 + MLA_WEIGHTZERO v18, v4, v5, 2 // tile:10, oc:0-3 + MLA_WEIGHTZERO v19, v4, v5, 3 // tile:11, oc:0-3 cmp w28, #1 beq L4Tile12QuanUseInt8 sub x4, x4, #128 + + TILE12_L4_ADD_BIAS: + cbz x9, TILE12_L4_ADD_DSTV + ld1 {v0.4s}, [x9] // bias + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v0 + b TILE12_L4_POST + + TILE12_L4_ADD_DSTV: + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x0], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x0], #64 + ld1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x0] + sub x0, x0, #128 + ADD_FLOAT v8, v9, v10, v11, v20, v21, v22, v23 + ADD_FLOAT v12, v13, v14, v15, v24, v25, v26, v27 + ADD_FLOAT v16, v17, v18, v19, v28, v29, v30, v31 + + TILE12_L4_POST: + cbz x23, TILE12_L4_STORE + ld1r {v6.4s}, [x23], #4 // f32 min + ld1r {v7.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v6, v7 + ReLU_FP32 v12, v13, v14, v15, v6, v7 + ReLU_FP32 v16, v17, v18, v19, v6, v7 + sub x23, x23, #4 + TILE12_L4_STORE: st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x0], #64 st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x0], #64 st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x0], x4 @@ -251,6 +448,15 @@ L4LoopDz_TILE_12: b End L4Tile12QuanUseInt8: + ld1r {v7.4s}, [x23], #4 // int8 max + ld1r {v6.4s}, [x23] // int8 min + ld1 {v0.4s}, [x9] // bias + dup v7.16b, v7.b[0] + dup v6.16b, v6.b[0] + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v0 + sub x23, x23, #4 FloatToInt32 v8, v9, v10, v11 FloatToInt32 v12, v13, v14, v15 FloatToInt32 v16, v17, v18, v19 @@ -276,17 +482,19 @@ TILE_8: mov x14, x5 mov x19, x8 // scale mov x20, x9 // bias + mov x6, x26 // weightQuantBias cmp x5, #2 blt L4LoopDz_TILE_8 L8LoopDz_TILE_8: - ld1 {v0.4s, v1.4s}, [x20], #32 // bias + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias mov x11, x1 mov x13, x3 + mov x27, x12 - SET_BIAS v0, v8, v9, v10, v11 - SET_BIAS v0, v12, v13, v14, v15 - SET_BIAS v1, v16, v17, v18, v19 - SET_BIAS v1, v20, v21, v22, v23 + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + SET_BIAS v16, v17, v18, v19 + SET_BIAS v20, v21, v22, v23 L8LoopSz_TILE_8: ld1 {v3.16b}, [x12], x15 // weight @@ -313,11 +521,14 @@ L8LoopDz_TILE_8: bne L8LoopSz_TILE_8 L8LoopSzEnd_TILE_8: - add x12, x12, x15 + //add x12, x12, x15 + add x12, x27, x15, LSL #1 sub x14, x14, #2 L8Tile8Quan: ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.4s, v3.4s}, [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint Int32ToFloat v8, v9, v10, v11 Int32ToFloat v12, v13, v14, v15 Int32ToFloat v16, v17, v18, v19 @@ -326,9 +537,68 @@ L8LoopDz_TILE_8: MUL_SCALE v0, v12, v13, v14, v15 MUL_SCALE v1, v16, v17, v18, v19 MUL_SCALE v1, v20, v21, v22, v23 + + cbz x24, TILE8_L8_MLA + ld1 {v0.4s, v1.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v1, v12, v13, v14, v15 + MUL_EXTRA_SCALE v0, v16, v17, v18, v19 + MUL_EXTRA_SCALE v1, v20, v21, v22, v23 + + TILE8_L8_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v24, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v24, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v24, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v24, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v2, v25, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v17, v2, v25, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v18, v2, v25, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v19, v2, v25, 3 // tile:3, oc:4-7 + MLA_WEIGHTZERO v20, v3, v25, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v21, v3, v25, 1 // tile:5, oc:4-7 + MLA_WEIGHTZERO v22, v3, v25, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v23, v3, v25, 3 // tile:7, oc:4-7 + cmp w28, #1 beq L8Tile8QuanUseInt8 sub x4, x4, #64 + + cbz x9, TILE8_ADD_DSTV + TILE8_ADD_BIAS: + ld1 {v0.4s, v1.4s}, [x20], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v1 + ADD_BIAS_FLOAT v20, v21, v22, v23, v1 + b TILE8_POST + + TILE8_ADD_DSTV: + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x10], #64 + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x10], x4 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x10], #64 + ld1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x10] + ADD_FLOAT v8, v9, v10, v11, v0, v1, v2, v3 + ADD_FLOAT v12, v13, v14, v15, v4, v5, v6, v7 + ADD_FLOAT v16, v17, v18, v19, v24, v25, v26, v27 + ADD_FLOAT v20, v21, v22, v23, v28, v29, v30, v31 + sub x10, x10, #128 + sub x10, x10, x4 + + TILE8_POST: + cbz x23, TILE8_STORE + ld1r {v0.4s}, [x23], #4 // f32 min + ld1r {v1.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v0, v1 + ReLU_FP32 v12, v13, v14, v15, v0, v1 + ReLU_FP32 v16, v17, v18, v19, v0, v1 + ReLU_FP32 v20, v21, v22, v23, v0, v1 + sub x23, x23, #4 + + TILE8_STORE: st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], #64 st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10], x4 st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x10], #64 @@ -337,6 +607,16 @@ L8LoopDz_TILE_8: b L8Tile8LoopCheck L8Tile8QuanUseInt8: + ld1r {v7.4s}, [x23], #4 // int8 max + ld1r {v6.4s}, [x23] // int8 min + ld1 {v0.4s, v1.4s}, [x20], #32 + dup v7.16b, v7.b[0] + dup v6.16b, v6.b[0] + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v1 + ADD_BIAS_FLOAT v20, v21, v22, v23, v1 + sub x23, x23, #4 FloatToInt32 v8, v9, v10, v11 FloatToInt32 v12, v13, v14, v15 FloatToInt32 v16, v17, v18, v19 @@ -364,12 +644,12 @@ L8LoopDz_TILE_8: cbz x14, Tile8End L4LoopDz_TILE_8: - ld1 {v0.4s}, [x20], #16 // bias + //ld1 {v0.4s}, [x20], #16 // bias mov x11, x1 mov x13, x3 - SET_BIAS v0, v8, v9, v10, v11 - SET_BIAS v0, v12, v13, v14, v15 + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 L4LoopSz_TILE_8: ld1 {v3.16b}, [x12], #16 // weight @@ -388,20 +668,69 @@ L4LoopDz_TILE_8: L4LoopSzEnd_TILE_8: L4Tile8Quan: - ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v0.4s}, [x19], #16 // scale + ld1 {v2.4s, v3.4s}, [x25] // x kernel sum + ld1 {v24.4s}, [x6], #16 // weight quan zeropoint Int32ToFloat v8, v9, v10, v11 Int32ToFloat v12, v13, v14, v15 MUL_SCALE v0, v8, v9, v10, v11 MUL_SCALE v0, v12, v13, v14, v15 + + cbz x24, TILE8_L4_MLA + ld1 {v0.4s, v1.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v1, v12, v13, v14, v15 + + TILE8_L4_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v24, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v24, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v24, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v24, 3 // tile:7, oc:0-3 cmp w28, #1 beq L4Tile8QuanUseInt8 sub x4, x4, #64 + + cbz x9, TILE8_L4_ADD_DSTV + TILE8_L4_ADD_BIAS: + ld1 {v4.4s}, [x20], #16 + ADD_BIAS_FLOAT v8, v9, v10, v11, v4 + ADD_BIAS_FLOAT v12, v13, v14, v15, v4 + b TILE8_L4_POST + + TILE8_L4_ADD_DSTV: + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x10], #64 + ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x10] + sub x10, x10, #64 + ADD_FLOAT v8, v9, v10, v11, v4, v5, v6, v7 + ADD_FLOAT v12, v13, v14, v15, v16, v17, v18, v19 + + TILE8_L4_POST: + cbz x23, TILE8_L4_STORE + ld1r {v0.4s}, [x23], #4 // f32 min + ld1r {v1.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v0, v1 + ReLU_FP32 v12, v13, v14, v15, v0, v1 + sub x23, x23, #4 + + TILE8_L4_STORE: st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], #64 st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10], x4 add x4, x4, #64 b Tile8End L4Tile8QuanUseInt8: + ld1r {v7.4s}, [x23], #4 // int8 max + ld1r {v6.4s}, [x23] // int8 min + ld1 {v4.4s}, [x20], #16 + dup v7.16b, v7.b[0] + dup v6.16b, v6.b[0] + ADD_BIAS_FLOAT v8, v9, v10, v11, v4 + ADD_BIAS_FLOAT v12, v13, v14, v15, v4 + sub x23, x23, #4 FloatToInt32 v8, v9, v10, v11 FloatToInt32 v12, v13, v14, v15 Int32ToInt16 v8, v9, v10, v11, v0, v1 @@ -412,11 +741,15 @@ L4LoopDz_TILE_8: smin v16.16b, v7.16b, v16.16b smin v17.16b, v7.16b, v17.16b st1 {v16.16b, v17.16b}, [x10], x4 - Tile8End: +cbz x24, Tile8_End_Offset +add x24, x24, #32 + +Tile8_End_Offset: sub x7, x7, #8 add x0, x0, x21, LSL #3 add x1, x1, #32 + add x25, x25, #32 TILE_4: cmp x7, #4 @@ -426,15 +759,17 @@ TILE_4: mov x14, x5 mov x19, x8 mov x20, x9 + mov x6, x26 // weightQuantBias cmp x5, #2 blt L4LoopDz_TILE_4 L8LoopDz_TILE_4: - ld1 {v0.4s, v1.4s}, [x20], #32 // bias + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias mov x11, x1 mov x13, x3 + mov x27, x12 - SET_BIAS v0, v8, v9, v10, v11 - SET_BIAS v1, v12, v13, v14, v15 + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 L8LoopSz_TILE_4: ld1 {v3.16b}, [x12], x15 // weight @@ -453,22 +788,73 @@ L8LoopDz_TILE_4: bne L8LoopSz_TILE_4 L8LoopSzEnd_TILE_4: - add x12, x12, x15 + //add x12, x12, x15 + add x12, x27, x15, LSL #1 sub x14, x14, #2 L8Tile4Quan: ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.4s}, [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint Int32ToFloat v8, v9, v10, v11 Int32ToFloat v12, v13, v14, v15 MUL_SCALE v0, v8, v9, v10, v11 MUL_SCALE v1, v12, v13, v14, v15 + + cbz x24, TILE4_L8_MLA + ld1 {v0.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v0, v12, v13, v14, v15 + + TILE4_L8_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v2, v25, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v13, v2, v25, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v14, v2, v25, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v15, v2, v25, 3 // tile:3, oc:4-7 + cmp w28, #1 beq L8Tile4QuanUseInt8 + + cbz x9, TILE4_ADD_DSTV + TILE4_ADD_BIAS: + ld1 {v4.4s, v5.4s}, [x20], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v4 + ADD_BIAS_FLOAT v12, v13, v14, v15, v5 + b TILE4_POST + + TILE4_ADD_DSTV: + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x10], x4 + ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x10] + sub x10, x10, x4 + ADD_FLOAT v8, v9, v10, v11, v4, v5, v6, v7 + ADD_FLOAT v12, v13, v14, v15, v16, v17, v18, v19 + + TILE4_POST: + cbz x23, TILE4_STORE + ld1r {v26.4s}, [x23], #4 // f32 min + ld1r {v27.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v26, v27 + ReLU_FP32 v12, v13, v14, v15, v26, v27 + sub x23, x23, #4 + + TILE4_STORE: st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], x4 st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10], x4 b L8Tile4LoopCheck L8Tile4QuanUseInt8: + ld1r {v7.4s}, [x23], #4 // int8 max + ld1r {v6.4s}, [x23] // int8 min + ld1 {v4.4s, v5.4s}, [x20], #32 + dup v7.16b, v7.b[0] + dup v6.16b, v6.b[0] + ADD_BIAS_FLOAT v8, v9, v10, v11, v4 + ADD_BIAS_FLOAT v12, v13, v14, v15, v5 + sub x23, x23, #4 FloatToInt32 v8, v9, v10, v11 FloatToInt32 v12, v13, v14, v15 Int32ToInt16 v8, v9, v10, v11, v0, v1 @@ -487,10 +873,10 @@ L8LoopDz_TILE_4: cbz x14, Tile4End L4LoopDz_TILE_4: - ld1 {v0.4s}, [x20], #16 // bias + //ld1 {v0.4s}, [x20], #16 // bias mov x11, x1 mov x13, x3 - SET_BIAS v0, v8, v9, v10, v11 + SET_BIAS v8, v9, v10, v11 L4LoopSz_TILE_4: ld1 {v3.16b}, [x12], #16 // weight @@ -506,25 +892,68 @@ L4LoopDz_TILE_4: L4Tile4Quan: ld1 {v0.4s}, [x19], #16 // scale + ld1 {v2.4s}, [x25] // x kernel sum + ld1 {v24.4s}, [x6], #16 // weight quan zeropoint Int32ToFloat v8, v9, v10, v11 MUL_SCALE v0, v8, v9, v10, v11 + + cbz x24, TILE4_L4_MLA + ld1 {v0.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + + TILE4_L4_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + cmp w28, #1 beq L4Tile4QuanUseInt8 + + cbz x9, TILE4_L4_ADD_DSTV + TILE4_L4_ADD_BIAS: + ld1 {v3.4s}, [x20], #16 + ADD_BIAS_FLOAT v8, v9, v10, v11, v3 + b TILE4_L4_POST + + TILE4_L4_ADD_DSTV: + ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10] + ADD_FLOAT v8, v9, v10, v11, v12, v13, v14, v15 + + TILE4_L4_POST: + cbz x23, TILE4_L4_STORE + ld1r {v26.4s}, [x23], #4 // f32 min + ld1r {v27.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v26, v27 + sub x23, x23, #4 + + TILE4_L4_STORE: st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], x4 b Tile4End L4Tile4QuanUseInt8: + ld1r {v7.4s}, [x23], #4 // int8 max + ld1r {v6.4s}, [x23] // int8 min + ld1 {v3.4s}, [x20], #16 + dup v7.16b, v7.b[0] + dup v6.16b, v6.b[0] + ADD_BIAS_FLOAT v8, v9, v10, v11, v3 + sub x23, x23, #4 FloatToInt32 v8, v9, v10, v11 Int32ToInt16 v8, v9, v10, v11, v0, v1 Int16ToInt8_ONE v0, v1, v16 smax v16.16b, v6.16b, v16.16b smin v16.16b, v7.16b, v16.16b st1 {v16.16b}, [x10], x4 - Tile4End: +cbz x24, Tile4_End_Offset +add x24, x24, #16 + +Tile4_End_Offset: sub x7, x7, #4 add x0, x0, x21, LSL #2 add x1, x1, #16 + add x25, x25, #16 TILE_1: cbz x7, End @@ -533,14 +962,17 @@ TILE_1: mov x14, x5 mov x19, x8 mov x20, x9 + mov x6, x26 // weightQuantBias cmp x5, #2 blt L4LoopDz_TILE_1 L8LoopDz_TILE_1: - ld1 {v0.4s, v1.4s}, [x20], #32 // bias + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias mov x11, x1 mov x13, x3 - mov v8.16b, v0.16b - mov v9.16b, v1.16b + mov x27, x12 + + movi v8.16b, #0 + movi v9.16b, #0 L8LoopSz_TILE_1: ld1 {v3.16b}, [x12], x15 // weight ld1 {v0.s}[0], [x11], x22 // src @@ -552,22 +984,68 @@ L8LoopDz_TILE_1: bne L8LoopSz_TILE_1 L8LoopSzEnd_TILE_1: - add x12, x12, x15 + add x12, x27, x15, LSL #1 sub x14, x14, #2 L8Tile1Quan: ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.s}[0], [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint scvtf v8.4s, v8.4s scvtf v9.4s, v9.4s fmul v8.4s, v8.4s, v0.4s fmul v9.4s, v9.4s, v1.4s + + cbz x24, TILE1_L8_MLA + ld1 {v0.s}[0], [x24] + fmul v8.4s, v8.4s, v0.s[0] + fmul v9.4s, v9.4s, v0.s[0] + + TILE1_L8_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v25, 0 // tile:0, oc:4-7 + cmp w28, #1 beq L8Tile1QuanUseInt8 + + cbz x9, TILE1_ADD_DSTV + TILE1_ADD_BIAS: + ld1 {v10.4s, v11.4s}, [x20], #32 + fadd v8.4s, v8.4s, v10.4s + fadd v9.4s, v9.4s, v11.4s + b TILE1_POST + + TILE1_ADD_DSTV: + ld1 {v10.4s}, [x10], x4 + ld1 {v11.4s}, [x10] + sub x10, x10, x4 + fadd v8.4s, v8.4s, v10.4s + fadd v9.4s, v9.4s, v11.4s + + TILE1_POST: + cbz x23, TILE1_STORE + ld1r {v26.4s}, [x23], #4 // f32 min + ld1r {v27.4s}, [x23] // f32 max + sub x23, x23, #4 + fmin v8.4s, v8.4s, v27.4s + fmin v9.4s, v9.4s, v27.4s + fmax v8.4s, v8.4s, v26.4s + fmax v9.4s, v9.4s, v26.4s + + TILE1_STORE: st1 {v8.4s}, [x10], x4 st1 {v9.4s}, [x10], x4 b L8Tile1LoopCheck L8Tile1QuanUseInt8: + ld1r {v7.4s}, [x23], #4 // int8 max + ld1r {v6.4s}, [x23] // int8 min + ld1 {v10.4s, v11.4s}, [x20], #32 + dup v7.16b, v7.b[0] + dup v6.16b, v6.b[0] + fadd v8.4s, v8.4s, v10.4s + fadd v9.4s, v9.4s, v11.4s + sub x23, x23, #4 fcvtas v8.4s, v8.4s fcvtas v9.4s, v9.4s sqxtn v0.4h, v8.4s @@ -584,10 +1062,10 @@ L8LoopDz_TILE_1: cbz x14, Tile1End L4LoopDz_TILE_1: - ld1 {v0.4s}, [x20], #16 // bias + //ld1 {v0.4s}, [x20], #16 // bias mov x11, x1 mov x13, x3 - mov v8.16b, v0.16b + movi v8.16b, #0 L4LoopSz_TILE_1: ld1 {v3.16b}, [x12], #16 // weight ld1 {v0.s}[0], [x11], x22 // src @@ -599,14 +1077,49 @@ L4LoopDz_TILE_1: L4Tile1Quan: ld1 {v0.4s}, [x19], #16 // scale + ld1 {v2.s}[0], [x25] // x kernel sum + ld1 {v24.4s}, [x6], #16 // weight quan zeropoint scvtf v8.4s, v8.4s fmul v8.4s, v8.4s, v0.4s + + cbz x24, TILE1_L4_MLA + ld1 {v0.s}[0], [x24] + fmul v8.4s, v8.4s, v0.s[0] + + TILE1_L4_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 cmp w28, #1 beq L4Tile1QuanUseInt8 + + cbz x9, TILE1_L4_ADD_DSTV + TILE1_L4_ADD_BIAS: + ld1 {v4.4s}, [x20], #16 + fadd v8.4s, v8.4s, v4.4s + b TILE1_L4_POST + + TILE1_L4_ADD_DSTV: + ld1 {v4.4s}, [x10] + fadd v8.4s, v8.4s, v4.4s + + TILE1_L4_POST: + cbz x23, TILE1_L4_STORE + ld1r {v26.4s}, [x23], #4 // f32 min + ld1r {v27.4s}, [x23] // f32 max + sub x23, x23, #4 + fmax v8.4s, v8.4s, v26.4s + fmin v8.4s, v8.4s, v27.4s + TILE1_L4_STORE: st1 {v8.4s}, [x10], x4 b Tile1End L4Tile1QuanUseInt8: + ld1r {v7.4s}, [x23], #4 // int8 max + ld1r {v6.4s}, [x23] // int8 min + ld1 {v4.4s}, [x20], #16 + fadd v8.4s, v8.4s, v4.4s + sub x23, x23, #4 + dup v7.16b, v7.b[0] + dup v6.16b, v6.b[0] fcvtas v8.4s, v8.4s sqxtn v0.4h, v8.4s sqxtn v16.8b, v0.8h @@ -615,19 +1128,26 @@ L4LoopDz_TILE_1: st1 {v16.s}[0], [x10], x4 Tile1End: +cbz x24, Tile1_End_Offset +add x24, x24, #4 + +Tile1_End_Offset: sub x7, x7, #1 add x0, x0, x21 add x1, x1, #4 + add x25, x25, #4 b TILE_1 End: +ldp x23, x24, [sp, #(16 * 8)] +ldp x25, x26, [sp, #(16 * 7)] ldp x27, x28, [sp, #(16 * 6)] ldp x19, x20, [sp, #(16 * 5)] ldp x21, x22, [sp, #(16 * 4)] ldp d8, d9, [sp, #(16 * 3)] ldp d10, d11, [sp, #(16 * 2)] ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 7) +ldp d14, d15, [sp], #(16 * 9) ret #endif // __aarch64__ diff --git a/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_ARMV86_Unit.S b/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_ARMV86_Unit.S index a6d4af6a3..eda852364 100644 --- a/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_ARMV86_Unit.S +++ b/source/backend/cpu/arm/arm64/MNNGemmInt8AddBiasScale_ARMV86_Unit.S @@ -12,11 +12,30 @@ .text .align 5 -.macro SET_BIAS s, d0, d1, d2, d3 - mov \d0\().16b, \s\().16b - mov \d1\().16b, \s\().16b - mov \d2\().16b, \s\().16b - mov \d3\().16b, \s\().16b +.macro SET_0_5 d0, d1, d2, d3, d4 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 + movi \d4\().16b, #0 +.endm +.macro SET_0_4 d0, d1, d2, d3 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 +.endm +.macro ADD_BIAS_FLOAT d0, d1, d2, d3, z0 + fadd \d0\().4s, \d0\().4s, \z0\().4s + fadd \d1\().4s, \d1\().4s, \z0\().4s + fadd \d2\().4s, \d2\().4s, \z0\().4s + fadd \d3\().4s, \d3\().4s, \z0\().4s +.endm +.macro ADD_FLOAT d0, d1, d2, d3, s0, s1, s2, s3 + fadd \d0\().4s, \d0\().4s, \s0\().4s + fadd \d1\().4s, \d1\().4s, \s1\().4s + fadd \d2\().4s, \d2\().4s, \s2\().4s + fadd \d3\().4s, \d3\().4s, \s3\().4s .endm .macro Int32ToFloat z0, z1, z2, z3 scvtf \z0\().4s, \z0\().4s @@ -30,6 +49,12 @@ fmul \d2\().4s, \d2\().4s, \s\().4s fmul \d3\().4s, \d3\().4s, \s\().4s .endm +.macro MUL_EXTRA_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().s[0] + fmul \d1\().4s, \d1\().4s, \s\().s[1] + fmul \d2\().4s, \d2\().4s, \s\().s[2] + fmul \d3\().4s, \d3\().4s, \s\().s[3] +.endm .macro FloatToInt32 z0, z1, z2, z3 fcvtas \z0\().4s, \z0\().4s fcvtas \z1\().4s, \z1\().4s @@ -50,16 +75,43 @@ Int16ToInt8_ONE \s0, \s1, \d0 Int16ToInt8_ONE \s2, \s3, \d1 .endm +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro ReLU_FP32 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmin \s2\().4s, \s2\().4s, \z1\().4s + fmin \s3\().4s, \s3\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s + fmax \s2\().4s, \s2\().4s, \z0\().4s + fmax \s3\().4s, \s3\().4s, \z0\().4s +.endm +.macro ReLU_FP32_2 s0, s1, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s +.endm asm_function MNNGemmInt8AddBiasScale_ARMV86_Unit - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -//}; - +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum; + float* extraScale; +}; +*/ //void MNNGemmInt8AddBiasScale_ARMV86_Unit(int8_t* dst, const int8_t* src, // const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, // const QuanPostTreatParameters* parameters, size_t realDstCount); @@ -67,13 +119,13 @@ asm_function MNNGemmInt8AddBiasScale_ARMV86_Unit //Auto: x0:dst, x1:src, x2:weight, x3:src_depth_quad, x4:dst_step //x5:dst_depth_quad, x6: parameters, x7: realDstCount -//Load from x7: x8: scale, x9: bias, w12: maxValue, w13: minValue, w23: useInt8 +//Load from x6: x8: scale, x9: bias, w23: useInt8, x27: srcKernelSum, x28: weightQuanBias, +// EP=10,LP=8,HP=8 + ldr x8, [x6, #0] ldr x9, [x6, #8] -ldr w10, [x6, #16] -ldr w14, [x6, #20] -stp d14, d15, [sp, #(-16 * 8)]! +stp d14, d15, [sp, #(-16 * 10)]! stp d12, d13, [sp, #(16 * 1)] stp d10, d11, [sp, #(16 * 2)] stp d8, d9, [sp, #(16 * 3)] @@ -81,341 +133,744 @@ stp x21, x22, [sp, #(16 * 4)] stp x19, x20, [sp, #(16 * 5)] stp x23, x24, [sp, #(16 * 6)] stp x25, x26, [sp, #(16 * 7)] +stp x27, x28, [sp, #(16 * 8)] ldr w23, [x6, #24] +ldr x27, [x6, #40] // srcKernelSum +ldr x28, [x6, #48] // weightQuanBias -mov x21, #4 // sizeof(int8_t) * UNIT -mov x22, #160 // GEMM_INT8_DST_XUNIT * GEMM_INT8_SRC_UNIT = 20 * 8 = 160 +ldr x22, [x6, #64] // blockNum +mul x22, x22, x3 // UP_DIV(ic*ky*kx, SRC_UNIT) = blockNum * src_depth_quad_per_block +lsl x15, x22, #6 // x15 = src_depth_quad * UNIT * UNIT_SRC = src_depth_quad * 64 = src_depth_quad << 6 + +ldr x10, [x6, #80] // extra scale +mov x21, #4 // sizeof(int8_t) * pack +add x14, x6, #16 // int8 max ptr cbnz w23, Start -mov x21, #16 // sizeof(float) * UNIT +mov x21, #16 // sizeof(float) * pack +ldr x14, [x6, #56] // float32 maxmin ptr Start: -lsl x15, x3, #5 // x15 = src_depth_quad * UNIT * UNIT_SRC = src_depth_quad * 32 = src_depth_quad << 5 +mov x22, #80 // GEMM_INT8_DST_XUNIT * GEMM_INT8_SRC_UNIT = 10 * 8 = 80 + +TILE_10: + cmp x7, #10 + blt TILE_8 + sub x4, x4, #32 // For int8 output, x4-64 + cbnz w23, TILE10_DZ + sub x4, x4, #96 // For float32 output, x4-32-96=x4-128 -TILE_20: - cmp x7, #20 - blt TILE_16 -LoopDz_TILE_20: - ld1 {v0.4s}, [x9], #16 // bias +TILE10_DZ: +cmp x5, #2 +blt LoopDz4_TILE_10 + +LoopDz8_TILE_10: mov x11, x1 // src mov x12, x2 // weight mov x13, x3 // src_depth_quad - mov v1.16b, v0.16b - uzp1 v12.2d, v0.2d, v1.2d // bias_0, bias_1, bias_0, bias_1 - uzp2 v13.2d, v0.2d, v1.2d // bias_2, bias_3, bias_2, bias_3 - mov v14.16b, v12.16b - mov v15.16b, v13.16b - SET_BIAS v14, v16, v18, v20, v22 - SET_BIAS v14, v24, v26, v28, v30 - SET_BIAS v15, v17, v19, v21, v23 - SET_BIAS v15, v25, v27, v29, v31 -LoopSz_TILE_20: - // src : 10 x [2 x 8] : v2-11 - // weight : 2 x [2 x 8] : v0-1 - // dst : 10 x 2 x [4] : v12-v31 - ld1 {v0.16b, v1.16b}, [x12], #32 // weight - ld1 {v2.16b, v3.16b, v4.16b, v5.16b}, [x11], #64 // src - .inst 0x4e80a44c // smmla v12.4s, v2.16b, v0.16b - .inst 0x4e81a44d // smmla v13.4s, v2.16b, v1.16b - .inst 0x4e80a46e // smmla v14.4s, v3.16b, v0.16b - .inst 0x4e81a46f // smmla v15.4s, v3.16b, v1.16b - ld1 {v6.16b, v7.16b, v8.16b, v9.16b}, [x11], #64 - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b - .inst 0x4e80a4b2 // smmla v18.4s, v5.16b, v0.16b - .inst 0x4e81a4b3 // smmla v19.4s, v5.16b, v1.16b - ld1 {v10.16b, v11.16b}, [x11], #32 - .inst 0x4e80a4d4 // smmla v20.4s, v6.16b, v0.16b - .inst 0x4e81a4d5 // smmla v21.4s, v6.16b, v1.16b - .inst 0x4e80a4f6 // smmla v22.4s, v7.16b, v0.16b - .inst 0x4e81a4f7 // smmla v23.4s, v7.16b, v1.16b - .inst 0x4e80a518 // smmla v24.4s, v8.16b, v0.16b - .inst 0x4e81a519 // smmla v25.4s, v8.16b, v1.16b - .inst 0x4e80a53a // smmla v26.4s, v9.16b, v0.16b - .inst 0x4e81a53b // smmla v27.4s, v9.16b, v1.16b - .inst 0x4e80a55c // smmla v28.4s, v10.16b, v0.16b - .inst 0x4e81a55d // smmla v29.4s, v10.16b, v1.16b + + SET_0_5 v12, v16, v20, v24, v28 // oc:0,1,0,1 + SET_0_5 v13, v17, v21, v25, v29 // oc:2,3,2,3 + SET_0_5 v14, v18, v22, v26, v30 // oc:4,5,4,5 + SET_0_5 v15, v19, v23, v27, v31 // oc:6,7,6,7 + +LoopSz_TILE_10: + ld1 {v8.16b, v9.16b, v10.16b, v11.16b}, [x12], #64 // weight + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], #64 // src: E0-E9 + ld1 {v7.16b}, [x11], #16 subs x13, x13, #1 - .inst 0x4e80a57e // smmla v30.4s, v11.16b, v0.16b - .inst 0x4e81a57f // smmla v31.4s, v11.16b, v1.16b - bne LoopSz_TILE_20 -LoopSzEnd_TILE_20: + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa46e // smmla v14.4s, v3.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba46f // smmla v15.4s, v3.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa492 // smmla v18.4s, v4.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba493 // smmla v19.4s, v4.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + .inst 0x4e8aa4b6 // smmla v22.4s, v5.16b, v10.16b // tile4-oc4, tile4-oc5, tile5-oc4, tile5-oc5 + .inst 0x4e8ba4b7 // smmla v23.4s, v5.16b, v11.16b // tile4-oc6, tile4-oc7, tile5-oc6, tile5-oc7 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + .inst 0x4e8aa4da // smmla v26.4s, v6.16b, v10.16b // tile6-oc4, tile6-oc5, tile7-oc4, tile7-oc5 + .inst 0x4e8ba4db // smmla v27.4s, v6.16b, v11.16b // tile6-oc6, tile6-oc7, tile7-oc6, tile7-oc7 + + .inst 0x4e88a4fc // smmla v28.4s, v7.16b, v8.16b // tile8-oc0, tile8-oc1, tile9-oc0, tile9-oc1 + .inst 0x4e89a4fd // smmla v29.4s, v7.16b, v9.16b // tile8-oc2, tile8-oc3, tile9-oc2, tile9-oc3 + .inst 0x4e8aa4fe // smmla v30.4s, v7.16b, v10.16b // tile8-oc4, tile8-oc5, tile9-oc4, tile9-oc5 + .inst 0x4e8ba4ff // smmla v31.4s, v7.16b, v11.16b // tile8-oc6, tile8-oc7, tile9-oc6, tile9-oc7 + bne LoopSz_TILE_10 +LoopSzEnd_TILE_10: add x2, x2, x15 // weight += dz * src_depth_quad * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); - sub x5, x5, #1 // dz-- + sub x5, x5, #2 // dz-2 // transpose - uzp1 v11.2d, v12.2d, v13.2d - uzp2 v12.2d, v12.2d, v13.2d - uzp1 v13.2d, v14.2d, v15.2d - uzp2 v14.2d, v14.2d, v15.2d - uzp1 v15.2d, v16.2d, v17.2d - uzp2 v16.2d, v16.2d, v17.2d - uzp1 v17.2d, v18.2d, v19.2d - uzp2 v18.2d, v18.2d, v19.2d - uzp1 v19.2d, v20.2d, v21.2d - uzp2 v20.2d, v20.2d, v21.2d - uzp1 v21.2d, v22.2d, v23.2d - uzp2 v22.2d, v22.2d, v23.2d - uzp1 v23.2d, v24.2d, v25.2d - uzp2 v24.2d, v24.2d, v25.2d - uzp1 v25.2d, v26.2d, v27.2d - uzp2 v26.2d, v26.2d, v27.2d - uzp1 v27.2d, v28.2d, v29.2d - uzp2 v28.2d, v28.2d, v29.2d - uzp1 v29.2d, v30.2d, v31.2d - uzp2 v30.2d, v30.2d, v31.2d - Int32ToFloat v11, v12, v13, v14 - Int32ToFloat v15, v16, v17, v18 - Int32ToFloat v19, v20, v21, v22 - Int32ToFloat v23, v24, v25, v26 - Int32ToFloat v27, v28, v29, v30 - -Tile20Quan: - ld1 {v0.4s}, [x8], #16 // scale - MUL_SCALE v0, v11, v12, v13, v14 - MUL_SCALE v0, v15, v16, v17, v18 - MUL_SCALE v0, v19, v20, v21, v22 - MUL_SCALE v0, v23, v24, v25, v26 - MUL_SCALE v0, v27, v28, v29, v30 - cmp w23, #1 - beq Tile20QuanUseInt8 - sub x4, x4, #256 - st1 {v11.4s, v12.4s, v13.4s, v14.4s}, [x0], #64 - st1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x0], #64 - st1 {v19.4s, v20.4s, v21.4s, v22.4s}, [x0], #64 - st1 {v23.4s, v24.4s, v25.4s, v26.4s}, [x0], #64 - st1 {v27.4s, v28.4s, v29.4s, v30.4s}, [x0], x4 - add x4, x4, #256 - b Tile20LoopCheck - - Tile20QuanUseInt8: - FloatToInt32 v11, v12, v13, v14 - FloatToInt32 v15, v16, v17, v18 - FloatToInt32 v19, v20, v21, v22 - FloatToInt32 v23, v24, v25, v26 - FloatToInt32 v27, v28, v29, v30 - Int32ToInt16 v11, v12, v13, v14, v0, v1 - Int32ToInt16 v15, v16, v17, v18, v2, v3 - Int32ToInt16 v19, v20, v21, v22, v4, v5 - Int32ToInt16 v23, v24, v25, v26, v6, v7 - Int32ToInt16 v27, v28, v29, v30, v8, v9 - Int16ToInt8 v0, v1, v2, v3, v16, v17 - Int16ToInt8 v4, v5, v6, v7, v18, v19 - Int16ToInt8_ONE v8, v9, v20 - dup v11.16b, w10 // max - dup v10.16b, w14 // min - smax v16.16b, v10.16b, v16.16b - smax v17.16b, v10.16b, v17.16b - smax v18.16b, v10.16b, v18.16b - smax v19.16b, v10.16b, v19.16b - smax v20.16b, v10.16b, v20.16b - smin v16.16b, v11.16b, v16.16b - smin v17.16b, v11.16b, v17.16b - smin v18.16b, v11.16b, v18.16b - smin v19.16b, v11.16b, v19.16b - smin v20.16b, v11.16b, v20.16b - sub x4, x4, #64 - st1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x0], #64 - st1 {v20.16b}, [x0], x4 // dst += dz * dst_step; - add x4, x4, #64 -Tile20LoopCheck: - cmp x5, #1 - bge LoopDz_TILE_20 - b End + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v16.2d, v17.2d + uzp2 v3.2d, v16.2d, v17.2d + uzp1 v4.2d, v20.2d, v21.2d + uzp2 v5.2d, v20.2d, v21.2d + uzp1 v6.2d, v24.2d, v25.2d + uzp2 v7.2d, v24.2d, v25.2d + uzp1 v8.2d, v28.2d, v29.2d + uzp2 v9.2d, v28.2d, v29.2d -TILE_16: - dup v11.16b, w10 // max - dup v10.16b, w14 // min - sub x10, x22, #64 - cmp x7, #16 - blt TILE_8 - mov x24, x5 // dst_depth_quad - mov x26, x0 // dst - mov x25, x2 // weight - mov x19, x8 // scale - mov x20, x9 // bias -LoopDz_TILE_16: // while (dz = dst_depth_quad) - ld1 {v0.4s}, [x20], #16 // bias + uzp1 v10.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v11.2d, v14.2d, v15.2d // E1: oc:4-7 + uzp1 v12.2d, v18.2d, v19.2d + uzp2 v13.2d, v18.2d, v19.2d + uzp1 v14.2d, v22.2d, v23.2d + uzp2 v15.2d, v22.2d, v23.2d + uzp1 v16.2d, v26.2d, v27.2d + uzp2 v17.2d, v26.2d, v27.2d + uzp1 v18.2d, v30.2d, v31.2d + uzp2 v19.2d, v30.2d, v31.2d + + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + +Tile10Quan: + ld1 {v20.4s, v21.4s}, [x8], #32 // scale + ld1 {v22.4s, v23.4s}, [x27], #32 // x kernel sum + ld1 {v24.d}[0], [x27] + ld1 {v25.4s, v26.4s}, [x28], #32 // weight quan zeropoint + sub x27, x27, #32 + MUL_SCALE v20, v0, v1, v2, v3 + MUL_SCALE v20, v4, v5, v6, v7 + MUL_SCALE v21, v10, v11, v12, v13 + MUL_SCALE v21, v14, v15, v16, v17 + fmul v8.4s, v8.4s, v20.4s + fmul v9.4s, v9.4s, v20.4s + fmul v18.4s, v18.4s, v21.4s + fmul v19.4s, v19.4s, v21.4s + + cbz x10, TILE10_MLA + ld1 {v27.4s, v28.4s}, [x10], #32 + ld1 {v29.d}[0], [x10] + MUL_EXTRA_SCALE v27, v0, v1, v2, v3 + MUL_EXTRA_SCALE v28, v4, v5, v6, v7 + MUL_EXTRA_SCALE v27, v10, v11, v12, v13 + MUL_EXTRA_SCALE v28, v14, v15, v16, v17 + fmul v8.4s, v8.4s, v29.s[0] + fmul v9.4s, v9.4s, v29.s[1] + fmul v18.4s, v18.4s, v29.s[0] + fmul v19.4s, v19.4s, v29.s[1] + sub x10, x10, #32 + + TILE10_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v11, v22, v26, 1 // tile:1, oc:4-7 + + MLA_WEIGHTZERO v2, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v3, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v22, v26, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v13, v22, v26, 3 // tile:3, oc:4-7 + + MLA_WEIGHTZERO v4, v23, v25, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v5, v23, v25, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v23, v26, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v15, v23, v26, 1 // tile:5, oc:4-7 + + MLA_WEIGHTZERO v6, v23, v25, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v7, v23, v25, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v23, v26, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v17, v23, v26, 3 // tile:7, oc:4-7 + + MLA_WEIGHTZERO v8, v24, v25, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v9, v24, v25, 1 // tile:9, oc:0-3 + MLA_WEIGHTZERO v18, v24, v26, 0 // tile:8, oc:4-7 + MLA_WEIGHTZERO v19, v24, v26, 1 // tile:9, oc:4-7 + + cbnz w23, Tile10QuanUseInt8 + + TILE10_ADD_BIAS: + cbz x9, TILE10_ADD_DSTV + ld1 {v20.4s, v21.4s}, [x9], #32 // bias + ADD_BIAS_FLOAT v0, v1, v2, v3, v20 + ADD_BIAS_FLOAT v4, v5, v6, v7, v20 + ADD_BIAS_FLOAT v10, v11, v12, v13, v21 + ADD_BIAS_FLOAT v14, v15, v16, v17, v21 + fadd v8.4s, v8.4s, v20.4s + fadd v9.4s, v9.4s, v20.4s + fadd v18.4s, v18.4s, v21.4s + fadd v19.4s, v19.4s, v21.4s + b TILE10_POST + + TILE10_ADD_DSTV: + // first batch10 + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x0], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x0], #64 + ld1 {v28.4s, v29.4s}, [x0], x4 + ADD_FLOAT v0, v1, v2, v3, v20, v21, v22, v23 + ADD_FLOAT v4, v5, v6, v7, v24, v25, v26, v27 + fadd v8.4s, v8.4s, v28.4s + fadd v9.4s, v9.4s, v29.4s + + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x0], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x0], #64 + ld1 {v28.4s, v29.4s}, [x0] + ADD_FLOAT v10, v11, v12, v13, v20, v21, v22, v23 + ADD_FLOAT v14, v15, v16, v17, v24, v25, v26, v27 + fadd v18.4s, v18.4s, v28.4s + fadd v19.4s, v19.4s, v29.4s + + sub x0, x0, #256 + sub x0, x0, x4 + + TILE10_POST: + cbz x14, TILE10_STORE + ld1r {v30.4s}, [x14], #4 // f32 min + ld1r {v31.4s}, [x14] // f32 max + ReLU_FP32 v0, v1, v2, v3, v30, v31 + ReLU_FP32 v4, v5, v6, v7, v30, v31 + ReLU_FP32 v8, v9, v10, v11, v30, v31 + ReLU_FP32 v12, v13, v14, v15, v30, v31 + ReLU_FP32 v16, v17, v18, v19, v30, v31 + sub x14, x14, #4 + + TILE10_STORE: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x0], #64 + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x0], #64 + st1 {v8.4s, v9.4s}, [x0], x4 + st1 {v10.4s, v11.4s, v12.4s, v13.4s}, [x0], #64 + st1 {v14.4s, v15.4s, v16.4s, v17.4s}, [x0], #64 + st1 {v18.4s, v19.4s}, [x0], x4 + b Tile10LoopCheck + + Tile10QuanUseInt8: + ld1 {v20.4s, v21.4s}, [x9], #32 // bias + ld1r {v31.4s}, [x14], #4 // int8 max + ld1r {v30.4s}, [x14] // int8 min + ADD_BIAS_FLOAT v0, v1, v2, v3, v20 + ADD_BIAS_FLOAT v4, v5, v6, v7, v20 + ADD_BIAS_FLOAT v10, v11, v12, v13, v21 + ADD_BIAS_FLOAT v14, v15, v16, v17, v21 + fadd v8.4s, v8.4s, v20.4s + fadd v9.4s, v9.4s, v20.4s + fadd v18.4s, v18.4s, v21.4s + fadd v19.4s, v19.4s, v21.4s + + sub x14, x14, #4 + dup v31.16b, v31.b[0] + dup v30.16b, v30.b[0] + + FloatToInt32 v0, v1, v2, v3 + FloatToInt32 v4, v5, v6, v7 + FloatToInt32 v10, v11, v12, v13 + FloatToInt32 v14, v15, v16, v17 + FloatToInt32 v8, v9, v18, v19 + + Int32ToInt16 v0, v1, v2, v3, v20, v21 + Int32ToInt16 v4, v5, v6, v7, v22, v23 + sqxtn v24.4h, v8.4s + sqxtn2 v24.8h, v9.4s + Int32ToInt16 v10, v11, v12, v13, v25, v26 + Int32ToInt16 v14, v15, v16, v17, v27, v28 + sqxtn v29.4h, v18.4s + sqxtn2 v29.8h, v19.4s + + Int16ToInt8 v20, v21, v22, v23, v0, v1 + sqxtn v2.8b, v24.8h + Int16ToInt8 v25, v26, v27, v28, v3, v4 + sqxtn v5.8b, v29.8h + + smax v0.16b, v30.16b, v0.16b + smax v1.16b, v30.16b, v1.16b + smax v2.8b, v30.8b, v2.8b + smax v3.16b, v30.16b, v3.16b + smax v4.16b, v30.16b, v4.16b + smax v5.8b, v30.8b, v5.8b + + smin v0.16b, v31.16b, v0.16b + smin v1.16b, v31.16b, v1.16b + smin v2.8b, v31.8b, v2.8b + smin v3.16b, v31.16b, v3.16b + smin v4.16b, v31.16b, v4.16b + smin v5.8b, v31.8b, v5.8b + + st1 {v0.16b, v1.16b}, [x0], #32 + st1 {v2.8b}, [x0], x4 + st1 {v3.16b, v4.16b}, [x0], #32 + st1 {v5.8b}, [x0], x4 + +Tile10LoopCheck: + cmp x5, #2 + bge LoopDz8_TILE_10 + cbz x5, End + +LoopDz4_TILE_10: mov x11, x1 // src - mov x12, x25 // weight + mov x12, x2 // weight mov x13, x3 // src_depth_quad - mov v1.16b, v0.16b - uzp1 v2.2d, v0.2d, v1.2d // bias_0, bias_1, bias_0, bias_1 - uzp2 v3.2d, v0.2d, v1.2d // bias_2, bias_3, bias_2, bias_3 - SET_BIAS v2, v16, v18, v20, v22 - SET_BIAS v2, v24, v26, v28, v30 - SET_BIAS v3, v17, v19, v21, v23 - SET_BIAS v3, v25, v27, v29, v31 -LoopSz_TILE_16: - // src : 8 x [2 x 8] : v2-9 - // weight : 2 x [2 x 8] : v0-1 - // dst : 8 x 2 x [4] : v16-v31 - ld1 {v0.16b, v1.16b}, [x12], #32 // weight - ld1 {v2.16b, v3.16b, v4.16b, v5.16b}, [x11], #64 // src - .inst 0x4e80a450 // smmla v16.4s, v2.16b, v0.16b - .inst 0x4e81a451 // smmla v17.4s, v2.16b, v1.16b - .inst 0x4e80a472 // smmla v18.4s, v3.16b, v0.16b - .inst 0x4e81a473 // smmla v19.4s, v3.16b, v1.16b - ld1 {v6.16b, v7.16b, v8.16b, v9.16b}, [x11], x10 - .inst 0x4e80a494 // smmla v20.4s, v4.16b, v0.16b - .inst 0x4e81a495 // smmla v21.4s, v4.16b, v1.16b - .inst 0x4e80a4b6 // smmla v22.4s, v5.16b, v0.16b - .inst 0x4e81a4b7 // smmla v23.4s, v5.16b, v1.16b - .inst 0x4e80a4d8 // smmla v24.4s, v6.16b, v0.16b - .inst 0x4e81a4d9 // smmla v25.4s, v6.16b, v1.16b - .inst 0x4e80a4fa // smmla v26.4s, v7.16b, v0.16b - .inst 0x4e81a4fb // smmla v27.4s, v7.16b, v1.16b + + SET_0_5 v12, v13, v16, v17, v20 + SET_0_5 v21, v24, v25, v28, v29 + +LoopSz4_TILE_10: + ld1 {v8.16b, v9.16b}, [x12] // weight + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], #64 // src: E0-E9 + ld1 {v7.16b}, [x11], #16 subs x13, x13, #1 - .inst 0x4e80a51c // smmla v28.4s, v8.16b, v0.16b - .inst 0x4e81a51d // smmla v29.4s, v8.16b, v1.16b - .inst 0x4e80a53e // smmla v30.4s, v9.16b, v0.16b - .inst 0x4e81a53f // smmla v31.4s, v9.16b, v1.16b - bne LoopSz_TILE_16 -LoopSzEnd_TILE_16: - add x25, x25, x15 // weight += dz * src_depth_quad * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); - sub x24, x24, #1 // dz-- + add x12, x12, #64 // x12+lp*hp + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + + .inst 0x4e88a4fc // smmla v28.4s, v7.16b, v8.16b // tile8-oc0, tile8-oc1, tile9-oc0, tile9-oc1 + .inst 0x4e89a4fd // smmla v29.4s, v7.16b, v9.16b // tile8-oc2, tile8-oc3, tile9-oc2, tile9-oc3 + bne LoopSz4_TILE_10 +LoopSz4End_TILE_10: // transpose - uzp1 v15.2d, v16.2d, v17.2d - uzp2 v16.2d, v16.2d, v17.2d - uzp1 v17.2d, v18.2d, v19.2d - uzp2 v18.2d, v18.2d, v19.2d - uzp1 v19.2d, v20.2d, v21.2d - uzp2 v20.2d, v20.2d, v21.2d - uzp1 v21.2d, v22.2d, v23.2d - uzp2 v22.2d, v22.2d, v23.2d - uzp1 v23.2d, v24.2d, v25.2d - uzp2 v24.2d, v24.2d, v25.2d - uzp1 v25.2d, v26.2d, v27.2d - uzp2 v26.2d, v26.2d, v27.2d - uzp1 v27.2d, v28.2d, v29.2d - uzp2 v28.2d, v28.2d, v29.2d - uzp1 v29.2d, v30.2d, v31.2d - uzp2 v30.2d, v30.2d, v31.2d - Int32ToFloat v15, v16, v17, v18 - Int32ToFloat v19, v20, v21, v22 - Int32ToFloat v23, v24, v25, v26 - Int32ToFloat v27, v28, v29, v30 - -Tile16Quan: - ld1 {v0.4s}, [x19], #16 // scale - MUL_SCALE v0, v15, v16, v17, v18 - MUL_SCALE v0, v19, v20, v21, v22 - MUL_SCALE v0, v23, v24, v25, v26 - MUL_SCALE v0, v27, v28, v29, v30 - cmp w23, #1 - beq Tile16QuanUseInt8 - sub x4, x4, #192 - st1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x26], #64 - st1 {v19.4s, v20.4s, v21.4s, v22.4s}, [x26], #64 - st1 {v23.4s, v24.4s, v25.4s, v26.4s}, [x26], #64 - st1 {v27.4s, v28.4s, v29.4s, v30.4s}, [x26], x4 - add x4, x4, #192 - b Tile16LoopCheck - - Tile16QuanUseInt8: - FloatToInt32 v15, v16, v17, v18 - FloatToInt32 v19, v20, v21, v22 - FloatToInt32 v23, v24, v25, v26 - FloatToInt32 v27, v28, v29, v30 - Int32ToInt16 v15, v16, v17, v18, v0, v1 - Int32ToInt16 v19, v20, v21, v22, v2, v3 - Int32ToInt16 v23, v24, v25, v26, v4, v5 - Int32ToInt16 v27, v28, v29, v30, v6, v7 - Int16ToInt8 v0, v1, v2, v3, v16, v17 - Int16ToInt8 v4, v5, v6, v7, v18, v19 - smax v16.16b, v10.16b, v16.16b - smax v17.16b, v10.16b, v17.16b - smax v18.16b, v10.16b, v18.16b - smax v19.16b, v10.16b, v19.16b - smin v16.16b, v11.16b, v16.16b - smin v17.16b, v11.16b, v17.16b - smin v18.16b, v11.16b, v18.16b - smin v19.16b, v11.16b, v19.16b - st1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x26], x4 // dst += dz * dst_step; -Tile16LoopCheck: - cmp x24, #1 - bge LoopDz_TILE_16 -Tile16End: - sub x7, x7, #16 - add x0, x0, x21, LSL #4 - add x1, x1, #128 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v16.2d, v17.2d + uzp2 v3.2d, v16.2d, v17.2d + uzp1 v4.2d, v20.2d, v21.2d + uzp2 v5.2d, v20.2d, v21.2d + uzp1 v6.2d, v24.2d, v25.2d + uzp2 v7.2d, v24.2d, v25.2d + uzp1 v8.2d, v28.2d, v29.2d + uzp2 v9.2d, v28.2d, v29.2d + + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + scvtf v8.4s, v8.4s + scvtf v9.4s, v9.4s + +Tile10Quan_L4: + ld1 {v20.4s}, [x8] // scale + ld1 {v22.4s, v23.4s}, [x27], #32 // x kernel sum + ld1 {v24.d}[0], [x27] + ld1 {v25.4s}, [x28] // weight quan zeropoint + MUL_SCALE v20, v0, v1, v2, v3 + MUL_SCALE v20, v4, v5, v6, v7 + fmul v8.4s, v8.4s, v20.4s + fmul v9.4s, v9.4s, v20.4s + + cbz x10, TILE10_MLA_L4 + ld1 {v27.4s, v28.4s}, [x10], #32 + ld1 {v29.d}[0], [x10] + MUL_EXTRA_SCALE v27, v0, v1, v2, v3 + MUL_EXTRA_SCALE v28, v4, v5, v6, v7 + fmul v8.4s, v8.4s, v29.s[0] + fmul v9.4s, v9.4s, v29.s[1] + + TILE10_MLA_L4: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v3, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v4, v23, v25, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v5, v23, v25, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v6, v23, v25, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v7, v23, v25, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v8, v24, v25, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v9, v24, v25, 1 // tile:9, oc:0-3 + //sub x4, x4, #128 + + cbnz w23, Tile10QuanUseInt8_L4 + + TILE10_ADD_BIAS_L4: + cbz x9, TILE10_ADD_DSTV_L4 + ld1 {v20.4s}, [x9] // bias + ADD_BIAS_FLOAT v0, v1, v2, v3, v20 + ADD_BIAS_FLOAT v4, v5, v6, v7, v20 + fadd v8.4s, v8.4s, v20.4s + fadd v9.4s, v9.4s, v20.4s + b TILE10_POST_L4 + + TILE10_ADD_DSTV_L4: + // first batch10 + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x0], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x0], #64 + ld1 {v28.4s, v29.4s}, [x0] + ADD_FLOAT v0, v1, v2, v3, v20, v21, v22, v23 + ADD_FLOAT v4, v5, v6, v7, v24, v25, v26, v27 + fadd v8.4s, v8.4s, v28.4s + fadd v9.4s, v9.4s, v29.4s + + sub x0, x0, #128 + + TILE10_POST_L4: + cbz x14, TILE10_STORE_L4 + ld1r {v30.4s}, [x14], #4 // f32 min + ld1r {v31.4s}, [x14] // f32 max + ReLU_FP32 v0, v1, v2, v3, v30, v31 + ReLU_FP32 v4, v5, v6, v7, v30, v31 + fmax v8.4s, v8.4s, v30.4s + fmax v9.4s, v9.4s, v30.4s + fmin v8.4s, v8.4s, v31.4s + fmin v9.4s, v9.4s, v31.4s + sub x14, x14, #4 + + TILE10_STORE_L4: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x0], #64 + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x0], #64 + st1 {v8.4s, v9.4s}, [x0], x4 + b End + + Tile10QuanUseInt8_L4: + ld1 {v20.4s}, [x9] // bias + ld1r {v31.4s}, [x14], #4 // int8 max + ld1r {v30.4s}, [x14] // int8 min + ADD_BIAS_FLOAT v0, v1, v2, v3, v20 + ADD_BIAS_FLOAT v4, v5, v6, v7, v20 + fadd v8.4s, v8.4s, v20.4s + fadd v9.4s, v9.4s, v20.4s + + sub x14, x14, #4 + dup v31.16b, v31.b[0] + dup v30.16b, v30.b[0] + + FloatToInt32 v0, v1, v2, v3 + FloatToInt32 v4, v5, v6, v7 + fcvtas v8.4s, v8.4s + fcvtas v9.4s, v9.4s + + Int32ToInt16 v0, v1, v2, v3, v16, v17 + Int32ToInt16 v4, v5, v6, v7, v18, v19 + sqxtn v24.4h, v8.4s + sqxtn2 v24.8h, v9.4s + + Int16ToInt8 v16, v17, v18, v19, v21, v22 + sqxtn v23.8b, v24.8h + + smax v21.16b, v30.16b, v21.16b + smax v22.16b, v30.16b, v22.16b + smax v23.8b, v30.8b, v23.8b + + smin v21.16b, v31.16b, v21.16b + smin v22.16b, v31.16b, v22.16b + smin v23.8b, v31.8b, v23.8b + + st1 {v21.16b, v22.16b}, [x0], #32 + st1 {v23.8b}, [x0], x4 + b End TILE_8: + // post parameters initilize + cbnz w23, INT8_POST_INIT + cbz x14, TILE_Remain + ld1r {v30.4s}, [x14], #4 // f32 min + ld1r {v31.4s}, [x14] // f32 max + b TILE_Remain + + INT8_POST_INIT: + ld1r {v31.4s}, [x14], #4 // int8 max + ld1r {v30.4s}, [x14] // int8 min + dup v31.16b, v31.b[0] + dup v30.16b, v30.b[0] + + TILE_Remain: cmp x7, #8 blt TILE_4 + cbnz w23, TILE8_START + sub x4, x4, #64 // For float32 output, add #64 when tile8 end. + + TILE8_START: mov x24, x5 // dst_depth_quad mov x26, x0 // dst mov x25, x2 // weight mov x19, x8 // scale mov x20, x9 // bias + mov x6, x28 // weightQuanBias +cmp x5, #2 +blt LoopDz4_TILE_8 LoopDz_TILE_8: - ld1 {v0.4s}, [x20], #16 // bias mov x11, x1 // src mov x12, x25 // weight mov x13, x3 // src_depth_quad - mov v1.16b, v0.16b - uzp1 v2.2d, v0.2d, v1.2d // bias_0, bias_1, bias_0, bias_1 - uzp2 v3.2d, v0.2d, v1.2d // bias_2, bias_3, bias_2, bias_3 - SET_BIAS v2, v24, v26, v28, v30 - SET_BIAS v3, v25, v27, v29, v31 + SET_0_4 v12, v16, v20, v24 + SET_0_4 v13, v17, v21, v25 + SET_0_4 v14, v18, v22, v26 + SET_0_4 v15, v19, v23, v27 LoopSz_TILE_8: - // src : 4 x [2 x 8] : v2-5 - // weight : 2 x [2 x 8] : v0-1 - // dst : 4 x 2 x [4] : v24-v31 - ld1 {v0.16b, v1.16b}, [x12], #32 // weight - ld1 {v2.16b, v3.16b, v4.16b, v5.16b}, [x11], x22 // src - .inst 0x4e80a458 // smmla v24.4s, v2.16b, v0.16b - .inst 0x4e81a459 // smmla v25.4s, v2.16b, v1.16b - .inst 0x4e80a47a // smmla v26.4s, v3.16b, v0.16b - .inst 0x4e81a47b // smmla v27.4s, v3.16b, v1.16b - .inst 0x4e80a49c // smmla v28.4s, v4.16b, v0.16b - .inst 0x4e81a49d // smmla v29.4s, v4.16b, v1.16b - .inst 0x4e80a4be // smmla v30.4s, v5.16b, v0.16b - .inst 0x4e81a4bf // smmla v31.4s, v5.16b, v1.16b + ld1 {v8.16b, v9.16b, v10.16b, v11.16b}, [x12], #64 // weight + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], x22 // src: E0-E7 subs x13, x13, #1 + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa46e // smmla v14.4s, v3.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba46f // smmla v15.4s, v3.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa492 // smmla v18.4s, v4.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba493 // smmla v19.4s, v4.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + .inst 0x4e8aa4b6 // smmla v22.4s, v5.16b, v10.16b // tile4-oc4, tile4-oc5, tile5-oc4, tile5-oc5 + .inst 0x4e8ba4b7 // smmla v23.4s, v5.16b, v11.16b // tile4-oc6, tile4-oc7, tile5-oc6, tile5-oc7 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + .inst 0x4e8aa4da // smmla v26.4s, v6.16b, v10.16b // tile6-oc4, tile6-oc5, tile7-oc4, tile7-oc5 + .inst 0x4e8ba4db // smmla v27.4s, v6.16b, v11.16b // tile6-oc6, tile6-oc7, tile7-oc6, tile7-oc7 bne LoopSz_TILE_8 + LoopSzEnd_TILE_8: add x25, x25, x15 - sub x24, x24, #1 - uzp1 v23.2d, v24.2d, v25.2d - uzp2 v24.2d, v24.2d, v25.2d - uzp1 v25.2d, v26.2d, v27.2d - uzp2 v26.2d, v26.2d, v27.2d - uzp1 v27.2d, v28.2d, v29.2d - uzp2 v28.2d, v28.2d, v29.2d - uzp1 v29.2d, v30.2d, v31.2d - uzp2 v30.2d, v30.2d, v31.2d - Int32ToFloat v23, v24, v25, v26 - Int32ToFloat v27, v28, v29, v30 + sub x24, x24, #2 // dz-2 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v8.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v9.2d, v14.2d, v15.2d // E1: oc:4-7 + + uzp1 v2.2d, v16.2d, v17.2d // E2: oc:0-3 + uzp2 v3.2d, v16.2d, v17.2d // E3: oc:0-3 + uzp1 v10.2d, v18.2d, v19.2d // E2: oc:4-7 + uzp2 v11.2d, v18.2d, v19.2d // E3: oc:4-7 + + uzp1 v4.2d, v20.2d, v21.2d // E4: oc:0-3 + uzp2 v5.2d, v20.2d, v21.2d // E5: oc:0-3 + uzp1 v12.2d, v22.2d, v23.2d // E4: oc:4-7 + uzp2 v13.2d, v22.2d, v23.2d // E5: oc:4-7 + + uzp1 v6.2d, v24.2d, v25.2d // E6: oc:0-3 + uzp2 v7.2d, v24.2d, v25.2d // E7: oc:0-3 + uzp1 v14.2d, v26.2d, v27.2d // E6: oc:4-7 + uzp2 v15.2d, v26.2d, v27.2d // E7: oc:4-7 + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 Tile8Quan: - ld1 {v0.4s}, [x19], #16 // scale - MUL_SCALE v0, v23, v24, v25, v26 - MUL_SCALE v0, v27, v28, v29, v30 - cmp w23, #1 - beq Tile8QuanUseInt8 - sub x4, x4, #64 - st1 {v23.4s, v24.4s, v25.4s, v26.4s}, [x26], #64 - st1 {v27.4s, v28.4s, v29.4s, v30.4s}, [x26], x4 - add x4, x4, #64 + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.4s, v23.4s}, [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + MUL_SCALE v20, v0, v1, v2, v3 + MUL_SCALE v20, v4, v5, v6, v7 + MUL_SCALE v21, v8, v9, v10, v11 + MUL_SCALE v21, v12, v13, v14, v15 + + cbz x10, TILE8_MLA + ld1 {v27.4s, v28.4s}, [x10] + MUL_EXTRA_SCALE v27, v0, v1, v2, v3 + MUL_EXTRA_SCALE v28, v4, v5, v6, v7 + MUL_EXTRA_SCALE v27, v8, v9, v10, v11 + MUL_EXTRA_SCALE v28, v12, v13, v14, v15 + + TILE8_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 + MLA_WEIGHTZERO v1, v22, v25, 1 + MLA_WEIGHTZERO v2, v22, v25, 2 + MLA_WEIGHTZERO v3, v22, v25, 3 + MLA_WEIGHTZERO v4, v23, v25, 0 + MLA_WEIGHTZERO v5, v23, v25, 1 + MLA_WEIGHTZERO v6, v23, v25, 2 + MLA_WEIGHTZERO v7, v23, v25, 3 + + MLA_WEIGHTZERO v8, v22, v26, 0 + MLA_WEIGHTZERO v9, v22, v26, 1 + MLA_WEIGHTZERO v10, v22, v26, 2 + MLA_WEIGHTZERO v11, v22, v26, 3 + MLA_WEIGHTZERO v12, v23, v26, 0 + MLA_WEIGHTZERO v13, v23, v26, 1 + MLA_WEIGHTZERO v14, v23, v26, 2 + MLA_WEIGHTZERO v15, v23, v26, 3 + + cbnz w23, Tile8QuanUseInt8 + + cbz x9, TILE8_ADD_DSTV + TILE8_ADD_BIAS: + ld1 {v16.4s, v17.4s}, [x20], #32 + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + ADD_BIAS_FLOAT v4, v5, v6, v7, v16 + ADD_BIAS_FLOAT v8, v9, v10, v11, v17 + ADD_BIAS_FLOAT v12, v13, v14, v15, v17 + b TILE8_POST + + TILE8_ADD_DSTV: + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x26], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x26], x4 + ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x26], #64 + ADD_FLOAT v0, v1, v2, v3, v20, v21, v22, v23 + ADD_FLOAT v4, v5, v6, v7, v24, v25, v26, v27 + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x26] + ADD_FLOAT v8, v9, v10, v11, v16, v17, v18, v19 + ADD_FLOAT v12, v13, v14, v15, v20, v21, v22, v23 + sub x26, x26, x4 + sub x26, x26, #128 + + TILE8_POST: + cbz x14, TILE8_STORE + ReLU_FP32 v0, v1, v2, v3, v30, v31 + ReLU_FP32 v4, v5, v6, v7, v30, v31 + ReLU_FP32 v8, v9, v10, v11, v30, v31 + ReLU_FP32 v12, v13, v14, v15, v30, v31 + + TILE8_STORE: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x26], #64 + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x26], x4 + st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x26], #64 + st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x26], x4 b Tile8LoopCheck Tile8QuanUseInt8: - FloatToInt32 v23, v24, v25, v26 - FloatToInt32 v27, v28, v29, v30 - Int32ToInt16 v23, v24, v25, v26, v4, v5 - Int32ToInt16 v27, v28, v29, v30, v6, v7 - Int16ToInt8 v4, v5, v6, v7, v18, v19 - smax v18.16b, v10.16b, v18.16b - smax v19.16b, v10.16b, v19.16b - smin v18.16b, v11.16b, v18.16b - smin v19.16b, v11.16b, v19.16b + ld1 {v16.4s, v17.4s}, [x20], #32 + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + ADD_BIAS_FLOAT v4, v5, v6, v7, v16 + ADD_BIAS_FLOAT v8, v9, v10, v11, v17 + ADD_BIAS_FLOAT v12, v13, v14, v15, v17 + + FloatToInt32 v0, v1, v2, v3 + FloatToInt32 v4, v5, v6, v7 + FloatToInt32 v8, v9, v10, v11 + FloatToInt32 v12, v13, v14, v15 + + Int32ToInt16 v0, v1, v2, v3, v20, v21 + Int32ToInt16 v4, v5, v6, v7, v22, v23 + Int32ToInt16 v8, v9, v10, v11, v24, v25 + Int32ToInt16 v12, v13, v14, v15, v26, v27 + + Int16ToInt8 v20, v21, v22, v23, v28, v29 + Int16ToInt8 v24, v25, v26, v27, v18, v19 + smax v28.16b, v30.16b, v28.16b + smax v29.16b, v30.16b, v29.16b + smax v18.16b, v30.16b, v18.16b + smax v19.16b, v30.16b, v19.16b + smin v28.16b, v31.16b, v28.16b + smin v29.16b, v31.16b, v29.16b + smin v18.16b, v31.16b, v18.16b + smin v19.16b, v31.16b, v19.16b + st1 {v28.16b, v29.16b}, [x26], x4 st1 {v18.16b, v19.16b}, [x26], x4 // dst += dz * dst_step Tile8LoopCheck: - cmp x24, #1 + cmp x24, #2 bge LoopDz_TILE_8 + cbz x24, Tile8Check + +LoopDz4_TILE_8: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + SET_0_4 v12, v13, v16, v17 + SET_0_4 v20, v21, v24, v25 +LoopSz4_TILE_8: + ld1 {v8.16b, v9.16b}, [x12] // weight + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], x22 // src: E0-E7 + subs x13, x13, #1 + add x12, x12, #64 + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + bne LoopSz4_TILE_8 + +LoopSz4End_TILE_8: + add x25, x25, x15 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v16.2d, v17.2d // E2: oc:0-3 + uzp2 v3.2d, v16.2d, v17.2d // E3: oc:0-3 + uzp1 v4.2d, v20.2d, v21.2d // E4: oc:0-3 + uzp2 v5.2d, v20.2d, v21.2d // E5: oc:0-3 + uzp1 v6.2d, v24.2d, v25.2d // E6: oc:0-3 + uzp2 v7.2d, v24.2d, v25.2d // E7: oc:0-3 + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + +Tile8Quan_L4: + ld1 {v20.4s}, [x19] // scale + ld1 {v22.4s, v23.4s}, [x27] // x kernel sum + ld1 {v25.4s}, [x6] // weight quan zeropoint + MUL_SCALE v20, v0, v1, v2, v3 + MUL_SCALE v20, v4, v5, v6, v7 + + cbz x10, TILE8_MLA_L4 + ld1 {v27.4s, v28.4s}, [x10] + MUL_EXTRA_SCALE v27, v0, v1, v2, v3 + MUL_EXTRA_SCALE v28, v4, v5, v6, v7 + + TILE8_MLA_L4: + MLA_WEIGHTZERO v0, v22, v25, 0 + MLA_WEIGHTZERO v1, v22, v25, 1 + MLA_WEIGHTZERO v2, v22, v25, 2 + MLA_WEIGHTZERO v3, v22, v25, 3 + MLA_WEIGHTZERO v4, v23, v25, 0 + MLA_WEIGHTZERO v5, v23, v25, 1 + MLA_WEIGHTZERO v6, v23, v25, 2 + MLA_WEIGHTZERO v7, v23, v25, 3 + + cbnz w23, Tile8QuanUseInt8_L4 + + cbz x9, TILE8_ADD_DSTV_L4 + TILE8_ADD_BIAS_L4: + ld1 {v16.4s}, [x20] + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + ADD_BIAS_FLOAT v4, v5, v6, v7, v16 + b TILE8_POST_L4 + + TILE8_ADD_DSTV_L4: + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x26], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x26] + ADD_FLOAT v0, v1, v2, v3, v20, v21, v22, v23 + ADD_FLOAT v4, v5, v6, v7, v24, v25, v26, v27 + sub x26, x26, #64 + + TILE8_POST_L4: + cbz x14, TILE8_STORE_L4 + ReLU_FP32 v0, v1, v2, v3, v30, v31 + ReLU_FP32 v4, v5, v6, v7, v30, v31 + + TILE8_STORE_L4: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x26], #64 + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x26], x4 + b Tile8Check + + Tile8QuanUseInt8_L4: + ld1 {v16.4s}, [x20] + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + ADD_BIAS_FLOAT v4, v5, v6, v7, v16 + + FloatToInt32 v0, v1, v2, v3 + FloatToInt32 v4, v5, v6, v7 + + Int32ToInt16 v0, v1, v2, v3, v20, v21 + Int32ToInt16 v4, v5, v6, v7, v22, v23 + + Int16ToInt8 v20, v21, v22, v23, v16, v17 + smax v16.16b, v30.16b, v16.16b + smax v17.16b, v30.16b, v17.16b + smin v16.16b, v31.16b, v16.16b + smin v17.16b, v31.16b, v17.16b + st1 {v16.16b, v17.16b}, [x26], x4 + +Tile8Check: +cbz x10, Tile8End +add x10, x10, #32 + Tile8End: sub x7, x7, #8 add x0, x0, x21, LSL #3 add x1, x1, #64 + add x27, x27, #32 + add x4, x4, #64 // Revert x4 for following tile. TILE_4: cmp x7, #4 @@ -425,59 +880,193 @@ TILE_4: mov x25, x2 // weight mov x19, x8 // scale mov x20, x9 // bias + mov x6, x28 // weightQuanBias +cmp x5, #2 +blt LoopDz4_TILE_4 LoopDz_TILE_4: - ld1 {v0.4s}, [x20], #16 // bias mov x11, x1 // src mov x12, x25 // weight mov x13, x3 // src_depth_quad - mov v1.16b, v0.16b - uzp1 v28.2d, v0.2d, v1.2d // bias_0, bias_1, bias_0, bias_1 - uzp2 v29.2d, v0.2d, v1.2d // bias_2, bias_3, bias_2, bias_3 - mov v30.16b, v28.16b - mov v31.16b, v29.16b + SET_0_4 v12, v13, v14, v15 + SET_0_4 v16, v17, v18, v19 + LoopSz_TILE_4: - // src : 2 x [2 x 8] : v2-3 - // weight : 2 x [2 x 8] : v0-1 - // dst : 2 x 2 x [4] : v28-v31 - ld1 {v0.16b, v1.16b}, [x12], #32 // weight - ld1 {v2.16b, v3.16b}, [x11], x22 // src - .inst 0x4e80a45c // smmla v28.4s, v2.16b, v0.16b - .inst 0x4e81a45d // smmla v29.4s, v2.16b, v1.16b - .inst 0x4e80a47e // smmla v30.4s, v3.16b, v0.16b - .inst 0x4e81a47f // smmla v31.4s, v3.16b, v1.16b + ld1 {v8.16b, v9.16b, v10.16b, v11.16b}, [x12], #64 // weight + ld1 {v4.16b, v5.16b}, [x11], x22 // src subs x13, x13, #1 + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa48e // smmla v14.4s, v4.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba48f // smmla v15.4s, v4.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a4b0 // smmla v16.4s, v5.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a4b1 // smmla v17.4s, v5.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa4b2 // smmla v18.4s, v5.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba4b3 // smmla v19.4s, v5.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 bne LoopSz_TILE_4 LoopSzEnd_TILE_4: add x25, x25, x15 - sub x24, x24, #1 - uzp1 v27.2d, v28.2d, v29.2d - uzp2 v28.2d, v28.2d, v29.2d - uzp1 v29.2d, v30.2d, v31.2d - uzp2 v30.2d, v30.2d, v31.2d - Int32ToFloat v27, v28, v29, v30 + sub x24, x24, #2 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v4.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v5.2d, v14.2d, v15.2d // E1: oc:4-7 + + uzp1 v2.2d, v16.2d, v17.2d + uzp2 v3.2d, v16.2d, v17.2d + uzp1 v6.2d, v18.2d, v19.2d + uzp2 v7.2d, v18.2d, v19.2d + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 Tile4Quan: - ld1 {v0.4s}, [x19], #16 // scale - MUL_SCALE v0, v27, v28, v29, v30 - cmp w23, #1 - beq Tile4QuanUseInt8 - st1 {v27.4s, v28.4s, v29.4s, v30.4s}, [x26], x4 + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.4s}, [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + MUL_SCALE v20, v0, v1, v2, v3 + MUL_SCALE v21, v4, v5, v6, v7 + + cbz x10, TILE4_MLA + ld1 {v27.4s}, [x10] + MUL_EXTRA_SCALE v27, v0, v1, v2, v3 + MUL_EXTRA_SCALE v27, v4, v5, v6, v7 + + TILE4_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v3, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v4, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v5, v22, v26, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v6, v22, v26, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v7, v22, v26, 3 // tile:3, oc:4-7 + + cbnz w23, Tile4QuanUseInt8 + + TILE4_ADD_BIAS: + cbz x9, TILE4_ADD_DSTV + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + ADD_BIAS_FLOAT v4, v5, v6, v7, v17 + b TILE4_POST + + TILE4_ADD_DSTV: + ld1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x26], x4 + ld1 {v19.4s, v20.4s, v21.4s, v22.4s}, [x26] + ADD_FLOAT v0, v1, v2, v3, v15, v16, v17, v18 + ADD_FLOAT v4, v5, v6, v7, v19, v20, v21, v22 + sub x26, x26, x4 + + TILE4_POST: + cbz x14, TILE4_STORE + ReLU_FP32 v0, v1, v2, v3, v30, v31 + ReLU_FP32 v4, v5, v6, v7, v30, v31 + + TILE4_STORE: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x26], x4 + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x26], x4 b Tile4LoopCheck Tile4QuanUseInt8: - FloatToInt32 v27, v28, v29, v30 - Int32ToInt16 v27, v28, v29, v30, v6, v7 - Int16ToInt8_ONE v6, v7, v19 - smax v19.16b, v10.16b, v19.16b - smin v19.16b, v11.16b, v19.16b + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + ADD_BIAS_FLOAT v4, v5, v6, v7, v17 + FloatToInt32 v0, v1, v2, v3 + FloatToInt32 v4, v5, v6, v7 + Int32ToInt16 v0, v1, v2, v3, v8, v9 + Int32ToInt16 v4, v5, v6, v7, v10, v11 + Int16ToInt8_ONE v8, v9, v19 + Int16ToInt8_ONE v10, v11, v20 + smax v19.16b, v30.16b, v19.16b + smin v19.16b, v31.16b, v19.16b + smax v20.16b, v30.16b, v20.16b + smin v20.16b, v31.16b, v20.16b st1 {v19.16b}, [x26], x4 // dst += dz * dst_step + st1 {v20.16b}, [x26], x4 Tile4LoopCheck: - cmp x24, #1 + cmp x24, #2 bge LoopDz_TILE_4 + cbz x24, Tile4Check + +LoopDz4_TILE_4: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + SET_0_4 v12, v13, v16, v17 +LoopSz4_TILE_4: + ld1 {v8.16b, v9.16b}, [x12] // weight + ld1 {v4.16b, v5.16b}, [x11], x22 // src + subs x13, x13, #1 + add x12, x12, #64 + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + + .inst 0x4e88a4b0 // smmla v16.4s, v5.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a4b1 // smmla v17.4s, v5.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + bne LoopSz4_TILE_4 +LoopSz4End_TILE_4: + add x25, x25, x15 + sub x24, x24, #1 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v16.2d, v17.2d + uzp2 v3.2d, v16.2d, v17.2d + Int32ToFloat v0, v1, v2, v3 + +Tile4Quan_L4: + ld1 {v20.4s}, [x19] // scale + ld1 {v22.4s}, [x27] // x kernel sum + ld1 {v25.4s}, [x6] // weight quan zeropoint + MUL_SCALE v20, v0, v1, v2, v3 + + cbz x10, TILE4_MLA_L4 + ld1 {v27.4s}, [x10] + MUL_EXTRA_SCALE v27, v0, v1, v2, v3 + + TILE4_MLA_L4: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v3, v22, v25, 3 // tile:3, oc:0-3 + + cbnz w23, Tile4QuanUseInt8_L4 + + TILE4_ADD_BIAS_L4: + cbz x9, TILE4_ADD_DSTV_L4 + ld1 {v16.4s}, [x20] // bias + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + b TILE4_POST_L4 + + TILE4_ADD_DSTV_L4: + ld1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x26] + ADD_FLOAT v0, v1, v2, v3, v15, v16, v17, v18 + + TILE4_POST_L4: + cbz x14, TILE4_STORE_L4 + ReLU_FP32 v0, v1, v2, v3, v30, v31 + + TILE4_STORE_L4: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x26], x4 + b Tile4Check + + Tile4QuanUseInt8_L4: + ld1 {v16.4s}, [x20] // bias + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + FloatToInt32 v0, v1, v2, v3 + Int32ToInt16 v0, v1, v2, v3, v8, v9 + Int16ToInt8_ONE v8, v9, v19 + smax v19.16b, v30.16b, v19.16b + smin v19.16b, v31.16b, v19.16b + st1 {v19.16b}, [x26], x4 // dst += dz * dst_step + +Tile4Check: +cbz x10, Tile4End +add x10, x10, #16 Tile4End: sub x7, x7, #4 add x0, x0, x21, LSL #2 add x1, x1, #32 + add x27, x27, #16 TILE_2: cmp x7, #2 @@ -487,57 +1076,189 @@ TILE_2: mov x25, x2 // weight mov x19, x8 // scale mov x20, x9 // bias + mov x6, x28 // weightQuanBias +cmp x5, #2 +blt LoopDz4_TILE_2 LoopDz_TILE_2: - ld1 {v0.4s}, [x20], #16 // bias mov x11, x1 // src mov x12, x25 // weight mov x13, x3 // src_depth_quad - mov v1.16b, v0.16b - uzp1 v30.2d, v0.2d, v1.2d // bias_0, bias_1, bias_0, bias_1 - uzp2 v31.2d, v0.2d, v1.2d // bias_2, bias_3, bias_2, bias_3 + SET_0_4 v12, v13, v14, v15 LoopSz_TILE_2: - // src : 1 x [2 x 8] : v2 - // weight : 2 x [2 x 8] : v0-1 - // dst : 1 x 2 x [4] : v30-v31 - ld1 {v0.16b, v1.16b}, [x12], #32 // weight - ld1 {v2.16b}, [x11], x22 // src - .inst 0x4e80a45e // smmla v30.4s, v2.16b, v0.16b - .inst 0x4e81a45f // smmla v31.4s, v2.16b, v1.16b + ld1 {v8.16b, v9.16b, v10.16b, v11.16b}, [x12], #64 + ld1 {v4.16b}, [x11], x22 // src + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa48e // smmla v14.4s, v4.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba48f // smmla v15.4s, v4.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 subs x13, x13, #1 bne LoopSz_TILE_2 LoopSzEnd_TILE_2: add x25, x25, x15 - sub x24, x24, #1 - uzp1 v29.2d, v30.2d, v31.2d - uzp2 v30.2d, v30.2d, v31.2d - scvtf v29.4s, v29.4s - scvtf v30.4s, v30.4s + sub x24, x24, #2 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v3.2d, v14.2d, v15.2d // E1: oc:4-7 + Int32ToFloat v0, v1, v2, v3 Tile2Quan: - ld1 {v0.4s}, [x19], #16 // scale - fmul v29.4s, v29.4s, v0.4s - fmul v30.4s, v30.4s, v0.4s - cmp w23, #1 - beq Tile2QuanUseInt8 - st1 {v29.4s, v30.4s}, [x26], x4 + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.d}[0], [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + fmul v0.4s, v0.4s, v20.4s + fmul v1.4s, v1.4s, v20.4s + fmul v2.4s, v2.4s, v21.4s + fmul v3.4s, v3.4s, v21.4s + + cbz x10, TILE2_MLA + ld1 {v27.d}[0], [x10] + fmul v0.4s, v0.4s, v27.s[0] + fmul v1.4s, v1.4s, v27.s[1] + fmul v2.4s, v2.4s, v27.s[0] + fmul v3.4s, v3.4s, v27.s[1] + + TILE2_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v3, v22, v26, 1 // tile:1, oc:4-7 + + cbnz w23, Tile2QuanUseInt8 + + TILE2_ADD_BIAS: + cbz x9, TILE2_ADD_DSTV + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + fadd v0.4s, v0.4s, v16.4s + fadd v1.4s, v1.4s, v16.4s + fadd v2.4s, v2.4s, v17.4s + fadd v3.4s, v3.4s, v17.4s + b TILE2_POST + + TILE2_ADD_DSTV: + ld1 {v18.4s, v19.4s}, [x26], x4 + ld1 {v20.4s, v21.4s}, [x26] + fadd v0.4s, v0.4s, v18.4s + fadd v1.4s, v1.4s, v19.4s + fadd v2.4s, v2.4s, v20.4s + fadd v3.4s, v3.4s, v21.4s + sub x26, x26, x4 + + TILE2_POST: + cbz x14, TILE2_STORE + ReLU_FP32 v0, v1, v2, v3, v30, v31 + TILE2_STORE: + st1 {v0.4s, v1.4s}, [x26], x4 + st1 {v2.4s, v3.4s}, [x26], x4 b Tile2LoopCheck + Tile2QuanUseInt8: - fcvtas v29.4s, v29.4s - fcvtas v30.4s, v30.4s - sqxtn v6.4h, v29.4s - sqxtn2 v6.8h, v30.4s + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + fadd v0.4s, v0.4s, v16.4s + fadd v1.4s, v1.4s, v16.4s + fadd v2.4s, v2.4s, v17.4s + fadd v3.4s, v3.4s, v17.4s + fcvtas v0.4s, v0.4s + fcvtas v1.4s, v1.4s + fcvtas v2.4s, v2.4s + fcvtas v3.4s, v3.4s + sqxtn v6.4h, v0.4s + sqxtn2 v6.8h, v1.4s + sqxtn v7.4h, v2.4s + sqxtn2 v7.8h, v3.4s sqxtn v19.8b, v6.8h - smax v19.16b, v10.16b, v19.16b - smin v19.16b, v11.16b, v19.16b + sqxtn v20.8b, v7.8h + smax v19.8b, v30.8b, v19.8b + smin v19.8b, v31.8b, v19.8b + smax v20.8b, v30.8b, v20.8b + smin v20.8b, v31.8b, v20.8b st1 {v19.8b}, [x26], x4 // dst += dz * dst_step + st1 {v20.8b}, [x26], x4 Tile2LoopCheck: - cmp x24, #1 + cmp x24, #2 bge LoopDz_TILE_2 + cbz x24, Tile2Check +LoopDz4_TILE_2: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + movi v12.4s, #0 + movi v13.4s, #0 +LoopSz4_TILE_2: + ld1 {v8.16b, v9.16b}, [x12] + ld1 {v4.16b}, [x11], x22 // src + + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + subs x13, x13, #1 + add x12, x12, #64 + bne LoopSz4_TILE_2 +LoopSz4End_TILE_2: + add x25, x25, x15 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + scvtf v0.4s, v0.4s + scvtf v1.4s, v1.4s + +Tile2Quan_L4: + ld1 {v20.4s}, [x19] + ld1 {v22.d}[0], [x27] // x kernel sum + ld1 {v25.4s}, [x6] // weight quan zeropoint + fmul v0.4s, v0.4s, v20.4s + fmul v1.4s, v1.4s, v20.4s + + cbz x10, TILE2_MLA_L4 + ld1 {v27.d}[0], [x10] + fmul v0.4s, v0.4s, v27.s[0] + fmul v1.4s, v1.4s, v27.s[1] + + TILE2_MLA_L4: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + + cbnz w23, Tile2QuanUseInt8_L4 + + TILE2_ADD_BIAS_L4: + cbz x9, TILE2_ADD_DSTV_L4 + ld1 {v16.4s}, [x20] // bias + fadd v0.4s, v0.4s, v16.4s + fadd v1.4s, v1.4s, v16.4s + b TILE2_POST_L4 + + TILE2_ADD_DSTV_L4: + ld1 {v18.4s, v19.4s}, [x26] + fadd v0.4s, v0.4s, v18.4s + fadd v1.4s, v1.4s, v19.4s + + TILE2_POST_L4: + cbz x14, TILE2_STORE_L4 + ReLU_FP32_2 v0, v1, v30, v31 + TILE2_STORE_L4: + st1 {v0.4s, v1.4s}, [x26], x4 + b Tile2Check + + Tile2QuanUseInt8_L4: + ld1 {v16.4s}, [x20] // bias + fadd v0.4s, v0.4s, v16.4s + fadd v1.4s, v1.4s, v16.4s + fcvtas v0.4s, v0.4s + fcvtas v1.4s, v1.4s + sqxtn v6.4h, v0.4s + sqxtn2 v6.8h, v1.4s + sqxtn v19.8b, v6.8h + smax v19.8b, v30.8b, v19.8b + smin v19.8b, v31.8b, v19.8b + st1 {v19.8b}, [x26], x4 // dst += dz * dst_step + +Tile2Check: +cbz x10, Tile2End +add x10, x10, #8 Tile2End: sub x7, x7, #2 add x0, x0, x21, LSL #1 add x1, x1, #16 + add x27, x27, #8 TILE_1: cmp x7, #1 @@ -547,55 +1268,168 @@ TILE_1: mov x25, x2 // weight mov x19, x8 // scale mov x20, x9 // bias + mov x6, x28 // weightQuanBias +cmp x5, #2 +blt LoopDz4_TILE_1 LoopDz_TILE_1: - ld1 {v0.4s}, [x20], #16 // bias - mov x11, x1 // src - mov x12, x25 // weight - mov x13, x3 // src_depth_quad - mov v1.16b, v0.16b - uzp1 v30.2d, v0.2d, v1.2d // bias_0, bias_1, bias_0, bias_1 - uzp2 v31.2d, v0.2d, v1.2d // bias_2, bias_3, bias_2, bias_3 + //ld1 {v0.4s}, [x20], #16 // bias + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + + movi v16.4s, #0 + movi v17.4s, #0 + movi v18.4s, #0 + movi v19.4s, #0 LoopSz_TILE_1: - // src : 1 x [1 x 8] : v2 - // weight : 2 x [2 x 8] : v0-1 - // dst : 1 x 2 x [2] : v30-v31 - ld1 {v0.16b, v1.16b}, [x12], #32 // weight + ld1 {v8.16b, v9.16b, v10.16b, v11.16b}, [x12], #64 // weight ld1 {v2.8b}, [x11], x22 // src - .inst 0x4e80a45e // smmla v30.4s, v2.16b, v0.16b - .inst 0x4e81a45f // smmla v31.4s, v2.16b, v1.16b subs x13, x13, #1 + + .inst 0x4e88a450 // smmla v16.4s, v2.16b, v8.16b + .inst 0x4e89a451 // smmla v17.4s, v2.16b, v9.16b + .inst 0x4e8aa452 // smmla v18.4s, v2.16b, v10.16b + .inst 0x4e8ba453 // smmla v19.4s, v2.16b, v11.16b bne LoopSz_TILE_1 LoopSzEnd_TILE_1: add x25, x25, x15 - sub x24, x24, #1 - uzp1 v29.2d, v30.2d, v31.2d - uzp2 v30.2d, v30.2d, v31.2d - scvtf v29.4s, v29.4s - scvtf v30.4s, v30.4s + sub x24, x24, #2 + uzp1 v27.2d, v16.2d, v17.2d + uzp1 v26.2d, v18.2d, v19.2d + scvtf v27.4s, v27.4s + scvtf v26.4s, v26.4s Tile1Quan: - ld1 {v0.4s}, [x19], #16 // scale - fmul v29.4s, v29.4s, v0.4s - fmul v30.4s, v30.4s, v0.4s - cmp w23, #1 - beq Tile1QuanUseInt8 - st1 {v29.4s, v30.4s}, [x26], x4 + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v6.s}[0], [x27] // x kernel sum + ld1 {v8.4s, v9.4s}, [x6], #32 // weight quan zeropoint + fmul v27.4s, v27.4s, v0.4s + fmul v26.4s, v26.4s, v1.4s + + cbz x10, TILE1_MLA + ld1 {v10.s}[0], [x10] + fmul v27.4s, v27.4s, v10.s[0] + fmul v26.4s, v26.4s, v10.s[0] + + TILE1_MLA: + MLA_WEIGHTZERO v27, v6, v8, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v26, v6, v9, 0 // tile:0, oc:4-7 + + cbnz w23, Tile1QuanUseInt8 + + TILE1_ADD_BIAS: + cbz x9, TILE1_ADD_DSTV + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + fadd v27.4s, v27.4s, v16.4s + fadd v26.4s, v26.4s, v17.4s + b TILE1_POST + + TILE1_ADD_DSTV: + ld1 {v16.4s}, [x26], x4 + ld1 {v17.4s}, [x26] + fadd v27.4s, v27.4s, v16.4s + fadd v26.4s, v26.4s, v17.4s + sub x26, x26, x4 + + TILE1_POST: + cbz x14, TILE1_STORE + fmin v27.4s, v27.4s, v31.4s + fmax v27.4s, v27.4s, v30.4s + fmin v26.4s, v26.4s, v31.4s + fmax v26.4s, v26.4s, v30.4s + + TILE1_STORE: + st1 {v27.4s}, [x26], x4 + st1 {v26.4s}, [x26], x4 b Tile1LoopEnd + Tile1QuanUseInt8: - fcvtas v29.4s, v29.4s - fcvtas v30.4s, v30.4s - sqxtn v6.4h, v29.4s - sqxtn2 v6.8h, v30.4s - sqxtn v19.8b, v6.8h - smax v19.16b, v10.16b, v19.16b - smin v19.16b, v11.16b, v19.16b - st1 {v19.s}[0], [x26], x4 // dst += dz * dst_step + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + fadd v27.4s, v27.4s, v16.4s + fadd v26.4s, v26.4s, v17.4s + fcvtas v27.4s, v27.4s + fcvtas v26.4s, v26.4s + sqxtn v6.4h, v27.4s + sqxtn v7.4h, v26.4s + sqxtn v6.8b, v6.8h + sqxtn v7.8b, v7.8h + smax v6.16b, v30.16b, v6.16b + smin v6.16b, v31.16b, v6.16b + smax v7.16b, v30.16b, v7.16b + smin v7.16b, v31.16b, v7.16b + st1 {v6.s}[0], [x26], x4 // dst += dz * dst_step + st1 {v7.s}[0], [x26], x4 Tile1LoopEnd: - cmp x24, #1 + cmp x24, #2 bge LoopDz_TILE_1 + cbz x24, End + +LoopDz4_TILE_1: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + + movi v16.4s, #0 + movi v17.4s, #0 +LoopSz4_TILE_1: + ld1 {v8.16b, v9.16b}, [x12] // weight + ld1 {v2.8b}, [x11], x22 // src + subs x13, x13, #1 + add x12, x12, #64 + .inst 0x4e88a450 // smmla v16.4s, v2.16b, v8.16b + .inst 0x4e89a451 // smmla v17.4s, v2.16b, v9.16b + bne LoopSz4_TILE_1 +LoopSz4End_TILE_1: + add x25, x25, x15 + uzp1 v27.2d, v16.2d, v17.2d + scvtf v27.4s, v27.4s + +Tile1Quan_L4: + ld1 {v0.4s}, [x19] // scale + ld1 {v6.s}[0], [x27] // x kernel sum + ld1 {v8.4s}, [x6] // weight quan zeropoint + fmul v27.4s, v27.4s, v0.4s + cbz x10, TILE1_MLA_L4 + ld1 {v10.s}[0], [x10] + fmul v27.4s, v27.4s, v10.s[0] + + TILE1_MLA_L4: + MLA_WEIGHTZERO v27, v6, v8, 0 // tile:0, oc:0-3 + + cbnz w23, Tile1QuanUseInt8_L4 + + TILE1_ADD_BIAS_L4: + cbz x9, TILE1_ADD_DSTV_L4 + ld1 {v16.4s}, [x20] // bias + fadd v27.4s, v27.4s, v16.4s + b TILE1_POST_L4 + + TILE1_ADD_DSTV_L4: + ld1 {v16.4s}, [x26] + fadd v27.4s, v27.4s, v16.4s + + TILE1_POST_L4: + cbz x14, TILE1_STORE_L4 + fmin v27.4s, v27.4s, v31.4s + fmax v27.4s, v27.4s, v30.4s + + TILE1_STORE_L4: + st1 {v27.4s}, [x26], x4 + b End + + Tile1QuanUseInt8_L4: + ld1 {v16.4s}, [x20] // bias + fadd v27.4s, v27.4s, v16.4s + fcvtas v27.4s, v27.4s + sqxtn v6.4h, v27.4s + sqxtn v6.8b, v6.8h + smax v6.8b, v30.8b, v6.8b + smin v6.8b, v31.8b, v6.8b + st1 {v6.s}[0], [x26], x4 // dst += dz * dst_step End: +ldp x27, x28, [sp, #(16 * 8)] ldp x25, x26, [sp, #(16 * 7)] ldp x23, x24, [sp, #(16 * 6)] ldp x19, x20, [sp, #(16 * 5)] @@ -603,7 +1437,7 @@ ldp x21, x22, [sp, #(16 * 4)] ldp d8, d9, [sp, #(16 * 3)] ldp d10, d11, [sp, #(16 * 2)] ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 8) +ldp d14, d15, [sp], #(16 * 10) ret #endif // __aarch64__ diff --git a/source/backend/cpu/arm/arm64/MNNLineDepthWiseInt8AddBiasScaleUnit.S b/source/backend/cpu/arm/arm64/MNNLineDepthWiseInt8AddBiasScaleUnit.S index 7b7f141bd..44c7766c5 100644 --- a/source/backend/cpu/arm/arm64/MNNLineDepthWiseInt8AddBiasScaleUnit.S +++ b/source/backend/cpu/arm/arm64/MNNLineDepthWiseInt8AddBiasScaleUnit.S @@ -38,7 +38,7 @@ asm_function MNNLineDepthWiseInt8AddBiasScaleUnit ldr x8, [sp, #0] ldr x9, [sp, #8] -str d14, [sp, #(-16 * 9)]! +str d14, [sp, #(-16 * 10)]! stp d12, d13, [sp, #(16 * 1)] stp d10, d11, [sp, #(16 * 2)] stp d8, d9, [sp, #(16 * 3)] @@ -53,7 +53,7 @@ ldr w11, [x3, #16] dup v30.16b, w11 // max ldr w11, [x3, #20] dup v31.16b, w11 // min -ldr x3, [x3, #8] +ldr x3, [x3, #72] mul x10, x6, x8 sub x9, x9, x10 @@ -711,7 +711,7 @@ ldp x27, x28, [sp, #(16 * 4)] ldp d8, d9, [sp, #(16 * 3)] ldp d10, d11, [sp, #(16 * 2)] ldp d12, d13, [sp, #(16 * 1)] -ldr d14, [sp], #(16 * 9) +ldr d14, [sp], #(16 * 10) ret #endif diff --git a/source/backend/cpu/arm/arm64/MNNLineDepthWiseInt8AddBiasScale_ARMV82_Unit3X3.S b/source/backend/cpu/arm/arm64/MNNLineDepthWiseInt8AddBiasScale_ARMV82_Unit3X3.S index 362b863ca..27a59d20d 100644 --- a/source/backend/cpu/arm/arm64/MNNLineDepthWiseInt8AddBiasScale_ARMV82_Unit3X3.S +++ b/source/backend/cpu/arm/arm64/MNNLineDepthWiseInt8AddBiasScale_ARMV82_Unit3X3.S @@ -19,15 +19,22 @@ asm_function MNNLineDepthWiseInt8AddBiasScale_ARMV82_Unit3X3 // size_t dilateY_step, int8_t* idx) { // kernelx=3, kernely=3,dilatex=1,dilatey=1 - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// float roundValuePos = 0.5f; -// float roundValueNeg = -0.5f; -//}; +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum = 1; + const int32_t* bias; + +};*/ // Auto Load: // x0: dst*, x1: src*, x2: weight*, x3: parameters* @@ -50,7 +57,7 @@ stp x19, x20, [sp, #(16 * 5)] ldr x19, [x3, #0] // scale ldr w20, [x3, #16] // max ldr w15, [x3, #20] // min -ldr x3, [x3, #8] // bias +ldr x3, [x3, #72] // bias ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [x14] ld1 {v0.16b, v1.16b}, [x2], #32 // v0,v1:weight ld1 {v12.s}[0], [x2] // weight: k:8 @@ -684,4 +691,4 @@ ldp d12, d13, [sp, #(16 * 1)] ldp d14, d15, [sp], #(16 * 6) ret -#endif \ No newline at end of file +#endif diff --git a/source/backend/cpu/arm/arm64/MNNPackC4Int8ForMatMulA_ARM82.S b/source/backend/cpu/arm/arm64/MNNPackC4Int8ForMatMulA_ARM82.S new file mode 100644 index 000000000..db81f8a03 --- /dev/null +++ b/source/backend/cpu/arm/arm64/MNNPackC4Int8ForMatMulA_ARM82.S @@ -0,0 +1,202 @@ +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro SET_0 s0, s1, s2, s3 + movi \s0\().4s, #0 + movi \s1\().4s, #0 + movi \s2\().4s, #0 + movi \s3\().4s, #0 +.endm + +/* +struct SumByAxisParams { + ssize_t kernelCountUnitDouble; + ssize_t col_buffer_unit_size; + ssize_t DST_XUNIT; + ssize_t SRC_UNIT; + ssize_t blockNum; + ssize_t oneScale; +}; + */ + +asm_function MNNSumByAxisLForMatmul_A_ARM82 +// MNNSumByAxisLForMatmul_A_ARM82(float_t* dest, int8_t* source, float* dequantScale, ssize_t realDstCount, +// ssize_t kernelCountUnitDouble, ssize_t col_buffer_unit_size, ssize_t EP, ssize_t LP, ssize_t blockNum, ssize_t oneScale); +// x0: dest, x1: source, x2: dequantScale, x3: realDstCount, x4: sumParams +// x4: kernelCountUnitDouble, x5: col_buffer_unit_size +// Load from sp: x8: blockNum + +ldr x8, [x4, #32] // blockNum +ldr x5, [x4, #40] // oneScale +ldr x4, [x4, #0] // kernelCountUnitDouble + +//ldr x8, [sp, #0] // blockNum + +stp d14, d15, [sp, #(-16 * 4)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] + +movi v31.16b, #1 +ld1r {v30.4s}, [x2] // Dequant scale +mov x6, #48 // EP*LP +sdiv x4, x4, x8 // src_depth_quad per block + +TILE_12: +cmp x3, #12 +blt Remain + +mov x9, x8 // blockNum +cbnz x5, TILE12_BLOCK_NUM +ld1 {v13.4s, v14.4s, v15.4s}, [x2], #48 // batch quant scale + +TILE12_BLOCK_NUM: +mov x15, x4 // kernelCountUnitDouble + +movi v10.4s, #0 +movi v11.4s, #0 +movi v12.4s, #0 + +TILE12_BLOCK_INNER: + +ld1 {v0.16b, v1.16b, v2.16b}, [x1], #48 // E: 0,1,2,3,...,11 +subs x15, x15, #1 + +.inst 0x4e8097ea // sdot v10.4s, v31.16b, v0.16b // sum LP axis for E0, E1, E2, E3 +.inst 0x4e8197eb // sdot v11.4s, v31.16b, v1.16b +.inst 0x4e8297ec // sdot v12.4s, v31.16b, v2.16b + +bne TILE12_BLOCK_INNER + +TILE12_BLOCK_INNER_END: +subs x9, x9, #1 // blockNum-- + +scvtf v10.4s, v10.4s +scvtf v11.4s, v11.4s +scvtf v12.4s, v12.4s + +cbnz x5, TILE12_MUL_ONE_SCALE +fmul v10.4s, v10.4s, v13.4s +fmul v11.4s, v11.4s, v14.4s +fmul v12.4s, v12.4s, v15.4s +b TILE12_STORE + +TILE12_MUL_ONE_SCALE: +fmul v10.4s, v10.4s, v30.4s +fmul v11.4s, v11.4s, v30.4s +fmul v12.4s, v12.4s, v30.4s + +TILE12_STORE: +st1 {v10.4s, v11.4s, v12.4s}, [x0], #48 +bne TILE12_BLOCK_NUM + +TILE12_END: +subs x3, x3, #12 // realDstCount-=12 +bne TILE_12 + + +Remain: // remain realDstCount < EP +cbz x3, End +/* x11: Remain dstCount step for each block */ +lsl x11, x3, #2 + +TILE_2: // realDstCount >= 1 +cmp x3, #2 +blt TILE_1 + +mov x7, x1 +mov x9, x8 // blockNum +mov x10, x0 // tag dst address + +cbnz x5, TILE2_BLOCK_NUM +ld1 {v13.d}[0], [x2], #8 // batch quant scale + +TILE2_BLOCK_NUM: +mov x15, x4 // kernelCountUnitDouble +movi v10.4s, #0 + +TILE2_BLOCK_INNER: +ld1 {v0.d}[0], [x7] // E: 0,1 +add x7, x7, x6 +subs x15, x15, #1 +.inst 0x4e8097ea // sdot v10.4s, v31.16b, v0.16b // sum LP axis for E0 +bne TILE2_BLOCK_INNER + +TILE2_BLOCK_INNER_ENd: +scvtf v10.4s, v10.4s + +cbnz x5, TILE2_MUL_ONE_SCALE +fmul v10.4s, v10.4s, v13.4s +b TILE2_STORE + +TILE2_MUL_ONE_SCALE: +fmul v10.4s, v10.4s, v30.4s + +TILE2_STORE: +subs x9, x9, #1 // blockNum-- +st1 {v10.d}[0], [x10], x11 +bne TILE2_BLOCK_NUM + +TILE2_END: +sub x3, x3, #2 // realDstCount-=2 +add x1, x1, #8 // LP * 2 +add x0, x0, #8 // finish remain 2 +b TILE_2 + + +TILE_1: // realDstCount >= 1 +cmp x3, #1 +blt End + +mov x7, x1 +mov x9, x8 // blockNum +mov x10, x0 + +cbnz x5, TILE1_BLOCK_NUM +ld1 {v13.s}[0], [x2], #4 // batch quant scale + +TILE1_BLOCK_NUM: +mov x15, x4 // kernelCountUnitDouble +movi v10.4s, #0 + +TILE1_BLOCK_INNER: +ld1 {v0.s}[0], [x7] // E: 0 +subs x15, x15, #1 +add x7, x7, x6 +.inst 0x4e8097ea // sdot v10.4s, v31.16b, v0.16b // sum LP axis for E0 + +bne TILE1_BLOCK_INNER + +TILE1_BLOCK_INNER_END: +scvtf v10.4s, v10.4s + +cbnz x5, TILE1_MUL_ONE_SCALE +fmul v10.4s, v10.4s, v13.4s +b TILE1_STORE + +TILE1_MUL_ONE_SCALE: +fmul v10.4s, v10.4s, v30.4s + +TILE1_STORE: +subs x9, x9, #1 // blockNum-- +st1 {v10.s}[0], [x10], x11 +bne TILE1_BLOCK_NUM + +TILE1_END: +sub x3, x3, #1 // realDstCount-=1 +add x1, x1, #4 // LP * 1 +add x0, x0, #4 // finish remain 1 + +b TILE_1 + +End: +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 4) +ret +#endif \ No newline at end of file diff --git a/source/backend/cpu/arm/arm64/MNNPackC4Int8ForMatMulA_ARM86.S b/source/backend/cpu/arm/arm64/MNNPackC4Int8ForMatMulA_ARM86.S new file mode 100644 index 000000000..803166f17 --- /dev/null +++ b/source/backend/cpu/arm/arm64/MNNPackC4Int8ForMatMulA_ARM86.S @@ -0,0 +1,318 @@ +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro SET_0 s0, s1, s2, s3 + movi \s0\().4s, #0 + movi \s1\().4s, #0 + movi \s2\().4s, #0 + movi \s3\().4s, #0 +.endm + +/* +struct SumByAxisParams { + ssize_t kernelCountUnitDouble; + ssize_t col_buffer_unit_size; + ssize_t DST_XUNIT; + ssize_t SRC_UNIT; + ssize_t blockNum; + ssize_t oneScale; +}; + */ + +asm_function MNNSumByAxisLForMatmul_A_ARM86 +// MNNSumByAxisLForMatmul_A_ARM86(float* dest, int8_t* source, const float* dequantScale, ssize_t realDstCount, SumByAxisParams sumParams); +// x0: dest, x1: source, x2: dequantScale, x3: realDstCount, x4: sumParams +// Load from sp: x6: blockNum + +ldr x6, [x4, #32] // blockNum +ldr x12, [x4, #40] // oneScale +ldr x5, [x4, #0] // kernelCountUnitDouble + +stp d14, d15, [sp, #(-16 * 4)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] + +movi v31.16b, #1 +ld1r {v30.4s}, [x2] // dequant scale +mov x8, #80 // EP*LP +sdiv x5, x5, x6 // src_depth_quad_per_block + +START: +lsl x11, x3, #2 + +cmp x3, #1 +beq TILE_1 + +TILE_10: // realDstCount >= EP(10) +cmp x3, #10 +blt Remain +mov x9, x6 // blockNum + +cbnz x12, TILE10_BLOCK_NUM +ld1 {v5.4s, v6.4s}, [x2], #32 +ld1 {v7.d}[0], [x2] +sub x2, x2, #32 + +TILE10_BLOCK_NUM: +cbz x9, TILE10_END + +mov x15, x5 // kernelCountUnitDouble of a block +SET_0 v10, v11, v12, v13 +movi v14.4s, #0 + +TILE10_BLOCK_SRC_QUAD: + +//Loop_EPxLP: // EP*LP=10*8 +ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x1], #64 // E: 0,1,...,7 +ld1 {v4.16b}, [x1], #16 // E: 8,9 +subs x15, x15, #1 + +.inst 0x4e80a7ea // smmla v10.4s, v31.16b, v0.16b // sum LP axis for E0 and E1 +.inst 0x4e81a7eb // smmla v11.4s, v31.16b, v1.16b +.inst 0x4e82a7ec // smmla v12.4s, v31.16b, v2.16b +.inst 0x4e83a7ed // smmla v13.4s, v31.16b, v3.16b +.inst 0x4e84a7ee // smmla v14.4s, v31.16b, v4.16b + +bne TILE10_BLOCK_SRC_QUAD + +TILE10_PER_BLOCK_END: +sub x9, x9, #1 // blockNum-- + +trn1 v20.2d, v10.2d, v11.2d +trn1 v21.2d, v12.2d, v13.2d + +scvtf v20.4s, v20.4s +scvtf v21.4s, v21.4s +scvtf v14.4s, v14.4s + +cbnz x12, TILE10_ONE_SCALE +fmul v20.4s, v20.4s, v5.4s +fmul v21.4s, v21.4s, v6.4s +fmul v14.4s, v14.4s, v7.4s +b TILE10_STORE + +TILE10_ONE_SCALE: +fmul v20.4s, v20.4s, v30.4s +fmul v21.4s, v21.4s, v30.4s +fmul v14.4s, v14.4s, v30.4s + +TILE10_STORE: +st1 {v20.4s, v21.4s}, [x0], #32 +st1 {v14.d}[0], [x0], #8 +b TILE10_BLOCK_NUM // Finish one block + +TILE10_END: +sub x3, x3, #10 // realDstCount-=10 +b TILE_10 + + +Remain: // remain realDstCount < EP +cbz x3, End + +lsl x11, x3, #2 +/* For remain dstCount, each E's block step is x11. */ +TILE_8: // realDstCount >= 8 +cmp x3, #8 +blt TILE_4 + +mov x7, x1 // tag begin src address for Remain8 +mov x10, x0 // tag begin dst address for Remain8 +mov x9, x6 // blockNum + +cbnz x12, TILE8_BLOCK_NUM +ld1 {v5.4s, v6.4s}, [x2], #32 + +TILE8_BLOCK_NUM: +cbz x9, TILE8_END +mov x15, x5 // kernelCountUnitDouble + +SET_0 v10, v11, v12, v13 + +TILE8_BLOCK_SRC_QUAD: + +ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x7] // E: 0,1,...,7 +subs x15, x15, #1 +add x7, x7, x8 // x7=x7+EP*LP +.inst 0x4e80a7ea // smmla v10.4s, v31.16b, v0.16b // sum LP axis for E0 and E1 +.inst 0x4e81a7eb // smmla v11.4s, v31.16b, v1.16b +.inst 0x4e82a7ec // smmla v12.4s, v31.16b, v2.16b +.inst 0x4e83a7ed // smmla v13.4s, v31.16b, v3.16b + +bne TILE8_BLOCK_SRC_QUAD + +TILE8_PER_BLOCK_END: +sub x9, x9, #1 // blockNum-- + +trn1 v20.2d, v10.2d, v11.2d +trn1 v21.2d, v12.2d, v13.2d + +scvtf v20.4s, v20.4s +scvtf v21.4s, v21.4s + +cbnz x12, TILE8_ONE_SCALE +fmul v20.4s, v20.4s, v5.4s +fmul v21.4s, v21.4s, v6.4s +b TILE8_STORE + +TILE8_ONE_SCALE: +fmul v20.4s, v20.4s, v30.4s +fmul v21.4s, v21.4s, v30.4s + +TILE8_STORE: +st1 {v20.4s, v21.4s}, [x10], x11 // Go to next block for this 8 remain. +b TILE8_BLOCK_NUM + +TILE8_END: +add x0, x0, #32 // finish 8 dstCount * sizeof(float) +sub x3, x3, #8 // realDstCount-=8 +add x1, x1, #64 // LP*8 + + +TILE_4: // realDstCount >= 4 +cmp x3, #4 +blt TILE_2 + +mov x7, x1 // tag begin src address for Remain4 +mov x10, x0 // tag begin dst address for Remain4 +mov x9, x6 // blockNum + +cbnz x12, TILE4_BLOCK_NUM +ld1 {v5.4s}, [x2], #16 + +TILE4_BLOCK_NUM: +cbz x9, TILE4_END +mov x15, x5 // kernelCountUnitDouble +movi v10.4s, #0 +movi v11.4s, #0 + +TILE4_BLOCK_SRC_QUAD: + +ld1 {v0.16b, v1.16b}, [x7] // E: 0,1,2,3 +subs x15, x15, #1 +add x7, x7, x8 +.inst 0x4e80a7ea // smmla v10.4s, v31.16b, v0.16b // sum LP axis for E0 and E1 +.inst 0x4e81a7eb // smmla v11.4s, v31.16b, v1.16b + +bne TILE4_BLOCK_SRC_QUAD + +TILE4_PER_BLOCK_END: +sub x9, x9, #1 // blockNum-- + +trn1 v20.2d, v10.2d, v11.2d +scvtf v20.4s, v20.4s + +cbnz x12, TILE4_ONE_SCALE +fmul v20.4s, v20.4s, v5.4s +b TILE4_STORE +TILE4_ONE_SCALE: +fmul v20.4s, v20.4s, v30.4s +TILE4_STORE: +st1 {v20.4s}, [x10], x11 +b TILE4_BLOCK_NUM + +TILE4_END: +add x0, x0, #16 // finish 4 dstCount * sizeof(float) +sub x3, x3, #4 // realDstCount-=4 +add x1, x1, #32 // LP*4 + +TILE_2: // realDstCount >= 2 +cmp x3, #2 +blt TILE_1 + +mov x7, x1 // tag begin src address for Remain8 +mov x10, x0 // tag begin dst address for Remain8 +mov x9, x6 // blockNum + +cbnz x12, TILE2_BLOCK_NUM +ld1 {v5.d}[0], [x2], #8 +TILE2_BLOCK_NUM: +cbz x9, TILE2_END +mov x15, x5 // kernelCountUnitDouble + +movi v10.4s, #0 + +TILE2_BLOCK_SRC_QUAD: + +ld1 {v0.16b}, [x7] // E: 0,1 +subs x15, x15, #1 +add x7, x7, x8 + +.inst 0x4e80a7ea // smmla v10.4s, v31.16b, v0.16b // sum LP axis for E0 and E1 + +bne TILE2_BLOCK_SRC_QUAD + +TILE2_PER_BLOCK_END: +sub x9, x9, #1 // blockNum-- + +scvtf v10.4s, v10.4s +cbnz x12, TILE2_ONE_SCALE +fmul v10.4s, v10.4s, v5.4s +b TILE2_STORE +TILE2_ONE_SCALE: +fmul v10.4s, v10.4s, v30.4s +TILE2_STORE: +st1 {v10.d}[0], [x10], x11 +b TILE2_BLOCK_NUM + +TILE2_END: +add x0, x0, #8 // finish 2 dstCount: 2 * sizeof(float32) +sub x3, x3, #2 // realDstCount-=2 +add x1, x1, #16 // LP * 2 * sizeof(int8_t) + +TILE_1: // realDstCount >= 1 +cmp x3, #1 +blt End + +mov x7, x1 // tag begin src address for Remain4 +mov x10, x0 // tag begin dst address for Remain4 +mov x9, x6 // blockNum + +cbnz x12, TILE1_BLOCK_NUM +ld1 {v5.s}[0], [x2], #4 + +TILE1_BLOCK_NUM: +cbz x9, TILE1_END +mov x15, x5 // kernelCountUnitDouble +movi v10.4s, #0 + +TILE1_BLOCK_SRC_QUAD: + +ld1 {v0.d}[0], [x7] // E: 0 +subs x15, x15, #1 +add x7, x7, x8 +.inst 0x4e80a7ea // smmla v10.4s, v31.16b, v0.16b // sum LP axis for E0 + +bne TILE1_BLOCK_SRC_QUAD + +TILE1_PER_BLOCK_END: +sub x9, x9, #1 // blockNum-- + +scvtf v10.4s, v10.4s + +cbnz x12, TILE1_ONE_SCALE +fmul v10.4s, v10.4s, v5.4s +b TILE1_STORE + +TILE1_ONE_SCALE: +fmul v10.4s, v10.4s, v30.4s +TILE1_STORE: +st1 {v10.s}[0], [x10], x11 +b TILE1_BLOCK_NUM + +TILE1_END: +sub x3, x3, #1 // realDstCount-=1 +add x1, x1, #8 // LP * 1 * sizeof(int8_t) +add x0, x0, #4 // 1 * sizeof(float) + +End: +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 4) +ret +#endif \ No newline at end of file diff --git a/source/backend/cpu/arm/arm64/MNNPackedSparseQuantMatMulEpx1.S b/source/backend/cpu/arm/arm64/MNNPackedSparseQuantMatMulEpx1.S index 12c11436a..119cb6c90 100644 --- a/source/backend/cpu/arm/arm64/MNNPackedSparseQuantMatMulEpx1.S +++ b/source/backend/cpu/arm/arm64/MNNPackedSparseQuantMatMulEpx1.S @@ -38,7 +38,10 @@ ldp x3, x9, [x3] // x3: eSize, x9: eP mov x8, x6 // x8: dataOffsetMap mov x7, x5 // x7: NNZMap -ldp x24, x6, [x4], #16 // x5: scale , x6: bias +ldr x24, [x4, #0] +ldr x6, [x4, #72] +add x4, x4, #16 +//ldp x24, x6, [x4], #16 // x5: scale , x6: bias lsr x14, x11, #2 lsl x14, x14, #2 // x14: (h / 4) * 4 ld2r {v13.4s, v14.4s}, [x4] // first two elements of x4 are pointers, 'max, min ' locate at [2], [3] diff --git a/source/backend/cpu/arm/arm64/MNNPackedSparseQuantMatMulEpx4.S b/source/backend/cpu/arm/arm64/MNNPackedSparseQuantMatMulEpx4.S index 5b506cd55..d99a3cfb2 100644 --- a/source/backend/cpu/arm/arm64/MNNPackedSparseQuantMatMulEpx4.S +++ b/source/backend/cpu/arm/arm64/MNNPackedSparseQuantMatMulEpx4.S @@ -38,7 +38,10 @@ ldp x3, x9, [x3] // x3: eSize, x9: eP mov x8, x6 // x8: dataOffsetMap mov x7, x5 // x7: NNZMap -ldp x24, x6, [x4], #16 // x5: scale , x6: bias +ldr x24, [x4] +ldr x6, [x4, #72] +add x4, x4, #16 +//ldp x24, x6, [x4], #16 // x5: scale , x6: bias lsr x14, x11, #2 lsl x14, x14, #2 // x14: (h / 4) * 4 ld2r {v13.4s, v14.4s}, [x4] // first two elements of x4 are pointers, 'max, min ' locate at [2], [3] diff --git a/source/backend/cpu/arm/arm64/bf16/ARMV86_MNNPackedMatMulRemain_BF16.S b/source/backend/cpu/arm/arm64/bf16/ARMV86_MNNPackedMatMulRemain_BF16.S index 2acfe6930..a5de45f88 100644 --- a/source/backend/cpu/arm/arm64/bf16/ARMV86_MNNPackedMatMulRemain_BF16.S +++ b/source/backend/cpu/arm/arm64/bf16/ARMV86_MNNPackedMatMulRemain_BF16.S @@ -19,13 +19,6 @@ movi \d3\().4s, #0 .endm -.macro Float32ToBf16 d0, d1, d2, d3 - shrn \d0\().4h, \d0\().4s, #16 - shrn \d1\().4h, \d1\().4s, #16 - shrn \d2\().4h, \d2\().4s, #16 - shrn \d3\().4h, \d3\().4s, #16 -.endm - .macro FOURFMAX s, d0, d1, d2, d3 fmax \d0\().4s, \d0\().4s, \s\().4s fmax \d1\().4s, \d1\().4s, \s\().4s @@ -50,12 +43,15 @@ asm_function ARMV86_MNNPackedMatMulRemain_BF16 //void ARMV86_MNNPackedMatMulRemain_BF16(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias); //Auto x0: C, x1:A, x2:B, x3:eSize, x4:parameter, x5:postParameters, x6:bias -sub sp, sp, #64 +sub sp, sp, #96 str x19, [sp, #0] str x20, [sp, #8] str x21, [sp, #16] str x22, [sp, #24] +stp d9, d10, [sp, #32] +str d15, [sp, #64] ldr x11, [x4, #0] // aStride +lsr x11, x11, #1 // aStride->bf16 stride ldr x9, [x4, #8] // l ldr x10, [x4, #16] // h lsl x11, x11, #2 // aStride * 4 @@ -63,6 +59,7 @@ mov x22, #64 // B_stride = LP * HP = 4 * 8 * sizeof(int16_t) ldr x7, [x4, #24] // cStride ldr x19, [x4, #40] // bExtraStride +lsr x19, x19, #1 // bExtraStride->bf16 stride add x10, x10, #3 lsr x10, x10, #2 @@ -89,14 +86,12 @@ LoopE8: // e, TILE_BLOCK size is 8 LH8: cmp x8, #2 // h/4 > 2 blt LH4 - sub x14, x7, #64 // cStride - 64 + sub x14, x7, #128 // cStride - 8 * 4 * sizeof(float) LoopH8x8: mov x15, x1 // src, A mov x12, x9 // l cbz x5, NoBiasLH8 - ld1 {v0.4h, v1.4h}, [x20], #16 // 8 * sizeof(int16_t) - shll v0.4s, v0.4h, #16 - shll v1.4s, v1.4h, #16 + ld1 {v0.4s, v1.4s}, [x20], #32 // 8 * sizeof(float) mov v2.16b, v0.16b mov v3.16b, v1.16b uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 @@ -165,15 +160,11 @@ LoopE8: // e, TILE_BLOCK size is 8 FOURFMIN v10, v23, v24, v25, v26 FOURFMIN v10, v27, v28, v29, v30 StoreLH8: - Float32ToBf16 v15, v16, v17, v18 - Float32ToBf16 v19, v20, v21, v22 - Float32ToBf16 v23, v24, v25, v26 - Float32ToBf16 v27, v28, v29, v30 - st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0], #32 // 16 * sizeof(int16_t) - st1 {v19.4h, v20.4h, v21.4h, v22.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x0], #64 // 16 * sizeof(float) + st1 {v19.4s, v20.4s, v21.4s, v22.4s}, [x0], #64 // 16 * sizeof(float) add x0, x0, x14 - st1 {v23.4h, v24.4h, v25.4h, v26.4h}, [x0], #32 // 16 * sizeof(int16_t) - st1 {v27.4h, v28.4h, v29.4h, v30.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v23.4s, v24.4s, v25.4s, v26.4s}, [x0], #64 // 16 * sizeof(float) + st1 {v27.4s, v28.4s, v29.4s, v30.4s}, [x0], #64 // 16 * sizeof(float) add x0, x0, x14 add x13, x13, x19 // weight stride sub x8, x8, #2 @@ -185,8 +176,7 @@ LoopE8: // e, TILE_BLOCK size is 8 mov x15, x1 mov x12, x9 cbz x5, NoBiasHRemain - ld1 {v0.4h}, [x20] - shll v0.4s, v0.4h, #16 + ld1 {v0.4s}, [x20] mov v2.16b, v0.16b uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 @@ -228,14 +218,12 @@ LoopE8: // e, TILE_BLOCK size is 8 FOURFMIN v10, v15, v16, v17, v18 FOURFMIN v10, v19, v20, v21, v22 StoreLH8x4: - Float32ToBf16 v15, v16, v17, v18 - Float32ToBf16 v19, v20, v21, v22 - st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0], #32 // 16 * sizeof(int16_t) - st1 {v19.4h, v20.4h, v21.4h, v22.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x0], #64 // 16 * sizeof(int16_t) + st1 {v19.4s, v20.4s, v21.4s, v22.4s}, [x0], #64 // 16 * sizeof(int16_t) E8End: sub x3, x3, #8 cmp x3, #8 - add x0, x21, #64 // move dest address of 8 * 4 * sizeof(int16_t) + add x0, x21, #128 // move dest address of 8 * 4 * sizeof(float) add x1, x1, #64 // move A matrix address of 8 * 4 * sizeof(int16_t) bge LoopE8 @@ -255,9 +243,7 @@ E4LH8: mov x15, x1 mov x12, x9 cbz x5, NoBiasE4 - ld1 {v0.4h, v1.4h}, [x20], #16 // 8 * sizeof(int16_t) - shll v0.4s, v0.4h, #16 - shll v1.4s, v1.4h, #16 + ld1 {v0.4s, v1.4s}, [x20], #32 // 8 * sizeof(float) mov v2.16b, v0.16b mov v3.16b, v1.16b uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 @@ -304,10 +290,8 @@ E4LH8: FOURFMIN v10, v15, v16, v17, v18 FOURFMIN v10, v19, v20, v21, v22 StoreLH4x8: - Float32ToBf16 v15, v16, v17, v18 - Float32ToBf16 v19, v20, v21, v22 - st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0], x7 // 16 * sizeof(int16_t) - st1 {v19.4h, v20.4h, v21.4h, v22.4h}, [x0], x7 // 16 * sizeof(int16_t) + st1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x0], x7 + st1 {v19.4s, v20.4s, v21.4s, v22.4s}, [x0], x7 add x13, x13, x19 // weight stride sub x8, x8, #2 cmp x8, #2 @@ -317,8 +301,7 @@ E4LH8: mov x15, x1 mov x12, x9 cbz x5, NoBiasE4R - ld1 {v0.4h}, [x20] - shll v0.4s, v0.4h, #16 + ld1 {v0.4s}, [x20] mov v2.16b, v0.16b uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 @@ -347,13 +330,12 @@ E4LH8: cbz x5, StoreLH4x4 PostTreatLH4x4: FOURFMAX v9, v15, v16, v17, v18 - FOURFMIN v10, v19, v20, v21, v22 + FOURFMIN v10, v15, v16, v17, v18 StoreLH4x4: - Float32ToBf16 v15, v16, v17, v18 - st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0] // 16 * sizeof(int16_t) + st1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x0] E4End: sub x3, x3, #4 - add x0, x21, #32 // move dest address of 4 * 4 * sizeof(int16_t) + add x0, x21, #64 // move dest address of 4 * 4 * sizeof(float) add x1, x1, #32 // move dest address of 4 * 4 * sizeof(int16_t) E2: @@ -372,9 +354,7 @@ E2LH8: mov x15, x1 mov x12, x9 cbz x5, NoBiasE2 - ld1 {v0.4h, v1.4h}, [x20], #16 - shll v0.4s, v0.4h, #16 - shll v1.4s, v1.4h, #16 + ld1 {v0.4s, v1.4s}, [x20], #32 mov v2.16b, v0.16b mov v3.16b, v1.16b uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 @@ -406,9 +386,8 @@ E2LH8: FOURFMAX v9, v15, v16, v17, v18 FOURFMIN v10, v15, v16, v17, v18 StoreLH2x8: - Float32ToBf16 v15, v16, v17, v18 - st1 {v15.4h, v16.4h}, [x0], x7 // 8 * sizeof(int16_t) - st1 {v17.4h, v18.4h}, [x0], x7 // 8 * sizeof(int16_t) + st1 {v15.4s, v16.4s}, [x0], x7 // 8 * sizeof(int16_t) + st1 {v17.4s, v18.4s}, [x0], x7 // 8 * sizeof(int16_t) add x13, x13, x19 // weight stride sub x8, x8, #2 cmp x8, #2 @@ -418,8 +397,7 @@ E2LH8: mov x15, x1 mov x12, x9 cbz x5, NoBiasE2R - ld1 {v0.4h}, [x20] - shll v0.4s, v0.4h, #16 + ld1 {v0.4s}, [x20] mov v2.16b, v0.16b uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 @@ -447,12 +425,10 @@ E2LH8: fmin v15.4s, v15.4s, v10.4s fmin v16.4s, v16.4s, v10.4s StoreLH2x4: - shrn v15.4h, v15.4s, #16 - shrn v16.4h, v16.4s, #16 - st1 {v15.4h, v16.4h}, [x0] // 8 * sizeof(int16_t) + st1 {v15.4s, v16.4s}, [x0] E2End: sub x3, x3, #2 - add x0, x21, #16 // move dest address of 2 * 4 * sizeof(int16_t) + add x0, x21, #32 // move dest address of 2 * 4 * sizeof(float) add x1, x1, #16 // move dest address of 2 * 4 * sizeof(int16_t) E1: @@ -473,9 +449,7 @@ LoopE1: mov x15, x1 mov x12, x9 cbz x5, NoBiasE1 - ld1 {v0.4h, v1.4h}, [x20], #16 - shll v0.4s, v0.4h, #16 - shll v1.4s, v1.4h, #16 + ld1 {v0.4s, v1.4s}, [x20], #32 mov v2.16b, v0.16b mov v3.16b, v1.16b uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 @@ -508,10 +482,8 @@ LoopE1: fmin v15.4s, v15.4s, v10.4s fmin v16.4s, v16.4s, v10.4s StoreLH1x8: - shrn v15.4h, v15.4s, #16 - shrn v16.4h, v16.4s, #16 - st1 {v15.4h}, [x0], x7 - st1 {v16.4h}, [x0], x7 + st1 {v15.4s}, [x0], x7 + st1 {v16.4s}, [x0], x7 add x13, x13, x19 sub x8, x8, #2 cmp x8, #2 @@ -522,8 +494,7 @@ LoopE1: mov x15, x1 mov x12, x9 cbz x5, NoBiasE1R - ld1 {v0.4h}, [x20] - shll v0.4s, v0.4h, #16 + ld1 {v0.4s}, [x20] mov v2.16b, v0.16b uzp1 v16.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 uzp2 v17.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 @@ -548,19 +519,20 @@ LoopE1: fmax v15.4s, v15.4s, v9.4s fmin v15.4s, v15.4s, v10.4s StoreLH1x4: - shrn v15.4h, v15.4s, #16 - st1 {v15.4h}, [x0] + st1 {v15.4s}, [x0] E1End: subs x3, x3, #1 - add x0, x21, #8 + add x0, x21, #16 // 4 * sizeof(float) add x1, x1, #8 bne LoopE1 End: +ldr d15, [sp, #64] +ldp d9, d10, [sp, #32] ldr x19, [sp, #0] ldr x20, [sp, #8] ldr x21, [sp, #16] ldr x22, [sp, #24] -add sp, sp, #64 +add sp, sp, #96 ret #endif diff --git a/source/backend/cpu/arm/arm64/bf16/ARMV86_MNNPackedMatMul_BF16.S b/source/backend/cpu/arm/arm64/bf16/ARMV86_MNNPackedMatMul_BF16.S index 7d3282969..567e34b56 100644 --- a/source/backend/cpu/arm/arm64/bf16/ARMV86_MNNPackedMatMul_BF16.S +++ b/source/backend/cpu/arm/arm64/bf16/ARMV86_MNNPackedMatMul_BF16.S @@ -19,13 +19,6 @@ movi \d3\().4s, #0 .endm -.macro Float32ToBf16 d0, d1, d2, d3 - shrn \d0\().4h, \d0\().4s, #16 - shrn \d1\().4h, \d1\().4s, #16 - shrn \d2\().4h, \d2\().4s, #16 - shrn \d3\().4h, \d3\().4s, #16 -.endm - .macro FOURFMAX s, d0, d1, d2, d3 fmax \d0\().4s, \d0\().4s, \s\().4s fmax \d1\().4s, \d1\().4s, \s\().4s @@ -51,11 +44,11 @@ asm_function ARMV86_MNNPackedMatMul_BF16 //void ARMV86_MNNPackedMatMul_BF16(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias); // x0: C, x1:A, x2:B, x3:parameter, x4: postParameters, x5:bias -stp d14, d15, [sp, #-80]! +stp d14, d15, [sp, #-128]! stp d12, d13, [sp, #16] stp d10, d11, [sp, #32] stp d8, d9, [sp, #48] -stp x19, x21, [sp, #64] +stp x19, x20, [sp, #64] //ldr x8, [x3, #0] // deprecated ldr x9, [x3, #8] // l @@ -64,6 +57,7 @@ mov x11, #64 // B_stride = LP * HP = 4 * 8 * sizeof(int16_t) ldr x13, [x3, #24] // cStride ldr x7, [x3, #40] // bExtraStride +lsr x7, x7, #1 // bExtraStride -> bf16 stride add x10, x10, #3 lsr x10, x10, #2 @@ -79,14 +73,13 @@ Start: cmp x10, #2 blt LH4 LH8: - sub x14, x13, #96 // cStride - 96 + sub x14, x13, #192 // cStride - 12 * 4 * sizeof(float) LoopH: mov x15, x1 mov x12, x9 cbz x5, NoBiasH8 - ld1 {v0.4h, v1.4h}, [x5], #16 // 8 * sizeof(int16_t) - shll v0.4s, v0.4h, #16 - shll v1.4s, v1.4h, #16 + ld1 {v0.4s, v1.4s}, [x5], #32 // 8 * sizeof(float) + mov v2.16b, v0.16b mov v3.16b, v1.16b uzp1 v18.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 @@ -185,19 +178,14 @@ LoopH: FOURFMIN v6, v23, v24, v25, v26 FOURFMIN v6, v27, v28, v29, v30 StoreLH8: - Float32ToBf16 v7, v8, v9, v10 - Float32ToBf16 v11, v12, v13, v14 - Float32ToBf16 v15, v16, v17, v18 - Float32ToBf16 v19, v20, v21, v22 - Float32ToBf16 v23, v24, v25, v26 - Float32ToBf16 v27, v28, v29, v30 - st1 {v7.4h, v8.4h, v9.4h, v10.4h}, [x0], #32 // 16 * sizeof(int16_t) - st1 {v11.4h, v12.4h, v13.4h, v14.4h}, [x0], #32 // 16 * sizeof(int16_t) - st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0], #32 // 16 * sizeof(int16_t) + + st1 {v7.4s, v8.4s, v9.4s, v10.4s}, [x0], #64 // 16 * sizeof(int16_t) + st1 {v11.4s, v12.4s, v13.4s, v14.4s}, [x0], #64 // 16 * sizeof(int16_t) + st1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x0], #64 // 16 * sizeof(int16_t) add x0, x0, x14 - st1 {v19.4h, v20.4h, v21.4h, v22.4h}, [x0], #32 // 16 * sizeof(int16_t) - st1 {v23.4h, v24.4h, v25.4h, v26.4h}, [x0], #32 // 16 * sizeof(int16_t) - st1 {v27.4h, v28.4h, v29.4h, v30.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v19.4s, v20.4s, v21.4s, v22.4s}, [x0], #64 // 16 * sizeof(int16_t) + st1 {v23.4s, v24.4s, v25.4s, v26.4s}, [x0], #64 // 16 * sizeof(int16_t) + st1 {v27.4s, v28.4s, v29.4s, v30.4s}, [x0], #64 // 16 * sizeof(int16_t) add x0, x0, x14 add x2, x2, x7 // weight stride sub x10, x10, #2 @@ -209,8 +197,7 @@ LoopHR: mov x15, x1 mov x12, x9 cbz x5, NoBiasH4 - ld1 {v0.4h}, [x5], #8 // 8 * sizeof(int16_t) - shll v0.4s, v0.4h, #16 + ld1 {v0.4s}, [x5], #16 // 4 * sizeof(float) mov v2.16b, v0.16b uzp1 v18.2d, v0.2d, v2.2d // bias_0, bias_1, bias_0, bias_1 uzp2 v19.2d, v0.2d, v2.2d // bias_2, bias_3, bias_2, bias_3 @@ -269,18 +256,16 @@ LoopHR: FOURFMIN v6, v11, v12, v13, v14 FOURFMIN v6, v15, v16, v17, v18 StoreLH4: - Float32ToBf16 v7, v8, v9, v10 - Float32ToBf16 v11, v12, v13, v14 - Float32ToBf16 v15, v16, v17, v18 - st1 {v7.4h, v8.4h, v9.4h, v10.4h}, [x0], #32 // 16 * sizeof(int16_t) - st1 {v11.4h, v12.4h, v13.4h, v14.4h}, [x0], #32 // 16 * sizeof(int16_t) - st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x0], #32 // 16 * sizeof(int16_t) + st1 {v7.4s, v8.4s, v9.4s, v10.4s}, [x0], #64 // 16 * sizeof(int16_t) + st1 {v11.4s, v12.4s, v13.4s, v14.4s}, [x0], #64 // 16 * sizeof(int16_t) + st1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x0], #64 // 16 * sizeof(int16_t) + End: -ldp x19, x21, [sp, #64] +ldp x19, x20, [sp, #64] ldp d8, d9, [sp, #48] ldp d10, d11, [sp, #32] ldp d12, d13, [sp, #16] -ldp d14, d15, [sp], #80 +ldp d14, d15, [sp], #128 ret #endif diff --git a/source/backend/cpu/arm/arm64/bf16/MNNPackC4ForMatMul_A_BF16.S b/source/backend/cpu/arm/arm64/bf16/MNNPackC4ForMatMul_A_BF16.S index faa7d31a1..4fc8d85eb 100644 --- a/source/backend/cpu/arm/arm64/bf16/MNNPackC4ForMatMul_A_BF16.S +++ b/source/backend/cpu/arm/arm64/bf16/MNNPackC4ForMatMul_A_BF16.S @@ -34,14 +34,16 @@ mov x6, #0 ldr w4, [x2, #4] // eReal ldr w11, [x2, #8] // eDest ldr w6, [x2, #12] // xOffset -// xOffset -> xOffset * 4 * sizeof(int16_t) +// xOffset -> xOffset * 4 * sizeof(float) // eReal -> eReal * 4 * sizeof(int16_t) // eDest -> eDest * sizeof(int16_t) mov x12, #2 // sizeof(int16_t). kept as a const mov x9, #8 -mul x4, x9, x4 +mov x15, #16 // sizeof(float) +mul x4, x15, x4 mul x11, x12, x11 -mul x6, x9, x6 + +mul x6, x15, x6 LoopNumber: mov x2, #0 @@ -72,18 +74,35 @@ bne Right LoopL4: mov x2, x1 .macro MAIN_TRANSPOSE - ld1 {v0.4h}, [x1], x6 // load size: 4 * sizeof(int16_t), jump one stride line as x6 - ld1 {v3.4h}, [x1], x6 - ld1 {v6.4h}, [x1], x6 - ld1 {v17.4h}, [x1], x6 - ld1 {v1.4h}, [x1], x6 - ld1 {v4.4h}, [x1], x6 - ld1 {v7.4h}, [x1], x6 - ld1 {v18.4h}, [x1], x6 - ld1 {v2.4h}, [x1], x6 - ld1 {v5.4h}, [x1], x6 - ld1 {v16.4h}, [x1], x6 - ld1 {v19.4h}, [x1], x6 + ld1 {v0.4s}, [x1], x6 // load size: 4 * sizeof(int16_t), jump one stride line as x6 + ld1 {v3.4s}, [x1], x6 + ld1 {v6.4s}, [x1], x6 + ld1 {v17.4s}, [x1], x6 + + ld1 {v1.4s}, [x1], x6 + ld1 {v4.4s}, [x1], x6 + ld1 {v7.4s}, [x1], x6 + ld1 {v18.4s}, [x1], x6 + + ld1 {v2.4s}, [x1], x6 + ld1 {v5.4s}, [x1], x6 + ld1 {v16.4s}, [x1], x6 + ld1 {v19.4s}, [x1], x6 + + shrn v0.4h, v0.4s, #16 + shrn v3.4h, v3.4s, #16 + shrn v6.4h, v6.4s, #16 + shrn v17.4h, v17.4s, #16 + + shrn v1.4h, v1.4s, #16 + shrn v4.4h, v4.4s, #16 + shrn v7.4h, v7.4s, #16 + shrn v18.4h, v18.4s, #16 + + shrn v2.4h, v2.4s, #16 + shrn v5.4h, v5.4s, #16 + shrn v16.4h, v16.4s, #16 + shrn v19.4h, v19.4s, #16 transpose_4x4 v0, v3, v6, v17, v23, v24 transpose_4x4 v1, v4, v7, v18, v25, v26 @@ -99,23 +118,6 @@ bne Right stp d18, d19, [x0, #(16 * 5)] add x0, x0, #(16 * 6) - // st1 {v0.4h}, [x0], #8 // store size: 4 * sizeof(int16_t) - // st1 {v1.4h}, [x0], #8 - // st1 {v2.4h}, [x0], #8 - // st1 {v3.4h}, [x0], #8 - // st1 {v4.4h}, [x0], #8 - // st1 {v5.4h}, [x0], #8 - // st1 {v6.4h}, [x0], #8 - // st1 {v7.4h}, [x0], #8 - // st1 {v16.4h}, [x0], #8 - // st1 {v17.4h}, [x0], #8 - // st1 {v18.4h}, [x0], #8 - // st1 {v19.4h}, [x0], #8 - - // st1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32 - // st1 {v4.4h, v5.4h, v6.4h, v7.4h}, [x0], #32 - // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0], #32 - add x1, x2, x4 sub x5, x5, #4 cmp w5, #4 @@ -133,20 +135,6 @@ bne Right str d16, [x0, #(16 * 4)] add x0, x0, #(16 * 4 + 8) - // st1 {v0.4h}, [x0], #8 // store size: 4 * sizeof(int16_t) - // st1 {v1.4h}, [x0], #8 - // st1 {v2.4h}, [x0], #8 - // st1 {v3.4h}, [x0], #8 - // st1 {v4.4h}, [x0], #8 - // st1 {v5.4h}, [x0], #8 - // st1 {v6.4h}, [x0], #8 - // st1 {v7.4h}, [x0], #8 - // st1 {v16.4h}, [x0], #8 - - // st1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32 - // st1 {v4.4h, v5.4h, v6.4h, v7.4h}, [x0], #32 - // st1 {v16.4h}, [x0], #8 - b LoopEEnd LoopEL2: @@ -158,16 +146,6 @@ bne Right stp d4, d5, [x0, #(16 * 2)] add x0, x0, #(16 * 3) - // st1 {v0.4h}, [x0], #8 // store size: 4 * sizeof(int16_t) - // st1 {v1.4h}, [x0], #8 - // st1 {v2.4h}, [x0], #8 - // st1 {v3.4h}, [x0], #8 - // st1 {v4.4h}, [x0], #8 - // st1 {v5.4h}, [x0], #8 - - // st1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32 - // st1 {v4.4h, v5.4h}, [x0], #16 - b LoopEEnd LoopEL1: @@ -178,12 +156,6 @@ bne Right str d2, [x0, #16] add x0, x0, #(16 + 8) - // st1 {v0.4h}, [x0], #8 // store size: 4 * sizeof(int16_t) - // st1 {v1.4h}, [x0], #8 - // st1 {v2.4h}, [x0], #8 - - // st1 {v0.4h, v1.4h, v2.4h}, [x0], #24 - LoopEEnd: b End @@ -198,7 +170,8 @@ LoopE1: cmp w5, #4 blt LoopE1L3 LoopE1L4: - ld1 {v0.4h}, [x1], x4 + ld1 {v0.4s}, [x1], x4 + shrn v0.4h, v0.4s, #16 st1 {v0.h}[0], [x0], x11 st1 {v0.h}[1], [x0], x11 st1 {v0.h}[2], [x0], x11 @@ -210,7 +183,8 @@ LoopE1: LoopE1L3: cmp w5, #3 blt LoopE1L2 - ld1 {v0.4h}, [x1], x4 + ld1 {v0.4s}, [x1], x4 + shrn v0.4h, v0.4s, #16 st1 {v0.h}[0], [x0], x11 st1 {v0.h}[1], [x0], x11 st1 {v0.h}[2], [x0], x11 @@ -220,7 +194,8 @@ LoopE1: LoopE1L2: cmp w5, #2 blt LoopE1L1 - ld1 {v0.4h}, [x1], x4 + ld1 {v0.4s}, [x1], x4 + shrn v0.4h, v0.4s, #16 st1 {v0.h}[0], [x0], x11 st1 {v0.h}[1], [x0], x11 sub w5, w5, #2 @@ -228,7 +203,8 @@ LoopE1: LoopE1L1: cmp w5, #1 blt LoopE1End - ld1 {v0.h}[0], [x1], x4 + ld1 {v0.s}[0], [x1], x4 + shrn v0.4h, v0.4s, #16 st1 {v0.h}[0], [x0], x11 LoopE1End: diff --git a/source/backend/cpu/arm/arm64/bf16/MNNPackC8_BF16.S b/source/backend/cpu/arm/arm64/bf16/MNNPackC8_BF16.S index 87503e839..7157ce44b 100644 --- a/source/backend/cpu/arm/arm64/bf16/MNNPackC8_BF16.S +++ b/source/backend/cpu/arm/arm64/bf16/MNNPackC8_BF16.S @@ -23,9 +23,10 @@ lsr x4, x2, #3 lsr x5, x3, #3 mov x12, #2 // sizeof(int16_t) mov x13, #16 // 8 * sizeof(int16_t) -mul x6, x12, x2 +mov x15, #4 +mul x6, x15, x2 mul x7, x13, x2 -mov x12, #16 // 8 * sizeof(int16_t) +mov x12, #32 // 8 * sizeof(float) mul x15, x12, x2 .macro transpose_4x4 x0, x1, x2, x3, x5, x6 @@ -47,32 +48,15 @@ mov x12, x4 LoopL: mov x10, x9 -ld1 {v16.4h, v17.4h}, [x9], x6 -ld1 {v18.4h, v19.4h}, [x9], x6 -ld1 {v20.4h, v21.4h}, [x9], x6 -ld1 {v22.4h, v23.4h}, [x9], x6 - -ld1 {v24.4h, v25.4h}, [x9], x6 -ld1 {v26.4h, v27.4h}, [x9], x6 -ld1 {v28.4h, v29.4h}, [x9], x6 -ld1 {v30.4h, v31.4h}, [x9], x6 - -shll v16.4s, v16.4h, #16 -shll v17.4s, v17.4h, #16 -shll v18.4s, v18.4h, #16 -shll v19.4s, v19.4h, #16 -shll v20.4s, v20.4h, #16 -shll v21.4s, v21.4h, #16 -shll v22.4s, v22.4h, #16 -shll v23.4s, v23.4h, #16 -shll v24.4s, v24.4h, #16 -shll v25.4s, v25.4h, #16 -shll v26.4s, v26.4h, #16 -shll v27.4s, v27.4h, #16 -shll v28.4s, v28.4h, #16 -shll v29.4s, v29.4h, #16 -shll v30.4s, v30.4h, #16 -shll v31.4s, v31.4h, #16 +ld1 {v16.4s, v17.4s}, [x9], x6 +ld1 {v18.4s, v19.4s}, [x9], x6 +ld1 {v20.4s, v21.4s}, [x9], x6 +ld1 {v22.4s, v23.4s}, [x9], x6 + +ld1 {v24.4s, v25.4s}, [x9], x6 +ld1 {v26.4s, v27.4s}, [x9], x6 +ld1 {v28.4s, v29.4s}, [x9], x6 +ld1 {v30.4s, v31.4s}, [x9], x6 transpose_4x4 v16, v18, v20, v22, v0, v1 @@ -109,7 +93,7 @@ stp d19, d27, [x8], #16 stp d21, d29, [x8], #16 stp d23, d31, [x8], #16 -add x9, x10, #16 // 8 * sizeof(int16_t) +add x9, x10, #32 // 8 * sizeof(float) subs x12, x12, #1 bne LoopL diff --git a/source/backend/cpu/arm/arm64/bf16/MNNPackedMatMulRemain_BF16.S b/source/backend/cpu/arm/arm64/bf16/MNNPackedMatMulRemain_BF16.S index a65140adc..64232bd9f 100644 --- a/source/backend/cpu/arm/arm64/bf16/MNNPackedMatMulRemain_BF16.S +++ b/source/backend/cpu/arm/arm64/bf16/MNNPackedMatMulRemain_BF16.S @@ -24,11 +24,14 @@ ldr x11, [x4, #0] // aStride ldr x9, [x4, #8] // l ldr x10, [x4, #16] // h +lsr x11, x11, #1 // aStride = aStride / 2 (fp32 -> bf16) + ldr x7, [x4, #24] // cStride ldr x19, [x4, #40] // bExtraStride add x10, x10, #3 lsr x10, x10, #2 +lsr x19, x19, #1 // bExtraStride = bExtraStride / 2 cbz x5, Start ld1 {v5.4s}, [x5] @@ -121,9 +124,7 @@ LoopE8: // e, TILE_BLOCK size is 8 cbz x5, StoreLH8 AddBiasLH8: - ld1 {v0.4h, v1.4h}, [x20], #16 - shll v0.4s, v0.4h, #16 - shll v1.4s, v1.4h, #16 + ld1 {v0.4s, v1.4s}, [x20], #32 fmla v16.4s, v0.4s, v5.s[1] fmla v17.4s, v0.4s, v5.s[1] @@ -181,33 +182,17 @@ LoopE8: // e, TILE_BLOCK size is 8 fmin v31.4s, v31.4s, v7.4s StoreLH8: - shrn v16.4h, v16.4s, #16 - shrn v17.4h, v17.4s, #16 - shrn v18.4h, v18.4s, #16 - shrn v19.4h, v19.4s, #16 - shrn v20.4h, v20.4s, #16 - shrn v21.4h, v21.4s, #16 - shrn v22.4h, v22.4s, #16 - shrn v23.4h, v23.4s, #16 - shrn v24.4h, v24.4s, #16 - shrn v25.4h, v25.4s, #16 - shrn v26.4h, v26.4s, #16 - shrn v27.4h, v27.4s, #16 - shrn v28.4h, v28.4s, #16 - shrn v29.4h, v29.4s, #16 - shrn v30.4h, v30.4s, #16 - shrn v31.4h, v31.4s, #16 - - stp d16, d17, [x0] - stp d18, d19, [x0, #(16 * 1)] - stp d24, d25, [x0, #(16 * 2)] - stp d26, d27, [x0, #(16 * 3)] + + stp q16, q17, [x0] + stp q18, q19, [x0, #(32 * 1)] + stp q24, q25, [x0, #(32 * 2)] + stp q26, q27, [x0, #(32 * 3)] add x0, x0, x7 // stp donot support post-index offset in register - stp d20, d21, [x0] - stp d22, d23, [x0, #(16 * 1)] - stp d28, d29, [x0, #(16 * 2)] - stp d30, d31, [x0, #(16 * 3)] + stp q20, q21, [x0] + stp q22, q23, [x0, #(32 * 1)] + stp q28, q29, [x0, #(32 * 2)] + stp q30, q31, [x0, #(32 * 3)] add x0, x0, x7 // stp donot support post-index offset in register // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0], #32 // 4 * 4 * sizeof(int16_t) @@ -271,8 +256,7 @@ LoopE8: // e, TILE_BLOCK size is 8 cbz x5, StoreLH8x4 AddBiasLH8x4: - ld1 {v0.4h}, [x20] - shll v0.4s, v0.4h, #16 + ld1 {v0.4s}, [x20] fmla v16.4s, v0.4s, v5.s[1] fmla v17.4s, v0.4s, v5.s[1] @@ -304,20 +288,12 @@ LoopE8: // e, TILE_BLOCK size is 8 fmin v23.4s, v23.4s, v7.4s StoreLH8x4: - shrn v16.4h, v16.4s, #16 - shrn v17.4h, v17.4s, #16 - shrn v18.4h, v18.4s, #16 - shrn v19.4h, v19.4s, #16 - shrn v20.4h, v20.4s, #16 - shrn v21.4h, v21.4s, #16 - shrn v22.4h, v22.4s, #16 - shrn v23.4h, v23.4s, #16 - - stp d16, d17, [x0] - stp d18, d19, [x0, #(16 * 1)] - stp d20, d21, [x0, #(16 * 2)] - stp d22, d23, [x0, #(16 * 3)] - add x0, x0, #(16 * 4) + + stp q16, q17, [x0] + stp q18, q19, [x0, #(32 * 1)] + stp q20, q21, [x0, #(32 * 2)] + stp q22, q23, [x0, #(32 * 3)] + add x0, x0, #(32 * 4) // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0], #32 // st1 {v20.4h, v21.4h, v22.4h, v23.4h}, [x0], #32 @@ -326,7 +302,7 @@ LoopE8: // e, TILE_BLOCK size is 8 sub x3, x3, #8 cmp x3, #8 - add x0, x21, #64 // move dest address of 8 * 4 * sizeof(int16_t) + add x0, x21, #128 // move dest address of 8 * 4 * sizeof(float) add x1, x1, #16 // move A matrix address of 8 * sizeof(int16_t) bge LoopE8 @@ -412,9 +388,7 @@ blt E1 cbz x5, StoreLH4x8 AddBiasLH4x8: - ld1 {v0.4h, v1.4h}, [x20], #16 - shll v0.4s, v0.4h, #16 - shll v1.4s, v1.4h, #16 + ld1 {v0.4s, v1.4s}, [x20], #32 fmla v16.4s, v0.4s, v5.s[1] fmla v17.4s, v0.4s, v5.s[1] @@ -446,21 +420,12 @@ blt E1 fmin v23.4s, v23.4s, v7.4s StoreLH4x8: - shrn v16.4h, v16.4s, #16 - shrn v17.4h, v17.4s, #16 - shrn v18.4h, v18.4s, #16 - shrn v19.4h, v19.4s, #16 - shrn v20.4h, v20.4s, #16 - shrn v21.4h, v21.4s, #16 - shrn v22.4h, v22.4s, #16 - shrn v23.4h, v23.4s, #16 - - - stp d16, d17, [x0] - stp d18, d19, [x0, #16] + + stp q16, q17, [x0] + stp q18, q19, [x0, #32] add x0, x0, x7 - stp d20, d21, [x0] - stp d22, d23, [x0, #16] + stp q20, q21, [x0] + stp q22, q23, [x0, #32] add x0, x0, x7 // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0], x7 @@ -503,8 +468,7 @@ blt E1 cbz x5, StoreLH4x4 AddBiasLH4x4: - ld1 {v0.4h}, [x20] - shll v0.4s, v0.4h, #16 + ld1 {v0.4s}, [x20] fmla v16.4s, v0.4s, v5.s[1] fmla v17.4s, v0.4s, v5.s[1] @@ -525,20 +489,15 @@ blt E1 StoreLH4x4: - shrn v16.4h, v16.4s, #16 - shrn v17.4h, v17.4s, #16 - shrn v18.4h, v18.4s, #16 - shrn v19.4h, v19.4s, #16 - - stp d16, d17, [x0] - stp d18, d19, [x0, #16] + stp q16, q17, [x0] + stp q18, q19, [x0, #32] // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0] E4End: sub x3, x3, #4 - add x0, x21, #32 // move dest address of 4 * 4 * sizeof(int16_t) + add x0, x21, #64 // move dest address of 4 * 4 * sizeof(float) add x1, x1, #8 // move dest address of 4 * sizeof(int16_t) E1: @@ -590,9 +549,7 @@ LoopE1: cbz x5, StoreLH1x8 AddBiasLH1x8: - ld1 {v0.4h, v1.4h}, [x20], #16 - shll v1.4s, v1.4h, #16 - shll v0.4s, v0.4h, #16 + ld1 {v0.4s, v1.4s}, [x20], #32 fmla v16.4s, v0.4s, v5.s[1] fmla v20.4s, v1.4s, v5.s[1] @@ -604,10 +561,8 @@ LoopE1: fmin v20.4s, v20.4s, v7.4s StoreLH1x8: - shrn v16.4h, v16.4s, #16 - shrn v20.4h, v20.4s, #16 - st1 {v16.4h}, [x0], x7 - st1 {v20.4h}, [x0], x7 + st1 {v16.4s}, [x0], x7 + st1 {v20.4s}, [x0], x7 bge E1LoopH8 @@ -640,8 +595,7 @@ LoopE1: cbz x5, StoreLH1x4 AddBiasLH1x4: - ld1 {v0.4h}, [x20] - shll v0.4s, v0.4h, #16 + ld1 {v0.4s}, [x20] fmla v16.4s, v0.4s, v5.s[1] @@ -650,13 +604,12 @@ LoopE1: fmin v16.4s, v16.4s, v7.4s StoreLH1x4: - shrn v16.4h, v16.4s, #16 - st1 {v16.4h}, [x0] + st1 {v16.4s}, [x0] E1End: subs x3, x3, #1 - add x0, x21, #8 + add x0, x21, #16 add x1, x1, #2 bne LoopE1 diff --git a/source/backend/cpu/arm/arm64/bf16/MNNPackedMatMul_BF16.S b/source/backend/cpu/arm/arm64/bf16/MNNPackedMatMul_BF16.S index 22c2c24ca..28991753c 100644 --- a/source/backend/cpu/arm/arm64/bf16/MNNPackedMatMul_BF16.S +++ b/source/backend/cpu/arm/arm64/bf16/MNNPackedMatMul_BF16.S @@ -27,6 +27,7 @@ ldr x10, [x3, #16] // h ldr x13, [x3, #24] // cStride ldr x7, [x3, #40] // bExtraStride +lsr x7, x7, #1 // v0, v1, v2: A // v3, v4: B @@ -218,9 +219,7 @@ LoopH: cbz x4, StoreLH8 AddBiasLH8: - ld1 {v0.4h, v1.4h}, [x5], #16 // 8 * sizeof(int16_t) - shll v0.4s, v0.4h, #16 - shll v1.4s, v1.4h, #16 + ld1 {v0.4s, v1.4s}, [x5], #32 // 8 * sizeof(int16_t) fmla v8.4s, v0.4s, v5.s[1] fmla v9.4s, v0.4s, v5.s[1] @@ -305,44 +304,19 @@ LoopH: StoreLH8: - shrn v8.4h, v8.4s, #16 - shrn v9.4h, v9.4s, #16 - shrn v10.4h, v10.4s, #16 - shrn v11.4h, v11.4s, #16 - shrn v12.4h, v12.4s, #16 - shrn v13.4h, v13.4s, #16 - shrn v14.4h, v14.4s, #16 - shrn v15.4h, v15.4s, #16 - shrn v16.4h, v16.4s, #16 - shrn v17.4h, v17.4s, #16 - shrn v18.4h, v18.4s, #16 - shrn v19.4h, v19.4s, #16 - shrn v20.4h, v20.4s, #16 - shrn v21.4h, v21.4s, #16 - shrn v22.4h, v22.4s, #16 - shrn v23.4h, v23.4s, #16 - shrn v24.4h, v24.4s, #16 - shrn v25.4h, v25.4s, #16 - shrn v26.4h, v26.4s, #16 - shrn v27.4h, v27.4s, #16 - shrn v28.4h, v28.4s, #16 - shrn v29.4h, v29.4s, #16 - shrn v30.4h, v30.4s, #16 - shrn v31.4h, v31.4s, #16 - - stp d8, d9, [x0] - stp d10, d11, [x0, #(16 * 1)] // 2 * 4 * sizeof(int16_t) - stp d12, d13, [x0, #(16 * 2)] - stp d14, d15, [x0, #(16 * 3)] - stp d16, d17, [x0, #(16 * 4)] - stp d18, d19, [x0, #(16 * 5)] + stp q8, q9, [x0] + stp q10, q11, [x0, #(32 * 1)] // 2 * 4 * sizeof(int16_t) + stp q12, q13, [x0, #(32 * 2)] + stp q14, q15, [x0, #(32 * 3)] + stp q16, q17, [x0, #(32 * 4)] + stp q18, q19, [x0, #(32 * 5)] add x0, x0, x13 // stp donot support post-index offset in register - stp d20, d21, [x0] - stp d22, d23, [x0, #(16 * 1)] - stp d24, d25, [x0, #(16 * 2)] - stp d26, d27, [x0, #(16 * 3)] - stp d28, d29, [x0, #(16 * 4)] - stp d30, d31, [x0, #(16 * 5)] + stp q20, q21, [x0] + stp q22, q23, [x0, #(32 * 1)] + stp q24, q25, [x0, #(32 * 2)] + stp q26, q27, [x0, #(32 * 3)] + stp q28, q29, [x0, #(32 * 4)] + stp q30, q31, [x0, #(32 * 5)] add x0, x0, x13 // st1 {v8.4h, v9.4h, v10.4h, v11.4h}, [x0], #32 // 16 * sizeof(int16_t) @@ -415,8 +389,7 @@ LoopHRemain: cbz x4, StoreLH4 AddBiasLH4: - ld1 {v0.4h}, [x5], #8 - shll v0.4s, v0.4h, #16 + ld1 {v0.4s}, [x5], #16 fmla v8.4s, v0.4s, v5.s[1] fmla v9.4s, v0.4s, v5.s[1] @@ -462,29 +435,9 @@ LoopHRemain: StoreLH4: - shrn v8.4h, v8.4s, #16 - shrn v9.4h, v9.4s, #16 - shrn v10.4h, v10.4s, #16 - shrn v11.4h, v11.4s, #16 - shrn v12.4h, v12.4s, #16 - shrn v13.4h, v13.4s, #16 - shrn v14.4h, v14.4s, #16 - shrn v15.4h, v15.4s, #16 - shrn v16.4h, v16.4s, #16 - shrn v17.4h, v17.4s, #16 - shrn v18.4h, v18.4s, #16 - shrn v19.4h, v19.4s, #16 - - stp d8, d9, [x0] - stp d10, d11, [x0, #(16 * 1)] - stp d12, d13, [x0, #(16 * 2)] - stp d14, d15, [x0, #(16 * 3)] - stp d16, d17, [x0, #(16 * 4)] - stp d18, d19, [x0, #(16 * 5)] - - // st1 {v8.4h, v9.4h, v10.4h, v11.4h}, [x0], #32 - // st1 {v12.4h, v13.4h, v14.4h, v15.4h}, [x0], #32 - // st1 {v16.4h, v17.4h, v18.4h, v19.4h}, [x0] + st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x0], #64 + st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x0], #64 + st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x0] sub x10, x10, #1 diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNDynamicQuantFP32.S b/source/backend/cpu/arm/arm64/low_memory/MNNDynamicQuantFP32.S index d1b02673b..f8971acaa 100644 --- a/source/backend/cpu/arm/arm64/low_memory/MNNDynamicQuantFP32.S +++ b/source/backend/cpu/arm/arm64/low_memory/MNNDynamicQuantFP32.S @@ -37,34 +37,32 @@ add \d0\().4s, \d0\().4s, \d2\().4s .endm -//void MNNDynamicQuantFP32(const float* src, int8_t* dst, const float* scale, float* sum, size_t src_depth_quad, size_t realSize) +//void MNNDynamicQuantFP32(const float* src, int8_t* dst, const float* scale, size_t src_depth_quad, size_t realSize, int pack) asm_function MNNDynamicQuantFP32 -// x0: src, x1:dst, x2:scale, x3: sum, x4:src_depth_quad, x5:realSize +// x0: src, x1:dst, x2:scale, x3:src_depth_quad, x4:realSize stp d14, d15, [sp, #(-16 * 4)]! stp d12, d13, [sp, #(16 * 1)] stp d10, d11, [sp, #(16 * 2)] stp d8, d9, [sp, #(16 * 3)] Start: -lsl x6, x5, #2 // dst_step = batch * unit * sizeof(int8_t) = batch * 4 = batch << 2 +lsl x6, x4, #2 // dst_step = batch * unit * sizeof(int8_t) = batch * 4 = batch << 2 lsl x7, x6, #2 // src_step = dst_step * 4 (sizeof(float32_t)) = dst_step << 2 TILE_8: -cmp x5, #8 +cmp x4, #8 blt TILE_4 sub x8, x7, #64 // src_step - 64 mov x9, x0 // src mov x10, x1 // dst //mov x11, x2 // scale -mov x12, x4 // src_depth_quad +mov x12, x3 // src_depth_quad // quant_scale: v8, 8(batch)*sizeof(float32_t) ld1 {v8.4s, v9.4s}, [x2], #32 // int8 sum -movi v10.4s, #0 -movi v11.4s, #0 LoopSz_8: ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x9], #64 ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x9], x8 @@ -99,36 +97,23 @@ sqxtn v17.8b, v14.8h sqxtn2 v17.16b, v15.8h st1 {v16.16b, v17.16b}, [x10], x6 -// sum -//Transpose v0, v1, v2, v3, v14, v15, v16, v17 -//Add_4x4 v0, v1, v2, v3 -addp v18.4s, v0.4s, v1.4s -addp v19.4s, v2.4s, v3.4s -addp v20.4s, v4.4s, v5.4s -addp v21.4s, v6.4s, v7.4s -addp v22.4s, v18.4s, v19.4s -addp v23.4s, v20.4s, v21.4s - -add v10.4s, v22.4s, v10.4s -add v11.4s, v23.4s, v11.4s subs x12, x12, #1 bne LoopSz_8 Tile8End: -sub x5, x5, #8 // batch -= 8 +sub x4, x4, #8 // batch -= 8 add x0, x0, #128 // src += 8 * 4 * sizeof(float32_t) add x1, x1, #32 // dst += 8 * 4 * sizeof(int8_t) -st1 {v10.4s, v11.4s}, [x3], #32 b TILE_8 TILE_4: -cmp x5, #4 +cmp x4, #4 blt TILE_1 mov x9, x0 // src mov x10, x1 // dst //mov x11, x2 // scale -mov x12, x4 // src_depth_quad +mov x12, x3 // src_depth_quad // quant_scale: v8, 4(batch)*sizeof(float32_t) ld1 {v8.4s}, [x2], #16 @@ -158,28 +143,23 @@ sqxtn v6.8b, v4.8h sqxtn2 v6.16b, v5.8h st1 {v6.16b}, [x10], x6 -// sum -Transpose v0, v1, v2, v3, v14, v15, v16, v17 -Add_4x4 v0, v1, v2, v3 -add v10.4s, v0.4s, v10.4s subs x12, x12, #1 bne LoopSz_4 Tile4End: -sub x5, x5, #4 // batch -= 4 +sub x4, x4, #4 // batch -= 4 add x0, x0, #64 // src += 4 * 4 * sizeof(float32_t) add x1, x1, #16 // dst += 4 * 4 * sizeof(int8_t) //add x2, x2, #16 // scale += 4 * sizeof(float32_t) -st1 {v10.4s}, [x3], #16 b TILE_4 TILE_1: -cmp x5, #1 +cmp x4, #1 blt End mov x9, x0 // src mov x10, x1 // dst -mov x12, x4 // src_depth_quad +mov x12, x3 // src_depth_quad // quant_scale: v8 ld1 {v8.s}[0], [x2], #4 @@ -192,26 +172,17 @@ fmul v0.4s, v0.4s, v8.s[0] // int16_t x = round(x) fcvtas v0.4s, v0.4s -dup v1.4s, v0.s[1] -dup v2.4s, v0.s[2] -dup v3.4s, v0.s[3] - // y = (int8_t)x sqxtn v7.4h, v0.4s sqxtn v7.8b, v7.8h -// sum - -Add_4x4 v0, v1, v2, v3 -add v4.4s, v0.4s, v4.4s st1 {v7.s}[0], [x10], x6 subs x12, x12, #1 bne LoopSz_1 -st1 {v4.s}[0], [x3], #4 Tile1End: -subs x5, x5, #1 // batch -= 1 +subs x4, x4, #1 // batch -= 1 add x0, x0, #16 // src += 1 * 4 * sizeof(float32_t) add x1, x1, #4 // dst += 1 * 4 * sizeof(int8_t) //add x2, x2, #4 // scale += 1 * sizeof(float32_t) @@ -224,4 +195,4 @@ ldp d12, d13, [sp, #(16 * 1)] ldp d14, d15, [sp], #(16 * 4) ret -#endif \ No newline at end of file +#endif diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNDynamicUpdateConvBiasScale.S b/source/backend/cpu/arm/arm64/low_memory/MNNDynamicUpdateConvBiasScale.S new file mode 100644 index 000000000..b0fa8194d --- /dev/null +++ b/source/backend/cpu/arm/arm64/low_memory/MNNDynamicUpdateConvBiasScale.S @@ -0,0 +1,229 @@ +// +// MNNDynamicUpdateConvBiasScale.S +// MNN +// +// Created by MNN on 2019/01/22. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro Round z0, z1, z2, z3 + fcvtzs \z0\().4s, \z0\().4s + fcvtzs \z1\().4s, \z1\().4s + fcvtzs \z2\().4s, \z2\().4s + fcvtzs \z3\().4s, \z3\().4s +.endm + +.macro MUL_CONSTANT s0, s1, s2, s3, z0 + fmul \s0\().4s, \s0\().4s, \z0\().4s + fmul \s1\().4s, \s1\().4s, \z0\().4s + fmul \s2\().4s, \s2\().4s, \z0\().4s + fmul \s3\().4s, \s3\().4s, \z0\().4s +.endm + +.macro DIV4 s0, s1, s2, s3, z0, z1, z2, z3 + fdiv \s0\().4s, \s0\().4s, \z0\().4s + fdiv \s1\().4s, \s1\().4s, \z1\().4s + fdiv \s2\().4s, \s2\().4s, \z2\().4s + fdiv \s3\().4s, \s3\().4s, \z3\().4s +.endm + +.macro SUB4 s0, s1, s2, s3, z0, z1, z2, z3 + fsub \s0\().4s, \s0\().4s, \z0\().4s + fsub \s1\().4s, \s1\().4s, \z1\().4s + fsub \s2\().4s, \s2\().4s, \z2\().4s + fsub \s3\().4s, \s3\().4s, \z3\().4s +.endm + +.macro Float32ToHalf s0, s1, s2, s3, d0, d1 + fcvtn \d0\().4h, \s0\().4s + fcvtn2 \d0\().8h, \s1\().4s + fcvtn \d1\().4h, \s2\().4s + fcvtn2 \d1\().8h, \s3\().4s +.endm + +/* +Note: Only used in dynamic quant,so do not need compare min max! + */ +asm_function MNNDynamicUpdateConvBiasScale +//MNNDynamicUpdateConvBiasScale(biasFloat.data(), scaleFloat.data(), biasfp32, weightDequantScale, +//inputScale, weightKernelSum, inputZero, UP_DIV(output->channel(), 4), alphaSize) +//x0:biasFloat, x1:scaleFloat, x2:biasfp32, x3:weightDequantScale, x4:inputScale, x5:weightKernelSum, x6:inputZero, x7:ocQuad +//Load from sp: x9: scaleSize + +ldr x9, [sp, #0] +stp d14, d15, [sp, #-64]! +stp d12, d13, [sp, #16] +stp d10, d11, [sp, #32] +stp d8, d9, [sp, #48] + +ld1r {v31.4s}, [x4] // input dequant scale +ld1r {v30.4s}, [x6] // input dequant zero:fp32 zero + +lsr x9, x9, #2 +// fuse scale + +SCALE_L24: +cmp x9, #24 +blt SCALE_L16 + +ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x3], #64 +ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x3], #64 +ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x3], #64 +ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x3], #64 +ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x3], #64 +ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x3], #64 +MUL_CONSTANT v0, v1, v2, v3, v31 // w_scale * x_scale +MUL_CONSTANT v4, v5, v6, v7, v31 +MUL_CONSTANT v8, v9, v10, v11, v31 +MUL_CONSTANT v12, v13, v14, v15, v31 +MUL_CONSTANT v16, v17, v18, v19, v31 +MUL_CONSTANT v20, v21, v22, v23, v31 +sub x9, x9, #24 +st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x1], #64 +st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x1], #64 +st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x1], #64 +st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x1], #64 +st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x1], #64 +st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x1], #64 +b SCALE_L24 + +SCALE_L16: +cmp x9, #16 +blt SCALE_L8 + +ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x3], #64 +ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x3], #64 +ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x3], #64 +ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x3], #64 +MUL_CONSTANT v0, v1, v2, v3, v31 // w_scale * x_scale +MUL_CONSTANT v4, v5, v6, v7, v31 +MUL_CONSTANT v8, v9, v10, v11, v31 +MUL_CONSTANT v12, v13, v14, v15, v31 +sub x9, x9, #16 +st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x1], #64 +st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x1], #64 +st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x1], #64 +st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x1], #64 +b SCALE_L16 + +SCALE_L8: +cmp x9, #8 +blt SCALE_L4 + +ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x3], #64 +ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x3], #64 +MUL_CONSTANT v0, v1, v2, v3, v31 // w_scale * x_scale +MUL_CONSTANT v4, v5, v6, v7, v31 +sub x9, x9, #8 +st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x1], #64 +st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x1], #64 +b SCALE_L8 + +SCALE_L4: +cmp x9, #4 +blt SCALE_L1 + +ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x3], #64 +MUL_CONSTANT v0, v1, v2, v3, v31 // w_scale * x_scale +sub x9, x9, #4 +st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x1], #64 +b SCALE_L4 + +SCALE_L1: +cmp x9, #1 +blt BIAS_L8 + +ld1 {v0.4s}, [x3], #16 +fmul v0.4s, v0.4s, v31.4s +sub x9, x9, #1 +st1 {v0.4s}, [x1], #16 +b SCALE_L1 + +// Bias: +BIAS_L16: +cmp x7, #16 +blt BIAS_L8 + +ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 // oldbias +ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], #64 +ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x2], #64 +ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x2], #64 +ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x5], #64 // weightKernelSum +ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x5], #64 +ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x5], #64 + +sub x7, x7, #16 + +MUL_CONSTANT v16, v17, v18, v19, v30 // w_sum * x_zero +MUL_CONSTANT v20, v21, v22, v23, v30 // w_sum * x_zero +MUL_CONSTANT v24, v25, v26, v27, v30 // w_sum * x_zero + +SUB4 v0, v1, v2, v3, v16, v17, v18, v19 +SUB4 v4, v5, v6, v7, v20, v21, v22, v23 +ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x5], #64 +SUB4 v8, v9, v10, v11, v24, v25, v26, v27 +MUL_CONSTANT v16, v17, v18, v19, v30 // w_sum * x_zero +SUB4 v12, v13, v14, v15, v16, v17, v18, v19 + +st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x0], #64 // bias float +st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x0], #64 +st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x0], #64 +st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x0], #64 +b BIAS_L16 + +BIAS_L8: +cmp x7, #8 +blt BIAS_L4 + +ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 // oldbias +ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], #64 +ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x5], #64 // weightKernelSum +ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x5], #64 +sub x7, x7, #8 + +MUL_CONSTANT v16, v17, v18, v19, v30 // w_sum * x_zero +MUL_CONSTANT v20, v21, v22, v23, v30 // w_sum * x_zero +SUB4 v0, v1, v2, v3, v16, v17, v18, v19 +SUB4 v4, v5, v6, v7, v20, v21, v22, v23 +st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x0], #64 // bias float +st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x0], #64 +b BIAS_L8 + +BIAS_L4: +cmp x7, #4 +blt BIAS_L1 + +ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 // oldbias +ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x5], #64 // weightKernelSum +sub x7, x7, #4 + +MUL_CONSTANT v8, v9, v10, v11, v30 // w_sum * x_zero +SUB4 v0, v1, v2, v3, v8, v9, v10, v11 +st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x0], #64 +b BIAS_L4 + +BIAS_L1: +cmp x7, #1 +blt End +ld1 {v0.4s}, [x2], #16 // oldbias +ld1 {v4.4s}, [x5], #16 // weightKernelSum +sub x7, x7, #1 +fmul v4.4s, v4.4s, v30.4s // w_sum * x_zero +fsub v0.4s, v0.4s, v4.4s // oldbias - w_sum * x_zero +st1 {v0.4s}, [x0], #16 +b BIAS_L1 + +End: +ldp d8, d9, [sp, #48] +ldp d10, d11, [sp, #32] +ldp d12, d13, [sp, #16] +ldp d14, d15, [sp], #64 +ret +#endif diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32.S b/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32.S deleted file mode 100644 index 83548cfd9..000000000 --- a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32.S +++ /dev/null @@ -1,308 +0,0 @@ -// -// MNNGemmHybridInt4_sdot.S -// MNN -// -// Created by MNN on 2023/11/09. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifdef __aarch64__ - -#include "MNNAsmGlobal.h" - -.text -.align 5 - -.macro Int32ToFloat z0, z1, z2, z3 - scvtf \z0\().4s, \z0\().4s - scvtf \z1\().4s, \z1\().4s - scvtf \z2\().4s, \z2\().4s - scvtf \z3\().4s, \z3\().4s -.endm - -.macro MulScale d0, d1, d2, d3, s - fmul \d0\().4s, \d0\().4s, \s\().s[0] - fmul \d1\().4s, \d1\().4s, \s\().s[1] - fmul \d2\().4s, \d2\().4s, \s\().s[2] - fmul \d3\().4s, \d3\().4s, \s\().s[3] -.endm - -.macro Dequant c0, a0, z0, b0, s0, idx - fmul \c0\().4s, \c0\().4s, \a0\().4s - fmla \c0\().4s, \z0\().4s, \s0\().s[\idx] - fadd \c0\().4s, \c0\().4s, \b0\().4s -.endm - -asm_function MNNGemmHybridInt4FP32 - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; - -//void MNNGemmHybridInt4FP32(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, float** param); - - -// Auto: x0: C*, x1: A*, x2:B*, x3: src_depth_quad, x4: dst_step, x5: dst_depth_quad, x6: realSize, x7: param -// load from param: x7: alpha*, x8: zero*, x9: bias*, x10: sums*, x11: scales* -stp d14, d15, [sp, #(-16 * 9)]! -stp d12, d13, [sp, #(16 * 1)] -stp d10, d11, [sp, #(16 * 2)] -stp d8, d9, [sp, #(16 * 3)] -stp x21, x22, [sp, #(16 * 4)] -stp x19, x20, [sp, #(16 * 5)] -stp x23, x24, [sp, #(16 * 6)] -stp x25, x26, [sp, #(16 * 7)] -stp x27, x28, [sp, #(16 * 8)] - -ldr x8, [x7, #0] -ldr x9, [x7, #8] -ldr x10, [x7, #16] -ldr x11, [x7, #24] -ldr x12, [x7, #32] - -Start: -lsl x13, x3, #3 // x13 = src_depth_quad * UNIT * UNIT_SRC / 2(int4) = src_depth_quad * 8 = src_depth_quad << 3 - -TILE_4: - cmp x6, #4 - blt TILE_1 - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_4: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - // batch=0,oc=0-3 - movi v10.4s, #0 //ic=0-3 - movi v11.4s, #0 - movi v12.4s, #0 - movi v13.4s, #0 - // batch=1,oc=0-3 - movi v16.4s, #0 - movi v17.4s, #0 - movi v18.4s, #0 - movi v19.4s, #0 - // batch=2,oc=0-3 - movi v20.4s, #0 - movi v21.4s, #0 - movi v22.4s, #0 - movi v23.4s, #0 - // batch=3,oc=0-3 - movi v24.4s, #0 - movi v25.4s, #0 - movi v26.4s, #0 - movi v27.4s, #0 - // mask - movi v14.16b, #15 - // offset - movi v15.16b, #8 -LoopSz_TILE_4: - // src : 4(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 4 x 4 x [1] : v16-v19 - ld1 {v0.8b}, [x25], #8 // weight - ld1 {v4.16b}, [x24], x15 // src - // int4->int8 - ushr v8.16b, v0.16b, #4 - and v9.16b, v0.16b, v14.16b - zip1 v0.16b, v8.16b, v9.16b - - Unit_TILE_4: - sxtl v5.8h, v4.8b // src batch=0,1 - sxtl2 v6.8h, v4.16b // batch=2,3 - sxtl v1.8h, v0.8b // weight oc=0,1 - sxtl2 v2.8h, v0.16b // oc=2,3 - dup v28.2d, v1.d[0] // oc=0,0 - dup v29.2d, v1.d[1] // oc=1,1 - dup v30.2d, v2.d[0] // oc=2,2 - dup v31.2d, v2.d[1] // oc=3,3 - // batch=0 - smlal v10.4s, v5.4h, v28.4h - smlal v11.4s, v5.4h, v29.4h - smlal v12.4s, v5.4h, v30.4h - smlal v13.4s, v5.4h, v31.4h - // batch=1 - smlal2 v16.4s, v5.8h, v28.8h - smlal2 v17.4s, v5.8h, v29.8h - smlal2 v18.4s, v5.8h, v30.8h - smlal2 v19.4s, v5.8h, v31.8h - // batch=2 - smlal v20.4s, v6.4h, v28.4h - smlal v21.4s, v6.4h, v29.4h - smlal v22.4s, v6.4h, v30.4h - smlal v23.4s, v6.4h, v31.4h - // batch=3 - smlal2 v24.4s, v6.8h, v28.8h - smlal2 v25.4s, v6.8h, v29.8h - smlal2 v26.4s, v6.8h, v30.8h - smlal2 v27.4s, v6.8h, v31.8h - // .inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] // batch0 - // .inst 0x4fa4e011 // sdot v17.4s, v0.16b, v4.4b[1] // batch1 - // .inst 0x4f84e812 // sdot v18.4s, v0.16b, v4.4b[2] // batch2 - // .inst 0x4fa4e813 // sdot v19.4s, v0.16b, v4.4b[3] // batch3 - - subs x26, x26, #1 - bne LoopSz_TILE_4 - -LoopSzEnd_TILE_4: - // add 4 ic - addp v10.4s, v10.4s, v11.4s - addp v12.4s, v12.4s, v13.4s - addp v16.4s, v16.4s, v17.4s - addp v18.4s, v18.4s, v19.4s - addp v20.4s, v20.4s, v21.4s - addp v22.4s, v22.4s, v23.4s - addp v24.4s, v24.4s, v25.4s - addp v26.4s, v26.4s, v27.4s - - addp v10.4s, v10.4s, v12.4s // batch=0,oc=0-3 - addp v11.4s, v16.4s, v18.4s - addp v12.4s, v20.4s, v22.4s - addp v13.4s, v24.4s, v26.4s - - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v10, v11, v12, v13 - // Int32ToFloat v20, v21, v22, v23 - // using float scale dequant for precison - ld1 {v5.4s}, [x23] // scales, 4 batch,so 4 scale - - MulScale v10, v11, v12, v13, v5 - -Tile4Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.4s}, [x22] // sums - // alpha * sum + (zero * sums) + bias - Dequant v10, v0, v1, v2, v3, 0 - Dequant v11, v0, v1, v2, v3, 1 - Dequant v12, v0, v1, v2, v3, 2 - Dequant v13, v0, v1, v2, v3, 3 - st1 {v10.4s, v11.4s, v12.4s, v13.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_4 -Tile4End: - sub x6, x6, #4 // bach -= 4 - add x0, x0, #64 // dst += 4 * 4 * sizeof(float32_t) - add x1, x1, #16 // src += 4 * 4 * sizeof(int8_t) - add x11, x11, #16 // sum += 4 * sizeof(float32_t) - add x12, x12, #16 // scale += 4 * sizeof(float32_t) - b TILE_4 - -TILE_1: - cmp x6, #1 - blt End - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4, sizeof(float32_t)/4=sizeof(int8_t) - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_1: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - // batch=0,oc=0-3 - movi v10.4s, #0 //ic=0-3 - movi v11.4s, #0 - movi v12.4s, #0 - movi v13.4s, #0 - // mask - movi v14.16b, #15 - // offset - movi v15.16b, #8 -LoopSz_TILE_1: - // src : 1(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 1 x 4 x [1] : v16 - ld1 {v0.8b}, [x25], #8 // weight pack*pack*0.5 - ld1 {v4.s}[0], [x24], x15 // src - // int4->int8 - ushr v8.16b, v0.16b, #4 - and v9.16b, v0.16b, v14.16b - zip1 v0.16b, v8.16b, v9.16b - - Unit_TILE_1: - sxtl v5.8h, v4.8b // src batch=0 - sxtl v1.8h, v0.8b // weight oc=0,1 - sxtl2 v2.8h, v0.16b // oc=2,3 - dup v28.2d, v1.d[0] // oc=0,0 - dup v29.2d, v1.d[1] // oc=1,1 - dup v30.2d, v2.d[0] // oc=2,2 - dup v31.2d, v2.d[1] // oc=3,3 - // batch=0 - smlal v10.4s, v5.4h, v28.4h - smlal v11.4s, v5.4h, v29.4h - smlal v12.4s, v5.4h, v30.4h - smlal v13.4s, v5.4h, v31.4h - - //.inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] - - subs x26, x26, #1 - bne LoopSz_TILE_1 - -LoopSzEnd_TILE_1: - // add 4 ic - addp v10.4s, v10.4s, v11.4s - addp v12.4s, v12.4s, v13.4s - addp v16.4s, v10.4s, v12.4s - add x7, x7, x13 - sub x27, x27, #1 - scvtf v16.4s, v16.4s - // using float scale dequant for precison - ld1 {v4.s}[0], [x23] // scales - fmul v16.4s, v16.4s, v4.s[0] -Tile1Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.s}[0], [x22] // sums - // alpha * sum + (zero * sumx) + bias - fmla v2.4s, v0.4s, v16.4s - fmla v2.4s, v1.4s, v3.s[0] - st1 {v2.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_1 -Tile1End: - subs x6, x6, #1 // batch -= 1 - add x0, x0, #16 // dst += 1 * 4 * sizeof(float32_t) - add x1, x1, #4 // src += 1 * 4 * sizeof(int8_t) - add x11, x11, #4 // sum += 1 * sizeof(float32_t) - add x12, x12, #4 // scale += 1 * sizeof(float32_t) - bne TILE_1 - -End: -ldp x27, x28, [sp, #(16 * 8)] -ldp x25, x26, [sp, #(16 * 7)] -ldp x23, x24, [sp, #(16 * 6)] -ldp x19, x20, [sp, #(16 * 5)] -ldp x21, x22, [sp, #(16 * 4)] -ldp d8, d9, [sp, #(16 * 3)] -ldp d10, d11, [sp, #(16 * 2)] -ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 9) -ret - -#endif \ No newline at end of file diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32_sdot.S b/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32_sdot.S deleted file mode 100644 index 11bf247a5..000000000 --- a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32_sdot.S +++ /dev/null @@ -1,413 +0,0 @@ -// -// MNNGemmHybridInt4_sdot.S -// MNN -// -// Created by MNN on 2023/11/09. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifdef __aarch64__ - -#include "MNNAsmGlobal.h" - -.text -.align 5 - -.macro Int32ToFloat z0, z1, z2, z3 - scvtf \z0\().4s, \z0\().4s - scvtf \z1\().4s, \z1\().4s - scvtf \z2\().4s, \z2\().4s - scvtf \z3\().4s, \z3\().4s -.endm - -.macro MulScale d0, d1, d2, d3, s - fmul \d0\().4s, \d0\().4s, \s\().s[0] - fmul \d1\().4s, \d1\().4s, \s\().s[1] - fmul \d2\().4s, \d2\().4s, \s\().s[2] - fmul \d3\().4s, \d3\().4s, \s\().s[3] -.endm - -.macro Dequant c0, a0, z0, b0, s0, idx - fmul \c0\().4s, \c0\().4s, \a0\().4s - fmla \c0\().4s, \z0\().4s, \s0\().s[\idx] - fadd \c0\().4s, \c0\().4s, \b0\().4s -.endm - -asm_function MNNGemmHybridInt4FP32_sdot - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; - -//void MNNGemmHybridInt4FP32_sdot(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, float** param); - - -// Auto: x0: C*, x1: A*, x2:B*, x3: src_depth_quad, x4: dst_step, x5: dst_depth_quad, x6: realSize, x7: param -// load from param: x7: alpha*, x8: zero*, x9: bias*, x10: sums*, x11: scales* -stp d14, d15, [sp, #(-16 * 9)]! -stp d12, d13, [sp, #(16 * 1)] -stp d10, d11, [sp, #(16 * 2)] -stp d8, d9, [sp, #(16 * 3)] -stp x21, x22, [sp, #(16 * 4)] -stp x19, x20, [sp, #(16 * 5)] -stp x23, x24, [sp, #(16 * 6)] -stp x25, x26, [sp, #(16 * 7)] -stp x27, x28, [sp, #(16 * 8)] - -ldr x8, [x7, #0] -ldr x9, [x7, #8] -ldr x10, [x7, #16] -ldr x11, [x7, #24] -ldr x12, [x7, #32] - -Start: -lsl x13, x3, #3 // x13 = src_depth_quad * UNIT * UNIT_SRC / 2(int8) = src_depth_quad * 8 = src_depth_quad << 3 - -TILE_12: - cmp x6, #12 - blt TILE_8 - sub x14, x4, #128 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_12: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - movi v16.4s, #0 - movi v17.4s, #0 - movi v18.4s, #0 - movi v19.4s, #0 - movi v20.4s, #0 - movi v21.4s, #0 - movi v22.4s, #0 - movi v23.4s, #0 - movi v24.4s, #0 - movi v25.4s, #0 - movi v26.4s, #0 - movi v27.4s, #0 - - // mask - movi v14.16b, #15 -LoopSz_TILE_12: - // src : 4(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 4 x 4 x [1] : v16-v19 - ld1 {v0.8b}, [x25], #8 // weight - ld1 {v4.16b, v5.16b, v6.16b}, [x24], x15 // src - // int4->int8 - ushr v8.16b, v0.16b, #4 - and v9.16b, v0.16b, v14.16b - zip1 v0.16b, v8.16b, v9.16b - .inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] // batch0 - .inst 0x4fa4e011 // sdot v17.4s, v0.16b, v4.4b[1] // batch1 - .inst 0x4f84e812 // sdot v18.4s, v0.16b, v4.4b[2] // batch2 - .inst 0x4fa4e813 // sdot v19.4s, v0.16b, v4.4b[3] // batch3 - .inst 0x4f85e014 // sdot v20.4s, v0.16b, v5.4b[0] // batch4 - .inst 0x4fa5e015 // sdot v21.4s, v0.16b, v5.4b[1] // batch5 - .inst 0x4f85e816 // sdot v22.4s, v0.16b, v5.4b[2] // batch6 - .inst 0x4fa5e817 // sdot v23.4s, v0.16b, v5.4b[3] // batch7 - .inst 0x4f86e018 // sdot v24.4s, v0.16b, v6.4b[0] // batch8 - .inst 0x4fa6e019 // sdot v25.4s, v0.16b, v6.4b[1] // batch9 - .inst 0x4f86e81a // sdot v26.4s, v0.16b, v6.4b[2] // batch10 - .inst 0x4fa6e81b // sdot v27.4s, v0.16b, v6.4b[3] // batch11 - subs x26, x26, #1 - bne LoopSz_TILE_12 - -LoopSzEnd_TILE_12: - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - Int32ToFloat v20, v21, v22, v23 - Int32ToFloat v24, v25, v26, v27 - // using float scale dequant for precison - ld1 {v5.4s, v6.4s, v7.4s}, [x23] // scales, 12 batch,so 12 scale - - MulScale v16, v17, v18, v19, v5 - MulScale v20, v21, v22, v23, v6 - MulScale v24, v25, v26, v27, v7 - -Tile12Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.4s, v4.4s, v5.4s}, [x22] // sums - // alpha * sum + (zero * sums) + bias - Dequant v16, v0, v1, v2, v3, 0 - Dequant v17, v0, v1, v2, v3, 1 - Dequant v18, v0, v1, v2, v3, 2 - Dequant v19, v0, v1, v2, v3, 3 - Dequant v20, v0, v1, v2, v4, 0 - Dequant v21, v0, v1, v2, v4, 1 - Dequant v22, v0, v1, v2, v4, 2 - Dequant v23, v0, v1, v2, v4, 3 - Dequant v24, v0, v1, v2, v5, 0 - Dequant v25, v0, v1, v2, v5, 1 - Dequant v26, v0, v1, v2, v5, 2 - Dequant v27, v0, v1, v2, v5, 3 - st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x28], #64 - st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x28], #64 - st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_12 -Tile12End: - sub x6, x6, #12 // bach -= 12 - add x0, x0, #192 // dst += 12 * 4 * sizeof(float32_t) - add x1, x1, #48 // src += 12 * 4 * sizeof(int8_t) - add x11, x11, #48 // sum += 12 * sizeof(float32_t) - add x12, x12, #48 // scale += 12 * sizeof(float32_t) - b TILE_12 - -TILE_8: - cmp x6, #8 - blt TILE_4 - sub x14, x4, #64 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_8: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - movi v16.4s, #0 - movi v17.4s, #0 - movi v18.4s, #0 - movi v19.4s, #0 - movi v20.4s, #0 - movi v21.4s, #0 - movi v22.4s, #0 - movi v23.4s, #0 - - // mask - movi v14.16b, #15 -LoopSz_TILE_8: - // src : 4(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 4 x 4 x [1] : v16-v19 - ld1 {v0.8b}, [x25], #8 // weight - ld1 {v4.16b, v5.16b}, [x24], x15 // src - // int4->int8 - ushr v8.16b, v0.16b, #4 - and v9.16b, v0.16b, v14.16b - zip1 v0.16b, v8.16b, v9.16b - .inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] // batch0 - .inst 0x4fa4e011 // sdot v17.4s, v0.16b, v4.4b[1] // batch1 - .inst 0x4f84e812 // sdot v18.4s, v0.16b, v4.4b[2] // batch2 - .inst 0x4fa4e813 // sdot v19.4s, v0.16b, v4.4b[3] // batch3 - .inst 0x4f85e014 // sdot v20.4s, v0.16b, v5.4b[0] // batch4 - .inst 0x4fa5e015 // sdot v21.4s, v0.16b, v5.4b[1] // batch5 - .inst 0x4f85e816 // sdot v22.4s, v0.16b, v5.4b[2] // batch6 - .inst 0x4fa5e817 // sdot v23.4s, v0.16b, v5.4b[3] // batch7 - subs x26, x26, #1 - bne LoopSz_TILE_8 - -LoopSzEnd_TILE_8: - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - Int32ToFloat v20, v21, v22, v23 - // using float scale dequant for precison - ld1 {v5.4s, v6.4s}, [x23] // scales, 8 batch,so 8 scale - - MulScale v16, v17, v18, v19, v5 - MulScale v20, v21, v22, v23, v6 - -Tile8Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.4s, v4.4s}, [x22] // sums - // alpha * sum + (zero * sums) + bias - Dequant v16, v0, v1, v2, v3, 0 - Dequant v17, v0, v1, v2, v3, 1 - Dequant v18, v0, v1, v2, v3, 2 - Dequant v19, v0, v1, v2, v3, 3 - Dequant v20, v0, v1, v2, v4, 0 - Dequant v21, v0, v1, v2, v4, 1 - Dequant v22, v0, v1, v2, v4, 2 - Dequant v23, v0, v1, v2, v4, 3 - st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x28], #64 - st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_8 -Tile8End: - sub x6, x6, #8 // bach -= 4 - add x0, x0, #128 // dst += 8 * 4 * sizeof(float32_t) - add x1, x1, #32 // src += 8 * 4 * sizeof(int8_t) - add x11, x11, #32 // sum += 8 * sizeof(float32_t) - add x12, x12, #32 // scale += 8 * sizeof(float32_t) - b TILE_8 - -TILE_4: - cmp x6, #4 - blt TILE_1 - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_4: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - // mask - movi v14.16b, #15 -LoopSz_TILE_4: - // src : 4(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 4 x 4 x [1] : v16-v19 - ld1 {v0.8b}, [x25], #8 // weight - ld1 {v4.16b}, [x24], x15 // src - // int4->int8 - ushr v8.16b, v0.16b, #4 - and v9.16b, v0.16b, v14.16b - zip1 v0.16b, v8.16b, v9.16b - .inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] // batch0 - .inst 0x4fa4e011 // sdot v17.4s, v0.16b, v4.4b[1] // batch1 - .inst 0x4f84e812 // sdot v18.4s, v0.16b, v4.4b[2] // batch2 - .inst 0x4fa4e813 // sdot v19.4s, v0.16b, v4.4b[3] // batch3 - subs x26, x26, #1 - bne LoopSz_TILE_4 - -LoopSzEnd_TILE_4: - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - // Int32ToFloat v20, v21, v22, v23 - // using float scale dequant for precison - ld1 {v5.4s}, [x23] // scales, 4 batch,so 4 scale - - MulScale v16, v17, v18, v19, v5 - -Tile4Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.4s}, [x22] // sums - // alpha * sum + (zero * sums) + bias - Dequant v16, v0, v1, v2, v3, 0 - Dequant v17, v0, v1, v2, v3, 1 - Dequant v18, v0, v1, v2, v3, 2 - Dequant v19, v0, v1, v2, v3, 3 - st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_4 -Tile4End: - sub x6, x6, #4 // bach -= 4 - add x0, x0, #64 // dst += 4 * 4 * sizeof(float32_t) - add x1, x1, #16 // src += 4 * 4 * sizeof(int8_t) - add x11, x11, #16 // sum += 4 * sizeof(float32_t) - add x12, x12, #16 // scale += 4 * sizeof(float32_t) - b TILE_4 - -TILE_1: - cmp x6, #1 - blt End - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4, sizeof(float32_t)/4=sizeof(int8_t) - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_1: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - // mask - movi v14.16b, #15 -LoopSz_TILE_1: - // src : 1(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 1 x 4 x [1] : v16 - ld1 {v0.8b}, [x25], #8 // weight pack*pack*0.5 - ld1 {v4.s}[0], [x24], x15 // src - // int4->int8 - ushr v8.16b, v0.16b, #4 - and v9.16b, v0.16b, v14.16b - zip1 v0.16b, v8.16b, v9.16b - - .inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] - - subs x26, x26, #1 - bne LoopSz_TILE_1 - -LoopSzEnd_TILE_1: - add x7, x7, x13 - sub x27, x27, #1 - scvtf v16.4s, v16.4s - // using float scale dequant for precison - ld1 {v4.s}[0], [x23] // scales - fmul v16.4s, v16.4s, v4.s[0] -Tile1Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.s}[0], [x22] // sums - // alpha * sum + (zero * sumx) + bias - fmla v2.4s, v0.4s, v16.4s - fmla v2.4s, v1.4s, v3.s[0] - st1 {v2.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_1 -Tile1End: - subs x6, x6, #1 // batch -= 1 - add x0, x0, #16 // dst += 1 * 4 * sizeof(float32_t) - add x1, x1, #4 // src += 1 * 4 * sizeof(int8_t) - add x11, x11, #4 // sum += 1 * sizeof(float32_t) - add x12, x12, #4 // scale += 1 * sizeof(float32_t) - bne TILE_1 - -End: -ldp x27, x28, [sp, #(16 * 8)] -ldp x25, x26, [sp, #(16 * 7)] -ldp x23, x24, [sp, #(16 * 6)] -ldp x19, x20, [sp, #(16 * 5)] -ldp x21, x22, [sp, #(16 * 4)] -ldp d8, d9, [sp, #(16 * 3)] -ldp d10, d11, [sp, #(16 * 2)] -ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 9) -ret - -#endif \ No newline at end of file diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32_smmla.S b/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32_smmla.S deleted file mode 100644 index aa0b5a383..000000000 --- a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt4FP32_smmla.S +++ /dev/null @@ -1,476 +0,0 @@ -// -// MNNGemmHybridInt4FP32_smmla.S -// MNN -// -// Created by MNN on 2023/11/09. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifdef __aarch64__ - -#include "MNNAsmGlobal.h" - -.text -.align 5 - -.macro Int32ToFloat z0, z1, z2, z3 - scvtf \z0\().4s, \z0\().4s - scvtf \z1\().4s, \z1\().4s - scvtf \z2\().4s, \z2\().4s - scvtf \z3\().4s, \z3\().4s -.endm - -.macro MulScale d0, d1, d2, d3, s, idx0, idx1 - fmul \d0\().4s, \d0\().4s, \s\().s[\idx0] - fmul \d1\().4s, \d1\().4s, \s\().s[\idx0] - fmul \d2\().4s, \d2\().4s, \s\().s[\idx1] - fmul \d3\().4s, \d3\().4s, \s\().s[\idx1] -.endm - -.macro Dequant c0, a0, z0, b0, s0, idx - fmul \c0\().4s, \c0\().4s, \a0\().4s - fmla \c0\().4s, \z0\().4s, \s0\().s[\idx] - fadd \c0\().4s, \c0\().4s, \b0\().4s -.endm - -asm_function MNNGemmHybridInt4FP32_smmla - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; - -//void MNNGemmHybridInt4FP32_smmla(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, float** param); - - -// Auto: x0: C*, x1: A*, x2:B*, x3: src_depth_quad, x4: dst_step, x5: dst_depth_quad, x6: realSize, x7: param -// load from param: x7: alpha*, x8: zero*, x9: bias*, x10: sums*, x11: scales* -stp d14, d15, [sp, #(-16 * 9)]! -stp d12, d13, [sp, #(16 * 1)] -stp d10, d11, [sp, #(16 * 2)] -stp d8, d9, [sp, #(16 * 3)] -stp x21, x22, [sp, #(16 * 4)] -stp x19, x20, [sp, #(16 * 5)] -stp x23, x24, [sp, #(16 * 6)] -stp x25, x26, [sp, #(16 * 7)] -stp x27, x28, [sp, #(16 * 8)] - -ldr x8, [x7, #0] -ldr x9, [x7, #8] -ldr x10, [x7, #16] -ldr x11, [x7, #24] -ldr x12, [x7, #32] - -Start: -lsl x13, x3, #5 // x13 = src_depth_quad * UNIT * UNIT_SRC / 2(int4) = src_depth_quad * 32 = src_depth_quad << 5 - -TILE_8: - cmp x6, #8 - blt TILE_4 - sub x14, x4, #192 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_8: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - dup v22.4s, wzr - dup v23.4s, wzr - dup v24.4s, wzr - dup v25.4s, wzr - dup v26.4s, wzr - dup v27.4s, wzr - dup v28.4s, wzr - dup v29.4s, wzr - dup v30.4s, wzr - dup v31.4s, wzr - - // mask - movi v10.16b, #15 -LoopSz_TILE_8: - // src : 2 x [2 x 8] : v4-5 - // weight : 4 x [2 x 8] : v0-3 - // dst : 2 x 4 x [4] : v16-23 - //ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v8.16b, v9.16b}, [x25], #32 // weight - ld1 {v12.16b, v13.16b, v14.16b, v15.16b}, [x24], x15 // src - // int4 to int8: v0, v1, v2, v3 - ushr v0.16b, v8.16b, #4 - and v1.16b, v8.16b, v10.16b - ushr v2.16b, v9.16b, #4 - and v3.16b, v9.16b, v10.16b - - .inst 0x4e80a590 // smmla v16.4s, v12.16b, v0.16b - .inst 0x4e81a591 // smmla v17.4s, v12.16b, v1.16b - .inst 0x4e82a592 // smmla v18.4s, v12.16b, v2.16b - .inst 0x4e83a593 // smmla v19.4s, v12.16b, v3.16b - .inst 0x4e80a5b4 // smmla v20.4s, v13.16b, v0.16b - .inst 0x4e81a5b5 // smmla v21.4s, v13.16b, v1.16b - .inst 0x4e82a5b6 // smmla v22.4s, v13.16b, v2.16b - .inst 0x4e83a5b7 // smmla v23.4s, v13.16b, v3.16b - .inst 0x4e80a5d8 // smmla v24.4s, v14.16b, v0.16b - .inst 0x4e81a5d9 // smmla v25.4s, v14.16b, v1.16b - .inst 0x4e82a5da // smmla v26.4s, v14.16b, v2.16b - .inst 0x4e83a5db // smmla v27.4s, v14.16b, v3.16b - .inst 0x4e80a5fc // smmla v28.4s, v15.16b, v0.16b - .inst 0x4e81a5fd // smmla v29.4s, v15.16b, v1.16b - .inst 0x4e82a5fe // smmla v30.4s, v15.16b, v2.16b - .inst 0x4e83a5ff // smmla v31.4s, v15.16b, v3.16b - subs x26, x26, #1 - bne LoopSz_TILE_8 - -LoopSzEnd_TILE_8: - add x7, x7, x13 - sub x27, x27, #1 - - trn1 v0.2d, v16.2d, v17.2d // batch:0 oc:0-3 - trn1 v1.2d, v18.2d, v19.2d // batch:0 oc:4-7 - trn2 v2.2d, v16.2d, v17.2d // batch:1 oc:0-3 - trn2 v3.2d, v18.2d, v19.2d // batch:1 oc:4-7 - trn1 v4.2d, v20.2d, v21.2d // batch:2 oc:0-3 - trn1 v5.2d, v22.2d, v23.2d // batch:2 oc:4-7 - trn2 v6.2d, v20.2d, v21.2d // batch:3 oc:0-3 - trn2 v7.2d, v22.2d, v23.2d // batch:3 oc:4-7 - - trn1 v8.2d, v24.2d, v25.2d // batch:0 oc:0-3 - trn1 v9.2d, v26.2d, v27.2d // batch:0 oc:4-7 - trn2 v10.2d, v24.2d, v25.2d // batch:1 oc:0-3 - trn2 v11.2d, v26.2d, v27.2d // batch:1 oc:4-7 - trn1 v12.2d, v28.2d, v29.2d // batch:2 oc:0-3 - trn1 v13.2d, v30.2d, v31.2d // batch:2 oc:4-7 - trn2 v14.2d, v28.2d, v29.2d // batch:3 oc:0-3 - trn2 v15.2d, v30.2d, v31.2d // batch:3 oc:4-7 - - Int32ToFloat v0, v1, v2, v3 - Int32ToFloat v4, v5, v6, v7 - Int32ToFloat v8, v9, v10, v11 - Int32ToFloat v12, v13, v14, v15 - // using float scale dequant for precison - ld1 {v16.4s, v17.4s}, [x23] // scales - MulScale v0, v1, v2, v3, v16, 0, 1 - MulScale v4, v5, v6, v7, v16, 2, 3 - MulScale v8, v9, v10, v11, v17, 0, 1 - MulScale v12, v13, v14, v15, v17, 2, 3 -Tile8Dequant: - ld1 {v18.4s, v19.4s}, [x19], #32 // alpha - ld1 {v20.4s, v21.4s}, [x20], #32 // zero - ld1 {v22.4s, v23.4s}, [x21], #32 // bias - ld1 {v24.4s, v25.4s}, [x22] // sums - // alpha * cusum + (zero * sums) + bias - Dequant v0, v18, v20, v22, v24, 0 // Batch0 - Dequant v1, v19, v21, v23, v24, 0 - Dequant v2, v18, v20, v22, v24, 1 // Batch1 - Dequant v3, v19, v21, v23, v24, 1 - Dequant v4, v18, v20, v22, v24, 2 // Batch2 - Dequant v5, v19, v21, v23, v24, 2 - Dequant v6, v18, v20, v22, v24, 3 // Batch3 - Dequant v7, v19, v21, v23, v24, 3 - Dequant v8, v18, v20, v22, v25, 0 // Batch4 - Dequant v9, v19, v21, v23, v25, 0 - Dequant v10, v18, v20, v22, v25, 1 // Batch5 - Dequant v11, v19, v21, v23, v25, 1 - Dequant v12, v18, v20, v22, v25, 2 // Batch6 - Dequant v13, v19, v21, v23, v25, 2 - Dequant v14, v18, v20, v22, v25, 3 // Batch7 - Dequant v15, v19, v21, v23, v25, 3 - st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x28], #64 - st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x28], #64 - st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x28], #64 - st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_8 -Tile8End: - sub x6, x6, #8 // bach -= 8 - add x0, x0, #256 // dst += 8 * 8 * sizeof(float32_t) - add x1, x1, #64 // src += 8 * 8 * sizeof(int8_t) - add x11, x11, #32 // sum += 8 * sizeof(float32_t) - add x12, x12, #32 // scale += 8 * sizeof(float32_t) - b TILE_8 - -TILE_4: - cmp x6, #4 - blt TILE_2 - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - sub x14, x14, #64 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_4: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - dup v22.4s, wzr - dup v23.4s, wzr - // mask - movi v10.16b, #15 -LoopSz_TILE_4: - // src : 2 x [2 x 8] : v4-5 - // weight : 4 x [2 x 8] : v0-3 - // dst : 2 x 4 x [4] : v16-23 - //ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v8.16b, v9.16b}, [x25], #32 // weight - // int4 to int8: v0, v1, v2, v3 - ushr v0.16b, v8.16b, #4 - and v1.16b, v8.16b, v10.16b - ushr v2.16b, v9.16b, #4 - and v3.16b, v9.16b, v10.16b - ld1 {v4.16b, v5.16b}, [x24], x15 // src - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b - .inst 0x4e82a492 // smmla v18.4s, v4.16b, v2.16b - .inst 0x4e83a493 // smmla v19.4s, v4.16b, v3.16b - .inst 0x4e80a4b4 // smmla v20.4s, v5.16b, v0.16b - .inst 0x4e81a4b5 // smmla v21.4s, v5.16b, v1.16b - .inst 0x4e82a4b6 // smmla v22.4s, v5.16b, v2.16b - .inst 0x4e83a4b7 // smmla v23.4s, v5.16b, v3.16b - subs x26, x26, #1 - bne LoopSz_TILE_4 - -LoopSzEnd_TILE_4: - add x7, x7, x13 - sub x27, x27, #1 - - trn1 v24.2d, v16.2d, v17.2d // batch:0 oc:0-3 - trn1 v25.2d, v18.2d, v19.2d // batch:0 oc:4-7 - trn2 v26.2d, v16.2d, v17.2d // batch:1 oc:0-3 - trn2 v27.2d, v18.2d, v19.2d // batch:1 oc:4-7 - trn1 v28.2d, v20.2d, v21.2d // batch:2 oc:0-3 - trn1 v29.2d, v22.2d, v23.2d // batch:2 oc:4-7 - trn2 v30.2d, v20.2d, v21.2d // batch:3 oc:0-3 - trn2 v31.2d, v22.2d, v23.2d // batch:3 oc:4-7 - Int32ToFloat v24, v25, v26, v27 - Int32ToFloat v28, v29, v30, v31 - // using float scale dequant for precison - ld1 {v5.4s}, [x23] // scales - MulScale v24, v25, v26, v27, v5, 0, 1 - MulScale v28, v29, v30, v31, v5, 2, 3 -Tile4Dequant: - ld1 {v0.4s, v1.4s}, [x19], #32 // alpha - ld1 {v2.4s, v3.4s}, [x20], #32 // zero - ld1 {v8.4s, v9.4s}, [x21], #32 // bias - ld1 {v6.4s}, [x22] // sums - // alpha * cusum + (zero * sums) + bias - Dequant v24, v0, v2, v8, v6, 0 // Batch0 - Dequant v25, v1, v3, v9, v6, 0 - Dequant v26, v0, v2, v8, v6, 1 // Batch1 - Dequant v27, v1, v3, v9, v6, 1 - Dequant v28, v0, v2, v8, v6, 2 // Batch2 - Dequant v29, v1, v3, v9, v6, 2 - Dequant v30, v0, v2, v8, v6, 3 // Batch3 - Dequant v31, v1, v3, v9, v6, 3 - st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x28], #64 - st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_4 -Tile4End: - sub x6, x6, #4 // bach -= 4 - add x0, x0, #128 // dst += 4 * 8 * sizeof(float32_t) - add x1, x1, #32 // src += 4 * 8 * sizeof(int8_t) - add x11, x11, #16 // sum += 4 * sizeof(float32_t) - add x12, x12, #16 // scale += 4 * sizeof(float32_t) - b TILE_4 - -TILE_2: - cmp x6, #2 - blt TILE_1 - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_2: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - // mask - movi v14.16b, #15 -LoopSz_TILE_2: - // src : 1 x [2 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - // dst : 1 x 4 x [4] : v16-19 - //ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v8.16b, v9.16b}, [x25], #32 // weight - // int4 to int8: v0, v1, v2, v3 - ushr v0.16b, v8.16b, #4 - and v1.16b, v8.16b, v14.16b - ushr v2.16b, v9.16b, #4 - and v3.16b, v9.16b, v14.16b - ld1 {v4.16b}, [x24], x15 // src - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b - .inst 0x4e82a492 // smmla v18.4s, v4.16b, v2.16b - .inst 0x4e83a493 // smmla v19.4s, v4.16b, v3.16b - subs x26, x26, #1 - bne LoopSz_TILE_2 - -LoopSzEnd_TILE_2: - add x7, x7, x13 - sub x27, x27, #1 - trn1 v20.2d, v16.2d, v17.2d - trn1 v21.2d, v18.2d, v19.2d - trn2 v22.2d, v16.2d, v17.2d - trn2 v23.2d, v18.2d, v19.2d - Int32ToFloat v20, v21, v22, v23 - // using float scale dequant for precison - ld1 {v5.d}[0], [x23] // scales - fmul v20.4s, v20.4s, v5.s[0] - fmul v21.4s, v21.4s, v5.s[0] - fmul v22.4s, v22.4s, v5.s[1] - fmul v23.4s, v23.4s, v5.s[1] -Tile2Dequant: - ld1 {v0.4s, v1.4s}, [x19], #32 // alpha - ld1 {v2.4s, v3.4s}, [x20], #32 // zero - ld1 {v8.4s, v9.4s}, [x21], #32 // bias - ld1 {v10.d}[0], [x22] // sums - // alpha * sum + (zero * sumx) + bias - Dequant v20, v0, v2, v8, v10, 0 - Dequant v21, v1, v3, v9, v10, 0 - Dequant v22, v0, v2, v8, v10, 1 - Dequant v23, v1, v3, v9, v10, 1 - st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_2 -Tile2End: - sub x6, x6, #2 // batch -= 2 - add x0, x0, #64 // dst += 2 * 8 * sizeof(float32_t) - add x1, x1, #16 // dst += 2 * 8 * sizeof(int8_t) - add x11, x11, #8 // sum += 2 * sizeof(float32_t) - add x12, x12, #8 // scale += 2 * sizeof(float32_t) - b TILE_2 - -TILE_1: - cmp x6, #1 - blt End - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4, sizeof(float32_t)/4=sizeof(int8_t) - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_1: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - // mask - movi v14.16b, #15 - -LoopSz_TILE_1: - // src : 1 x [1 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - // dst : 1 x 4 x [2] : v16-v19 - //ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v8.16b, v9.16b}, [x25], #32 // weight - // int4 to int8: v0, v1, v2, v3 - ushr v0.16b, v8.16b, #4 - and v1.16b, v8.16b, v14.16b - ushr v2.16b, v9.16b, #4 - and v3.16b, v9.16b, v14.16b - ld1 {v4.8b}, [x24], x15 // src - .inst 0x4e84a410 // smmla v16.4s, v0.16b, v4.16b - .inst 0x4e84a431 // smmla v17.4s, v1.16b, v4.16b - .inst 0x4e84a452 // smmla v18.4s, v2.16b, v4.16b - .inst 0x4e84a473 // smmla v19.4s, v3.16b, v4.16b - - subs x26, x26, #1 - bne LoopSz_TILE_1 - -LoopSzEnd_TILE_1: - add x7, x7, x13 - sub x27, x27, #1 - uzp1 v20.4s, v16.4s, v17.4s - uzp1 v21.4s, v18.4s, v19.4s - scvtf v20.4s, v20.4s - scvtf v21.4s, v21.4s - // using float scale dequant for precison - ld1 {v4.s}[0], [x23] // scales - fmul v20.4s, v20.4s, v4.s[0] - fmul v21.4s, v21.4s, v4.s[0] -Tile1Dequant: - ld1 {v0.4s, v1.4s}, [x19], #32 // alpha - ld1 {v2.4s, v3.4s}, [x20], #32 // zero - ld1 {v12.4s, v13.4s}, [x21], #32 // bias - ld1 {v6.s}[0], [x22] // sums - // alpha * sum + (zero * sumx) + bias - fmla v12.4s, v20.4s, v0.4s - fmla v13.4s, v21.4s, v1.4s - fmla v12.4s, v2.4s, v6.s[0] - fmla v13.4s, v3.4s, v6.s[0] - st1 {v12.4s, v13.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_1 -Tile1End: - sub x6, x6, #1 // batch -= 1 - add x0, x0, #32 // dst += 1 * 8 * sizeof(float32_t) - add x1, x1, #8 // dst += 1 * 8 * sizeof(int8_t) - add x11, x11, #4 // sum += 1 * sizeof(float32_t) - add x12, x12, #4 // scale += 1 * sizeof(float32_t) - b TILE_1 - -End: -ldp x27, x28, [sp, #(16 * 8)] -ldp x25, x26, [sp, #(16 * 7)] -ldp x23, x24, [sp, #(16 * 6)] -ldp x19, x20, [sp, #(16 * 5)] -ldp x21, x22, [sp, #(16 * 4)] -ldp d8, d9, [sp, #(16 * 3)] -ldp d10, d11, [sp, #(16 * 2)] -ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 9) -ret - -#endif \ No newline at end of file diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32.S b/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32.S deleted file mode 100644 index 418638fce..000000000 --- a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32.S +++ /dev/null @@ -1,293 +0,0 @@ -// -// MNNGemmHybridInt4_sdot.S -// MNN -// -// Created by MNN on 2023/11/09. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifdef __aarch64__ - -#include "MNNAsmGlobal.h" - -.text -.align 5 - -.macro Int32ToFloat z0, z1, z2, z3 - scvtf \z0\().4s, \z0\().4s - scvtf \z1\().4s, \z1\().4s - scvtf \z2\().4s, \z2\().4s - scvtf \z3\().4s, \z3\().4s -.endm - -.macro MulScale d0, d1, d2, d3, s - fmul \d0\().4s, \d0\().4s, \s\().s[0] - fmul \d1\().4s, \d1\().4s, \s\().s[1] - fmul \d2\().4s, \d2\().4s, \s\().s[2] - fmul \d3\().4s, \d3\().4s, \s\().s[3] -.endm - -.macro Dequant c0, a0, z0, b0, s0, idx - fmul \c0\().4s, \c0\().4s, \a0\().4s - fmla \c0\().4s, \z0\().4s, \s0\().s[\idx] - fadd \c0\().4s, \c0\().4s, \b0\().4s -.endm - -asm_function MNNGemmHybridInt8FP32 - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; - -//void MNNGemmHybridInt8FP32(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, float** param); - - -// Auto: x0: C*, x1: A*, x2:B*, x3: src_depth_quad, x4: dst_step, x5: dst_depth_quad, x6: realSize, x7: param -// load from param: x7: alpha*, x8: zero*, x9: bias*, x10: sums*, x11: scales* -stp d14, d15, [sp, #(-16 * 9)]! -stp d12, d13, [sp, #(16 * 1)] -stp d10, d11, [sp, #(16 * 2)] -stp d8, d9, [sp, #(16 * 3)] -stp x21, x22, [sp, #(16 * 4)] -stp x19, x20, [sp, #(16 * 5)] -stp x23, x24, [sp, #(16 * 6)] -stp x25, x26, [sp, #(16 * 7)] -stp x27, x28, [sp, #(16 * 8)] - -ldr x8, [x7, #0] -ldr x9, [x7, #8] -ldr x10, [x7, #16] -ldr x11, [x7, #24] -ldr x12, [x7, #32] - -Start: -lsl x13, x3, #4 // x13 = src_depth_quad * UNIT * UNIT_SRC / 1(int8) = src_depth_quad * 16 = src_depth_quad << 4 - -TILE_4: - cmp x6, #4 - blt TILE_1 - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_4: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - // batch=0,oc=0-3 - movi v10.4s, #0 //ic=0-3 - movi v11.4s, #0 - movi v12.4s, #0 - movi v13.4s, #0 - // batch=1,oc=0-3 - movi v16.4s, #0 - movi v17.4s, #0 - movi v18.4s, #0 - movi v19.4s, #0 - // batch=2,oc=0-3 - movi v20.4s, #0 - movi v21.4s, #0 - movi v22.4s, #0 - movi v23.4s, #0 - // batch=3,oc=0-3 - movi v24.4s, #0 - movi v25.4s, #0 - movi v26.4s, #0 - movi v27.4s, #0 - -LoopSz_TILE_4: - // src : 4(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - ld1 {v0.16b}, [x25], #16 // weight - ld1 {v4.16b}, [x24], x15 // src - - Unit_TILE_4: - sxtl v5.8h, v4.8b // src batch=0,1 - sxtl2 v6.8h, v4.16b // batch=2,3 - sxtl v1.8h, v0.8b // weight oc=0,1 - sxtl2 v2.8h, v0.16b // oc=2,3 - dup v28.2d, v1.d[0] // oc=0,0 - dup v29.2d, v1.d[1] // oc=1,1 - dup v30.2d, v2.d[0] // oc=2,2 - dup v31.2d, v2.d[1] // oc=3,3 - // batch=0 - smlal v10.4s, v5.4h, v28.4h - smlal v11.4s, v5.4h, v29.4h - smlal v12.4s, v5.4h, v30.4h - smlal v13.4s, v5.4h, v31.4h - // batch=1 - smlal2 v16.4s, v5.8h, v28.8h - smlal2 v17.4s, v5.8h, v29.8h - smlal2 v18.4s, v5.8h, v30.8h - smlal2 v19.4s, v5.8h, v31.8h - // batch=2 - smlal v20.4s, v6.4h, v28.4h - smlal v21.4s, v6.4h, v29.4h - smlal v22.4s, v6.4h, v30.4h - smlal v23.4s, v6.4h, v31.4h - // batch=3 - smlal2 v24.4s, v6.8h, v28.8h - smlal2 v25.4s, v6.8h, v29.8h - smlal2 v26.4s, v6.8h, v30.8h - smlal2 v27.4s, v6.8h, v31.8h - // .inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] // batch0 - // .inst 0x4fa4e011 // sdot v17.4s, v0.16b, v4.4b[1] // batch1 - // .inst 0x4f84e812 // sdot v18.4s, v0.16b, v4.4b[2] // batch2 - // .inst 0x4fa4e813 // sdot v19.4s, v0.16b, v4.4b[3] // batch3 - - subs x26, x26, #1 - bne LoopSz_TILE_4 - -LoopSzEnd_TILE_4: - // add 4 ic - addp v10.4s, v10.4s, v11.4s - addp v12.4s, v12.4s, v13.4s - addp v16.4s, v16.4s, v17.4s - addp v18.4s, v18.4s, v19.4s - addp v20.4s, v20.4s, v21.4s - addp v22.4s, v22.4s, v23.4s - addp v24.4s, v24.4s, v25.4s - addp v26.4s, v26.4s, v27.4s - - addp v10.4s, v10.4s, v12.4s // batch=0,oc=0-3 - addp v11.4s, v16.4s, v18.4s - addp v12.4s, v20.4s, v22.4s - addp v13.4s, v24.4s, v26.4s - - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v10, v11, v12, v13 - // Int32ToFloat v20, v21, v22, v23 - // using float scale dequant for precison - ld1 {v5.4s}, [x23] // scales, 4 batch,so 4 scale - - MulScale v10, v11, v12, v13, v5 - -Tile4Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.4s}, [x22] // sums - // alpha * sum + (zero * sums) + bias - Dequant v10, v0, v1, v2, v3, 0 - Dequant v11, v0, v1, v2, v3, 1 - Dequant v12, v0, v1, v2, v3, 2 - Dequant v13, v0, v1, v2, v3, 3 - st1 {v10.4s, v11.4s, v12.4s, v13.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_4 -Tile4End: - sub x6, x6, #4 // bach -= 4 - add x0, x0, #64 // dst += 4 * 4 * sizeof(float32_t) - add x1, x1, #16 // src += 4 * 4 * sizeof(int8_t) - add x11, x11, #16 // sum += 4 * sizeof(float32_t) - add x12, x12, #16 // scale += 4 * sizeof(float32_t) - b TILE_4 - -TILE_1: - cmp x6, #1 - blt End - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4, sizeof(float32_t)/4=sizeof(int8_t) - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_1: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - // batch=0,oc=0-3 - movi v10.4s, #0 //ic=0-3 - movi v11.4s, #0 - movi v12.4s, #0 - movi v13.4s, #0 - -LoopSz_TILE_1: - // src : 1(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 1 x 4 x [1] : v16 - ld1 {v0.16b}, [x25], #16 // weight pack*pack - ld1 {v4.s}[0], [x24], x15 // src - - Unit_TILE_1: - sxtl v5.8h, v4.8b // src batch=0 - sxtl v1.8h, v0.8b // weight oc=0,1 - sxtl2 v2.8h, v0.16b // oc=2,3 - dup v28.2d, v1.d[0] // oc=0,0 - dup v29.2d, v1.d[1] // oc=1,1 - dup v30.2d, v2.d[0] // oc=2,2 - dup v31.2d, v2.d[1] // oc=3,3 - // batch=0 - smlal v10.4s, v5.4h, v28.4h - smlal v11.4s, v5.4h, v29.4h - smlal v12.4s, v5.4h, v30.4h - smlal v13.4s, v5.4h, v31.4h - - //.inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] - - subs x26, x26, #1 - bne LoopSz_TILE_1 - -LoopSzEnd_TILE_1: - // add 4 ic - addp v10.4s, v10.4s, v11.4s - addp v12.4s, v12.4s, v13.4s - addp v16.4s, v10.4s, v12.4s - add x7, x7, x13 - sub x27, x27, #1 - scvtf v16.4s, v16.4s - // using float scale dequant for precison - ld1 {v4.s}[0], [x23] // scales - fmul v16.4s, v16.4s, v4.s[0] -Tile1Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.s}[0], [x22] // sums - // alpha * sum + (zero * sumx) + bias - fmla v2.4s, v0.4s, v16.4s - fmla v2.4s, v1.4s, v3.s[0] - st1 {v2.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_1 -Tile1End: - subs x6, x6, #1 // batch -= 1 - add x0, x0, #16 // dst += 1 * 4 * sizeof(float32_t) - add x1, x1, #4 // src += 1 * 4 * sizeof(int8_t) - add x11, x11, #4 // sum += 1 * sizeof(float32_t) - add x12, x12, #4 // scale += 1 * sizeof(float32_t) - bne TILE_1 - -End: -ldp x27, x28, [sp, #(16 * 8)] -ldp x25, x26, [sp, #(16 * 7)] -ldp x23, x24, [sp, #(16 * 6)] -ldp x19, x20, [sp, #(16 * 5)] -ldp x21, x22, [sp, #(16 * 4)] -ldp d8, d9, [sp, #(16 * 3)] -ldp d10, d11, [sp, #(16 * 2)] -ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 9) -ret - -#endif \ No newline at end of file diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32_sdot.S b/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32_sdot.S deleted file mode 100644 index dd14f71d5..000000000 --- a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32_sdot.S +++ /dev/null @@ -1,396 +0,0 @@ -// -// MNNGemmHybridInt8_smmla.S -// MNN -// -// Created by MNN on 2023/11/09. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifdef __aarch64__ - -#include "MNNAsmGlobal.h" - -.text -.align 5 - -.macro Int32ToFloat z0, z1, z2, z3 - scvtf \z0\().4s, \z0\().4s - scvtf \z1\().4s, \z1\().4s - scvtf \z2\().4s, \z2\().4s - scvtf \z3\().4s, \z3\().4s -.endm - -.macro MulScale d0, d1, d2, d3, s - fmul \d0\().4s, \d0\().4s, \s\().s[0] - fmul \d1\().4s, \d1\().4s, \s\().s[1] - fmul \d2\().4s, \d2\().4s, \s\().s[2] - fmul \d3\().4s, \d3\().4s, \s\().s[3] -.endm - -.macro Dequant c0, a0, z0, b0, s0, idx - fmul \c0\().4s, \c0\().4s, \a0\().4s - fmla \c0\().4s, \z0\().4s, \s0\().s[\idx] - fadd \c0\().4s, \c0\().4s, \b0\().4s -.endm - -asm_function MNNGemmHybridInt8FP32_sdot - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; - -//void MNNGemmHybridInt8FP32_sdot(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, float** param); - - -// Auto: x0: C*, x1: A*, x2:B*, x3: src_depth_quad, x4: dst_step, x5: dst_depth_quad, x6: realSize, x7: param -// load from param: x7: alpha*, x8: zero*, x9: bias*, x10: sums*, x11: scales* -stp d14, d15, [sp, #(-16 * 9)]! -stp d12, d13, [sp, #(16 * 1)] -stp d10, d11, [sp, #(16 * 2)] -stp d8, d9, [sp, #(16 * 3)] -stp x21, x22, [sp, #(16 * 4)] -stp x19, x20, [sp, #(16 * 5)] -stp x23, x24, [sp, #(16 * 6)] -stp x25, x26, [sp, #(16 * 7)] -stp x27, x28, [sp, #(16 * 8)] - -ldr x8, [x7, #0] -ldr x9, [x7, #8] -ldr x10, [x7, #16] -ldr x11, [x7, #24] -ldr x12, [x7, #32] - -Start: -lsl x13, x3, #4 // x13 = src_depth_quad * UNIT * UNIT_SRC / 1(int8) = src_depth_quad * 16 = src_depth_quad << 4 - -TILE_12: - cmp x6, #12 - blt TILE_8 - sub x14, x4, #128 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_12: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - movi v16.4s, #0 - movi v17.4s, #0 - movi v18.4s, #0 - movi v19.4s, #0 - movi v20.4s, #0 - movi v21.4s, #0 - movi v22.4s, #0 - movi v23.4s, #0 - movi v24.4s, #0 - movi v25.4s, #0 - movi v26.4s, #0 - movi v27.4s, #0 - -LoopSz_TILE_12: - // src : 4(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 4 x 4 x [1] : v16-v19 - ld1 {v0.16b}, [x25], #16 // weight - ld1 {v4.16b, v5.16b, v6.16b}, [x24], x15 // src - - .inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] // batch0 - .inst 0x4fa4e011 // sdot v17.4s, v0.16b, v4.4b[1] // batch1 - .inst 0x4f84e812 // sdot v18.4s, v0.16b, v4.4b[2] // batch2 - .inst 0x4fa4e813 // sdot v19.4s, v0.16b, v4.4b[3] // batch3 - .inst 0x4f85e014 // sdot v20.4s, v0.16b, v5.4b[0] // batch4 - .inst 0x4fa5e015 // sdot v21.4s, v0.16b, v5.4b[1] // batch5 - .inst 0x4f85e816 // sdot v22.4s, v0.16b, v5.4b[2] // batch6 - .inst 0x4fa5e817 // sdot v23.4s, v0.16b, v5.4b[3] // batch7 - .inst 0x4f86e018 // sdot v24.4s, v0.16b, v6.4b[0] // batch8 - .inst 0x4fa6e019 // sdot v25.4s, v0.16b, v6.4b[1] // batch9 - .inst 0x4f86e81a // sdot v26.4s, v0.16b, v6.4b[2] // batch10 - .inst 0x4fa6e81b // sdot v27.4s, v0.16b, v6.4b[3] // batch11 - subs x26, x26, #1 - bne LoopSz_TILE_12 - -LoopSzEnd_TILE_12: - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - Int32ToFloat v20, v21, v22, v23 - Int32ToFloat v24, v25, v26, v27 - // using float scale dequant for precison - ld1 {v5.4s, v6.4s, v7.4s}, [x23] // scales, 12 batch,so 12 scale - - MulScale v16, v17, v18, v19, v5 - MulScale v20, v21, v22, v23, v6 - MulScale v24, v25, v26, v27, v7 - -Tile12Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.4s, v4.4s, v5.4s}, [x22] // sums - // alpha * sum + (zero * sums) + bias - Dequant v16, v0, v1, v2, v3, 0 - Dequant v17, v0, v1, v2, v3, 1 - Dequant v18, v0, v1, v2, v3, 2 - Dequant v19, v0, v1, v2, v3, 3 - Dequant v20, v0, v1, v2, v4, 0 - Dequant v21, v0, v1, v2, v4, 1 - Dequant v22, v0, v1, v2, v4, 2 - Dequant v23, v0, v1, v2, v4, 3 - Dequant v24, v0, v1, v2, v5, 0 - Dequant v25, v0, v1, v2, v5, 1 - Dequant v26, v0, v1, v2, v5, 2 - Dequant v27, v0, v1, v2, v5, 3 - st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x28], #64 - st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x28], #64 - st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_12 -Tile12End: - sub x6, x6, #12 // bach -= 12 - add x0, x0, #192 // dst += 12 * 4 * sizeof(float32_t) - add x1, x1, #48 // src += 12 * 4 * sizeof(int8_t) - add x11, x11, #48 // sum += 12 * sizeof(float32_t) - add x12, x12, #48 // scale += 12 * sizeof(float32_t) - b TILE_12 - -TILE_8: - cmp x6, #8 - blt TILE_4 - sub x14, x4, #64 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_8: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - movi v16.4s, #0 - movi v17.4s, #0 - movi v18.4s, #0 - movi v19.4s, #0 - movi v20.4s, #0 - movi v21.4s, #0 - movi v22.4s, #0 - movi v23.4s, #0 - - // mask - movi v14.16b, #15 - // offset - movi v15.16b, #8 -LoopSz_TILE_8: - // src : 4(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 4 x 4 x [1] : v16-v19 - ld1 {v0.16b}, [x25], #16 // weight - ld1 {v4.16b, v5.16b}, [x24], x15 // src - - .inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] // batch0 - .inst 0x4fa4e011 // sdot v17.4s, v0.16b, v4.4b[1] // batch1 - .inst 0x4f84e812 // sdot v18.4s, v0.16b, v4.4b[2] // batch2 - .inst 0x4fa4e813 // sdot v19.4s, v0.16b, v4.4b[3] // batch3 - .inst 0x4f85e014 // sdot v20.4s, v0.16b, v5.4b[0] // batch4 - .inst 0x4fa5e015 // sdot v21.4s, v0.16b, v5.4b[1] // batch5 - .inst 0x4f85e816 // sdot v22.4s, v0.16b, v5.4b[2] // batch6 - .inst 0x4fa5e817 // sdot v23.4s, v0.16b, v5.4b[3] // batch7 - subs x26, x26, #1 - bne LoopSz_TILE_8 - -LoopSzEnd_TILE_8: - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - Int32ToFloat v20, v21, v22, v23 - // using float scale dequant for precison - ld1 {v5.4s, v6.4s}, [x23] // scales, 8 batch,so 8 scale - - MulScale v16, v17, v18, v19, v5 - MulScale v20, v21, v22, v23, v6 - -Tile8Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.4s, v4.4s}, [x22] // sums - // alpha * sum + (zero * sums) + bias - Dequant v16, v0, v1, v2, v3, 0 - Dequant v17, v0, v1, v2, v3, 1 - Dequant v18, v0, v1, v2, v3, 2 - Dequant v19, v0, v1, v2, v3, 3 - Dequant v20, v0, v1, v2, v4, 0 - Dequant v21, v0, v1, v2, v4, 1 - Dequant v22, v0, v1, v2, v4, 2 - Dequant v23, v0, v1, v2, v4, 3 - st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x28], #64 - st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_8 -Tile8End: - sub x6, x6, #8 // bach -= 4 - add x0, x0, #128 // dst += 8 * 4 * sizeof(float32_t) - add x1, x1, #32 // src += 8 * 4 * sizeof(int8_t) - add x11, x11, #32 // sum += 8 * sizeof(float32_t) - add x12, x12, #32 // scale += 8 * sizeof(float32_t) - b TILE_8 - -TILE_4: - cmp x6, #4 - blt TILE_1 - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_4: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - -LoopSz_TILE_4: - // src : 4(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 4 x 4 x [1] : v16-v19 - ld1 {v0.16b}, [x25], #16 // weight - ld1 {v4.16b}, [x24], x15 // src - .inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] // batch0 - .inst 0x4fa4e011 // sdot v17.4s, v0.16b, v4.4b[1] // batch1 - .inst 0x4f84e812 // sdot v18.4s, v0.16b, v4.4b[2] // batch2 - .inst 0x4fa4e813 // sdot v19.4s, v0.16b, v4.4b[3] // batch3 - subs x26, x26, #1 - bne LoopSz_TILE_4 - -LoopSzEnd_TILE_4: - add x7, x7, x13 - sub x27, x27, #1 - Int32ToFloat v16, v17, v18, v19 - // Int32ToFloat v20, v21, v22, v23 - // using float scale dequant for precison - ld1 {v5.4s}, [x23] // scales, 4 batch,so 4 scale - - MulScale v16, v17, v18, v19, v5 - -Tile4Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.4s}, [x22] // sums - // alpha * sum + (zero * sums) + bias - Dequant v16, v0, v1, v2, v3, 0 - Dequant v17, v0, v1, v2, v3, 1 - Dequant v18, v0, v1, v2, v3, 2 - Dequant v19, v0, v1, v2, v3, 3 - st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_4 -Tile4End: - sub x6, x6, #4 // bach -= 4 - add x0, x0, #64 // dst += 4 * 4 * sizeof(float32_t) - add x1, x1, #16 // src += 4 * 4 * sizeof(int8_t) - add x11, x11, #16 // sum += 4 * sizeof(float32_t) - add x12, x12, #16 // scale += 4 * sizeof(float32_t) - b TILE_4 - -TILE_1: - cmp x6, #1 - blt End - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4, sizeof(float32_t)/4=sizeof(int8_t) - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_1: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - -LoopSz_TILE_1: - // src : 1(batch) x [1 x 4] : v4 - // weight : 4(oc) x [1 x 4] : v0 - // dst : 1 x 4 x [1] : v16 - ld1 {v0.16b}, [x25], #16 // weight - ld1 {v4.s}[0], [x24], x15 // src - .inst 0x4f84e010 // sdot v16.4s, v0.16b, v4.4b[0] - - subs x26, x26, #1 - bne LoopSz_TILE_1 - -LoopSzEnd_TILE_1: - add x7, x7, x13 - sub x27, x27, #1 - scvtf v16.4s, v16.4s - // using float scale dequant for precison - ld1 {v4.s}[0], [x23] // scales - fmul v16.4s, v16.4s, v4.s[0] -Tile1Dequant: - ld1 {v0.4s}, [x19], #16 // alpha - ld1 {v1.4s}, [x20], #16 // zero - ld1 {v2.4s}, [x21], #16 // bias - ld1 {v3.s}[0], [x22] // sums - // alpha * sum + (zero * sumx) + bias - fmla v2.4s, v0.4s, v16.4s - fmla v2.4s, v1.4s, v3.s[0] - st1 {v2.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_1 -Tile1End: - sub x6, x6, #1 // batch -= 1 - add x0, x0, #16 // dst += 1 * 4 * sizeof(float32_t) - add x1, x1, #4 // src += 1 * 4 * sizeof(int8_t) - add x11, x11, #4 // sum += 1 * sizeof(float32_t) - add x12, x12, #4 // scale += 1 * sizeof(float32_t) - b TILE_1 - -End: -ldp x27, x28, [sp, #(16 * 8)] -ldp x25, x26, [sp, #(16 * 7)] -ldp x23, x24, [sp, #(16 * 6)] -ldp x19, x20, [sp, #(16 * 5)] -ldp x21, x22, [sp, #(16 * 4)] -ldp d8, d9, [sp, #(16 * 3)] -ldp d10, d11, [sp, #(16 * 2)] -ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 9) -ret - -#endif \ No newline at end of file diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32_smmla.S b/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32_smmla.S deleted file mode 100644 index a4c915853..000000000 --- a/source/backend/cpu/arm/arm64/low_memory/MNNGemmHybridInt8FP32_smmla.S +++ /dev/null @@ -1,445 +0,0 @@ -// -// MNNGemmHybridInt8FP32_smmla.S -// MNN -// -// Created by MNN on 2023/11/09. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifdef __aarch64__ - -#include "MNNAsmGlobal.h" - -.text -.align 5 - -.macro Int32ToFloat z0, z1, z2, z3 - scvtf \z0\().4s, \z0\().4s - scvtf \z1\().4s, \z1\().4s - scvtf \z2\().4s, \z2\().4s - scvtf \z3\().4s, \z3\().4s -.endm - -.macro MulScale d0, d1, d2, d3, s, idx0, idx1 - fmul \d0\().4s, \d0\().4s, \s\().s[\idx0] - fmul \d1\().4s, \d1\().4s, \s\().s[\idx0] - fmul \d2\().4s, \d2\().4s, \s\().s[\idx1] - fmul \d3\().4s, \d3\().4s, \s\().s[\idx1] -.endm - -.macro Dequant c0, a0, z0, b0, s0, idx - fmul \c0\().4s, \c0\().4s, \a0\().4s - fmla \c0\().4s, \z0\().4s, \s0\().s[\idx] - fadd \c0\().4s, \c0\().4s, \b0\().4s -.endm - -asm_function MNNGemmHybridInt8FP32_smmla - -//struct QuanPostTreatParameters { -// const float* scale; -// const int32_t* bias; -// int32_t maxValue; -// int32_t minValue; -// int32_t useInt8; -//}; - -//void MNNGemmHybridInt8FP32_smmla(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, float** param); - - -// Auto: x0: C*, x1: A*, x2:B*, x3: src_depth_quad, x4: dst_step, x5: dst_depth_quad, x6: realSize, x7: param -// load from param: x7: alpha*, x8: zero*, x9: bias*, x10: sums*, x11: scales* -stp d14, d15, [sp, #(-16 * 9)]! -stp d12, d13, [sp, #(16 * 1)] -stp d10, d11, [sp, #(16 * 2)] -stp d8, d9, [sp, #(16 * 3)] -stp x21, x22, [sp, #(16 * 4)] -stp x19, x20, [sp, #(16 * 5)] -stp x23, x24, [sp, #(16 * 6)] -stp x25, x26, [sp, #(16 * 7)] -stp x27, x28, [sp, #(16 * 8)] - -ldr x8, [x7, #0] -ldr x9, [x7, #8] -ldr x10, [x7, #16] -ldr x11, [x7, #24] -ldr x12, [x7, #32] - -Start: -lsl x13, x3, #6 // x13 = src_depth_quad * UNIT * UNIT_SRC / 1(int8) = src_depth_quad * 64 = src_depth_quad << 6 - -TILE_8: - cmp x6, #8 - blt TILE_4 - sub x14, x4, #192 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_8: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - dup v22.4s, wzr - dup v23.4s, wzr - dup v24.4s, wzr - dup v25.4s, wzr - dup v26.4s, wzr - dup v27.4s, wzr - dup v28.4s, wzr - dup v29.4s, wzr - dup v30.4s, wzr - dup v31.4s, wzr - -LoopSz_TILE_8: - // src : 2 x [2 x 8] : v4-5 - // weight : 4 x [2 x 8] : v0-3 - // dst : 2 x 4 x [4] : v16-23 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v12.16b, v13.16b, v14.16b, v15.16b}, [x24], x15 // src - - .inst 0x4e80a590 // smmla v16.4s, v12.16b, v0.16b - .inst 0x4e81a591 // smmla v17.4s, v12.16b, v1.16b - .inst 0x4e82a592 // smmla v18.4s, v12.16b, v2.16b - .inst 0x4e83a593 // smmla v19.4s, v12.16b, v3.16b - .inst 0x4e80a5b4 // smmla v20.4s, v13.16b, v0.16b - .inst 0x4e81a5b5 // smmla v21.4s, v13.16b, v1.16b - .inst 0x4e82a5b6 // smmla v22.4s, v13.16b, v2.16b - .inst 0x4e83a5b7 // smmla v23.4s, v13.16b, v3.16b - .inst 0x4e80a5d8 // smmla v24.4s, v14.16b, v0.16b - .inst 0x4e81a5d9 // smmla v25.4s, v14.16b, v1.16b - .inst 0x4e82a5da // smmla v26.4s, v14.16b, v2.16b - .inst 0x4e83a5db // smmla v27.4s, v14.16b, v3.16b - .inst 0x4e80a5fc // smmla v28.4s, v15.16b, v0.16b - .inst 0x4e81a5fd // smmla v29.4s, v15.16b, v1.16b - .inst 0x4e82a5fe // smmla v30.4s, v15.16b, v2.16b - .inst 0x4e83a5ff // smmla v31.4s, v15.16b, v3.16b - subs x26, x26, #1 - bne LoopSz_TILE_8 - -LoopSzEnd_TILE_8: - add x7, x7, x13 - sub x27, x27, #1 - - trn1 v0.2d, v16.2d, v17.2d // batch:0 oc:0-3 - trn1 v1.2d, v18.2d, v19.2d // batch:0 oc:4-7 - trn2 v2.2d, v16.2d, v17.2d // batch:1 oc:0-3 - trn2 v3.2d, v18.2d, v19.2d // batch:1 oc:4-7 - trn1 v4.2d, v20.2d, v21.2d // batch:2 oc:0-3 - trn1 v5.2d, v22.2d, v23.2d // batch:2 oc:4-7 - trn2 v6.2d, v20.2d, v21.2d // batch:3 oc:0-3 - trn2 v7.2d, v22.2d, v23.2d // batch:3 oc:4-7 - - trn1 v8.2d, v24.2d, v25.2d // batch:0 oc:0-3 - trn1 v9.2d, v26.2d, v27.2d // batch:0 oc:4-7 - trn2 v10.2d, v24.2d, v25.2d // batch:1 oc:0-3 - trn2 v11.2d, v26.2d, v27.2d // batch:1 oc:4-7 - trn1 v12.2d, v28.2d, v29.2d // batch:2 oc:0-3 - trn1 v13.2d, v30.2d, v31.2d // batch:2 oc:4-7 - trn2 v14.2d, v28.2d, v29.2d // batch:3 oc:0-3 - trn2 v15.2d, v30.2d, v31.2d // batch:3 oc:4-7 - - Int32ToFloat v0, v1, v2, v3 - Int32ToFloat v4, v5, v6, v7 - Int32ToFloat v8, v9, v10, v11 - Int32ToFloat v12, v13, v14, v15 - // using float scale dequant for precison - ld1 {v16.4s, v17.4s}, [x23] // scales - MulScale v0, v1, v2, v3, v16, 0, 1 - MulScale v4, v5, v6, v7, v16, 2, 3 - MulScale v8, v9, v10, v11, v17, 0, 1 - MulScale v12, v13, v14, v15, v17, 2, 3 -Tile8Dequant: - ld1 {v18.4s, v19.4s}, [x19], #32 // alpha - ld1 {v20.4s, v21.4s}, [x20], #32 // zero - ld1 {v22.4s, v23.4s}, [x21], #32 // bias - ld1 {v24.4s, v25.4s}, [x22] // sums - // alpha * cusum + (zero * sums) + bias - Dequant v0, v18, v20, v22, v24, 0 // Batch0 - Dequant v1, v19, v21, v23, v24, 0 - Dequant v2, v18, v20, v22, v24, 1 // Batch1 - Dequant v3, v19, v21, v23, v24, 1 - Dequant v4, v18, v20, v22, v24, 2 // Batch2 - Dequant v5, v19, v21, v23, v24, 2 - Dequant v6, v18, v20, v22, v24, 3 // Batch3 - Dequant v7, v19, v21, v23, v24, 3 - Dequant v8, v18, v20, v22, v25, 0 // Batch4 - Dequant v9, v19, v21, v23, v25, 0 - Dequant v10, v18, v20, v22, v25, 1 // Batch5 - Dequant v11, v19, v21, v23, v25, 1 - Dequant v12, v18, v20, v22, v25, 2 // Batch6 - Dequant v13, v19, v21, v23, v25, 2 - Dequant v14, v18, v20, v22, v25, 3 // Batch7 - Dequant v15, v19, v21, v23, v25, 3 - st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x28], #64 - st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x28], #64 - st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x28], #64 - st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_8 -Tile8End: - sub x6, x6, #8 // bach -= 8 - add x0, x0, #256 // dst += 8 * 8 * sizeof(float32_t) - add x1, x1, #64 // src += 8 * 8 * sizeof(int8_t) - add x11, x11, #32 // sum += 8 * sizeof(float32_t) - add x12, x12, #32 // scale += 8 * sizeof(float32_t) - b TILE_8 - -TILE_4: - cmp x6, #4 - blt TILE_2 - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - sub x14, x14, #64 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_4: - // dequant info for batch - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - dup v20.4s, wzr - dup v21.4s, wzr - dup v22.4s, wzr - dup v23.4s, wzr -LoopSz_TILE_4: - // src : 2 x [2 x 8] : v4-5 - // weight : 4 x [2 x 8] : v0-3 - // dst : 2 x 4 x [4] : v16-23 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v4.16b, v5.16b}, [x24], x15 // src - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b - .inst 0x4e82a492 // smmla v18.4s, v4.16b, v2.16b - .inst 0x4e83a493 // smmla v19.4s, v4.16b, v3.16b - .inst 0x4e80a4b4 // smmla v20.4s, v5.16b, v0.16b - .inst 0x4e81a4b5 // smmla v21.4s, v5.16b, v1.16b - .inst 0x4e82a4b6 // smmla v22.4s, v5.16b, v2.16b - .inst 0x4e83a4b7 // smmla v23.4s, v5.16b, v3.16b - subs x26, x26, #1 - bne LoopSz_TILE_4 - -LoopSzEnd_TILE_4: - add x7, x7, x13 - sub x27, x27, #1 - - trn1 v24.2d, v16.2d, v17.2d // batch:0 oc:0-3 - trn1 v25.2d, v18.2d, v19.2d // batch:0 oc:4-7 - trn2 v26.2d, v16.2d, v17.2d // batch:1 oc:0-3 - trn2 v27.2d, v18.2d, v19.2d // batch:1 oc:4-7 - trn1 v28.2d, v20.2d, v21.2d // batch:2 oc:0-3 - trn1 v29.2d, v22.2d, v23.2d // batch:2 oc:4-7 - trn2 v30.2d, v20.2d, v21.2d // batch:3 oc:0-3 - trn2 v31.2d, v22.2d, v23.2d // batch:3 oc:4-7 - Int32ToFloat v24, v25, v26, v27 - Int32ToFloat v28, v29, v30, v31 - // using float scale dequant for precison - ld1 {v5.4s}, [x23] // scales - MulScale v24, v25, v26, v27, v5, 0, 1 - MulScale v28, v29, v30, v31, v5, 2, 3 -Tile4Dequant: - ld1 {v0.4s, v1.4s}, [x19], #32 // alpha - ld1 {v2.4s, v3.4s}, [x20], #32 // zero - ld1 {v8.4s, v9.4s}, [x21], #32 // bias - ld1 {v6.4s}, [x22] // sums - // alpha * cusum + (zero * sums) + bias - Dequant v24, v0, v2, v8, v6, 0 // Batch0 - Dequant v25, v1, v3, v9, v6, 0 - Dequant v26, v0, v2, v8, v6, 1 // Batch1 - Dequant v27, v1, v3, v9, v6, 1 - Dequant v28, v0, v2, v8, v6, 2 // Batch2 - Dequant v29, v1, v3, v9, v6, 2 - Dequant v30, v0, v2, v8, v6, 3 // Batch3 - Dequant v31, v1, v3, v9, v6, 3 - st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x28], #64 - st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_4 -Tile4End: - sub x6, x6, #4 // bach -= 4 - add x0, x0, #128 // dst += 4 * 8 * sizeof(float32_t) - add x1, x1, #32 // src += 4 * 8 * sizeof(int8_t) - add x11, x11, #16 // sum += 4 * sizeof(float32_t) - add x12, x12, #16 // scale += 4 * sizeof(float32_t) - b TILE_4 - -TILE_2: - cmp x6, #2 - blt TILE_1 - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4 - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_2: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr -LoopSz_TILE_2: - // src : 1 x [2 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - // dst : 1 x 4 x [4] : v16-19 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v4.16b}, [x24], x15 // src - .inst 0x4e80a490 // smmla v16.4s, v4.16b, v0.16b - .inst 0x4e81a491 // smmla v17.4s, v4.16b, v1.16b - .inst 0x4e82a492 // smmla v18.4s, v4.16b, v2.16b - .inst 0x4e83a493 // smmla v19.4s, v4.16b, v3.16b - subs x26, x26, #1 - bne LoopSz_TILE_2 - -LoopSzEnd_TILE_2: - add x7, x7, x13 - sub x27, x27, #1 - trn1 v20.2d, v16.2d, v17.2d - trn1 v21.2d, v18.2d, v19.2d - trn2 v22.2d, v16.2d, v17.2d - trn2 v23.2d, v18.2d, v19.2d - Int32ToFloat v20, v21, v22, v23 - // using float scale dequant for precison - ld1 {v5.d}[0], [x23] // scales - fmul v20.4s, v20.4s, v5.s[0] - fmul v21.4s, v21.4s, v5.s[0] - fmul v22.4s, v22.4s, v5.s[1] - fmul v23.4s, v23.4s, v5.s[1] -Tile2Dequant: - ld1 {v0.4s, v1.4s}, [x19], #32 // alpha - ld1 {v2.4s, v3.4s}, [x20], #32 // zero - ld1 {v8.4s, v9.4s}, [x21], #32 // bias - ld1 {v10.d}[0], [x22] // sums - // alpha * sum + (zero * sumx) + bias - Dequant v20, v0, v2, v8, v10, 0 - Dequant v21, v1, v3, v9, v10, 0 - Dequant v22, v0, v2, v8, v10, 1 - Dequant v23, v1, v3, v9, v10, 1 - st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_2 -Tile2End: - sub x6, x6, #2 // batch -= 2 - add x0, x0, #64 // dst += 2 * 8 * sizeof(float32_t) - add x1, x1, #16 // dst += 2 * 8 * sizeof(int8_t) - add x11, x11, #8 // sum += 2 * sizeof(float32_t) - add x12, x12, #8 // scale += 2 * sizeof(float32_t) - b TILE_2 - -TILE_1: - - cmp x6, #1 - blt End - mov x14, x4 // dst_step - lsr x15, x4, #2 // src_step = dst_step / 4, sizeof(float32_t)/4=sizeof(int8_t) - mov x27, x5 // dst_depth_quad - mov x28, x0 // dst - mov x7, x2 // weight - // dequant info - mov x19, x8 // alpha - mov x20, x9 // zero - mov x21, x10 // bias -LoopDz_TILE_1: - mov x22, x11 // sums - mov x23, x12 // scales - mov x24, x1 // src - mov x25, x7 // weight - mov x26, x3 // src_depth_quad - // init - dup v16.4s, wzr - dup v17.4s, wzr - dup v18.4s, wzr - dup v19.4s, wzr - -LoopSz_TILE_1: - // src : 1 x [1 x 8] : v4 - // weight : 4 x [2 x 8] : v0-3 - // dst : 1 x 4 x [2] : v16-v19 - ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x25], #64 // weight - ld1 {v4.8b}, [x24], x15 // src - .inst 0x4e84a410 // smmla v16.4s, v0.16b, v4.16b - .inst 0x4e84a431 // smmla v17.4s, v1.16b, v4.16b - .inst 0x4e84a452 // smmla v18.4s, v2.16b, v4.16b - .inst 0x4e84a473 // smmla v19.4s, v3.16b, v4.16b - - subs x26, x26, #1 - bne LoopSz_TILE_1 - -LoopSzEnd_TILE_1: - add x7, x7, x13 - sub x27, x27, #1 - uzp1 v20.4s, v16.4s, v17.4s - uzp1 v21.4s, v18.4s, v19.4s - scvtf v20.4s, v20.4s - scvtf v21.4s, v21.4s - // using float scale dequant for precison - ld1 {v4.s}[0], [x23] // scales - fmul v20.4s, v20.4s, v4.s[0] - fmul v21.4s, v21.4s, v4.s[0] -Tile1Dequant: - ld1 {v0.4s, v1.4s}, [x19], #32 // alpha - ld1 {v2.4s, v3.4s}, [x20], #32 // zero - ld1 {v10.4s, v11.4s}, [x21], #32 // bias - ld1 {v8.s}[0], [x22] // sums - // alpha * sum + (zero * sumx) + bias - fmla v10.4s, v20.4s, v0.4s - fmla v11.4s, v21.4s, v1.4s - fmla v10.4s, v2.4s, v8.s[0] - fmla v11.4s, v3.4s, v8.s[0] - st1 {v10.4s, v11.4s}, [x28], x14 - cmp x27, #1 - bge LoopDz_TILE_1 -Tile1End: - sub x6, x6, #1 // batch -= 1 - add x0, x0, #32 // dst += 1 * 8 * sizeof(float32_t) - add x1, x1, #8 // src += 1 * 8 * sizeof(int8_t) - add x11, x11, #4 // sum += 1 * sizeof(float32_t) - add x12, x12, #4 // scale += 1 * sizeof(float32_t) - b TILE_1 - -End: -ldp x27, x28, [sp, #(16 * 8)] -ldp x25, x26, [sp, #(16 * 7)] -ldp x23, x24, [sp, #(16 * 6)] -ldp x19, x20, [sp, #(16 * 5)] -ldp x21, x22, [sp, #(16 * 4)] -ldp d8, d9, [sp, #(16 * 3)] -ldp d10, d11, [sp, #(16 * 2)] -ldp d12, d13, [sp, #(16 * 1)] -ldp d14, d15, [sp], #(16 * 9) -ret - -#endif \ No newline at end of file diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_16x4_w4_Unit.S b/source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_16x4_w4_Unit.S new file mode 100644 index 000000000..fa8258b66 --- /dev/null +++ b/source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_16x4_w4_Unit.S @@ -0,0 +1,830 @@ +// +// MNNGemmInt8AddBiasScale_16x4_w4_Unit.S +// MNN +// +// Created by MNN on 2019/06/11. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifdef __aarch64__ + +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro ReLU_FP32_4 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmin \s2\().4s, \s2\().4s, \z1\().4s + fmin \s3\().4s, \s3\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s + fmax \s2\().4s, \s2\().4s, \z0\().4s + fmax \s3\().4s, \s3\().4s, \z0\().4s +.endm +.macro ReLU_FP32_3 s0, s1, s2, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmin \s2\().4s, \s2\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s + fmax \s2\().4s, \s2\().4s, \z0\().4s +.endm +.macro ReLU_FP32_2 s0, s1, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s +.endm +.macro ReLU_FP32_1 s0, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s +.endm +.macro MUL_SCALE4 s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s + fmul \d2\().4s, \d2\().4s, \s\().4s + fmul \d3\().4s, \d3\().4s, \s\().4s +.endm +.macro MUL_SCALE3 s, d0, d1, d2 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s + fmul \d2\().4s, \d2\().4s, \s\().4s +.endm +.macro MUL_SCALE2 s, d0, d1 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s +.endm +.macro MUL_SCALE1 s, d0 + fmul \d0\().4s, \d0\().4s, \s\().4s +.endm +.macro MUL_EXTRA_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().s[0] + fmul \d1\().4s, \d1\().4s, \s\().s[1] + fmul \d2\().4s, \d2\().4s, \s\().s[2] + fmul \d3\().4s, \d3\().4s, \s\().s[3] +.endm + +asm_function MNNGemmInt8AddBiasScale_16x4_w4_Unit + +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum; +}; +*/ +//void MNNGemmInt8AddBiasScale_16x4_w4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, +// size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realSize) { + +//Auto: x0: dst*, x1: src*, x2:weight*, x3: src_depth_quad, x4: dst_step, +// x5: dst_depth_quad, x6: post, x7: realSize + +//Load from post: +// x7: scale, x10: bias, w11: maxValue, w6: minValue, w13: UseInt8, x14: srcKernelSum, x12: weightQuantBias +mov x8, x7 +mov x15, x6 +ldr x7, [x15, #0] +ldr x10, [x15, #8] +ldr w11, [x15, #16] +ldr w6, [x15, #20] +ldr w13, [x15, #24] +ldr x14, [x15, #40] // srcKernelSum +ldr x12, [x15, #48] // weightQuantBias + +stp d14, d15, [sp, #(-16 * 8)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] +stp x19, x20, [sp, #(16 * 4)] +stp x21, x22, [sp, #(16 * 5)] +stp x23, x24, [sp, #(16 * 6)] + +ldr x19, [x15, #56] // fp32 min max +ldr x21, [x15, #64] // blockNum +ldr x23, [x15, #80] // extraScale +mul x21, x21, x3 // blockNum * src_depth_quad_perblock +lsl x21, x21, #5 // src_depth_quad* SRC_UNIT * UNIT * sizeof(int4_t) +add x20, x19, #4 + +Start: +cmp x8, #3 +beq L3Dz + +cmp x8, #2 +beq L2Dz + +cmp x8, #1 +beq L1Dz + +//cmp w13, #1 +//bne L4LoopDz +//sub x4, x4, #8 // post->scale != nullptr && post->useInt8 == 1. +L4LoopDz: + mov x8, x1 + mov x22, x2 + ld1 {v0.16b, v1.16b}, [x2], #32 // weight + ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #64 // src + // int4->int8 + movi v8.16b, #15 + ushr v10.16b, v0.16b, #4 + and v11.16b, v0.16b, v8.16b + ushr v12.16b, v1.16b, #4 + and v13.16b, v1.16b, v8.16b + zip1 v0.16b, v10.16b, v11.16b + zip2 v1.16b, v10.16b, v11.16b + zip1 v2.16b, v12.16b, v13.16b + zip2 v3.16b, v12.16b, v13.16b + + smull v8.8h, v0.8b, v4.8b + smull v9.8h, v1.8b, v4.8b + smull v10.8h, v2.8b, v4.8b + smull v11.8h, v3.8b, v4.8b + smull v12.8h, v0.8b, v5.8b + smull v13.8h, v1.8b, v5.8b + smull v14.8h, v2.8b, v5.8b + smull v15.8h, v3.8b, v5.8b + + smlal2 v8.8h, v0.16b, v4.16b + smlal2 v9.8h, v1.16b, v4.16b + smlal2 v10.8h, v2.16b, v4.16b + smlal2 v11.8h, v3.16b, v4.16b + smlal2 v12.8h, v0.16b, v5.16b + smlal2 v13.8h, v1.16b, v5.16b + smlal2 v14.8h, v2.16b, v5.16b + smlal2 v15.8h, v3.16b, v5.16b + + L4Initialize: + saddlp v16.4s, v8.8h + saddlp v17.4s, v9.8h + saddlp v18.4s, v10.8h + saddlp v19.4s, v11.8h + saddlp v20.4s, v12.8h + saddlp v21.4s, v13.8h + saddlp v22.4s, v14.8h + saddlp v23.4s, v15.8h + + smull v8.8h, v0.8b, v6.8b + smull v9.8h, v1.8b, v6.8b + smull v10.8h, v2.8b, v6.8b + smull v11.8h, v3.8b, v6.8b + smull v12.8h, v0.8b, v7.8b + smull v13.8h, v1.8b, v7.8b + smull v14.8h, v2.8b, v7.8b + smull v15.8h, v3.8b, v7.8b + subs x9, x3, #1 + smlal2 v8.8h, v0.16b, v6.16b + smlal2 v9.8h, v1.16b, v6.16b + smlal2 v10.8h, v2.16b, v6.16b + smlal2 v11.8h, v3.16b, v6.16b + smlal2 v12.8h, v0.16b, v7.16b + smlal2 v13.8h, v1.16b, v7.16b + smlal2 v14.8h, v2.16b, v7.16b + smlal2 v15.8h, v3.16b, v7.16b + + saddlp v24.4s, v8.8h + saddlp v25.4s, v9.8h + saddlp v26.4s, v10.8h + saddlp v27.4s, v11.8h + saddlp v28.4s, v12.8h + saddlp v29.4s, v13.8h + saddlp v30.4s, v14.8h + saddlp v31.4s, v15.8h + L4InitializeEnd: + beq ComputeSum + + L4LoopSz: + ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x1], #64 + ld1 {v0.16b, v1.16b}, [x2], #32 + // int4->int8 + movi v8.16b, #15 + ushr v10.16b, v0.16b, #4 + and v11.16b, v0.16b, v8.16b + ushr v12.16b, v1.16b, #4 + and v13.16b, v1.16b, v8.16b + zip1 v0.16b, v10.16b, v11.16b + zip2 v1.16b, v10.16b, v11.16b + zip1 v2.16b, v12.16b, v13.16b + zip2 v3.16b, v12.16b, v13.16b + + smull v8.8h, v0.8b, v4.8b + smull v9.8h, v1.8b, v4.8b + smull v10.8h, v2.8b, v4.8b + smull v11.8h, v3.8b, v4.8b + smull v12.8h, v0.8b, v5.8b + smull v13.8h, v1.8b, v5.8b + smull v14.8h, v2.8b, v5.8b + smull v15.8h, v3.8b, v5.8b + + smlal2 v8.8h, v0.16b, v4.16b + smlal2 v9.8h, v1.16b, v4.16b + smlal2 v10.8h, v2.16b, v4.16b + smlal2 v11.8h, v3.16b, v4.16b + smlal2 v12.8h, v0.16b, v5.16b + smlal2 v13.8h, v1.16b, v5.16b + smlal2 v14.8h, v2.16b, v5.16b + smlal2 v15.8h, v3.16b, v5.16b + + sadalp v16.4s, v8.8h + sadalp v17.4s, v9.8h + sadalp v18.4s, v10.8h + sadalp v19.4s, v11.8h + sadalp v20.4s, v12.8h + sadalp v21.4s, v13.8h + sadalp v22.4s, v14.8h + sadalp v23.4s, v15.8h + + smull v8.8h, v0.8b, v6.8b + smull v9.8h, v1.8b, v6.8b + smull v10.8h, v2.8b, v6.8b + smull v11.8h, v3.8b, v6.8b + smull v12.8h, v0.8b, v7.8b + smull v13.8h, v1.8b, v7.8b + smull v14.8h, v2.8b, v7.8b + smull v15.8h, v3.8b, v7.8b + + subs x9, x9, #1 + + smlal2 v8.8h, v0.16b, v6.16b + smlal2 v9.8h, v1.16b, v6.16b + smlal2 v10.8h, v2.16b, v6.16b + smlal2 v11.8h, v3.16b, v6.16b + smlal2 v12.8h, v0.16b, v7.16b + smlal2 v13.8h, v1.16b, v7.16b + smlal2 v14.8h, v2.16b, v7.16b + smlal2 v15.8h, v3.16b, v7.16b + + sadalp v24.4s, v8.8h + sadalp v25.4s, v9.8h + sadalp v26.4s, v10.8h + sadalp v27.4s, v11.8h + sadalp v28.4s, v12.8h + sadalp v29.4s, v13.8h + sadalp v30.4s, v14.8h + sadalp v31.4s, v15.8h + + bne L4LoopSz + + ComputeSum: + + addp v4.4s, v16.4s, v17.4s + addp v5.4s, v18.4s, v19.4s + addp v6.4s, v20.4s, v21.4s + addp v7.4s, v22.4s, v23.4s + addp v8.4s, v24.4s, v25.4s + addp v9.4s, v26.4s, v27.4s + addp v10.4s, v28.4s, v29.4s + addp v11.4s, v30.4s, v31.4s + + addp v12.4s, v4.4s, v5.4s + addp v13.4s, v6.4s, v7.4s + addp v14.4s, v8.4s, v9.4s + addp v15.4s, v10.4s, v11.4s + + L4Quan: + ld1 {v1.4s}, [x7], #16 // scalefuse + ld1 {v20.4s}, [x14] // srcKernelSum + ld1 {v21.4s}, [x12], #16 // weightQuanZero + + scvtf v4.4s, v12.4s + scvtf v5.4s, v13.4s + scvtf v6.4s, v14.4s + scvtf v7.4s, v15.4s + + cbz x23, TILE4_MUL_OHE_SCALE + ld1 {v2.4s}, [x23] + MUL_EXTRA_SCALE v2, v4, v5, v6, v7 + + TILE4_MUL_OHE_SCALE: + MUL_SCALE4 v1, v4, v5, v6, v7 + + MLA_WEIGHTZERO v4, v20, v21, 0 + MLA_WEIGHTZERO v5, v20, v21, 1 + MLA_WEIGHTZERO v6, v20, v21, 2 + MLA_WEIGHTZERO v7, v20, v21, 3 + + L4_Add_BIAS: + cbz x10, L4_ADD_DSTV + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v0.4s + fadd v6.4s, v6.4s, v0.4s + fadd v7.4s, v7.4s, v0.4s + b L4_POST + + L4_ADD_DSTV: + ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x0] + fadd v4.4s, v4.4s, v8.4s + fadd v5.4s, v5.4s, v9.4s + fadd v6.4s, v6.4s, v10.4s + fadd v7.4s, v7.4s, v11.4s + + L4_POST: + cbz x19, L4_STORE + ld1r {v26.4s}, [x19] // f32 min + ld1r {v27.4s}, [x20] // f32 max + ReLU_FP32_4 v4, v5, v6, v7, v26, v27 + + L4_STORE: + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x0], x4 + +L4LoopCheck: + subs x5, x5, #1 + mov x1, x8 + add x2, x22, x21 + bne L4LoopDz + +b End + +L3Dz: +cmp w13, #1 +bne L3LoopDz +sub x4, x4, #8 +L3LoopDz: + mov x8, x1 + mov x22, x2 + ld1 {v0.16b, v1.16b}, [x2], #32 + ld1 {v4.16b, v5.16b, v6.16b}, [x1], #48 + add x1, x1, #16 + // int4->int8 + movi v8.16b, #15 + ushr v10.16b, v0.16b, #4 + and v11.16b, v0.16b, v8.16b + ushr v12.16b, v1.16b, #4 + and v13.16b, v1.16b, v8.16b + zip1 v0.16b, v10.16b, v11.16b + zip2 v1.16b, v10.16b, v11.16b + zip1 v2.16b, v12.16b, v13.16b + zip2 v3.16b, v12.16b, v13.16b + + smull v8.8h, v0.8b, v4.8b + smull v9.8h, v1.8b, v4.8b + smull v10.8h, v2.8b, v4.8b + smull v11.8h, v3.8b, v4.8b + smull v12.8h, v0.8b, v5.8b + smull v13.8h, v1.8b, v5.8b + smull v14.8h, v2.8b, v5.8b + smull v15.8h, v3.8b, v5.8b + + smlal2 v8.8h, v0.16b, v4.16b + smlal2 v9.8h, v1.16b, v4.16b + smlal2 v10.8h, v2.16b, v4.16b + smlal2 v11.8h, v3.16b, v4.16b + smlal2 v12.8h, v0.16b, v5.16b + smlal2 v13.8h, v1.16b, v5.16b + smlal2 v14.8h, v2.16b, v5.16b + smlal2 v15.8h, v3.16b, v5.16b + + L3Initialize: + saddlp v16.4s, v8.8h + saddlp v17.4s, v9.8h + saddlp v18.4s, v10.8h + saddlp v19.4s, v11.8h + saddlp v20.4s, v12.8h + saddlp v21.4s, v13.8h + saddlp v22.4s, v14.8h + saddlp v23.4s, v15.8h + + smull v8.8h, v0.8b, v6.8b + smull v9.8h, v1.8b, v6.8b + smull v10.8h, v2.8b, v6.8b + smull v11.8h, v3.8b, v6.8b + + subs x9, x3, #1 + + smlal2 v8.8h, v0.16b, v6.16b + smlal2 v9.8h, v1.16b, v6.16b + smlal2 v10.8h, v2.16b, v6.16b + smlal2 v11.8h, v3.16b, v6.16b + + saddlp v24.4s, v8.8h + saddlp v25.4s, v9.8h + saddlp v26.4s, v10.8h + saddlp v27.4s, v11.8h + L3InitializeEnd: + beq L3ComputeSum + + L3LoopSz: + ld1 {v4.16b, v5.16b, v6.16b}, [x1], #48 + ld1 {v0.16b, v1.16b}, [x2], #32 + // int4->int8 + movi v8.16b, #15 + ushr v10.16b, v0.16b, #4 + and v11.16b, v0.16b, v8.16b + ushr v12.16b, v1.16b, #4 + and v13.16b, v1.16b, v8.16b + zip1 v0.16b, v10.16b, v11.16b + zip2 v1.16b, v10.16b, v11.16b + zip1 v2.16b, v12.16b, v13.16b + zip2 v3.16b, v12.16b, v13.16b + + smull v8.8h, v0.8b, v4.8b + smull v9.8h, v1.8b, v4.8b + smull v10.8h, v2.8b, v4.8b + smull v11.8h, v3.8b, v4.8b + smull v12.8h, v0.8b, v5.8b + smull v13.8h, v1.8b, v5.8b + smull v14.8h, v2.8b, v5.8b + smull v15.8h, v3.8b, v5.8b + + smlal2 v8.8h, v0.16b, v4.16b + smlal2 v9.8h, v1.16b, v4.16b + smlal2 v10.8h, v2.16b, v4.16b + smlal2 v11.8h, v3.16b, v4.16b + smlal2 v12.8h, v0.16b, v5.16b + smlal2 v13.8h, v1.16b, v5.16b + smlal2 v14.8h, v2.16b, v5.16b + smlal2 v15.8h, v3.16b, v5.16b + + sadalp v16.4s, v8.8h + sadalp v17.4s, v9.8h + sadalp v18.4s, v10.8h + sadalp v19.4s, v11.8h + sadalp v20.4s, v12.8h + sadalp v21.4s, v13.8h + sadalp v22.4s, v14.8h + sadalp v23.4s, v15.8h + + smull v8.8h, v0.8b, v6.8b + smull v9.8h, v1.8b, v6.8b + smull v10.8h, v2.8b, v6.8b + smull v11.8h, v3.8b, v6.8b + + subs x9, x9, #1 + add x1, x1, #16 + + smlal2 v8.8h, v0.16b, v6.16b + smlal2 v9.8h, v1.16b, v6.16b + smlal2 v10.8h, v2.16b, v6.16b + smlal2 v11.8h, v3.16b, v6.16b + + sadalp v24.4s, v8.8h + sadalp v25.4s, v9.8h + sadalp v26.4s, v10.8h + sadalp v27.4s, v11.8h + + bne L3LoopSz + + L3ComputeSum: + addp v4.4s, v16.4s, v17.4s + addp v5.4s, v18.4s, v19.4s + addp v6.4s, v20.4s, v21.4s + addp v7.4s, v22.4s, v23.4s + addp v8.4s, v24.4s, v25.4s + addp v9.4s, v26.4s, v27.4s + + addp v12.4s, v4.4s, v5.4s + addp v13.4s, v6.4s, v7.4s + addp v14.4s, v8.4s, v9.4s + + L3Quan: + ld1 {v1.4s}, [x7], #16 + ld1 {v20.d}[0], [x14], #8 // srcKernelSum + ld1 {v20.s}[2], [x14] + ld1 {v21.4s}, [x12], #16 // weightQuanZero + + scvtf v4.4s, v12.4s + scvtf v5.4s, v13.4s + scvtf v6.4s, v14.4s + MUL_SCALE3 v1, v4, v5, v6 + + cbz x23, TILE3_MUL_OHE_SCALE + ld1 {v2.d}[0], [x23], #8 + ld1 {v2.s}[2], [x23] + fmul v4.4s, v4.4s, v2.s[0] + fmul v5.4s, v5.4s, v2.s[1] + fmul v6.4s, v6.4s, v2.s[2] + sub x23, x23, #8 + + TILE3_MUL_OHE_SCALE: + sub x14, x14, #8 + MLA_WEIGHTZERO v4, v20, v21, 0 + MLA_WEIGHTZERO v5, v20, v21, 1 + MLA_WEIGHTZERO v6, v20, v21, 2 + + L3_ADD_BIAS: + cbz x10, L3_ADD_DSTV + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v0.4s + fadd v6.4s, v6.4s, v0.4s + b L3_POST + + L3_ADD_DSTV: + ld1 {v0.4s, v1.4s, v2.4s}, [x0] + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v1.4s + fadd v6.4s, v6.4s, v2.4s + + L3_POST: + cbz x19, L3_STORE + ld1r {v26.4s}, [x19] // f32 min + ld1r {v27.4s}, [x20] // f32 max + ReLU_FP32_3 v4, v5, v6, v26, v27 + L3_STORE: + st1 {v4.4s, v5.4s, v6.4s}, [x0], x4 + +L3LoopCheck: + subs x5, x5, #1 + mov x1, x8 + add x2, x22, x21 + bne L3LoopDz + +b End + +L2Dz: +L2LoopDz: + mov x8, x1 + mov x22, x2 + ld1 {v0.16b, v1.16b}, [x2], #32 + ld1 {v4.16b, v5.16b}, [x1], #32 + // int4->int8 + movi v8.16b, #15 + ushr v10.16b, v0.16b, #4 + and v11.16b, v0.16b, v8.16b + ushr v12.16b, v1.16b, #4 + and v13.16b, v1.16b, v8.16b + zip1 v0.16b, v10.16b, v11.16b + zip2 v1.16b, v10.16b, v11.16b + zip1 v2.16b, v12.16b, v13.16b + zip2 v3.16b, v12.16b, v13.16b + + + smull v8.8h, v0.8b, v4.8b + smull v9.8h, v1.8b, v4.8b + smull v10.8h, v2.8b, v4.8b + smull v11.8h, v3.8b, v4.8b + smull v12.8h, v0.8b, v5.8b + smull v13.8h, v1.8b, v5.8b + smull v14.8h, v2.8b, v5.8b + smull v15.8h, v3.8b, v5.8b + add x1, x1, #32 + smlal2 v8.8h, v0.16b, v4.16b + smlal2 v9.8h, v1.16b, v4.16b + smlal2 v10.8h, v2.16b, v4.16b + smlal2 v11.8h, v3.16b, v4.16b + smlal2 v12.8h, v0.16b, v5.16b + smlal2 v13.8h, v1.16b, v5.16b + smlal2 v14.8h, v2.16b, v5.16b + smlal2 v15.8h, v3.16b, v5.16b + + L2Initialize: + saddlp v16.4s, v8.8h + saddlp v17.4s, v9.8h + saddlp v18.4s, v10.8h + saddlp v19.4s, v11.8h + saddlp v20.4s, v12.8h + saddlp v21.4s, v13.8h + saddlp v22.4s, v14.8h + saddlp v23.4s, v15.8h + subs x9, x3, #1 + L2InitializeEnd: + beq L2ComputeSum + + L2LoopSz: + ld1 {v4.16b, v5.16b}, [x1], #32 + ld1 {v0.16b, v1.16b}, [x2], #32 + // int4->int8 + movi v8.16b, #15 + ushr v10.16b, v0.16b, #4 + and v11.16b, v0.16b, v8.16b + ushr v12.16b, v1.16b, #4 + and v13.16b, v1.16b, v8.16b + zip1 v0.16b, v10.16b, v11.16b + zip2 v1.16b, v10.16b, v11.16b + zip1 v2.16b, v12.16b, v13.16b + zip2 v3.16b, v12.16b, v13.16b + + smull v8.8h, v0.8b, v4.8b + smull v9.8h, v1.8b, v4.8b + smull v10.8h, v2.8b, v4.8b + smull v11.8h, v3.8b, v4.8b + smull v12.8h, v0.8b, v5.8b + smull v13.8h, v1.8b, v5.8b + smull v14.8h, v2.8b, v5.8b + smull v15.8h, v3.8b, v5.8b + + smlal2 v8.8h, v0.16b, v4.16b + smlal2 v9.8h, v1.16b, v4.16b + smlal2 v10.8h, v2.16b, v4.16b + smlal2 v11.8h, v3.16b, v4.16b + add x1, x1, #32 + subs x9, x9, #1 + smlal2 v12.8h, v0.16b, v5.16b + smlal2 v13.8h, v1.16b, v5.16b + smlal2 v14.8h, v2.16b, v5.16b + smlal2 v15.8h, v3.16b, v5.16b + + sadalp v16.4s, v8.8h + sadalp v17.4s, v9.8h + sadalp v18.4s, v10.8h + sadalp v19.4s, v11.8h + sadalp v20.4s, v12.8h + sadalp v21.4s, v13.8h + sadalp v22.4s, v14.8h + sadalp v23.4s, v15.8h + + bne L2LoopSz + + L2ComputeSum: + + addp v4.4s, v16.4s, v17.4s + addp v5.4s, v18.4s, v19.4s + addp v6.4s, v20.4s, v21.4s + addp v7.4s, v22.4s, v23.4s + + addp v12.4s, v4.4s, v5.4s + addp v13.4s, v6.4s, v7.4s + + L2Quan: + ld1 {v1.4s}, [x7], #16 + ld1 {v20.d}[0], [x14] // srcKernelSum + ld1 {v21.4s}, [x12], #16 // weightQuanZero + + scvtf v4.4s, v12.4s + scvtf v5.4s, v13.4s + MUL_SCALE2 v1, v4, v5 + + cbz x23, TILE2_MUL_OHE_SCALE + ld1 {v2.d}[0], [x23] + fmul v4.4s, v4.4s, v2.s[0] + fmul v5.4s, v5.4s, v2.s[1] + + TILE2_MUL_OHE_SCALE: + MLA_WEIGHTZERO v4, v20, v21, 0 + MLA_WEIGHTZERO v5, v20, v21, 1 + + L2_ADD_BIAS: + cbz x10, L2_ADD_DSTV + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v0.4s + b L2_POST + + L2_ADD_DSTV: + ld1 {v0.4s, v1.4s}, [x0] + fadd v4.4s, v4.4s, v0.4s + fadd v5.4s, v5.4s, v1.4s + + L2_POST: + cbz x19, L2_STORE + ld1r {v26.4s}, [x19] // f32 min + ld1r {v27.4s}, [x20] // f32 max + ReLU_FP32_2 v4, v5, v26, v27 + + L2_STORE: + st1 {v4.4s, v5.4s}, [x0], x4 + +L2LoopCheck: + subs x5, x5, #1 + mov x1, x8 + add x2, x22, x21 + bne L2LoopDz + +b End + +L1Dz: +L1LoopDz: + mov x8, x1 + mov x22, x2 + ld1 {v0.16b, v1.16b}, [x2], #32 + // int4->int8 + movi v8.16b, #15 + ushr v10.16b, v0.16b, #4 + and v11.16b, v0.16b, v8.16b + ushr v12.16b, v1.16b, #4 + and v13.16b, v1.16b, v8.16b + zip1 v0.16b, v10.16b, v11.16b + zip2 v1.16b, v10.16b, v11.16b + zip1 v2.16b, v12.16b, v13.16b + zip2 v3.16b, v12.16b, v13.16b + dup v16.4s, wzr + dup v17.4s, wzr + ld1 {v4.16b}, [x1], #16 + add x1, x1, #48 + + smull v8.8h, v0.8b, v4.8b + dup v18.4s, wzr + smull v9.8h, v1.8b, v4.8b + dup v19.4s, wzr + smull v10.8h, v2.8b, v4.8b + smull v11.8h, v3.8b, v4.8b + subs x9, x3, #1 + smlal2 v8.8h, v0.16b, v4.16b + smlal2 v9.8h, v1.16b, v4.16b + smlal2 v10.8h, v2.16b, v4.16b + smlal2 v11.8h, v3.16b, v4.16b + beq L1LoopSzEnd + + L1LoopSz: + sadalp v16.4s, v8.8h + ld1 {v4.16b}, [x1], #16 + sadalp v17.4s, v9.8h + sadalp v18.4s, v10.8h + sadalp v19.4s, v11.8h + sadalp v20.4s, v12.8h + sadalp v21.4s, v13.8h + sadalp v22.4s, v14.8h + sadalp v23.4s, v15.8h + + ld1 {v0.16b, v1.16b}, [x2], #32 + add x1, x1, #48 + // int4->int8 + movi v8.16b, #15 + ushr v10.16b, v0.16b, #4 + and v11.16b, v0.16b, v8.16b + ushr v12.16b, v1.16b, #4 + and v13.16b, v1.16b, v8.16b + zip1 v0.16b, v10.16b, v11.16b + zip2 v1.16b, v10.16b, v11.16b + zip1 v2.16b, v12.16b, v13.16b + zip2 v3.16b, v12.16b, v13.16b + + smull v8.8h, v0.8b, v4.8b + smull v9.8h, v1.8b, v4.8b + smull v10.8h, v2.8b, v4.8b + smull v11.8h, v3.8b, v4.8b + + smlal2 v8.8h, v0.16b, v4.16b + smlal2 v9.8h, v1.16b, v4.16b + smlal2 v10.8h, v2.16b, v4.16b + smlal2 v11.8h, v3.16b, v4.16b + + subs x9, x9, #1 + bne L1LoopSz + + L1LoopSzEnd: + sadalp v16.4s, v8.8h + sadalp v17.4s, v9.8h + sadalp v18.4s, v10.8h + sadalp v19.4s, v11.8h + + //ld1 {v0.4s}, [x10], #16 + addp v4.4s, v16.4s, v17.4s + addp v5.4s, v18.4s, v19.4s + + addp v12.4s, v4.4s, v5.4s + + L1Quan: + ld1 {v1.4s}, [x7], #16 + ld1 {v20.s}[0], [x14] // srcKernelSum + ld1 {v21.4s}, [x12], #16 // weightQuanZero + + scvtf v4.4s, v12.4s + MUL_SCALE1 v1, v4 + + cbz x23, TILE1_MUL_OHE_SCALE + ld1 {v2.s}[0], [x23] + fmul v4.4s, v4.4s, v2.s[0] + + TILE1_MUL_OHE_SCALE: + MLA_WEIGHTZERO v4, v20, v21, 0 + + L1_ADD_BIAS: + cbz x10, L1_ADD_DSTV + ld1 {v0.4s}, [x10], #16 + fadd v4.4s, v4.4s, v0.4s + b L1_POST + + L1_ADD_DSTV: + ld1 {v0.4s}, [x0] + fadd v4.4s, v4.4s, v0.4s + + L1_POST: + cbz x19, L1_STORE + ld1r {v26.4s}, [x19] // f32 min + ld1r {v27.4s}, [x20] // f32 max + ReLU_FP32_1 v4, v26, v27 + + L1_STORE: + st1 {v4.4s}, [x0], x4 + +L1LoopCheck: + subs x5, x5, #1 + mov x1, x8 + add x2, x22, x21 + bne L1LoopDz + +End: +ldp x23, x24, [sp, #(16 * 6)] +ldp x21, x22, [sp, #(16 * 5)] +ldp x19, x20, [sp, #(16 * 4)] +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 8) +ret + +#endif diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_w4_Unit.S b/source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_w4_Unit.S new file mode 100644 index 000000000..fa9bc1f43 --- /dev/null +++ b/source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV82_w4_Unit.S @@ -0,0 +1,999 @@ +// +// MNNGemmInt8AddBiasScale_ARMV82_w4_Unit.S +// MNN +// +// Created by MNN on 2019/12/17. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#if defined(__aarch64__) +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro ADD_BIAS_FLOAT d0, d1, d2, d3, z0 + fadd \d0\().4s, \d0\().4s, \z0\().4s + fadd \d1\().4s, \d1\().4s, \z0\().4s + fadd \d2\().4s, \d2\().4s, \z0\().4s + fadd \d3\().4s, \d3\().4s, \z0\().4s +.endm + +.macro ADD_FLOAT d0, d1, d2, d3, s0, s1, s2, s3 + fadd \d0\().4s, \d0\().4s, \s0\().4s + fadd \d1\().4s, \d1\().4s, \s1\().4s + fadd \d2\().4s, \d2\().4s, \s2\().4s + fadd \d3\().4s, \d3\().4s, \s3\().4s +.endm + +.macro SET_BIAS d0, d1, d2, d3 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 +.endm +.macro Int32ToFloat z0, z1, z2, z3 + scvtf \z0\().4s, \z0\().4s + scvtf \z1\().4s, \z1\().4s + scvtf \z2\().4s, \z2\().4s + scvtf \z3\().4s, \z3\().4s +.endm +.macro MUL_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s + fmul \d2\().4s, \d2\().4s, \s\().4s + fmul \d3\().4s, \d3\().4s, \s\().4s +.endm +.macro MUL_EXTRA_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().s[0] + fmul \d1\().4s, \d1\().4s, \s\().s[1] + fmul \d2\().4s, \d2\().4s, \s\().s[2] + fmul \d3\().4s, \d3\().4s, \s\().s[3] +.endm +.macro FloatToInt32 z0, z1, z2, z3 + fcvtas \z0\().4s, \z0\().4s + fcvtas \z1\().4s, \z1\().4s + fcvtas \z2\().4s, \z2\().4s + fcvtas \z3\().4s, \z3\().4s +.endm +.macro Int32ToInt16 s0, s1, s2, s3, d0, d1 + sqxtn \d0\().4h, \s0\().4s + sqxtn2 \d0\().8h, \s1\().4s + sqxtn \d1\().4h, \s2\().4s + sqxtn2 \d1\().8h, \s3\().4s +.endm +.macro Int16ToInt8_ONE s0, s1, d0 + sqxtn \d0\().8b, \s0\().8h + sqxtn2 \d0\().16b, \s1\().8h +.endm +.macro Int16ToInt8 s0, s1, s2, s3, d0, d1 + Int16ToInt8_ONE \s0, \s1, \d0 + Int16ToInt8_ONE \s2, \s3, \d1 +.endm +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro ReLU_FP32 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmin \s2\().4s, \s2\().4s, \z1\().4s + fmin \s3\().4s, \s3\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s + fmax \s2\().4s, \s2\().4s, \z0\().4s + fmax \s3\().4s, \s3\().4s, \z0\().4s +.endm + +asm_function MNNGemmInt8AddBiasScale_ARMV82_w4_Unit +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum; + const int32_t* bias; + +}; +*/ + +//void MNNGemmInt8AddBiasScale_ARMV82_w4_Unit(int8_t* dst, const int8_t* src, +// const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, +// const QuanPostTreatParameters* parameters, size_t realDstCount); + +//Auto: x0:dst, x1:src, x2:weight, x3:src_depth_quad, x4:dst_step +//x5:dst_depth_quad, x6: parameters, x7: realDstCount + +//Load from x6: x8: scale, x9: bias, x25: xKernelSum, x26: weightQuantBias, x23: fp32minmax +ldr x8, [x6, #0] +ldr x9, [x6, #8] + +stp d14, d15, [sp, #(-16 * 10)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] +stp x21, x22, [sp, #(16 * 4)] +stp x19, x20, [sp, #(16 * 5)] +stp x27, x28, [sp, #(16 * 6)] +stp x25, x26, [sp, #(16 * 7)] +stp x23, x24, [sp, #(16 * 8)] + +ldr x27, [x6, #64] // blockNum +mul x27, x27, x3 // blockNum * src_depth_quad_perblock +lsl x15, x27, #3 // x15 = src_depth_quad * UNIT * SRC_UNIT * sizeof(int4_t) + +ldr x25, [x6, #40] // xKernelSum +ldr x26, [x6, #48] // weightQuantBias +ldr x24, [x6, #80] // extraScale + +mov x21, #16 // sizeof(float) * UNIT +ldr x23, [x6, #56] // fp32minmax +Start: +mov x22, #48 // src_steps + +TILE_12: + cmp x7, #12 + blt TILE_8 + cmp x5, #2 + blt L4LoopDz_TILE_12 +L8LoopDz_TILE_12: + //ld1 {v0.4s, v1.4s}, [x9], #32 // bias + mov x11, x1 + mov x13, x3 + mov x20, x0 // tag dst address + mov x27, x2 + movi v7.16b, #15 + + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + SET_BIAS v16, v17, v18, v19 + SET_BIAS v20, v21, v22, v23 + SET_BIAS v24, v25, v26, v27 + SET_BIAS v28, v29, v30, v31 + + L8LoopSz_TILE_12: + ld1 {v3.d}[0], [x2], x15 // weight + ld1 {v4.d}[0], [x2], #8 + ld1 {v0.16b, v1.16b, v2.16b}, [x11], #48 // src + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + + .inst 0x4f81e06c // sdot v12.4s, v3.16b, v1.4b[0] + .inst 0x4fa1e06d // sdot v13.4s, v3.16b, v1.4b[1] + .inst 0x4f81e86e // sdot v14.4s, v3.16b, v1.4b[2] + .inst 0x4fa1e86f // sdot v15.4s, v3.16b, v1.4b[3] + // int4->int8 + ushr v5.16b, v4.16b, #4 + and v6.16b, v4.16b, v7.16b + zip1 v4.16b, v5.16b, v6.16b + + .inst 0x4f82e070 // sdot v16.4s, v3.16b, v2.4b[0] + .inst 0x4fa2e071 // sdot v17.4s, v3.16b, v2.4b[1] + .inst 0x4f82e872 // sdot v18.4s, v3.16b, v2.4b[2] + .inst 0x4fa2e873 // sdot v19.4s, v3.16b, v2.4b[3] + + .inst 0x4f80e094 // sdot v20.4s, v4.16b, v0.4b[0] + .inst 0x4fa0e095 // sdot v21.4s, v4.16b, v0.4b[1] + .inst 0x4f80e896 // sdot v22.4s, v4.16b, v0.4b[2] + .inst 0x4fa0e897 // sdot v23.4s, v4.16b, v0.4b[3] + sub x2, x2, x15 + .inst 0x4f81e098 // sdot v24.4s, v4.16b, v1.4b[0] + .inst 0x4fa1e099 // sdot v25.4s, v4.16b, v1.4b[1] + .inst 0x4f81e89a // sdot v26.4s, v4.16b, v1.4b[2] + .inst 0x4fa1e89b // sdot v27.4s, v4.16b, v1.4b[3] + subs x13, x13, #1 + .inst 0x4f82e09c // sdot v28.4s, v4.16b, v2.4b[0] + .inst 0x4fa2e09d // sdot v29.4s, v4.16b, v2.4b[1] + .inst 0x4f82e89e // sdot v30.4s, v4.16b, v2.4b[2] + .inst 0x4fa2e89f // sdot v31.4s, v4.16b, v2.4b[3] + bne L8LoopSz_TILE_12 + + L8LoopSzEnd_TILE_12: + // add x2, x2, x15 + add x2, x27, x15, LSL #1 + sub x5, x5, #2 + + L8Tile12Quan: + ld1 {v0.4s, v1.4s}, [x8], #32 // scale + ld1 {v2.4s, v3.4s, v4.4s}, [x25] // x kernel sum + ld1 {v5.4s, v6.4s}, [x26], #32 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + Int32ToFloat v20, v21, v22, v23 + Int32ToFloat v24, v25, v26, v27 + Int32ToFloat v28, v29, v30, v31 + + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v0, v12, v13, v14, v15 + MUL_SCALE v0, v16, v17, v18, v19 + MUL_SCALE v1, v20, v21, v22, v23 + MUL_SCALE v1, v24, v25, v26, v27 + MUL_SCALE v1, v28, v29, v30, v31 + + cbz x24, TILE12_L8_MLA + ld1 {v0.4s, v1.4s}, [x24], #32 + ld1 {v7.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v1, v12, v13, v14, v15 + MUL_EXTRA_SCALE v7, v16, v17, v18, v19 + MUL_EXTRA_SCALE v0, v20, v21, v22, v23 + MUL_EXTRA_SCALE v1, v24, v25, v26, v27 + MUL_EXTRA_SCALE v7, v28, v29, v30, v31 + sub x24, x24, #32 + + TILE12_L8_MLA: + MLA_WEIGHTZERO v8, v2, v5, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v5, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v5, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v5, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v5, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v5, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v5, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v5, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v4, v5, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v17, v4, v5, 1 // tile:9, oc:0-3 + MLA_WEIGHTZERO v18, v4, v5, 2 // tile:10, oc:0-3 + MLA_WEIGHTZERO v19, v4, v5, 3 // tile:11, oc:0-3 + + MLA_WEIGHTZERO v20, v2, v6, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v21, v2, v6, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v22, v2, v6, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v23, v2, v6, 3 // tile:3, oc:4-7 + MLA_WEIGHTZERO v24, v3, v6, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v25, v3, v6, 1 // tile:5, oc:4-7 + MLA_WEIGHTZERO v26, v3, v6, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v27, v3, v6, 3 // tile:7, oc:4-7 + MLA_WEIGHTZERO v28, v4, v6, 0 // tile:8, oc:4-7 + MLA_WEIGHTZERO v29, v4, v6, 1 // tile:9, oc:4-7 + MLA_WEIGHTZERO v30, v4, v6, 2 // tile:10, oc:4-7 + MLA_WEIGHTZERO v31, v4, v6, 3 // tile:11, oc:4-7 + + sub x4, x4, #128 + + cbz x9, TILE12_ADD_DSTV + TILE12_ADD_BIAS: + ld1 {v0.4s, v1.4s}, [x9], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v0 + ADD_BIAS_FLOAT v20, v21, v22, v23, v1 + ADD_BIAS_FLOAT v24, v25, v26, v27, v1 + ADD_BIAS_FLOAT v28, v29, v30, v31, v1 + b TILE12_POST + + TILE12_ADD_DSTV: + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x20], #64 + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x20], #64 + ADD_FLOAT v8, v9, v10, v11, v0, v1, v2, v3 + ADD_FLOAT v12, v13, v14, v15, v4, v5, v6, v7 + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x20], x4 + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x20], #64 + ADD_FLOAT v16, v17, v18, v19, v0, v1, v2, v3 + ADD_FLOAT v20, v21, v22, v23, v4, v5, v6, v7 + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x20], #64 + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x20] + ADD_FLOAT v24, v25, v26, v27, v0, v1, v2, v3 + ADD_FLOAT v28, v29, v30, v31, v4, v5, v6, v7 + + TILE12_POST: + cbz x23, TILE12_STORE + ld1r {v0.4s}, [x23], #4 // f32 min + ld1r {v1.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v0, v1 + ReLU_FP32 v12, v13, v14, v15, v0, v1 + ReLU_FP32 v16, v17, v18, v19, v0, v1 + ReLU_FP32 v20, v21, v22, v23, v0, v1 + ReLU_FP32 v24, v25, v26, v27, v0, v1 + ReLU_FP32 v28, v29, v30, v31, v0, v1 + sub x23, x23, #4 + + TILE12_STORE: + st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x0], #64 + st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x0], #64 + st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x0], x4 + st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x0], #64 + st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x0], #64 + st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x0], x4 + add x4, x4, #128 + + L8Tile12LoopCheck: + cmp x5, #1 + bgt L8LoopDz_TILE_12 + blt End + +L4LoopDz_TILE_12: + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + SET_BIAS v16, v17, v18, v19 + movi v7.16b, #15 + + L4LoopSz_TILE_12: + ld1 {v3.d}[0], [x2], #8 // weight + ld1 {v0.16b, v1.16b, v2.16b}, [x1], #48 // src + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + .inst 0x4f81e06c // sdot v12.4s, v3.16b, v1.4b[0] + .inst 0x4fa1e06d // sdot v13.4s, v3.16b, v1.4b[1] + .inst 0x4f81e86e // sdot v14.4s, v3.16b, v1.4b[2] + .inst 0x4fa1e86f // sdot v15.4s, v3.16b, v1.4b[3] + subs x3, x3, #1 + .inst 0x4f82e070 // sdot v16.4s, v3.16b, v2.4b[0] + .inst 0x4fa2e071 // sdot v17.4s, v3.16b, v2.4b[1] + .inst 0x4f82e872 // sdot v18.4s, v3.16b, v2.4b[2] + .inst 0x4fa2e873 // sdot v19.4s, v3.16b, v2.4b[3] + bne L4LoopSz_TILE_12 + + L4LoopSzEnd_TILE_12: + + L4Tile12Quan: + ld1 {v0.4s}, [x8] // scale + ld1 {v2.4s, v3.4s, v4.4s}, [x25]// x kernel sum + ld1 {v5.4s}, [x26], #16 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v0, v12, v13, v14, v15 + MUL_SCALE v0, v16, v17, v18, v19 + + cbz x24, TILE12_L4_MLA + ld1 {v0.4s, v1.4s}, [x24], #32 + ld1 {v7.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v1, v12, v13, v14, v15 + MUL_EXTRA_SCALE v7, v16, v17, v18, v19 + sub x24, x24, #32 + + TILE12_L4_MLA: + MLA_WEIGHTZERO v8, v2, v5, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v5, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v5, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v5, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v5, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v5, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v5, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v5, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v4, v5, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v17, v4, v5, 1 // tile:9, oc:0-3 + MLA_WEIGHTZERO v18, v4, v5, 2 // tile:10, oc:0-3 + MLA_WEIGHTZERO v19, v4, v5, 3 // tile:11, oc:0-3 + + sub x4, x4, #128 + + TILE12_L4_ADD_BIAS: + cbz x9, TILE12_L4_ADD_DSTV + ld1 {v0.4s}, [x9] // bias + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v0 + b TILE12_L4_POST + + TILE12_L4_ADD_DSTV: + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x0], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x0], #64 + ld1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x0] + sub x0, x0, #128 + ADD_FLOAT v8, v9, v10, v11, v20, v21, v22, v23 + ADD_FLOAT v12, v13, v14, v15, v24, v25, v26, v27 + ADD_FLOAT v16, v17, v18, v19, v28, v29, v30, v31 + + TILE12_L4_POST: + cbz x23, TILE12_L4_STORE + ld1r {v6.4s}, [x23], #4 // f32 min + ld1r {v7.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v6, v7 + ReLU_FP32 v12, v13, v14, v15, v6, v7 + ReLU_FP32 v16, v17, v18, v19, v6, v7 + sub x23, x23, #4 + TILE12_L4_STORE: + st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x0], #64 + st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x0], #64 + st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x0], x4 + add x4, x4, #128 + b End + +TILE_8: + cmp x7, #8 + blt TILE_4 + mov x10, x0 + mov x12, x2 + mov x14, x5 + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x26 // weightQuantBias + cmp x5, #2 + blt L4LoopDz_TILE_8 +L8LoopDz_TILE_8: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 + mov x13, x3 + mov x27, x12 + movi v7.16b, #15 + + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + SET_BIAS v16, v17, v18, v19 + SET_BIAS v20, v21, v22, v23 + + L8LoopSz_TILE_8: + ld1 {v3.d}[0], [x12], x15 // weight + ld1 {v4.d}[0], [x12], #8 + ld1 {v0.16b, v1.16b}, [x11], x22 // src + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + // int4->int8 + ushr v5.16b, v4.16b, #4 + and v6.16b, v4.16b, v7.16b + zip1 v4.16b, v5.16b, v6.16b + .inst 0x4f81e06c // sdot v12.4s, v3.16b, v1.4b[0] + .inst 0x4fa1e06d // sdot v13.4s, v3.16b, v1.4b[1] + .inst 0x4f81e86e // sdot v14.4s, v3.16b, v1.4b[2] + .inst 0x4fa1e86f // sdot v15.4s, v3.16b, v1.4b[3] + sub x12, x12, x15 + .inst 0x4f80e090 // sdot v16.4s, v4.16b, v0.4b[0] + .inst 0x4fa0e091 // sdot v17.4s, v4.16b, v0.4b[1] + .inst 0x4f80e892 // sdot v18.4s, v4.16b, v0.4b[2] + .inst 0x4fa0e893 // sdot v19.4s, v4.16b, v0.4b[3] + subs x13, x13, #1 + .inst 0x4f81e094 // sdot v20.4s, v4.16b, v1.4b[0] + .inst 0x4fa1e095 // sdot v21.4s, v4.16b, v1.4b[1] + .inst 0x4f81e896 // sdot v22.4s, v4.16b, v1.4b[2] + .inst 0x4fa1e897 // sdot v23.4s, v4.16b, v1.4b[3] + bne L8LoopSz_TILE_8 + + L8LoopSzEnd_TILE_8: + //add x12, x12, x15 + add x12, x27, x15, LSL #1 + sub x14, x14, #2 + + L8Tile8Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.4s, v3.4s}, [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + Int32ToFloat v20, v21, v22, v23 + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v0, v12, v13, v14, v15 + MUL_SCALE v1, v16, v17, v18, v19 + MUL_SCALE v1, v20, v21, v22, v23 + + cbz x24, TILE8_L8_MLA + ld1 {v0.4s, v1.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v1, v12, v13, v14, v15 + MUL_EXTRA_SCALE v0, v16, v17, v18, v19 + MUL_EXTRA_SCALE v1, v20, v21, v22, v23 + + TILE8_L8_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v24, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v24, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v24, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v24, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v2, v25, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v17, v2, v25, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v18, v2, v25, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v19, v2, v25, 3 // tile:3, oc:4-7 + MLA_WEIGHTZERO v20, v3, v25, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v21, v3, v25, 1 // tile:5, oc:4-7 + MLA_WEIGHTZERO v22, v3, v25, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v23, v3, v25, 3 // tile:7, oc:4-7 + + sub x4, x4, #64 + + cbz x9, TILE8_ADD_DSTV + TILE8_ADD_BIAS: + ld1 {v0.4s, v1.4s}, [x20], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v0 + ADD_BIAS_FLOAT v12, v13, v14, v15, v0 + ADD_BIAS_FLOAT v16, v17, v18, v19, v1 + ADD_BIAS_FLOAT v20, v21, v22, v23, v1 + b TILE8_POST + + TILE8_ADD_DSTV: + ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x10], #64 + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x10], x4 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x10], #64 + ld1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x10] + ADD_FLOAT v8, v9, v10, v11, v0, v1, v2, v3 + ADD_FLOAT v12, v13, v14, v15, v4, v5, v6, v7 + ADD_FLOAT v16, v17, v18, v19, v24, v25, v26, v27 + ADD_FLOAT v20, v21, v22, v23, v28, v29, v30, v31 + sub x10, x10, #128 + sub x10, x10, x4 + + TILE8_POST: + cbz x23, TILE8_STORE + ld1r {v0.4s}, [x23], #4 // f32 min + ld1r {v1.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v0, v1 + ReLU_FP32 v12, v13, v14, v15, v0, v1 + ReLU_FP32 v16, v17, v18, v19, v0, v1 + ReLU_FP32 v20, v21, v22, v23, v0, v1 + sub x23, x23, #4 + + TILE8_STORE: + st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], #64 + st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10], x4 + st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x10], #64 + st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x10], x4 + add x4, x4, #64 + + L8Tile8LoopCheck: + cmp x14, #1 + bgt L8LoopDz_TILE_8 + cbz x14, Tile8End + +L4LoopDz_TILE_8: + //ld1 {v0.4s}, [x20], #16 // bias + mov x11, x1 + mov x13, x3 + movi v7.16b, #15 + + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + + L4LoopSz_TILE_8: + ld1 {v3.d}[0], [x12], #8 // weight + ld1 {v0.16b, v1.16b}, [x11], x22 // src + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + subs x13, x13, #1 + .inst 0x4f81e06c // sdot v12.4s, v3.16b, v1.4b[0] + .inst 0x4fa1e06d // sdot v13.4s, v3.16b, v1.4b[1] + .inst 0x4f81e86e // sdot v14.4s, v3.16b, v1.4b[2] + .inst 0x4fa1e86f // sdot v15.4s, v3.16b, v1.4b[3] + bne L4LoopSz_TILE_8 + + L4LoopSzEnd_TILE_8: + + L4Tile8Quan: + ld1 {v0.4s}, [x19], #16 // scale + ld1 {v2.4s, v3.4s}, [x25] // x kernel sum + ld1 {v24.4s}, [x6], #16 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v0, v12, v13, v14, v15 + + cbz x24, TILE8_L4_MLA + ld1 {v0.4s, v1.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v1, v12, v13, v14, v15 + + TILE8_L4_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v3, v24, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v13, v3, v24, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v3, v24, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v15, v3, v24, 3 // tile:7, oc:0-3 + + sub x4, x4, #64 + + cbz x9, TILE8_L4_ADD_DSTV + TILE8_L4_ADD_BIAS: + ld1 {v4.4s}, [x20], #16 + ADD_BIAS_FLOAT v8, v9, v10, v11, v4 + ADD_BIAS_FLOAT v12, v13, v14, v15, v4 + b TILE8_L4_POST + + TILE8_L4_ADD_DSTV: + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x10], #64 + ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x10] + sub x10, x10, #64 + ADD_FLOAT v8, v9, v10, v11, v4, v5, v6, v7 + ADD_FLOAT v12, v13, v14, v15, v16, v17, v18, v19 + + TILE8_L4_POST: + cbz x23, TILE8_L4_STORE + ld1r {v0.4s}, [x23], #4 // f32 min + ld1r {v1.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v0, v1 + ReLU_FP32 v12, v13, v14, v15, v0, v1 + sub x23, x23, #4 + + TILE8_L4_STORE: + st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], #64 + st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10], x4 + add x4, x4, #64 + +Tile8End: +cbz x24, Tile8_End_Offset +add x24, x24, #32 + +Tile8_End_Offset: + sub x7, x7, #8 + add x0, x0, x21, LSL #3 + add x1, x1, #32 + add x25, x25, #32 + +TILE_4: + cmp x7, #4 + blt TILE_1 + mov x10, x0 + mov x12, x2 + mov x14, x5 + mov x19, x8 + mov x20, x9 + mov x6, x26 // weightQuantBias + cmp x5, #2 + blt L4LoopDz_TILE_4 +L8LoopDz_TILE_4: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 + mov x13, x3 + mov x27, x12 + movi v7.16b, #15 + + SET_BIAS v8, v9, v10, v11 + SET_BIAS v12, v13, v14, v15 + + L8LoopSz_TILE_4: + ld1 {v3.d}[0], [x12], x15 // weight + ld1 {v0.16b}, [x11], x22 // src + ld1 {v4.d}[0], [x12], #8 // weight + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + // int4->int8 + ushr v5.16b, v4.16b, #4 + and v6.16b, v4.16b, v7.16b + zip1 v4.16b, v5.16b, v6.16b + subs x13, x13, #1 + sub x12, x12, x15 + .inst 0x4f80e08c // sdot v12.4s, v4.16b, v0.4b[0] + .inst 0x4fa0e08d // sdot v13.4s, v4.16b, v0.4b[1] + .inst 0x4f80e88e // sdot v14.4s, v4.16b, v0.4b[2] + .inst 0x4fa0e88f // sdot v15.4s, v4.16b, v0.4b[3] + bne L8LoopSz_TILE_4 + + L8LoopSzEnd_TILE_4: + //add x12, x12, x15 + add x12, x27, x15, LSL #1 + sub x14, x14, #2 + + L8Tile4Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.4s}, [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + MUL_SCALE v0, v8, v9, v10, v11 + MUL_SCALE v1, v12, v13, v14, v15 + + cbz x24, TILE4_L8_MLA + ld1 {v0.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + MUL_EXTRA_SCALE v0, v12, v13, v14, v15 + + TILE4_L8_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v2, v25, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v13, v2, v25, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v14, v2, v25, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v15, v2, v25, 3 // tile:3, oc:4-7 + + cbz x9, TILE4_ADD_DSTV + TILE4_ADD_BIAS: + ld1 {v4.4s, v5.4s}, [x20], #32 + ADD_BIAS_FLOAT v8, v9, v10, v11, v4 + ADD_BIAS_FLOAT v12, v13, v14, v15, v5 + b TILE4_POST + + TILE4_ADD_DSTV: + ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x10], x4 + ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x10] + sub x10, x10, x4 + ADD_FLOAT v8, v9, v10, v11, v4, v5, v6, v7 + ADD_FLOAT v12, v13, v14, v15, v16, v17, v18, v19 + + TILE4_POST: + cbz x23, TILE4_STORE + ld1r {v26.4s}, [x23], #4 // f32 min + ld1r {v27.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v26, v27 + ReLU_FP32 v12, v13, v14, v15, v26, v27 + sub x23, x23, #4 + + TILE4_STORE: + st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], x4 + st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10], x4 + + L8Tile4LoopCheck: + cmp x14, #1 + bgt L8LoopDz_TILE_4 + cbz x14, Tile4End + +L4LoopDz_TILE_4: + //ld1 {v0.4s}, [x20], #16 // bias + mov x11, x1 + mov x13, x3 + movi v7.16b, #15 + SET_BIAS v8, v9, v10, v11 + + L4LoopSz_TILE_4: + ld1 {v3.d}[0], [x12], #8 // weight + ld1 {v0.16b}, [x11], x22 // src + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + subs x13, x13, #1 + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + .inst 0x4fa0e069 // sdot v9.4s, v3.16b, v0.4b[1] + .inst 0x4f80e86a // sdot v10.4s, v3.16b, v0.4b[2] + .inst 0x4fa0e86b // sdot v11.4s, v3.16b, v0.4b[3] + bne L4LoopSz_TILE_4 + + L4LoopSzEnd_TILE_4: + + L4Tile4Quan: + ld1 {v0.4s}, [x19], #16 // scale + ld1 {v2.4s}, [x25] // x kernel sum + ld1 {v24.4s}, [x6], #16 // weight quan zeropoint + Int32ToFloat v8, v9, v10, v11 + MUL_SCALE v0, v8, v9, v10, v11 + + cbz x24, TILE4_L4_MLA + ld1 {v0.4s}, [x24] + MUL_EXTRA_SCALE v0, v8, v9, v10, v11 + + TILE4_L4_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v24, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v2, v24, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v11, v2, v24, 3 // tile:3, oc:0-3 + + cbz x9, TILE4_L4_ADD_DSTV + TILE4_L4_ADD_BIAS: + ld1 {v3.4s}, [x20], #16 + ADD_BIAS_FLOAT v8, v9, v10, v11, v3 + b TILE4_L4_POST + + TILE4_L4_ADD_DSTV: + ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x10] + ADD_FLOAT v8, v9, v10, v11, v12, v13, v14, v15 + + TILE4_L4_POST: + cbz x23, TILE4_L4_STORE + ld1r {v26.4s}, [x23], #4 // f32 min + ld1r {v27.4s}, [x23] // f32 max + ReLU_FP32 v8, v9, v10, v11, v26, v27 + sub x23, x23, #4 + + TILE4_L4_STORE: + st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x10], x4 + +Tile4End: +cbz x24, Tile4_End_Offset +add x24, x24, #16 + +Tile4_End_Offset: + sub x7, x7, #4 + add x0, x0, x21, LSL #2 + add x1, x1, #16 + add x25, x25, #16 + +TILE_1: + cbz x7, End + movi v7.16b, #15 + mov x10, x0 + mov x12, x2 + mov x14, x5 + mov x19, x8 + mov x20, x9 + mov x6, x26 // weightQuantBias + cmp x5, #2 + blt L4LoopDz_TILE_1 +L8LoopDz_TILE_1: + //ld1 {v0.4s, v1.4s}, [x20], #32 // bias + mov x11, x1 + mov x13, x3 + mov x27, x12 + + movi v8.16b, #0 + movi v9.16b, #0 + L8LoopSz_TILE_1: + ld1 {v3.d}[0], [x12], x15 // weight + ld1 {v0.s}[0], [x11], x22 // src + ld1 {v4.d}[0], [x12], #8 // weight + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + subs x13, x13, #1 + // int4->int8 + ushr v5.16b, v4.16b, #4 + and v6.16b, v4.16b, v7.16b + zip1 v4.16b, v5.16b, v6.16b + sub x12, x12, x15 + + .inst 0x4f80e089 // sdot v9.4s, v4.16b, v0.4b[0] + bne L8LoopSz_TILE_1 + + L8LoopSzEnd_TILE_1: + add x12, x27, x15, LSL #1 + sub x14, x14, #2 + + L8Tile1Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v2.s}[0], [x25] // x kernel sum + ld1 {v24.4s, v25.4s}, [x6], #32 // weight quan zeropoint + scvtf v8.4s, v8.4s + scvtf v9.4s, v9.4s + fmul v8.4s, v8.4s, v0.4s + fmul v9.4s, v9.4s, v1.4s + + cbz x24, TILE1_L8_MLA + ld1 {v0.s}[0], [x24] + fmul v8.4s, v8.4s, v0.s[0] + fmul v9.4s, v9.4s, v0.s[0] + + TILE1_L8_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v9, v2, v25, 0 // tile:0, oc:4-7 + + cbz x9, TILE1_ADD_DSTV + TILE1_ADD_BIAS: + ld1 {v10.4s, v11.4s}, [x20], #32 + fadd v8.4s, v8.4s, v10.4s + fadd v9.4s, v9.4s, v11.4s + b TILE1_POST + + TILE1_ADD_DSTV: + ld1 {v10.4s}, [x10], x4 + ld1 {v11.4s}, [x10] + sub x10, x10, x4 + fadd v8.4s, v8.4s, v10.4s + fadd v9.4s, v9.4s, v11.4s + + TILE1_POST: + cbz x23, TILE1_STORE + ld1r {v26.4s}, [x23], #4 // f32 min + ld1r {v27.4s}, [x23] // f32 max + sub x23, x23, #4 + fmin v8.4s, v8.4s, v27.4s + fmin v9.4s, v9.4s, v27.4s + fmax v8.4s, v8.4s, v26.4s + fmax v9.4s, v9.4s, v26.4s + + TILE1_STORE: + st1 {v8.4s}, [x10], x4 + st1 {v9.4s}, [x10], x4 + + L8Tile1LoopCheck: + cmp x14, #1 + bgt L8LoopDz_TILE_1 + cbz x14, Tile1End + +L4LoopDz_TILE_1: + //ld1 {v0.4s}, [x20], #16 // bias + mov x11, x1 + mov x13, x3 + movi v8.16b, #0 + L4LoopSz_TILE_1: + ld1 {v3.d}[0], [x12], #8 // weight + ld1 {v0.s}[0], [x11], x22 // src + // int4->int8 + ushr v5.16b, v3.16b, #4 + and v6.16b, v3.16b, v7.16b + zip1 v3.16b, v5.16b, v6.16b + subs x13, x13, #1 + + .inst 0x4f80e068 // sdot v8.4s, v3.16b, v0.4b[0] + bne L4LoopSz_TILE_1 + + L4LoopSzEnd_TILE_1: + + L4Tile1Quan: + ld1 {v0.4s}, [x19], #16 // scale + ld1 {v2.s}[0], [x25] // x kernel sum + ld1 {v24.4s}, [x6], #16 // weight quan zeropoint + scvtf v8.4s, v8.4s + fmul v8.4s, v8.4s, v0.4s + + cbz x24, TILE1_L4_MLA + ld1 {v0.s}[0], [x24] + fmul v8.4s, v8.4s, v0.s[0] + + TILE1_L4_MLA: + MLA_WEIGHTZERO v8, v2, v24, 0 // tile:0, oc:0-3 + + cbz x9, TILE1_L4_ADD_DSTV + TILE1_L4_ADD_BIAS: + ld1 {v4.4s}, [x20], #16 + fadd v8.4s, v8.4s, v4.4s + b TILE1_L4_POST + + TILE1_L4_ADD_DSTV: + ld1 {v4.4s}, [x10] + fadd v8.4s, v8.4s, v4.4s + + TILE1_L4_POST: + cbz x23, TILE1_L4_STORE + ld1r {v26.4s}, [x23], #4 // f32 min + ld1r {v27.4s}, [x23] // f32 max + sub x23, x23, #4 + fmax v8.4s, v8.4s, v26.4s + fmin v8.4s, v8.4s, v27.4s + TILE1_L4_STORE: + st1 {v8.4s}, [x10], x4 + +Tile1End: +cbz x24, Tile1_End_Offset +add x24, x24, #4 + +Tile1_End_Offset: + sub x7, x7, #1 + add x0, x0, x21 + add x1, x1, #4 + add x25, x25, #4 + b TILE_1 + +End: +ldp x23, x24, [sp, #(16 * 8)] +ldp x25, x26, [sp, #(16 * 7)] +ldp x27, x28, [sp, #(16 * 6)] +ldp x19, x20, [sp, #(16 * 5)] +ldp x21, x22, [sp, #(16 * 4)] +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 10) +ret + +#endif // __aarch64__ diff --git a/source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_w4_Unit.S b/source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_w4_Unit.S new file mode 100644 index 000000000..b4cc330c2 --- /dev/null +++ b/source/backend/cpu/arm/arm64/low_memory/MNNGemmInt8AddBiasScale_ARMV86_w4_Unit.S @@ -0,0 +1,1205 @@ +// +// MNNGemmInt8AddBiasScale_ARMV86_w4_Unit.S +// MNN +// +// Created by MNN on 2022/09/26. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#if defined(__aarch64__) +#include "MNNAsmGlobal.h" + +.text +.align 5 + +.macro SET_0_5 d0, d1, d2, d3, d4 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 + movi \d4\().16b, #0 +.endm +.macro SET_0_4 d0, d1, d2, d3 + movi \d0\().16b, #0 + movi \d1\().16b, #0 + movi \d2\().16b, #0 + movi \d3\().16b, #0 +.endm +.macro ADD_BIAS_FLOAT d0, d1, d2, d3, z0 + fadd \d0\().4s, \d0\().4s, \z0\().4s + fadd \d1\().4s, \d1\().4s, \z0\().4s + fadd \d2\().4s, \d2\().4s, \z0\().4s + fadd \d3\().4s, \d3\().4s, \z0\().4s +.endm +.macro ADD_FLOAT d0, d1, d2, d3, s0, s1, s2, s3 + fadd \d0\().4s, \d0\().4s, \s0\().4s + fadd \d1\().4s, \d1\().4s, \s1\().4s + fadd \d2\().4s, \d2\().4s, \s2\().4s + fadd \d3\().4s, \d3\().4s, \s3\().4s +.endm +.macro Int32ToFloat z0, z1, z2, z3 + scvtf \z0\().4s, \z0\().4s + scvtf \z1\().4s, \z1\().4s + scvtf \z2\().4s, \z2\().4s + scvtf \z3\().4s, \z3\().4s +.endm +.macro MUL_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().4s + fmul \d1\().4s, \d1\().4s, \s\().4s + fmul \d2\().4s, \d2\().4s, \s\().4s + fmul \d3\().4s, \d3\().4s, \s\().4s +.endm +.macro MUL_EXTRA_SCALE s, d0, d1, d2, d3 + fmul \d0\().4s, \d0\().4s, \s\().s[0] + fmul \d1\().4s, \d1\().4s, \s\().s[1] + fmul \d2\().4s, \d2\().4s, \s\().s[2] + fmul \d3\().4s, \d3\().4s, \s\().s[3] +.endm +.macro MLA_WEIGHTZERO d0, s0, s1, idx // idx for xKernelSum + fmla \d0\().4s, \s1\().4s, \s0\().s[\idx] +.endm +.macro ReLU_FP32 s0, s1, s2, s3, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmin \s2\().4s, \s2\().4s, \z1\().4s + fmin \s3\().4s, \s3\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s + fmax \s2\().4s, \s2\().4s, \z0\().4s + fmax \s3\().4s, \s3\().4s, \z0\().4s +.endm +.macro ReLU_FP32_2 s0, s1, z0, z1 // z0:min z1:max + fmin \s0\().4s, \s0\().4s, \z1\().4s + fmin \s1\().4s, \s1\().4s, \z1\().4s + fmax \s0\().4s, \s0\().4s, \z0\().4s + fmax \s1\().4s, \s1\().4s, \z0\().4s +.endm + +asm_function MNNGemmInt8AddBiasScale_ARMV86_w4_Unit +/* +struct QuanPostTreatParameters { + const float* scale; + const float* biasFloat; + int32_t maxValue; + int32_t minValue; + int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. + float roundValuePos = 0.5f; + float roundValueNeg = -0.5f; + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum; + float* extraScale; + +}; +*/ +//void MNNGemmInt8AddBiasScale_ARMV86_w4_Unit(int8_t* dst, const int8_t* src, +// const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, +// const QuanPostTreatParameters* parameters, size_t realDstCount); + +//Auto: x0:dst, x1:src, x2:weight, x3:src_depth_quad, x4:dst_step +//x5:dst_depth_quad, x6: parameters, x7: realDstCount + +//Load from x6: x8: scale, x9: bias, x27: srcKernelSum, x28: weightQuanBias, +ldr x8, [x6, #0] +ldr x9, [x6, #8] + +stp d14, d15, [sp, #(-16 * 10)]! +stp d12, d13, [sp, #(16 * 1)] +stp d10, d11, [sp, #(16 * 2)] +stp d8, d9, [sp, #(16 * 3)] +stp x21, x22, [sp, #(16 * 4)] +stp x19, x20, [sp, #(16 * 5)] +stp x23, x24, [sp, #(16 * 6)] +stp x25, x26, [sp, #(16 * 7)] +stp x27, x28, [sp, #(16 * 8)] +ldr x27, [x6, #40] // srcKernelSum +ldr x28, [x6, #48] // weightQuanBias + +ldr x22, [x6, #64] // blockNum +mul x22, x22, x3 // UP_DIV(ic*ky*kx, SRC_UNIT) = blockNum * src_depth_quad_per_block +lsl x15, x22, #5 // x15 = src_depth_quad * UNIT * UNIT_SRC = src_depth_quad * 64 * (sizeof(int4)) = src_depth_quad << 4 + +mov x21, #16 // sizeof(float) * pack +ldr x14, [x6, #56] // float32 maxmin ptr +ldr x23, [x6, #80] // extra scale + +Start: +mov x22, #80 // GEMM_INT8_DST_XUNIT * GEMM_INT8_SRC_UNIT = 10 * 8 = 80 + +TILE_10: + cmp x7, #10 + blt TILE_8 + sub x4, x4, #128 // For float32 output, x4-128 +cmp x5, #2 +blt LoopDz4_TILE_10 + +LoopDz8_TILE_10: + mov x11, x1 // src + mov x12, x2 // weight + mov x13, x3 // src_depth_quad + + SET_0_5 v12, v16, v20, v24, v28 // oc:0,1,0,1 + SET_0_5 v13, v17, v21, v25, v29 // oc:2,3,2,3 + SET_0_5 v14, v18, v22, v26, v30 // oc:4,5,4,5 + SET_0_5 v15, v19, v23, v27, v31 // oc:6,7,6,7 + +LoopSz_TILE_10: + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + movi v2.16b, #15 + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], #64 // src: E0-E9 + ld1 {v7.16b}, [x11], #16 + // int4->int8 + + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + and v10.16b, v0.16b, v2.16b // oc:4-5 + and v11.16b, v1.16b, v2.16b // oc:6-7 + + subs x13, x13, #1 + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa46e // smmla v14.4s, v3.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba46f // smmla v15.4s, v3.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa492 // smmla v18.4s, v4.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba493 // smmla v19.4s, v4.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + .inst 0x4e8aa4b6 // smmla v22.4s, v5.16b, v10.16b // tile4-oc4, tile4-oc5, tile5-oc4, tile5-oc5 + .inst 0x4e8ba4b7 // smmla v23.4s, v5.16b, v11.16b // tile4-oc6, tile4-oc7, tile5-oc6, tile5-oc7 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + .inst 0x4e8aa4da // smmla v26.4s, v6.16b, v10.16b // tile6-oc4, tile6-oc5, tile7-oc4, tile7-oc5 + .inst 0x4e8ba4db // smmla v27.4s, v6.16b, v11.16b // tile6-oc6, tile6-oc7, tile7-oc6, tile7-oc7 + + .inst 0x4e88a4fc // smmla v28.4s, v7.16b, v8.16b // tile8-oc0, tile8-oc1, tile9-oc0, tile9-oc1 + .inst 0x4e89a4fd // smmla v29.4s, v7.16b, v9.16b // tile8-oc2, tile8-oc3, tile9-oc2, tile9-oc3 + .inst 0x4e8aa4fe // smmla v30.4s, v7.16b, v10.16b // tile8-oc4, tile8-oc5, tile9-oc4, tile9-oc5 + .inst 0x4e8ba4ff // smmla v31.4s, v7.16b, v11.16b // tile8-oc6, tile8-oc7, tile9-oc6, tile9-oc7 + bne LoopSz_TILE_10 +LoopSzEnd_TILE_10: + add x2, x2, x15 // weight += dz * src_depth_quad * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT * 0.5); + sub x5, x5, #2 // dz-2 + // transpose + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v16.2d, v17.2d + uzp2 v3.2d, v16.2d, v17.2d + uzp1 v4.2d, v20.2d, v21.2d + uzp2 v5.2d, v20.2d, v21.2d + uzp1 v6.2d, v24.2d, v25.2d + uzp2 v7.2d, v24.2d, v25.2d + uzp1 v8.2d, v28.2d, v29.2d + uzp2 v9.2d, v28.2d, v29.2d + + uzp1 v10.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v11.2d, v14.2d, v15.2d // E1: oc:4-7 + uzp1 v12.2d, v18.2d, v19.2d + uzp2 v13.2d, v18.2d, v19.2d + uzp1 v14.2d, v22.2d, v23.2d + uzp2 v15.2d, v22.2d, v23.2d + uzp1 v16.2d, v26.2d, v27.2d + uzp2 v17.2d, v26.2d, v27.2d + uzp1 v18.2d, v30.2d, v31.2d + uzp2 v19.2d, v30.2d, v31.2d + + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + Int32ToFloat v16, v17, v18, v19 + +Tile10Quan: + ld1 {v20.4s, v21.4s}, [x8], #32 // scale + ld1 {v22.4s, v23.4s}, [x27], #32 // x kernel sum + ld1 {v24.d}[0], [x27] + ld1 {v25.4s, v26.4s}, [x28], #32 // weight quan zeropoint + sub x27, x27, #32 + MUL_SCALE v20, v0, v1, v2, v3 + MUL_SCALE v20, v4, v5, v6, v7 + MUL_SCALE v21, v10, v11, v12, v13 + MUL_SCALE v21, v14, v15, v16, v17 + fmul v8.4s, v8.4s, v20.4s + fmul v9.4s, v9.4s, v20.4s + fmul v18.4s, v18.4s, v21.4s + fmul v19.4s, v19.4s, v21.4s + + cbz x23, TILE10_MLA + ld1 {v27.4s, v28.4s}, [x23], #32 + ld1 {v29.d}[0], [x23] + MUL_EXTRA_SCALE v27, v0, v1, v2, v3 + MUL_EXTRA_SCALE v28, v4, v5, v6, v7 + MUL_EXTRA_SCALE v27, v10, v11, v12, v13 + MUL_EXTRA_SCALE v28, v14, v15, v16, v17 + fmul v8.4s, v8.4s, v29.s[0] + fmul v9.4s, v9.4s, v29.s[1] + fmul v18.4s, v18.4s, v29.s[0] + fmul v19.4s, v19.4s, v29.s[1] + sub x23, x23, #32 + + TILE10_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v10, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v11, v22, v26, 1 // tile:1, oc:4-7 + + MLA_WEIGHTZERO v2, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v3, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v12, v22, v26, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v13, v22, v26, 3 // tile:3, oc:4-7 + + MLA_WEIGHTZERO v4, v23, v25, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v5, v23, v25, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v14, v23, v26, 0 // tile:4, oc:4-7 + MLA_WEIGHTZERO v15, v23, v26, 1 // tile:5, oc:4-7 + + MLA_WEIGHTZERO v6, v23, v25, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v7, v23, v25, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v16, v23, v26, 2 // tile:6, oc:4-7 + MLA_WEIGHTZERO v17, v23, v26, 3 // tile:7, oc:4-7 + + MLA_WEIGHTZERO v8, v24, v25, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v9, v24, v25, 1 // tile:9, oc:0-3 + MLA_WEIGHTZERO v18, v24, v26, 0 // tile:8, oc:4-7 + MLA_WEIGHTZERO v19, v24, v26, 1 // tile:9, oc:4-7 + + TILE10_ADD_BIAS: + cbz x9, TILE10_ADD_DSTV + ld1 {v20.4s, v21.4s}, [x9], #32 // bias + ADD_BIAS_FLOAT v0, v1, v2, v3, v20 + ADD_BIAS_FLOAT v4, v5, v6, v7, v20 + ADD_BIAS_FLOAT v10, v11, v12, v13, v21 + ADD_BIAS_FLOAT v14, v15, v16, v17, v21 + fadd v8.4s, v8.4s, v20.4s + fadd v9.4s, v9.4s, v20.4s + fadd v18.4s, v18.4s, v21.4s + fadd v19.4s, v19.4s, v21.4s + b TILE10_POST + + TILE10_ADD_DSTV: + // first batch10 + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x0], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x0], #64 + ld1 {v28.4s, v29.4s}, [x0], x4 + ADD_FLOAT v0, v1, v2, v3, v20, v21, v22, v23 + ADD_FLOAT v4, v5, v6, v7, v24, v25, v26, v27 + fadd v8.4s, v8.4s, v28.4s + fadd v9.4s, v9.4s, v29.4s + + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x0], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x0], #64 + ld1 {v28.4s, v29.4s}, [x0] + ADD_FLOAT v10, v11, v12, v13, v20, v21, v22, v23 + ADD_FLOAT v14, v15, v16, v17, v24, v25, v26, v27 + fadd v18.4s, v18.4s, v28.4s + fadd v19.4s, v19.4s, v29.4s + + sub x0, x0, #256 + sub x0, x0, x4 + + TILE10_POST: + cbz x14, TILE10_STORE + ld1r {v30.4s}, [x14], #4 // f32 min + ld1r {v31.4s}, [x14] // f32 max + ReLU_FP32 v0, v1, v2, v3, v30, v31 + ReLU_FP32 v4, v5, v6, v7, v30, v31 + ReLU_FP32 v8, v9, v10, v11, v30, v31 + ReLU_FP32 v12, v13, v14, v15, v30, v31 + ReLU_FP32 v16, v17, v18, v19, v30, v31 + sub x14, x14, #4 + + TILE10_STORE: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x0], #64 + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x0], #64 + st1 {v8.4s, v9.4s}, [x0], x4 + st1 {v10.4s, v11.4s, v12.4s, v13.4s}, [x0], #64 + st1 {v14.4s, v15.4s, v16.4s, v17.4s}, [x0], #64 + st1 {v18.4s, v19.4s}, [x0], x4 + +Tile10LoopCheck: + cmp x5, #2 + bge LoopDz8_TILE_10 + cbz x5, End + +LoopDz4_TILE_10: + mov x11, x1 // src + mov x12, x2 // weight + mov x13, x3 // src_depth_quad + + SET_0_5 v12, v13, v16, v17, v20 + SET_0_5 v21, v24, v25, v28, v29 + +LoopSz4_TILE_10: + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], #64 // src: E0-E9 + ld1 {v7.16b}, [x11], #16 + subs x13, x13, #1 + + // int4->int8 + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + + .inst 0x4e88a4fc // smmla v28.4s, v7.16b, v8.16b // tile8-oc0, tile8-oc1, tile9-oc0, tile9-oc1 + .inst 0x4e89a4fd // smmla v29.4s, v7.16b, v9.16b // tile8-oc2, tile8-oc3, tile9-oc2, tile9-oc3 + bne LoopSz4_TILE_10 +LoopSz4End_TILE_10: + // transpose + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v16.2d, v17.2d + uzp2 v3.2d, v16.2d, v17.2d + uzp1 v4.2d, v20.2d, v21.2d + uzp2 v5.2d, v20.2d, v21.2d + uzp1 v6.2d, v24.2d, v25.2d + uzp2 v7.2d, v24.2d, v25.2d + uzp1 v8.2d, v28.2d, v29.2d + uzp2 v9.2d, v28.2d, v29.2d + + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + scvtf v8.4s, v8.4s + scvtf v9.4s, v9.4s + +Tile10Quan_L4: + ld1 {v20.4s}, [x8] // scale + ld1 {v22.4s, v23.4s}, [x27], #32 // x kernel sum + ld1 {v24.d}[0], [x27] + ld1 {v25.4s}, [x28] // weight quan zeropoint + MUL_SCALE v20, v0, v1, v2, v3 + MUL_SCALE v20, v4, v5, v6, v7 + fmul v8.4s, v8.4s, v20.4s + fmul v9.4s, v9.4s, v20.4s + + cbz x23, TILE10_MLA_L4 + ld1 {v27.4s, v28.4s}, [x23], #32 + ld1 {v29.d}[0], [x23] + MUL_EXTRA_SCALE v27, v0, v1, v2, v3 + MUL_EXTRA_SCALE v28, v4, v5, v6, v7 + fmul v8.4s, v8.4s, v29.s[0] + fmul v9.4s, v9.4s, v29.s[1] + + TILE10_MLA_L4: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v3, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v4, v23, v25, 0 // tile:4, oc:0-3 + MLA_WEIGHTZERO v5, v23, v25, 1 // tile:5, oc:0-3 + MLA_WEIGHTZERO v6, v23, v25, 2 // tile:6, oc:0-3 + MLA_WEIGHTZERO v7, v23, v25, 3 // tile:7, oc:0-3 + MLA_WEIGHTZERO v8, v24, v25, 0 // tile:8, oc:0-3 + MLA_WEIGHTZERO v9, v24, v25, 1 // tile:9, oc:0-3 + //sub x4, x4, #128 + + TILE10_ADD_BIAS_L4: + cbz x9, TILE10_ADD_DSTV_L4 + ld1 {v20.4s}, [x9] // bias + ADD_BIAS_FLOAT v0, v1, v2, v3, v20 + ADD_BIAS_FLOAT v4, v5, v6, v7, v20 + fadd v8.4s, v8.4s, v20.4s + fadd v9.4s, v9.4s, v20.4s + b TILE10_POST_L4 + + TILE10_ADD_DSTV_L4: + // first batch10 + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x0], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x0], #64 + ld1 {v28.4s, v29.4s}, [x0] + ADD_FLOAT v0, v1, v2, v3, v20, v21, v22, v23 + ADD_FLOAT v4, v5, v6, v7, v24, v25, v26, v27 + fadd v8.4s, v8.4s, v28.4s + fadd v9.4s, v9.4s, v29.4s + + sub x0, x0, #128 + + TILE10_POST_L4: + cbz x14, TILE10_STORE_L4 + ld1r {v30.4s}, [x14], #4 // f32 min + ld1r {v31.4s}, [x14] // f32 max + ReLU_FP32 v0, v1, v2, v3, v30, v31 + ReLU_FP32 v4, v5, v6, v7, v30, v31 + fmax v8.4s, v8.4s, v30.4s + fmax v9.4s, v9.4s, v30.4s + fmin v8.4s, v8.4s, v31.4s + fmin v9.4s, v9.4s, v31.4s + sub x14, x14, #4 + + TILE10_STORE_L4: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x0], #64 + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x0], #64 + st1 {v8.4s, v9.4s}, [x0], x4 + b End + +TILE_8: + // post parameters initilize + cbz x14, TILE_Remain + ld1r {v30.4s}, [x14], #4 // f32 min + ld1r {v31.4s}, [x14] // f32 max + + TILE_Remain: + movi v28.16b, #15 + cmp x7, #8 + blt TILE_4 + sub x4, x4, #64 // For float32 output + + TILE8_START: + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias + +cmp x5, #2 +blt LoopDz4_TILE_8 +LoopDz_TILE_8: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + SET_0_4 v12, v16, v20, v24 + SET_0_4 v13, v17, v21, v25 + SET_0_4 v14, v18, v22, v26 + SET_0_4 v15, v19, v23, v27 +LoopSz_TILE_8: + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], x22 // src: E0-E7 + + // int4->int8 + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + and v10.16b, v0.16b, v28.16b // oc:4-5 + and v11.16b, v1.16b, v28.16b // oc:6-7 + + subs x13, x13, #1 + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa46e // smmla v14.4s, v3.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba46f // smmla v15.4s, v3.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa492 // smmla v18.4s, v4.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba493 // smmla v19.4s, v4.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + .inst 0x4e8aa4b6 // smmla v22.4s, v5.16b, v10.16b // tile4-oc4, tile4-oc5, tile5-oc4, tile5-oc5 + .inst 0x4e8ba4b7 // smmla v23.4s, v5.16b, v11.16b // tile4-oc6, tile4-oc7, tile5-oc6, tile5-oc7 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + .inst 0x4e8aa4da // smmla v26.4s, v6.16b, v10.16b // tile6-oc4, tile6-oc5, tile7-oc4, tile7-oc5 + .inst 0x4e8ba4db // smmla v27.4s, v6.16b, v11.16b // tile6-oc6, tile6-oc7, tile7-oc6, tile7-oc7 + bne LoopSz_TILE_8 + +LoopSzEnd_TILE_8: + add x25, x25, x15 + sub x24, x24, #2 // dz-2 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v8.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v9.2d, v14.2d, v15.2d // E1: oc:4-7 + + uzp1 v2.2d, v16.2d, v17.2d // E2: oc:0-3 + uzp2 v3.2d, v16.2d, v17.2d // E3: oc:0-3 + uzp1 v10.2d, v18.2d, v19.2d // E2: oc:4-7 + uzp2 v11.2d, v18.2d, v19.2d // E3: oc:4-7 + + uzp1 v4.2d, v20.2d, v21.2d // E4: oc:0-3 + uzp2 v5.2d, v20.2d, v21.2d // E5: oc:0-3 + uzp1 v12.2d, v22.2d, v23.2d // E4: oc:4-7 + uzp2 v13.2d, v22.2d, v23.2d // E5: oc:4-7 + + uzp1 v6.2d, v24.2d, v25.2d // E6: oc:0-3 + uzp2 v7.2d, v24.2d, v25.2d // E7: oc:0-3 + uzp1 v14.2d, v26.2d, v27.2d // E6: oc:4-7 + uzp2 v15.2d, v26.2d, v27.2d // E7: oc:4-7 + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + Int32ToFloat v8, v9, v10, v11 + Int32ToFloat v12, v13, v14, v15 + +Tile8Quan: + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.4s, v23.4s}, [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + MUL_SCALE v20, v0, v1, v2, v3 + MUL_SCALE v20, v4, v5, v6, v7 + MUL_SCALE v21, v8, v9, v10, v11 + MUL_SCALE v21, v12, v13, v14, v15 + + cbz x23, TILE8_MLA + ld1 {v18.4s, v19.4s}, [x23] + MUL_EXTRA_SCALE v18, v0, v1, v2, v3 + MUL_EXTRA_SCALE v19, v4, v5, v6, v7 + MUL_EXTRA_SCALE v18, v8, v9, v10, v11 + MUL_EXTRA_SCALE v19, v12, v13, v14, v15 + + TILE8_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 + MLA_WEIGHTZERO v1, v22, v25, 1 + MLA_WEIGHTZERO v2, v22, v25, 2 + MLA_WEIGHTZERO v3, v22, v25, 3 + MLA_WEIGHTZERO v4, v23, v25, 0 + MLA_WEIGHTZERO v5, v23, v25, 1 + MLA_WEIGHTZERO v6, v23, v25, 2 + MLA_WEIGHTZERO v7, v23, v25, 3 + + MLA_WEIGHTZERO v8, v22, v26, 0 + MLA_WEIGHTZERO v9, v22, v26, 1 + MLA_WEIGHTZERO v10, v22, v26, 2 + MLA_WEIGHTZERO v11, v22, v26, 3 + MLA_WEIGHTZERO v12, v23, v26, 0 + MLA_WEIGHTZERO v13, v23, v26, 1 + MLA_WEIGHTZERO v14, v23, v26, 2 + MLA_WEIGHTZERO v15, v23, v26, 3 + + cbz x9, TILE8_ADD_DSTV + TILE8_ADD_BIAS: + ld1 {v16.4s, v17.4s}, [x20], #32 + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + ADD_BIAS_FLOAT v4, v5, v6, v7, v16 + ADD_BIAS_FLOAT v8, v9, v10, v11, v17 + ADD_BIAS_FLOAT v12, v13, v14, v15, v17 + b TILE8_POST + + TILE8_ADD_DSTV: + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x26], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x26], x4 + ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x26], #64 + ADD_FLOAT v0, v1, v2, v3, v20, v21, v22, v23 + ADD_FLOAT v4, v5, v6, v7, v24, v25, v26, v27 + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x26] + ADD_FLOAT v8, v9, v10, v11, v16, v17, v18, v19 + ADD_FLOAT v12, v13, v14, v15, v20, v21, v22, v23 + sub x26, x26, x4 + sub x26, x26, #128 + + TILE8_POST: + cbz x14, TILE8_STORE + ReLU_FP32 v0, v1, v2, v3, v30, v31 + ReLU_FP32 v4, v5, v6, v7, v30, v31 + ReLU_FP32 v8, v9, v10, v11, v30, v31 + ReLU_FP32 v12, v13, v14, v15, v30, v31 + + TILE8_STORE: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x26], #64 + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x26], x4 + st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [x26], #64 + st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [x26], x4 + b Tile8LoopCheck + +Tile8LoopCheck: + cmp x24, #2 + bge LoopDz_TILE_8 + cbz x24, Tile8Check + +LoopDz4_TILE_8: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + SET_0_4 v12, v13, v16, v17 + SET_0_4 v20, v21, v24, v25 +LoopSz4_TILE_8: + ld1 {v3.16b, v4.16b, v5.16b, v6.16b}, [x11], x22 // src: E0-E7 + subs x13, x13, #1 + // int4->int8 + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + + .inst 0x4e88a46c // smmla v12.4s, v3.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a46d // smmla v13.4s, v3.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + + .inst 0x4e88a490 // smmla v16.4s, v4.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a491 // smmla v17.4s, v4.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + + .inst 0x4e88a4b4 // smmla v20.4s, v5.16b, v8.16b // tile4-oc0, tile4-oc1, tile5-oc0, tile5-oc1 + .inst 0x4e89a4b5 // smmla v21.4s, v5.16b, v9.16b // tile4-oc2, tile4-oc3, tile5-oc2, tile5-oc3 + + .inst 0x4e88a4d8 // smmla v24.4s, v6.16b, v8.16b // tile6-oc0, tile6-oc1, tile7-oc0, tile7-oc1 + .inst 0x4e89a4d9 // smmla v25.4s, v6.16b, v9.16b // tile6-oc2, tile6-oc3, tile7-oc2, tile7-oc3 + bne LoopSz4_TILE_8 + +LoopSz4End_TILE_8: + add x25, x25, x15 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v16.2d, v17.2d // E2: oc:0-3 + uzp2 v3.2d, v16.2d, v17.2d // E3: oc:0-3 + uzp1 v4.2d, v20.2d, v21.2d // E4: oc:0-3 + uzp2 v5.2d, v20.2d, v21.2d // E5: oc:0-3 + uzp1 v6.2d, v24.2d, v25.2d // E6: oc:0-3 + uzp2 v7.2d, v24.2d, v25.2d // E7: oc:0-3 + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + +Tile8Quan_L4: + ld1 {v20.4s}, [x19] // scale + ld1 {v22.4s, v23.4s}, [x27] // x kernel sum + ld1 {v25.4s}, [x6] // weight quan zeropoint + MUL_SCALE v20, v0, v1, v2, v3 + MUL_SCALE v20, v4, v5, v6, v7 + + cbz x23, TILE8_MLA_L4 + ld1 {v18.4s, v19.4s}, [x23] + MUL_EXTRA_SCALE v18, v0, v1, v2, v3 + MUL_EXTRA_SCALE v19, v4, v5, v6, v7 + + TILE8_MLA_L4: + MLA_WEIGHTZERO v0, v22, v25, 0 + MLA_WEIGHTZERO v1, v22, v25, 1 + MLA_WEIGHTZERO v2, v22, v25, 2 + MLA_WEIGHTZERO v3, v22, v25, 3 + MLA_WEIGHTZERO v4, v23, v25, 0 + MLA_WEIGHTZERO v5, v23, v25, 1 + MLA_WEIGHTZERO v6, v23, v25, 2 + MLA_WEIGHTZERO v7, v23, v25, 3 + + cbz x9, TILE8_ADD_DSTV_L4 + TILE8_ADD_BIAS_L4: + ld1 {v16.4s}, [x20] + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + ADD_BIAS_FLOAT v4, v5, v6, v7, v16 + b TILE8_POST_L4 + + TILE8_ADD_DSTV_L4: + ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x26], #64 + ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x26] + ADD_FLOAT v0, v1, v2, v3, v20, v21, v22, v23 + ADD_FLOAT v4, v5, v6, v7, v24, v25, v26, v27 + sub x26, x26, #64 + + TILE8_POST_L4: + cbz x14, TILE8_STORE_L4 + ReLU_FP32 v0, v1, v2, v3, v30, v31 + ReLU_FP32 v4, v5, v6, v7, v30, v31 + + TILE8_STORE_L4: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x26], #64 + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x26], x4 + b Tile8Check + +Tile8Check: +cbz x23, Tile8End +add x23, x23, #32 + +Tile8End: + sub x7, x7, #8 + add x0, x0, x21, LSL #3 + add x1, x1, #64 + add x27, x27, #32 + add x4, x4, #64 // Revert x4 for following tile. + +TILE_4: + cmp x7, #4 + blt TILE_2 + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias + +cmp x5, #2 +blt LoopDz4_TILE_4 +LoopDz_TILE_4: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + SET_0_4 v12, v13, v14, v15 + SET_0_4 v16, v17, v18, v19 + +LoopSz_TILE_4: + // int4->int8 + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + and v10.16b, v0.16b, v28.16b // oc:4-5 + and v11.16b, v1.16b, v28.16b // oc:6-7 + + ld1 {v4.16b, v5.16b}, [x11], x22 // src + subs x13, x13, #1 + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa48e // smmla v14.4s, v4.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba48f // smmla v15.4s, v4.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + + .inst 0x4e88a4b0 // smmla v16.4s, v5.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a4b1 // smmla v17.4s, v5.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + .inst 0x4e8aa4b2 // smmla v18.4s, v5.16b, v10.16b // tile2-oc4, tile2-oc5, tile3-oc4, tile3-oc5 + .inst 0x4e8ba4b3 // smmla v19.4s, v5.16b, v11.16b // tile2-oc6, tile2-oc7, tile3-oc6, tile3-oc7 + bne LoopSz_TILE_4 +LoopSzEnd_TILE_4: + add x25, x25, x15 + sub x24, x24, #2 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v4.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v5.2d, v14.2d, v15.2d // E1: oc:4-7 + + uzp1 v2.2d, v16.2d, v17.2d + uzp2 v3.2d, v16.2d, v17.2d + uzp1 v6.2d, v18.2d, v19.2d + uzp2 v7.2d, v18.2d, v19.2d + Int32ToFloat v0, v1, v2, v3 + Int32ToFloat v4, v5, v6, v7 + +Tile4Quan: + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.4s}, [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + MUL_SCALE v20, v0, v1, v2, v3 + MUL_SCALE v21, v4, v5, v6, v7 + + cbz x23, TILE4_MLA + ld1 {v27.4s}, [x23] + MUL_EXTRA_SCALE v27, v0, v1, v2, v3 + MUL_EXTRA_SCALE v27, v4, v5, v6, v7 + + TILE4_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v3, v22, v25, 3 // tile:3, oc:0-3 + MLA_WEIGHTZERO v4, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v5, v22, v26, 1 // tile:1, oc:4-7 + MLA_WEIGHTZERO v6, v22, v26, 2 // tile:2, oc:4-7 + MLA_WEIGHTZERO v7, v22, v26, 3 // tile:3, oc:4-7 + + TILE4_ADD_BIAS: + cbz x9, TILE4_ADD_DSTV + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + ADD_BIAS_FLOAT v4, v5, v6, v7, v17 + b TILE4_POST + + TILE4_ADD_DSTV: + ld1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x26], x4 + ld1 {v19.4s, v20.4s, v21.4s, v22.4s}, [x26] + ADD_FLOAT v0, v1, v2, v3, v15, v16, v17, v18 + ADD_FLOAT v4, v5, v6, v7, v19, v20, v21, v22 + sub x26, x26, x4 + + TILE4_POST: + cbz x14, TILE4_STORE + ReLU_FP32 v0, v1, v2, v3, v30, v31 + ReLU_FP32 v4, v5, v6, v7, v30, v31 + + TILE4_STORE: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x26], x4 + st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x26], x4 + b Tile4LoopCheck + +Tile4LoopCheck: + cmp x24, #2 + bge LoopDz_TILE_4 + cbz x24, Tile4Check + +LoopDz4_TILE_4: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + SET_0_4 v12, v13, v16, v17 +LoopSz4_TILE_4: + ld1 {v4.16b, v5.16b}, [x11], x22 // src + // int4->int8 + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + + subs x13, x13, #1 + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + + .inst 0x4e88a4b0 // smmla v16.4s, v5.16b, v8.16b // tile2-oc0, tile2-oc1, tile3-oc0, tile3-oc1 + .inst 0x4e89a4b1 // smmla v17.4s, v5.16b, v9.16b // tile2-oc2, tile2-oc3, tile3-oc2, tile3-oc3 + bne LoopSz4_TILE_4 +LoopSz4End_TILE_4: + add x25, x25, x15 + sub x24, x24, #1 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v16.2d, v17.2d + uzp2 v3.2d, v16.2d, v17.2d + Int32ToFloat v0, v1, v2, v3 + +Tile4Quan_L4: + ld1 {v20.4s}, [x19] // scale + ld1 {v22.4s}, [x27] // x kernel sum + ld1 {v25.4s}, [x6] // weight quan zeropoint + MUL_SCALE v20, v0, v1, v2, v3 + + cbz x23, TILE4_MLA_L4 + ld1 {v27.4s}, [x23] + MUL_EXTRA_SCALE v27, v0, v1, v2, v3 + + TILE4_MLA_L4: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v25, 2 // tile:2, oc:0-3 + MLA_WEIGHTZERO v3, v22, v25, 3 // tile:3, oc:0-3 + + TILE4_ADD_BIAS_L4: + cbz x9, TILE4_ADD_DSTV_L4 + ld1 {v16.4s}, [x20] // bias + ADD_BIAS_FLOAT v0, v1, v2, v3, v16 + b TILE4_POST_L4 + + TILE4_ADD_DSTV_L4: + ld1 {v15.4s, v16.4s, v17.4s, v18.4s}, [x26] + ADD_FLOAT v0, v1, v2, v3, v15, v16, v17, v18 + + TILE4_POST_L4: + cbz x14, TILE4_STORE_L4 + ReLU_FP32 v0, v1, v2, v3, v30, v31 + + TILE4_STORE_L4: + st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x26], x4 + b Tile4Check + +Tile4Check: +cbz x23, Tile4End +add x23, x23, #16 +Tile4End: + sub x7, x7, #4 + add x0, x0, x21, LSL #2 + add x1, x1, #32 + add x27, x27, #16 + +TILE_2: + cmp x7, #2 + blt TILE_1 + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias + +cmp x5, #2 +blt LoopDz4_TILE_2 +LoopDz_TILE_2: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + SET_0_4 v12, v13, v14, v15 +LoopSz_TILE_2: + // int4->int8 + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + and v10.16b, v0.16b, v28.16b // oc:4-5 + and v11.16b, v1.16b, v28.16b // oc:6-7 + + ld1 {v4.16b}, [x11], x22 // src + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + .inst 0x4e8aa48e // smmla v14.4s, v4.16b, v10.16b // tile0-oc4, tile0-oc5, tile1-oc4, tile1-oc5 + .inst 0x4e8ba48f // smmla v15.4s, v4.16b, v11.16b // tile0-oc6, tile0-oc7, tile1-oc6, tile1-oc7 + subs x13, x13, #1 + bne LoopSz_TILE_2 +LoopSzEnd_TILE_2: + add x25, x25, x15 + sub x24, x24, #2 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + uzp1 v2.2d, v14.2d, v15.2d // E0: oc:4-7 + uzp2 v3.2d, v14.2d, v15.2d // E1: oc:4-7 + Int32ToFloat v0, v1, v2, v3 + +Tile2Quan: + ld1 {v20.4s, v21.4s}, [x19], #32 // scale + ld1 {v22.d}[0], [x27] // x kernel sum + ld1 {v25.4s, v26.4s}, [x6], #32 // weight quan zeropoint + fmul v0.4s, v0.4s, v20.4s + fmul v1.4s, v1.4s, v20.4s + fmul v2.4s, v2.4s, v21.4s + fmul v3.4s, v3.4s, v21.4s + + cbz x23, TILE2_MLA + ld1 {v27.d}[0], [x23] + fmul v0.4s, v0.4s, v27.s[0] + fmul v1.4s, v1.4s, v27.s[1] + fmul v2.4s, v2.4s, v27.s[0] + fmul v3.4s, v3.4s, v27.s[1] + + TILE2_MLA: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + MLA_WEIGHTZERO v2, v22, v26, 0 // tile:0, oc:4-7 + MLA_WEIGHTZERO v3, v22, v26, 1 // tile:1, oc:4-7 + + TILE2_ADD_BIAS: + cbz x9, TILE2_ADD_DSTV + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + fadd v0.4s, v0.4s, v16.4s + fadd v1.4s, v1.4s, v16.4s + fadd v2.4s, v2.4s, v17.4s + fadd v3.4s, v3.4s, v17.4s + b TILE2_POST + + TILE2_ADD_DSTV: + ld1 {v18.4s, v19.4s}, [x26], x4 + ld1 {v20.4s, v21.4s}, [x26] + fadd v0.4s, v0.4s, v18.4s + fadd v1.4s, v1.4s, v19.4s + fadd v2.4s, v2.4s, v20.4s + fadd v3.4s, v3.4s, v21.4s + sub x26, x26, x4 + + TILE2_POST: + cbz x14, TILE2_STORE + ReLU_FP32 v0, v1, v2, v3, v30, v31 + TILE2_STORE: + st1 {v0.4s, v1.4s}, [x26], x4 + st1 {v2.4s, v3.4s}, [x26], x4 + b Tile2LoopCheck + +Tile2LoopCheck: + cmp x24, #2 + bge LoopDz_TILE_2 + cbz x24, Tile2Check +LoopDz4_TILE_2: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + movi v12.4s, #0 + movi v13.4s, #0 +LoopSz4_TILE_2: + // int4->int8 + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + ld1 {v4.16b}, [x11], x22 // src + + .inst 0x4e88a48c // smmla v12.4s, v4.16b, v8.16b // tile0-oc0, tile0-oc1, tile1-oc0, tile1-oc1 + .inst 0x4e89a48d // smmla v13.4s, v4.16b, v9.16b // tile0-oc2, tile0-oc3, tile1-oc2, tile1-oc3 + subs x13, x13, #1 + bne LoopSz4_TILE_2 +LoopSz4End_TILE_2: + add x25, x25, x15 + uzp1 v0.2d, v12.2d, v13.2d // E0: oc:0-3 + uzp2 v1.2d, v12.2d, v13.2d // E1: oc:0-3 + scvtf v0.4s, v0.4s + scvtf v1.4s, v1.4s + +Tile2Quan_L4: + ld1 {v20.4s}, [x19] + ld1 {v22.d}[0], [x27] // x kernel sum + ld1 {v25.4s}, [x6] // weight quan zeropoint + fmul v0.4s, v0.4s, v20.4s + fmul v1.4s, v1.4s, v20.4s + + cbz x23, TILE2_MLA_L4 + ld1 {v27.d}[0], [x23] + fmul v0.4s, v0.4s, v27.s[0] + fmul v1.4s, v1.4s, v27.s[1] + + TILE2_MLA_L4: + MLA_WEIGHTZERO v0, v22, v25, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v1, v22, v25, 1 // tile:1, oc:0-3 + + TILE2_ADD_BIAS_L4: + cbz x9, TILE2_ADD_DSTV_L4 + ld1 {v16.4s}, [x20] // bias + fadd v0.4s, v0.4s, v16.4s + fadd v1.4s, v1.4s, v16.4s + b TILE2_POST_L4 + + TILE2_ADD_DSTV_L4: + ld1 {v18.4s, v19.4s}, [x26] + fadd v0.4s, v0.4s, v18.4s + fadd v1.4s, v1.4s, v19.4s + + TILE2_POST_L4: + cbz x14, TILE2_STORE_L4 + ReLU_FP32_2 v0, v1, v30, v31 + TILE2_STORE_L4: + st1 {v0.4s, v1.4s}, [x26], x4 + b Tile2Check + +Tile2Check: +cbz x23, Tile2End +add x23, x23, #8 +Tile2End: + sub x7, x7, #2 + add x0, x0, x21, LSL #1 + add x1, x1, #16 + add x27, x27, #8 + +TILE_1: + cmp x7, #1 + blt End + mov x24, x5 // dst_depth_quad + mov x26, x0 // dst + mov x25, x2 // weight + mov x19, x8 // scale + mov x20, x9 // bias + mov x6, x28 // weightQuanBias +cmp x5, #2 +blt LoopDz4_TILE_1 +LoopDz_TILE_1: + //ld1 {v0.4s}, [x20], #16 // bias + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + + movi v16.4s, #0 + movi v17.4s, #0 + movi v18.4s, #0 + movi v19.4s, #0 +LoopSz_TILE_1: + ld1 {v2.8b}, [x11], x22 // src + // int4->int8 + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + and v10.16b, v0.16b, v28.16b // oc:4-5 + and v11.16b, v1.16b, v28.16b // oc:6-7 + subs x13, x13, #1 + + .inst 0x4e88a450 // smmla v16.4s, v2.16b, v8.16b + .inst 0x4e89a451 // smmla v17.4s, v2.16b, v9.16b + .inst 0x4e8aa452 // smmla v18.4s, v2.16b, v10.16b + .inst 0x4e8ba453 // smmla v19.4s, v2.16b, v11.16b + bne LoopSz_TILE_1 +LoopSzEnd_TILE_1: + add x25, x25, x15 + sub x24, x24, #2 + uzp1 v27.2d, v16.2d, v17.2d + uzp1 v26.2d, v18.2d, v19.2d + scvtf v27.4s, v27.4s + scvtf v26.4s, v26.4s + +Tile1Quan: + ld1 {v0.4s, v1.4s}, [x19], #32 // scale + ld1 {v6.s}[0], [x27] // x kernel sum + ld1 {v8.4s, v9.4s}, [x6], #32 // weight quan zeropoint + fmul v27.4s, v27.4s, v0.4s + fmul v26.4s, v26.4s, v1.4s + + cbz x23, TILE1_MLA + ld1 {v10.s}[0], [x23] + fmul v27.4s, v27.4s, v10.s[0] + fmul v26.4s, v26.4s, v10.s[0] + + TILE1_MLA: + MLA_WEIGHTZERO v27, v6, v8, 0 // tile:0, oc:0-3 + MLA_WEIGHTZERO v26, v6, v9, 0 // tile:0, oc:4-7 + + TILE1_ADD_BIAS: + cbz x9, TILE1_ADD_DSTV + ld1 {v16.4s, v17.4s}, [x20], #32 // bias + fadd v27.4s, v27.4s, v16.4s + fadd v26.4s, v26.4s, v17.4s + b TILE1_POST + + TILE1_ADD_DSTV: + ld1 {v16.4s}, [x26], x4 + ld1 {v17.4s}, [x26] + fadd v27.4s, v27.4s, v16.4s + fadd v26.4s, v26.4s, v17.4s + sub x26, x26, x4 + + TILE1_POST: + cbz x14, TILE1_STORE + fmin v27.4s, v27.4s, v31.4s + fmax v27.4s, v27.4s, v30.4s + fmin v26.4s, v26.4s, v31.4s + fmax v26.4s, v26.4s, v30.4s + + TILE1_STORE: + st1 {v27.4s}, [x26], x4 + st1 {v26.4s}, [x26], x4 + b Tile1LoopEnd + +Tile1LoopEnd: + cmp x24, #2 + bge LoopDz_TILE_1 + cbz x24, End + +LoopDz4_TILE_1: + mov x11, x1 // src + mov x12, x25 // weight + mov x13, x3 // src_depth_quad + + movi v16.4s, #0 + movi v17.4s, #0 +LoopSz4_TILE_1: + ld1 {v2.8b}, [x11], x22 // src + // int4->int8 + ld1 {v0.16b, v1.16b}, [x12], #32 // weight + ushr v8.16b, v0.16b, #4 // oc:0-1 + ushr v9.16b, v1.16b, #4 // oc:2-3 + + subs x13, x13, #1 + .inst 0x4e88a450 // smmla v16.4s, v2.16b, v8.16b + .inst 0x4e89a451 // smmla v17.4s, v2.16b, v9.16b + bne LoopSz4_TILE_1 +LoopSz4End_TILE_1: + add x25, x25, x15 + uzp1 v27.2d, v16.2d, v17.2d + scvtf v27.4s, v27.4s + +Tile1Quan_L4: + ld1 {v0.4s}, [x19] // scale + ld1 {v6.s}[0], [x27] // x kernel sum + ld1 {v8.4s}, [x6] // weight quan zeropoint + fmul v27.4s, v27.4s, v0.4s + cbz x23, TILE1_MLA_L4 + ld1 {v10.s}[0], [x23] + fmul v27.4s, v27.4s, v10.s[0] + + TILE1_MLA_L4: + MLA_WEIGHTZERO v27, v6, v8, 0 // tile:0, oc:0-3 + + TILE1_ADD_BIAS_L4: + cbz x9, TILE1_ADD_DSTV_L4 + ld1 {v16.4s}, [x20] // bias + fadd v27.4s, v27.4s, v16.4s + b TILE1_POST_L4 + + TILE1_ADD_DSTV_L4: + ld1 {v16.4s}, [x26] + fadd v27.4s, v27.4s, v16.4s + + TILE1_POST_L4: + cbz x14, TILE1_STORE_L4 + fmin v27.4s, v27.4s, v31.4s + fmax v27.4s, v27.4s, v30.4s + + TILE1_STORE_L4: + st1 {v27.4s}, [x26], x4 + b End + +End: +ldp x27, x28, [sp, #(16 * 8)] +ldp x25, x26, [sp, #(16 * 7)] +ldp x23, x24, [sp, #(16 * 6)] +ldp x19, x20, [sp, #(16 * 5)] +ldp x21, x22, [sp, #(16 * 4)] +ldp d8, d9, [sp, #(16 * 3)] +ldp d10, d11, [sp, #(16 * 2)] +ldp d12, d13, [sp, #(16 * 1)] +ldp d14, d15, [sp], #(16 * 10) +ret + +#endif // __aarch64__ diff --git a/source/backend/cpu/bf16/BF16Functions.cpp b/source/backend/cpu/bf16/BF16Functions.cpp index 3f792a3ce..852cd791b 100644 --- a/source/backend/cpu/bf16/BF16Functions.cpp +++ b/source/backend/cpu/bf16/BF16Functions.cpp @@ -3,7 +3,6 @@ #include "../x86_x64/avx/FunctionSummary.hpp" #include "../x86_x64/avxfma/FunctionSummary.hpp" #include "../x86_x64/avx512/FunctionSummary.hpp" -#include "../x86_x64/cpu_id.h" #endif #include "core/Macro.h" #if defined(MNN_USE_NEON) @@ -11,20 +10,17 @@ #endif #include "BF16Functions.hpp" -#include "WinogradOptFunctionHalf.hpp" #include "../compute/CommonOptFunction.h" -#include "../CPUPool.hpp" #include "../CPURuntime.hpp" #include "VecHalf.hpp" #include "math/Vec.hpp" -#include "BF16Binary.hpp" -#include "BF16Unary.hpp" using BFVec4 = MNN::Math::VecHalf<4>; using Vec4 = MNN::Math::Vec; -extern "C" { -void MNNReluWithSlopeChannelBF16(float* dstO, const float* srcO, const float* slopeO, size_t sizeQuad, size_t depthQuad); -} namespace MNN { +// The Function Will be Called in init +void registerBF16Backend() { + BF16Functions::init(); +} // just for reference BF16 converting of c++ code, not for arm or sse. inline int16_t MNNFP32ToBF16(float fp32Value) { int32_t* s32Value = (int32_t*)(&fp32Value); @@ -76,825 +72,277 @@ static void _MNNLowpToFp32(const int16_t* src, float* dst, size_t size) { ::memcpy(dst, dstTemp, sizeRemain * sizeof(float)); } } -static void MNNConvRunForUnitDepthWiseBF16(float* dst, const float* src, const float* weight, size_t fw, size_t fh, - size_t weight_y_step, size_t dilateX_step, size_t dilateY_step) { - int fx, fy; - BFVec4 dstValue(0.0f); - const int16_t* src_z = (const int16_t*)src; - const int16_t* weight_z = (const int16_t*)weight; - for (fy = 0; fy < fh; ++fy) { - const auto src_y = src_z + fy * dilateY_step; - const auto weight_y = weight_z + fy * weight_y_step; - for (fx = 0; fx < fw; ++fx) { - const auto weight_x = weight_y + 4 * fx; - const auto src_x = src_y + fx * dilateX_step; - dstValue = dstValue + BFVec4::load(src_x) * BFVec4::load(weight_x); - } - } - BFVec4::save((int16_t*)dst, dstValue); + +#if defined(MNN_USE_NEON) +// todo: search for proper value for bf16 +void NEON_MNNGetMatMulPackMode_BF16(int* eP, int* lP, int* hP) { + *eP = 12; + *lP = 1; +#ifdef __aarch64__ + *hP = 8; +#else + *hP = 4; +#endif } -static void MNNConvRunForLineDepthwiseBF16(float* dstO, const float* srcO, const float* weightO, size_t width, size_t src_w_setup, - size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, size_t height, - size_t srcHStep, size_t dstHStep) { - int dx, fx, fy; - auto dst = (int16_t*)dstO; - auto src = (const int16_t*)srcO; - auto weight = (const int16_t*)weightO; - for (int y = 0; y < height; ++y) { - auto srcY = src + y * srcHStep; - auto dstY = dst + y * dstHStep; - for (dx = 0; dx < width; ++dx) { - auto dst_x = dstY + dx * 4; - BFVec4 dstValue(0.0f); - const auto src_z = srcY + src_w_setup * dx; - const auto weight_z = weight; - for (fy = 0; fy < fh; ++fy) { - const auto src_y = src_z + fy * dilateY_step; - const auto weight_y = weight_z + fy * fw * 4; - for (fx = 0; fx < fw; ++fx) { - const auto weight_x = weight_y + 4 * fx; - const auto src_x = src_y + fx * dilateX_step; - dstValue = dstValue + BFVec4::load(src_x) * BFVec4::load(weight_x); - } - } - BFVec4::save(dst_x, dstValue); +#ifdef __aarch64__ +#define EP 12 +#define HP 8 +#define LP 4 +void ARMV86_MNNGetMatMulPackMode_BF16(int* eP, int* lP, int* hP) { + *eP = EP; + *hP = HP; + *lP = LP; +} +void ARMV86_MNNPackForMatMul_B_BF16(float* destF, const float* sourceF, size_t h, size_t l, bool transpose) { + // [l, h] -> [h/hp, l/lp, hp, lp] + auto dest = (int16_t*)destF; + auto source = (const int32_t*)sourceF; + auto lCP = UP_DIV(l, LP); + auto hCP = UP_DIV(h, HP); + int sYstride = 1; + int sXstride = h; + if (transpose) { + sYstride = l; + sXstride = 1; + } + ::memset(dest, 0, lCP * hCP * sizeof(int16_t) * HP * LP); + for (int y = 0; y < h; ++y) { + int yC = y / HP; + int yR = y % HP; + for (int x = 0; x < l; ++x) { + int xC = x / LP; + int xR = x % LP; + dest[xR + yR * LP + xC * HP * LP + yC * HP * LP * lCP] = source[sXstride * x + sYstride * y] >> 16; } } } -void MNNAxByClampBroadcastUnitBF16(float* CF, const float* AF, const float* BF, size_t width, size_t cStride, size_t aStride, size_t height, const float* parameters) { - auto C = (int16_t*)CF; - auto A = (const int16_t*)AF; - auto B = (const int16_t*)BF; - auto minF = BFVec4(parameters[2]); - auto maxF = BFVec4(parameters[3]); - auto beta = BFVec4(parameters[1]); - for (int y = 0; y < height; ++y) { - auto a = A + aStride * y; - auto b = B + 4 * y; - auto bv = BFVec4::load(b); - auto c = C + cStride * y; - for (int x = 0; x < width; ++x) { - auto av = BFVec4::load(a + 4 * x); - auto cv = av + bv * beta; - cv = BFVec4::min(cv, maxF); - cv = BFVec4::max(cv, minF); - BFVec4::save(c + 4 * x, cv); +void ARMV86_MNNPackC4ForMatMul_A_BF16(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el) { + // [l/4, e, 4] -> [l/4, ep, 4] + int number = info[0]; + int eReal = info[1]; + int eDest = info[2]; + int offset = info[3]; + if (1 == number) { + int l = el[1]; + if (l % 8 != 0) { + auto lAigin = UP_DIV(l, LP) * LP; + ::memset(destOrigin, 0, eDest * lAigin * sizeof(int16_t)); } } -} -#ifndef MNN_USE_NEON -void MNNReluWithSlopeChannelBF16(float* dstO, const float* srcO, const float* slopeO, size_t sizeQuad, size_t depthQuad) { - auto slope = (const int16_t*)slopeO; - auto dst = (int16_t*)dstO; - auto src = (const int16_t*)srcO; - auto zero = BFVec4(0.0f); - for (int j = 0; j < depthQuad; j++) { - auto slopeZ = BFVec4::load(slope + 4 * j); - auto srcZ = src + 4 * j * sizeQuad; - auto dstZ = dst + 4 * j * sizeQuad; - for (int i = 0; i < sizeQuad; i++) { - auto srcValue = BFVec4::load(srcZ + 4 * i); - std::array dstV; - for (int c = 0; c < 4; c++) { - if (srcValue[c] < 0) { - dstV[c] = srcValue[c] * slopeZ[c]; - } else { - dstV[c] = srcValue[c]; + + for (int n=0; n [l/4, ep, 4] + for (int x = 0; x < lDiv; ++x) { + auto destX = dest + x * eDest * 4; + auto srcX = source + x * eReal * 4; + for (int y = 0; y < e; ++y) { + auto srcV = Vec4::load(srcX + y * offset * 4); + auto dstV = BFVec4(std::move(srcV.value)); + BFVec4::save((int16_t*)(destX + 4*y), dstV); } } - auto dstValue = BFVec4(std::move(Vec4::load(dstV.data()).value)); - BFVec4::save(dstZ + 4 * i, dstValue); + continue; + } + for (int x = 0; x < l; ++x) { + auto dl = lOR + x; + auto dlC = dl / LP; + auto dlR = dl % LP; + auto xC = x / LP; + auto xR = x % LP; + auto destX = dest + dlC * eDest * LP + dlR; + auto srcX = sourceInt + xC * eReal * LP + xR; + for (int y = 0; y < e; ++y) { + destX[y * 4] = srcX[y * 4 * offset] >> 16; + } } } } -#endif - -#if !defined(MNN_USE_SSE) && !defined(MNN_USE_NEON) -void MNNPackC4ForMatMul_A_BF16(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el) { - MNNPackC4ForMatMul_A(destOrigin, sourceGroup, info, el); - return; -} - -void MNNPackForMatMul_B_BF16(float* dest, const float* source, size_t h, size_t l, bool transpose) { - auto hP = h / 4; - auto hR = hP * 4; - if (hR != h) { - ::memset(dest, 0, UP_DIV(h, 4)*4*l*sizeof(int16_t)); - } +#undef EP +#undef HP +#undef LP +void NEON_MNNPackForMatMul_B_BF16(float* destFloat, const float* sourceFloat, size_t h, size_t l, bool transpose) { + auto hP = (int)h / 8; + auto hR = (int)hP * 8; + int16_t* dest = (int16_t*)destFloat; + const float* source = sourceFloat; if (!transpose) { - for (int y=0; y 0) { - auto destY = dest + hP * 4 * l; - auto sourceY = source + hP * 4; - for (int x=0; x().max(); - float maxValue = std::numeric_limits().max(); - if (nullptr != postParameters) { - minValue = postParameters[2]; - maxValue = postParameters[3]; - alpha = postParameters[0]; - beta = postParameters[1]; - } - - for (int x = 0; x < eSize; ++x) { - auto dst = C + 4 * x; - auto src = - A + x; // input data is packed as tileCount x l x 16, is only one tiled block here, indexed as A[z * 16 + x] - for (int ry = 0; ry < h; ++ry) { - auto y = ry / 4; - auto yRemain = ry % 4; - auto bY = B + y * bStride; - auto dstY = dst + y * cStride; // convert NCHW to NC4HW4 ie 1·(y/4)·X·4 - int wdy = ry / 6; - int wdyRemain = ry % 6; - auto weight = - B + wdy * bStride + - wdyRemain; // weight is packed as (h/6) x l x 6, indexed as B[(ry / 6) * Bstride +z*6 + (ry % 6)] - float summer = 0.0f; - for (int z = 0; z < l; ++z) { - auto aZ = src + z * 16; - auto wZ = weight + z * 6; - summer += MNNLowpToFp32(wZ[0]) * MNNLowpToFp32(aZ[0]); + auto destY = dest + hP * 8 * l; + auto sourceY = source + hP * 8; + float sTmp[8]; + ::memset(sTmp, 0, sizeof(sTmp)); + for (int x = 0; x < l; ++x) { + ::memcpy(sTmp, sourceY + x * h, hRemain * sizeof(float)); + auto s0 = Vec4::load(sTmp + 0); + auto s1 = Vec4::load(sTmp + 4); + auto d0 = BFVec4(std::move(s0.value)); + auto d1 = BFVec4(std::move(s1.value)); + BFVec4::save(destY + 8 * x + 0, d0); + BFVec4::save(destY + 8 * x + 4, d1); } - float originValue = MNNLowpToFp32(dstY[yRemain]); - if (nullptr != bias) { - originValue = MNNLowpToFp32(bias[ry]); - } - auto dstValue = originValue * beta + alpha * summer; - dstValue = std::min(dstValue, maxValue); - dstValue = std::max(dstValue, minValue); - dstY[yRemain] = MNNFP32ToBF16(dstValue); - } - } -} - -void MNNPackedMatMul_BF16(float* C, const float* A, const float* B, const size_t* parameter, float* cache, - const float* postParameters, const float* bias, const float* k, const float* b) { - return MNNPackedMatMulRemain_BF16(C, A, B, 16, parameter, cache, postParameters, bias, nullptr, nullptr); - // return _AVX_MNNPackedMatMulFMA(C, A, B, parameter, cache); -} - - -static void _MNNConvDwF23MulTransUnit(float **cacheLine, const float *weigth, float *dest, size_t ow); - -static void _MNNMultiAndDestTransformCommon23(float **cacheLine, const float *weigthF, float *destF, int cacheLineSize, int ow, const float* bias, const float* parameters) { - auto weigth = (const int16_t*)weigthF; - auto dest = (int16_t*)destF; - int unit = ow / 2; - auto biasF = BFVec4::load((const int16_t*)bias); - auto minV = BFVec4(parameters[2]); - auto maxV = BFVec4(parameters[3]); - MNN_ASSERT(cacheLineSize >= 1); - for (int x = 0; x < unit; ++x) { - auto offset = 4 * 4 * x; - int i = 0; - BFVec4 m0 = BFVec4::load(weigth + i * 16 + 4 * 0) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 0); - BFVec4 m1 = BFVec4::load(weigth + i * 16 + 4 * 1) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 1); - BFVec4 m2 = BFVec4::load(weigth + i * 16 + 4 * 2) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 2); - BFVec4 m3 = BFVec4::load(weigth + i * 16 + 4 * 3) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 3); - - for (i = 1; i < cacheLineSize; ++i) { - m0 = m0 + BFVec4::load(weigth + i * 16 + 4 * 0) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 0); - m1 = m1 + BFVec4::load(weigth + i * 16 + 4 * 1) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 1); - m2 = m2 + BFVec4::load(weigth + i * 16 + 4 * 2) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 2); - m3 = m3 + BFVec4::load(weigth + i * 16 + 4 * 3) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 3); - } - - auto o0 = m0 + m1 + m2 + biasF; - auto o1 = m1 - m2 + m3 + biasF; - o0 = BFVec4::min(o0, maxV); - o1 = BFVec4::min(o1, maxV); - o0 = BFVec4::max(o0, minV); - o1 = BFVec4::max(o1, minV); - BFVec4::save(dest + 8 * x + 0 * 4, o0); - BFVec4::save(dest + 8 * x + 1 * 4, o1); - } - if (unit * 2 < ow) { - auto offset = 4 * 4 * unit; - int i = 0; - BFVec4 m0 = BFVec4::load(weigth + i * 16 + 4 * 0) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 0); - BFVec4 m1 = BFVec4::load(weigth + i * 16 + 4 * 1) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 1); - BFVec4 m2 = BFVec4::load(weigth + i * 16 + 4 * 2) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 2); - - for (i = 1; i < cacheLineSize; ++i) { - m0 = m0 + BFVec4::load(weigth + i * 16 + 4 * 0) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 0); - m1 = m1 + BFVec4::load(weigth + i * 16 + 4 * 1) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 1); - m2 = m2 + BFVec4::load(weigth + i * 16 + 4 * 2) * BFVec4::load((int16_t*)cacheLine[i] + offset + 4 * 2); - } - - auto o0 = m0 + m1 + m2 + biasF; - o0 = BFVec4::min(o0, maxV); - o0 = BFVec4::max(o0, minV); - BFVec4::save(dest + 8 * unit + 0 * 4, o0); - } -} -static void _MNNConvDwF23SourceTransUnit(const int16_t *source, int16_t *dest, size_t unit); -static void _MNNSourceTransformCommonF23(const float *sourceF, float *destF, int unit, int iw, int pad, int su, int eu) { - auto source = (const int16_t*)sourceF; - auto dest = (int16_t*)destF; - for (int x = 0; x < su; ++x) { - auto dstX = dest + 4 * 4 * x; - auto sx = x * 2 - (int)pad; - auto ex = sx + 4; - - auto clampSx = std::max(sx, 0); - auto clampEx = std::min(ex, (int)iw); - - BFVec4 v[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - for (int i = clampSx; i < clampEx; ++i) { - v[i - sx] = BFVec4::load(source + 4 * i); } - auto m0 = v[0] - v[2]; - auto m1 = v[1] + v[2]; - auto m2 = v[2] - v[1]; - auto m3 = v[3] - v[1]; - - BFVec4::save(dstX + 4 * 0, m0); - BFVec4::save(dstX + 4 * 1, m1); - BFVec4::save(dstX + 4 * 2, m2); - BFVec4::save(dstX + 4 * 3, m3); - } - _MNNConvDwF23SourceTransUnit(source + 4 * (su * 2 - pad), dest + 4 * 4 * su, eu - su); - - for (int x = eu; x < unit; ++x) { - auto dstX = dest + 4 * 4 * x; - auto sx = x * 2 - (int)pad; - auto ex = sx + 4; - - auto clampSx = std::max(sx, 0); - auto clampEx = std::min(ex, (int)iw); - - BFVec4 v[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - for (int i = clampSx; i < clampEx; ++i) { - v[i - sx] = BFVec4::load(source + 4 * i); - } - auto m0 = v[0] - v[2]; - auto m1 = v[1] + v[2]; - auto m2 = v[2] - v[1]; - auto m3 = v[3] - v[1]; - - BFVec4::save(dstX + 4 * 0, m0); - BFVec4::save(dstX + 4 * 1, m1); - BFVec4::save(dstX + 4 * 2, m2); - BFVec4::save(dstX + 4 * 3, m3); - } -} - -static void _MNNConvDwF23MulTransUnit(float **cacheLine, const float *weigthF, float *destF, size_t ow, const float* bias, const float* parameters) { - int unit = ow / 2; - auto weigth = (const int16_t*)weigthF; - auto dest = (int16_t*)destF; - - auto w00 = BFVec4::load(weigth + 0 * 16 + 4 * 0); - auto w01 = BFVec4::load(weigth + 0 * 16 + 4 * 1); - auto w02 = BFVec4::load(weigth + 0 * 16 + 4 * 2); - auto w03 = BFVec4::load(weigth + 0 * 16 + 4 * 3); - auto w10 = BFVec4::load(weigth + 1 * 16 + 4 * 0); - auto w11 = BFVec4::load(weigth + 1 * 16 + 4 * 1); - auto w12 = BFVec4::load(weigth + 1 * 16 + 4 * 2); - auto w13 = BFVec4::load(weigth + 1 * 16 + 4 * 3); - auto w20 = BFVec4::load(weigth + 2 * 16 + 4 * 0); - auto w21 = BFVec4::load(weigth + 2 * 16 + 4 * 1); - auto w22 = BFVec4::load(weigth + 2 * 16 + 4 * 2); - auto w23 = BFVec4::load(weigth + 2 * 16 + 4 * 3); - - auto biasF = BFVec4::load((const int16_t*)bias); - auto minV = BFVec4(parameters[2]); - auto maxV = BFVec4(parameters[3]); - for (int x = 0; x < unit; ++x) { - auto offset = 4 * 4 * x; - int i = 0; - BFVec4 m0 = w00 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 0); - BFVec4 m1 = w01 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 1); - BFVec4 m2 = w02 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 2); - BFVec4 m3 = w03 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 3); - - m0 = m0 + w10 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 0); - m1 = m1 + w11 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 1); - m2 = m2 + w12 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 2); - m3 = m3 + w13 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 3); - - m0 = m0 + w20 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 0); - m1 = m1 + w21 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 1); - m2 = m2 + w22 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 2); - m3 = m3 + w23 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 3); - - auto o0 = m0 + m1 + m2 + biasF; - auto o1 = m1 - m2 + m3 + biasF; - o0 = BFVec4::min(o0, maxV); - o1 = BFVec4::min(o1, maxV); - o0 = BFVec4::max(o0, minV); - o1 = BFVec4::max(o1, minV); - BFVec4::save(dest + 8 * x + 0 * 4, o0); - BFVec4::save(dest + 8 * x + 1 * 4, o1); - } - if (unit * 2 < ow) { - auto offset = 4 * 4 * unit; - BFVec4 m0 = w00 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 0); - BFVec4 m1 = w01 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 1); - BFVec4 m2 = w02 * BFVec4::load((int16_t*)cacheLine[0] + offset + 4 * 2); - - m0 = m0 + w10 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 0); - m1 = m1 + w11 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 1); - m2 = m2 + w12 * BFVec4::load((int16_t*)cacheLine[1] + offset + 4 * 2); - - m0 = m0 + w20 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 0); - m1 = m1 + w21 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 1); - m2 = m2 + w22 * BFVec4::load((int16_t*)cacheLine[2] + offset + 4 * 2); - auto o0 = m0 + m1 + m2 + biasF; - o0 = BFVec4::min(o0, maxV); - o0 = BFVec4::max(o0, minV); - BFVec4::save(dest + 8 * unit + 0 * 4, o0); - } -} -static void _MNNConvDwF23SourceTransUnit(const int16_t *source, int16_t *dest, size_t unit) { - if (unit <= 0) { return; } - BFVec4 v0 = BFVec4::load(source + 4 * 0); - BFVec4 v1 = BFVec4::load(source + 4 * 1); - BFVec4 v2; - BFVec4 v3; - source += 8; - - for (int x = 0; x < unit; ++x) { - v2 = BFVec4::load(source + 0 * 4); - v3 = BFVec4::load(source + 1 * 4); - auto m0 = v0 - v2; - auto m1 = v1 + v2; - auto m2 = v2 - v1; - auto m3 = v3 - v1; - - BFVec4::save(dest + 4 * 0, m0); - BFVec4::save(dest + 4 * 1, m1); - BFVec4::save(dest + 4 * 2, m2); - BFVec4::save(dest + 4 * 3, m3); - - source += 8; - dest += 16; - - v0 = v2; - v1 = v3; - } -} - -static void _MNNMatrixSub(float* CF, const float* AF, const float* BF, size_t widthC4, size_t cStride, size_t aStride, - size_t bStride, size_t height) { - auto A = (int16_t*)AF; - auto B = (int16_t*)BF; - auto C = (int16_t*)CF; - for (int y = 0; y < height; ++y) { - auto a = A + aStride * y; - auto b = B + bStride * y; - auto c = C + cStride * y; - for (int x = 0; x < widthC4; ++x) { - BFVec4::save(c + 4 * x, BFVec4::load(a + 4 * x) - BFVec4::load(b + 4 * x)); - } - } -} -static void _MNNMatrixAdd(float* CF, const float* AF, const float* BF, size_t widthC4, size_t cStride, size_t aStride, - size_t bStride, size_t height) { - auto A = (int16_t*)AF; - auto B = (int16_t*)BF; - auto C = (int16_t*)CF; - for (int y = 0; y < height; ++y) { - auto a = A + aStride * y; - auto b = B + bStride * y; - auto c = C + cStride * y; - for (int x = 0; x < widthC4; ++x) { - BFVec4::save(c + 4 * x, BFVec4::load(a + 4 * x) + BFVec4::load(b + 4 * x)); - } - } -} - -static void _MNNStrassenMergeCFunction(float* c11F, float* c12F, float* c21F, float* c22F, float* xAddrF, size_t cStride, - size_t eSub, size_t hSub) { - auto c11 = (int16_t*)c11F; - auto c12 = (int16_t*)c12F; - auto c21 = (int16_t*)c21F; - auto c22 = (int16_t*)c22F; - auto xAddr = (int16_t*)xAddrF; - for (int y=0; y> 16; } } -} - -size_t _MNNGridSampleComputeOffset(int h, int w, int height, int width, bool padMode) { - if (padMode == true) { //padMode == BorderMode_ZEROS - if (h < 0 || h >= height || w < 0 || w >= width) { - return -1; + return; +#endif + int lC8 = (int)l / 8; + auto lR = lC8 * 8; + if (hP > 0 && lC8 > 0) { + MNNPackC8_BF16(destFloat, sourceFloat, l, h); + } + for (int y = hR; y < h; ++y) { + auto yR = y % 8; + auto yC = hP; + for (int x = 0; x < l; ++x) { + dest[x * 8 + yR + yC * 8 * l] = sourceInt32[x + y * l] >> 16; } - } else { - // Clearly, CLAMP is the right way to go for GridSamplePaddingMode_BORDER - // For GridSamplePaddingMode_REFLECTION, since we have reflected the values into (-1, 1), - // the leftover reflections degrade to GridSamplePaddingMode_BORDER - h = h < 0 ? 0 : ( h > (height - 1) ? (height - 1) : h); - w = w < 0 ? 0 : ( w > (width - 1) ? (width - 1) : w); } - return h * width * 4 + w * 4; -} - -void _MNNGridSampleInterp(float* output, const float* input, const float* cord, size_t inH, size_t inW, size_t outW, size_t channelCUnit, size_t inOffset, size_t outOffset, bool sampleMode, bool padMode) { - int16_t* outputPtr = (int16_t*)output; - const int16_t* inputPtr = (const int16_t*)input; - const int16_t* cordPtr = (const int16_t*)cord; - - for (auto ow = 0; ow < outW; ++ow) { - auto w = MNNLowpToFp32(cordPtr[2 * ow + 0]); - auto h = MNNLowpToFp32(cordPtr[2 * ow + 1]); - BFVec4 interp; - - if (sampleMode == true) { //sampleMode == SampleMode_NEAREST - int nh = ::floor(h + 0.5f); - int nw = ::floor(w + 0.5f); - size_t ns = _MNNGridSampleComputeOffset(nh, nw, inH, inW, padMode); - for (int k = 0; k < channelCUnit; ++k) { - interp = ns == -1 ? BFVec4(0.f) : BFVec4::load(inputPtr + k * inOffset + ns); - BFVec4::save(outputPtr + k * outOffset + 4 * ow, interp); - } - } else { //sampleMode == GridSampleMode_BILINEAR - int w0_h = ::floor(h); - int w0_w = ::floor(w); - int w1_h = ::ceil(h); - int w1_w = ::ceil(w); - auto oneV = BFVec4(1.0f); - - auto f0 = BFVec4((float)w1_w - w); - auto f1 = oneV - f0; - auto h0 = BFVec4((float)w1_h - h); - auto h1 = oneV - h0; - - size_t s00 = _MNNGridSampleComputeOffset(w0_h, w0_w, inH, inW, padMode); - size_t s01 = _MNNGridSampleComputeOffset(w0_h, w1_w, inH, inW, padMode); - size_t s10 = _MNNGridSampleComputeOffset(w1_h, w0_w, inH, inW, padMode); - size_t s11 = _MNNGridSampleComputeOffset(w1_h, w1_w, inH, inW, padMode); - - for (int k = 0; k < channelCUnit; ++k) { - BFVec4 i00 = s00 == -1 ? BFVec4(0.f) : BFVec4::load(inputPtr + k * inOffset + s00); - BFVec4 i01 = s01 == -1 ? BFVec4(0.f) : BFVec4::load(inputPtr + k * inOffset + s01); - BFVec4 i10 = s10 == -1 ? BFVec4(0.f) : BFVec4::load(inputPtr + k * inOffset + s10); - BFVec4 i11 = s11 == -1 ? BFVec4(0.f) : BFVec4::load(inputPtr + k * inOffset + s11); - - BFVec4 i0 = i00 * f0 + i01 * f1; - BFVec4 i1 = i10 * f0 + i11 * f1; - - interp = i0 * h0 + i1 * h1; - BFVec4::save(outputPtr + k * outOffset + 4 * ow, interp); - } + for (int y = 0; y < hR; ++y) { + auto yR = y % 8; + auto yC = y / 8; + for (int x = lR; x < l; ++x) { + dest[x * 8 + yR + yC * 8 * l] = sourceInt32[x + y * l] >> 16; } } } - -static void _MNNAddC4WithStride(const float* sourceF, float* destF, size_t srcStride, size_t dstStride, size_t count) { - auto source = (const int16_t*)sourceF; - auto dest = (int16_t*)destF; - for (int i = 0; i < count; ++i) { - auto s = source + i * srcStride; - auto d = dest + i * dstStride; - BFVec4::save(d, BFVec4::load(d) + BFVec4::load(s)); - } -} -static void _MNNDeconvRunForUnitDepthWise(const int16_t* dst, int16_t* src, const int16_t* weight, size_t fw, size_t fh, - size_t weight_y_step, size_t dilateX_step, size_t dilateY_step) { - int fx, fy; - auto src_z = src; - auto weight_z = weight; - BFVec4 dstV = BFVec4::load(dst); - for (fy = 0; fy < fh; ++fy) { - auto src_y = src_z + fy * dilateY_step; - auto weight_y = weight_z + fy * weight_y_step; - for (fx = 0; fx < fw; ++fx) { - BFVec4 weight_x = BFVec4::load(weight_y + 4 * fx); - BFVec4 src_x = BFVec4::load(src_y + fx * dilateX_step); - BFVec4::save(src_y + fx * dilateX_step, src_x + weight_x * dstV); +#else +void NEON_MNNPackForMatMul_B_BF16(float* destFloat, const float* sourceFloat, size_t h, size_t l, bool transpose) { + int16_t* dest = (int16_t*)destFloat; + const float* source = sourceFloat; + if (!transpose) { + auto hP = h / 4; + auto hR = hP * 4; + if (hR != h) { + ::memset(dest, 0, UP_DIV(h, 4) * 4 * l * sizeof(int16_t)); } - } -} -static void _MNNDeconvRunForLineDepthwise(const int16_t* dst, int16_t* src, const int16_t* weight, size_t width, size_t src_w_setup, - size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step) { - int dx; - for (dx = 0; dx < width; ++dx) { - auto dst_x = dst + dx * 4; - auto src_dx = src + src_w_setup * dx; - _MNNDeconvRunForUnitDepthWise(dst_x, src_dx, weight, fw, fh, fw * 4, dilateX_step, dilateY_step); - } -} - -static void _MNNComputeMatMulForH_1_BF16(const float* AF, const float* BF, float* CF, const float* biasPtrF, const MatMulParam* param, size_t tId) { - auto A = (const int16_t*)AF; - auto B = (const int16_t*)BF; - auto C = (int16_t*)CF; - auto biasPtr = (const int16_t*)biasPtrF; - int e = param->e; - int l = param->l; - int numberThread = param->numberThread; - float biasValue = 0.0f; - auto bf = BF16Functions::get(); - if (nullptr != biasPtr) { - bf->MNNLowpToFp32(biasPtr, &biasValue, 1); - } - if (param->ATranspose) { - auto eC4 = e / 4; - auto eR = e % 4; - for (int y=tId; y 0) { - BFVec4 sumValue = BFVec4(biasValue); - auto srcY = A + eC4 * 4; - int16_t AR[4]; - for (int x=0; x 0) { + auto destY = dest + hP * 4 * l; + auto sourceY = source + hP * 4; + for (int x = 0; x < l; ++x) { + auto s0 = Vec4::load(sourceY + x * h + 0); + auto d0 = BFVec4(std::move(s0.value)); + BFVec4::save(destY + 4 * x + 0, d0); } - int16_t CR[4]; - BFVec4::save(CR, sumValue); - ::memcpy(C + 4 * eC4, CR, eR * sizeof(int16_t)); } return; } - auto lC4 = l / 4; - auto lR = l % 4; - for (int y=tId; y 0) { - int16_t AR[4] = {0, 0, 0, 0}; - int16_t BR[4] = {0, 0, 0, 0}; - ::memcpy(AR, srcY + lC4 * 4, lR * sizeof(int16_t)); - ::memcpy(BR, B + 4 * lC4, lR * sizeof(int16_t)); - sumValue = sumValue + BFVec4::load(AR) * BFVec4::load(BR); +#if 0 + auto sourceInt32 = (const int32_t*)source; + // Origin C++ code + ::memset(dest, 0, UP_DIV(h, 4) * 4 * l * sizeof(int16_t)); + + for (int y = 0; y < h; ++y) { + auto yR = y % 4; + auto yC = y / 4; + for (int x = 0; x < l; ++x) { + dest[x * 4 + yR + yC * 4 * l] = sourceInt32[x + y * l] >> 16; } - float sumSingle = sumValue[0] + sumValue[1] + sumValue[2] + sumValue[3]; - bf->MNNFp32ToLowp(&sumSingle, C + y, 1); } + return; +#endif + int offset[2] = { + (int)l, + (int)l, + }; + MNNPackC4_BF16(destFloat, sourceFloat, l, h, offset); } +#endif // __aarch64__ +#endif -static void _MNNComputeMatMulForE_1_BF16(const float* AF, const float* BF, float* CF, const float* biasPtrF, const MatMulParam* param, size_t tId) { - auto l = param->l; - auto h = param->h; - auto numberThread = param->numberThread; - auto lC4 = l / 4; - auto lR = l % 4; - auto A = (const int16_t*)AF; - auto B = (const int16_t*)BF; - auto C = (int16_t*)CF; - auto biasPtr = (const int16_t*)biasPtrF; - auto bf16 = BF16Functions::get(); - if (param->BTranspose) { - for (int y=tId; y 0) { - int16_t AR[4] = {0, 0, 0, 0}; - int16_t BR[4] = {0, 0, 0, 0}; - ::memcpy(AR, A + lC4 * 4, lR * sizeof(int16_t)); - ::memcpy(BR, by + 4 * lC4, lR * sizeof(int16_t)); - sumValue = sumValue + BFVec4::load(AR) * BFVec4::load(BR); - } - float sumRemain = sumValue[0] + sumValue[1] + sumValue[2] + sumValue[3]; - if (nullptr != biasPtr) { - sumRemain += BFVec4::broadcast(biasPtr[y])[0]; - } - bf16->MNNFp32ToLowp(&sumRemain, C + y, 1); - } - } else { - auto hC4 = h / 4; - auto hR = h % 4; - for (int y=tId; y> 16; } - BFVec4::save(C + 4 * y, sumValue); - } - if (tId == 0 && hR > 0) { - auto bs = B + 4 * hC4; - BFVec4 sumValue = BFVec4(0.0f); - if (biasPtr != nullptr) { - int16_t biasTemp[4]; - ::memcpy(biasTemp, biasPtr + 4 * hC4, hR * sizeof(int16_t)); - sumValue = BFVec4::load(biasTemp); - } - auto srcY = A + 4 * hC4 * l; - int16_t bTemp[4]; - for (int x=0; xMNNConvRunForLineDepthwise = MNNConvRunForLineDepthwiseBF16; - gInstance->MNNConvRunForUnitDepthWise = MNNConvRunForUnitDepthWiseBF16; - gInstance->MNNAxByClampBroadcastUnit = MNNAxByClampBroadcastUnitBF16; + *gInstance = *MNNGetCoreFunctions(); gInstance->MNNFp32ToLowp = _MNNFp32ToLowp; gInstance->MNNLowpToFp32 = _MNNLowpToFp32; - gInstance->bytes = 2; - gInstance->pack = 4; - gInstance->MNNPackCUnit = (decltype(gInstance->MNNPackCUnit))MNNPackC4Int16; - gInstance->MNNUnpackCUnit = (decltype(gInstance->MNNUnpackCUnit))MNNUnpackC4Int16; - gInstance->MNNUnpackCUnitTranspose = (decltype(gInstance->MNNUnpackCUnitTranspose))MNNPackTransposeInt16; - gInstance->MNNPackCUnitTranspose = (decltype(gInstance->MNNPackCUnitTranspose))MNNUnpackTransposeInt16; - gInstance->MNNConvDwF23MulTransUnit = _MNNConvDwF23MulTransUnit; - gInstance->MNNSourceTransformCommonF23 = _MNNSourceTransformCommonF23; - gInstance->MNNMultiAndDestTransformCommon23 = _MNNMultiAndDestTransformCommon23; - gInstance->MNNMatrixAdd = _MNNMatrixAdd; - gInstance->MNNMatrixSub = _MNNMatrixSub; - gInstance->MNNStrassenMergeCFunction = _MNNStrassenMergeCFunction; - gInstance->penalty = 10.0f; - gInstance->MNNScaleAndAddBias = _MNNScaleAndAddBias; - gInstance->MNNGridSampleComputeCord = _MNNGridSampleComputeCord; - gInstance->MNNGridSampleInterp = _MNNGridSampleInterp; - gInstance->MNNCopyC4WithStride = MNNCopyC4Int16WithStride; - gInstance->MNNAddC4WithStride = _MNNAddC4WithStride; - gInstance->chooseWinoSourceTransformPack = (decltype(gInstance->chooseWinoSourceTransformPack))(WinogradFunctionHalf::chooseWinoSourceTransformPack); - gInstance->chooseWinoSourceUnrollTransform = (decltype(gInstance->chooseWinoSourceUnrollTransform))(WinogradFunctionHalf::chooseSourceUnrollTransform); - gInstance->chooseWinoDestUnrollTransform = (decltype(gInstance->chooseWinoDestUnrollTransform))(WinogradFunctionHalf::chooseWinoDestUnrollTransform); - gInstance->MNNDeconvRunForLineDepthwise = (decltype(gInstance->MNNDeconvRunForLineDepthwise))_MNNDeconvRunForLineDepthwise; - gInstance->MNNDeconvRunForUnitDepthWise = (decltype(gInstance->MNNDeconvRunForUnitDepthWise))_MNNDeconvRunForUnitDepthWise; - gInstance->MNNSelectBinaryFunctionForFloat = BF16BinaryFloatSelect; - gInstance->MNNSelectUnaryFunctionForFloat = BF16UnaryFloatSelect; - gInstance->MNNReluWithSlopeChannel = MNNReluWithSlopeChannelBF16;// TODO: Optimize it - -#if !defined(MNN_USE_SSE) && !defined(MNN_USE_NEON) - gInstance->penalty = 1.5f; - gInstance->MNNPackForMatMul_B = MNNPackForMatMul_B_BF16; // common function MNNPackForMatMul_B_BF16 is needed even with out sse or arm neon. - gInstance->MNNPackC4ForMatMul_A = MNNPackC4ForMatMul_A_BF16;// - gInstance->MNNPackedMatMul = (decltype(gInstance->MNNPackedMatMul))MNNPackedMatMul_BF16; - gInstance->MNNPackedMatMulRemain = (decltype(gInstance->MNNPackedMatMulRemain))MNNPackedMatMulRemain_BF16; -#endif - gInstance->MNNComputeMatMulForH_1 = _MNNComputeMatMulForH_1_BF16; - gInstance->MNNComputeMatMulForE_1 = _MNNComputeMatMulForE_1_BF16; - gInstance->MNNPoolingAvg = (decltype(gInstance->MNNPoolingAvg))(poolingAvg); - gInstance->MNNPoolingMax = (decltype(gInstance->MNNPoolingMax))(poolingMax); - gInstance->MNNPoolingMaxWithRedice = (decltype(gInstance->MNNPoolingMaxWithRedice))(poolingMaxWithRedice); + gInstance->matmulBytes = 2; -#if defined(MNN_USE_SSE) - gInstance->MNNPackForMatMul_B = _SSE_MNNPackForMatMul_B_BF16; - auto cpuFlags = libyuv::InitCpuFlags(); - if (!(cpuFlags & libyuv::kCpuHasF16C)) { - delete gInstance; - gInstance = nullptr; - return false; - } - if (cpuFlags & libyuv::kCpuHasAVX2) { - gInstance->MNNPackForMatMul_B = _AVX_MNNPackForMatMul_B_BF16; - gInstance->MNNGetMatMulPackMode = _AVX_MNNGetMatMulPackMode_BF16; - gInstance->MNNPackC4ForMatMul_A = _AVX_MNNPackC4ForMatMul_A_BF16; - gInstance->MNNPackedMatMul = _AVX_MNNPackedMatMulFMA_BF16; - gInstance->MNNPackedMatMulRemain = _AVX_MNNPackedMatMulRemainFMA_BF16; - return true; - } -#elif defined(MNN_USE_NEON) gInstance->MNNPackForMatMul_B = NEON_MNNPackForMatMul_B_BF16; gInstance->MNNGetMatMulPackMode = NEON_MNNGetMatMulPackMode_BF16; gInstance->MNNPackC4ForMatMul_A = NEON_MNNPackC4ForMatMul_A_BF16; gInstance->MNNPackedMatMul = NEON_MNNPackedMatMul_BF16; gInstance->MNNPackedMatMulRemain = NEON_MNNPackedMatMulRemain_BF16; - gInstance->MNNConvRunForLineDepthwise = NEON_MNNConvRunForLineDepthwise_BF16; - gInstance->MNNConvRunForUnitDepthWise = NEON_MNNConvRunForUnitDepthWise_BF16; - gInstance->MNNAxByClampBroadcastUnit = NEON_MNNAxByClampBroadcastC4_BF16; #ifdef __aarch64__ - cpuinfo_arm_isa gCPUInfo; - cpuinfo_arm_init(&gCPUInfo); + const MNNCPUInfo& gCPUInfo = *MNNGetCPUInfo(); gInstance->supportFp16arith = gCPUInfo.fp16arith; gInstance->supportSDot = gCPUInfo.dot; gInstance->supportI8mm = gCPUInfo.i8mm; @@ -906,10 +354,11 @@ bool BF16Functions::init() { gInstance->MNNPackedMatMulRemain = ARMV86_MNNPackedMatMulRemain_BF16; } #endif - return true; -#endif + gInstance->MNNPackedMatMul_int4 = nullptr; + gInstance->MNNPackedMatMul_int8 = nullptr; // TODO: raw cpu version of bf16 return true; +#endif } CoreFunctions* BF16Functions::get() { diff --git a/source/backend/cpu/bf16/CMakeLists.txt b/source/backend/cpu/bf16/CMakeLists.txt index b533bec6f..7dc34a113 100644 --- a/source/backend/cpu/bf16/CMakeLists.txt +++ b/source/backend/cpu/bf16/CMakeLists.txt @@ -9,11 +9,3 @@ add_library( ${MNN_BF16_SRCS} ) target_compile_options(MNN_BF16 PRIVATE -DMNN_SUPPORT_BF16) -if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(X86_64)|(x64)|(X64)|(amd64)|(AMD64)|(i686)") - if (MNN_USE_SSE) - target_compile_options(MNN_BF16 PRIVATE -DMNN_USE_SSE) - if (MNN_SSE_USE_FP16_INSTEAD) - target_compile_options(MNN_BF16 PRIVATE -DMNN_SSE_USE_FP16_INSTEAD -mf16c) - endif() - endif() -endif() diff --git a/source/backend/cpu/compute/CommonOptFunction.cpp b/source/backend/cpu/compute/CommonOptFunction.cpp index f9ce9567c..897f10b40 100644 --- a/source/backend/cpu/compute/CommonOptFunction.cpp +++ b/source/backend/cpu/compute/CommonOptFunction.cpp @@ -35,16 +35,279 @@ void MNNInt8ToInt16(int16_t* dest, const int8_t* source, size_t count) { } #endif -#if defined(__aarch64__) #ifdef MNN_LOW_MEMORY -extern "C" { -void MNNGemmHybridInt4FP32_smmla(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); -void MNNGemmHybridInt8FP32_smmla(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); -void MNNGemmHybridInt4FP32_sdot(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); -void MNNGemmHybridInt8FP32_sdot(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); +#ifndef __aarch64__ +static void _MNNPackedMatMulRemain_int4(float* C, const float* A, const float* fB, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, int aStride, const float* k, const float* b) { + auto B = reinterpret_cast(fB); + auto h = parameter[2]; + auto l = parameter[1]; + auto cStride = parameter[3] / sizeof(float); + auto hRemain = parameter[4]; + float weightBytes = 0.5; // sizeof(int4_t) + auto bExtraStride = static_cast(parameter[5] / weightBytes); + auto bStride = bExtraStride + 4 * l; + auto hC4 = UP_DIV(h, 4); + float minValue = -std::numeric_limits().max(); + float maxValue = std::numeric_limits().max(); + if (nullptr != postParameters) { + minValue = postParameters[2]; + maxValue = postParameters[3]; + } + int blockId = parameter[6]; + + for (int x=0; x 0) { + summer[0] = dstY[0]; + summer[1] = dstY[1]; + summer[2] = dstY[2]; + summer[3] = dstY[3]; + } + if (nullptr != bias && nullptr != postParameters) { + for (int v=0; v<4; ++v) { + summer[v] += bias[4 * y + v]; + } + } + for (int z=0; z(fB); + auto h = parameter[2]; + auto l = parameter[1]; + auto cStride = parameter[3] / sizeof(float); + auto hRemain = parameter[4]; + float weightBytes = 1; // sizeof(int8_t) + auto bExtraStride = static_cast(parameter[5] / weightBytes); + auto bStride = bExtraStride + 4 * l; + auto hC4 = UP_DIV(h, 4); + float minValue = -std::numeric_limits().max(); + float maxValue = std::numeric_limits().max(); + if (nullptr != postParameters) { + minValue = postParameters[2]; + maxValue = postParameters[3]; + } + int blockId = parameter[6]; + + for (int x=0; x 0) { + summer[0] = dstY[0]; + summer[1] = dstY[1]; + summer[2] = dstY[2]; + summer[3] = dstY[3]; + } + if (nullptr != bias && nullptr != postParameters) { + for (int v=0; v<4; ++v) { + summer[v] += bias[4 * y + v]; + } + } + for (int z=0; z=0 + for (int c = 0; c < src_depth_quad; ++c) { + auto src = source + c * srcStep + i * pack; + for (int k = 0; k < pack; ++k) { + absmaxVal = std::max(absmaxVal, std::abs(src[k])); + } + } + absmax[i] = absmaxVal; + } +} +void MNNQuantScaleFP32(float* absmax, float* quant_scale, float* dequant_scale, size_t thread, size_t batch) { + for (int i = 0; i < batch; ++i) { + auto absmaxPtr = absmax + i; + float absVal = 0.f; + for (int t = 0; t < thread; ++t) { + absVal = std::max(absVal, absmaxPtr[t * batch]); + } + quant_scale[i] = 127.0f / absVal; + dequant_scale[i] = absVal / 127.0f; + } } +void MNNQuantSumFP32(float* sum, const float* dequant_scale, size_t thread, size_t batch) { + for (int i = 0; i < batch; ++i) { + auto sumPtr = reinterpret_cast(sum) + i; + int sumVal = 0.f; + for (int t = 0; t < thread; ++t) { + sumVal += sumPtr[t * batch]; + } + sum[i] = sumVal * dequant_scale[i]; + } +} +void MNNDynamicQuantFP32(const float* src, int8_t* dst, const float* scale, size_t src_depth_quad, size_t realSize, int pack) { +#ifdef MNN_USE_SSE + uint8_t* dstPtr = reinterpret_cast(dst); + int offset = 128; +#else + int8_t* dstPtr = dst; + int offset = 0; #endif + for (int i = 0; i < realSize; ++i) { + auto scaleVal = scale[i]; + for (int c = 0; c < src_depth_quad; ++c) { + auto srcZ = src + c * pack * realSize + i * pack; + auto dstZ = dstPtr + c * pack * realSize + i * pack; + for (int k = 0; k < pack; ++k) { + int val = (int)roundf(srcZ[k] * scaleVal); + dstZ[k] = val + offset; + } + } + } +} + +void MNNDynamicUpdateConvBiasScale(float* newbias, float* newscale, float* oldbias, float* weightScale, float* inputScale, float* weightKernelSum, float* inputZero, size_t ocQuad, size_t scaleSize) { + int ocUp4 = 4 * ocQuad; + int pack = 4; + int blockNum = scaleSize / ocUp4; + for (int i = 0; i < ocUp4; ++i) { + newbias[i] = oldbias[i] - weightKernelSum[i] * inputZero[0]; + } + for (int k = 0; k < blockNum; ++k) { + for (int i = 0; i < ocUp4; ++i) { + newscale[i + k * ocUp4] = weightScale[i + k * ocUp4] * inputScale[0]; + } + } +} + +#endif // not __aarch64__ +#endif // LOW_MEMORY + + +static void MNNSumByAxisLForMatmul_A(float* dest, int8_t* source, const float* scale, ssize_t realDstCount, SumByAxisParams sumParams) { +#ifdef MNN_USE_SSE + uint8_t* srcInt8 = reinterpret_cast(source); +#else + int8_t* srcInt8 = source; #endif + auto scalePtr = scale; + auto kernelCountUnitDouble = sumParams.kernelCountUnitDouble; + auto blockNum = sumParams.blockNum; + auto EP = sumParams.DST_XUNIT; + auto LP = sumParams.SRC_UNIT; + auto blockSizeQuad = kernelCountUnitDouble / blockNum; + auto col_buffer_unit_size = sumParams.col_buffer_unit_size; + auto oneScale = sumParams.oneScale; + do { + int step = ALIMIN(EP, realDstCount); + + for (int k = 0; k < blockNum; ++k) { + // const auto src_x = srcInt8 + w * LP; + const auto src_x = srcInt8 + k * (EP * LP * blockSizeQuad); + for (int w = 0; w < step; ++w) { + float dequantScale = scale[0]; + if (oneScale == 0) { + dequantScale = scalePtr[w]; + } + int sumint32 = 0; + const auto src_y = src_x + w * LP; + for (int j = 0; j < blockSizeQuad; ++j) { + const auto src_z = src_y + j * (EP * LP); + for (int i = 0; i < LP; ++i) { + sumint32 += src_z[i]; + } + } + dest[w + k * step] = dequantScale * static_cast(sumint32); + } + } + scalePtr += step; + + dest += (step * blockNum); + realDstCount -= step; + srcInt8 += col_buffer_unit_size; + } while(realDstCount > 0); +} template void MNNPackC4Common(T* dst, const T* src, size_t area, size_t depth, int* areaOffset) { @@ -461,11 +724,6 @@ void MNNCountMaxMinValue(float* source, float* minVal, float* maxVal, size_t siz } *minVal = min_; *maxVal = max_; - // float range = max_ - min_; - // MNN_ASSERT(range != 0); - // *quantScale = 255.0f / range; - // *dequantScale = range / 255.0f; - // *zeroPoint = std::min(255.f, std::max(roundf(-(min_ * 255.f) / range), 0.f)) - 128.0f; } #ifndef MNN_USE_NEON @@ -579,312 +837,6 @@ void MNNPackedMatMulRemain(float* C, const float* A, const float* B, size_t eSiz _MNNPackedMatMulRemain(C, A, B, eSize, parameter, postParameters, bias, aStride); } -#ifdef MNN_LOW_MEMORY -static void _MNNPackedMatMulRemain_int4(float* C, const float* A, const float* fB, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, int aStride, const float* k, const float* b) { - auto B = reinterpret_cast(fB); - auto h = parameter[2]; - auto l = parameter[1]; - auto cStride = parameter[3] / sizeof(float); - auto hRemain = parameter[4]; - float weightBytes = 0.5; // sizeof(int4_t) - auto bExtraStride = static_cast(parameter[5] / weightBytes); - auto bStride = bExtraStride + 4 * l; - auto hC4 = UP_DIV(h, 4); - float minValue = -std::numeric_limits().max(); - float maxValue = std::numeric_limits().max(); - if (nullptr != postParameters) { - minValue = postParameters[2]; - maxValue = postParameters[3]; - } - int blockId = parameter[6]; - - for (int x=0; x 0) { - summer[0] = dstY[0]; - summer[1] = dstY[1]; - summer[2] = dstY[2]; - summer[3] = dstY[3]; - } - if (nullptr != bias && nullptr != postParameters) { - for (int v=0; v<4; ++v) { - summer[v] += bias[4 * y + v]; - } - } - for (int z=0; z(fB); - auto h = parameter[2]; - auto l = parameter[1]; - auto cStride = parameter[3] / sizeof(float); - auto hRemain = parameter[4]; - float weightBytes = 1; // sizeof(int8_t) - auto bExtraStride = static_cast(parameter[5] / weightBytes); - auto bStride = bExtraStride + 4 * l; - auto hC4 = UP_DIV(h, 4); - float minValue = -std::numeric_limits().max(); - float maxValue = std::numeric_limits().max(); - if (nullptr != postParameters) { - minValue = postParameters[2]; - maxValue = postParameters[3]; - } - int blockId = parameter[6]; - - for (int x=0; x 0) { - summer[0] = dstY[0]; - summer[1] = dstY[1]; - summer[2] = dstY[2]; - summer[3] = dstY[3]; - } - if (nullptr != bias && nullptr != postParameters) { - for (int v=0; v<4; ++v) { - summer[v] += bias[4 * y + v]; - } - } - for (int z=0; z=0 - for (int c = 0; c < src_depth_quad; ++c) { - auto src = source + c * srcStep + i * pack; - for (int k = 0; k < pack; ++k) { - absmaxVal = std::max(absmaxVal, std::abs(src[k])); - } - } - absmax[i] = absmaxVal; - } -} -void MNNQuantScaleFP32(float* absmax, float* quant_scale, float* dequant_scale, size_t thread, size_t batch) { - for (int i = 0; i < batch; ++i) { - auto absmaxPtr = absmax + i; - float absVal = 0.f; - for (int t = 0; t < thread; ++t) { - absVal = std::max(absVal, absmaxPtr[t * batch]); - } - quant_scale[i] = 127.0f / absVal; - dequant_scale[i] = absVal / 127.0f; - } -} -void MNNQuantSumFP32(float* sum, const float* dequant_scale, size_t thread, size_t batch) { - for (int i = 0; i < batch; ++i) { - auto sumPtr = reinterpret_cast(sum) + i; - int sumVal = 0.f; - for (int t = 0; t < thread; ++t) { - sumVal += sumPtr[t * batch]; - } - sum[i] = sumVal * dequant_scale[i]; - } -} -void MNNDynamicQuantFP32(const float* src, int8_t* dst, const float* scale, float* sum, size_t src_depth_quad, size_t realSize, int pack) { -#ifdef MNN_USE_SSE - uint8_t* dstPtr = reinterpret_cast(dst); -#else - int8_t* dstPtr = dst; -#endif - for (int i = 0; i < realSize; ++i) { - auto scaleVal = scale[i]; - int acc = 0; - for (int c = 0; c < src_depth_quad; ++c) { - auto srcZ = src + c * pack * realSize + i * pack; - auto dstZ = dstPtr + c * pack * realSize + i * pack; - for (int k = 0; k < pack; ++k) { - int val = (int)roundf(srcZ[k] * scaleVal); - acc += val; - dstZ[k] = val; - } - } - ((int32_t*)sum)[i] = acc; - } -} -void MNNGemmHybridInt8FP32(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param) { - // C:(oc/4,N,4) A:(ic/4,N,4) B:(oc/4,ic/4,4,4) - int pack = 4; - size_t weight_step = src_depth_quad * pack * pack; - const float* alpha_ptr = param[0]; - const float* zero_ptr = param[1]; - const float* bias_ptr = param[2]; - const float* sums_ptr = param[3]; - const float* scale_ptr = param[4]; - for (int ci = 0; ci < dst_depth_quad; ++ci) { - float* dstZ = C + ci * pack * realSize; - const int8_t* weight = B + ci * weight_step; - auto alpha = alpha_ptr + ci * pack; - auto zero = zero_ptr + ci * pack; - auto bias = bias_ptr + ci * pack; - //const float* sums = param[2]; - for (int j = 0; j < realSize; ++j) { - const float* sums = sums_ptr + j; - const float* scale = scale_ptr + j; - float* dstX = dstZ + j * pack; - std::vector tmp(pack); - // int8_t* weightPtr = B + weight_step; - const int8_t* srcBatch = A + j * pack; - for (int k = 0; k < src_depth_quad; ++k) { - const int8_t* srcZ = srcBatch + k * pack * realSize; - const int8_t* weightZ = weight + k * pack * pack; - for (int cn = 0; cn < pack; ++cn) { // pack for oc - const auto weightj = weightZ + cn * pack; - for (int ck = 0; ck < pack; ++ck) { // pack for ic - tmp[cn] += (int32_t)srcZ[ck] * (int32_t)weightj[ck]; - } - } - } - - // int32->float - for (int cn = 0; cn < pack; ++cn) { - float val = (float)tmp[cn] * scale[0]; - val = bias[cn] + val * alpha[cn] + zero[cn] * sums[0]; - dstX[cn] = val; - } - } - } -} -void MNNGemmHybridInt4FP32(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param) { - // C:(oc/4,N,4) A:(ic/4,N,4) B:(oc/4,ic/4,4,4) - int pack = 4; - size_t weight_step = src_depth_quad * pack * pack * 0.5; - size_t weight_stride = pack * pack / 2; - const float* alpha_ptr = param[0]; - const float* zero_ptr = param[1]; - const float* bias_ptr = param[2]; - const float* sums_ptr = param[3]; - const float* scale_ptr = param[4]; - for (int ci = 0; ci < dst_depth_quad; ++ci) { - float* dstZ = C + ci * pack * realSize; - const int8_t* weight = B + ci * weight_step; - auto alpha = alpha_ptr + ci * pack; - auto zero = zero_ptr + ci * pack; - auto bias = bias_ptr + ci * pack; - //const float* sums = param[2]; - for (int j = 0; j < realSize; ++j) { - const float* sums = sums_ptr + j; - const float* scale = scale_ptr + j; - float* dstX = dstZ + j * pack; - int tmp[4] = {0, 0, 0, 0}; - // int8_t* weightPtr = B + weight_step; - const int8_t* srcBatch = A + j * pack; - for (int k = 0; k < src_depth_quad; ++k) { - const int8_t* srcZ = srcBatch + k * pack * realSize; - const uint8_t* weightZ = (uint8_t*)weight + k * weight_stride; - int32_t tmpw[16]; - uint32_t c = 0xf; - for (int kk = 0; kk < 8; ++kk) { - tmpw[2 * kk] = (weightZ[kk]>>4) - 8; - tmpw[2 * kk + 1] = (weightZ[kk] & c) - 8; - } - for (int cn = 0; cn < pack; ++cn) { // pack for oc - const auto weightj = tmpw + cn * pack; - for (int ck = 0; ck < pack; ++ck) { // pack for ic - tmp[cn] += (int32_t)srcZ[ck] * (int32_t)weightj[ck]; - } - } - } - - // int32->float - for (int cn = 0; cn < pack; ++cn) { - float val = (float)tmp[cn] * scale[0]; - val = bias[cn] + val * alpha[cn] + zero[cn] * sums[0]; - dstX[cn] = val; - } - } - } -} -#endif - void MNNPackC4ForMatMul_A(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el) { int number = info[0]; int eReal = info[1]; @@ -3298,16 +3250,6 @@ void MNNCoreFunctionInit() { gCoreFunction->MNNPackForMatMul_B = MNNPackForMatMul_B; gCoreFunction->MNNPackedMatMul = MNNPackedMatMul; gCoreFunction->MNNPackedMatMulRemain = MNNPackedMatMulRemain; -#ifdef MNN_LOW_MEMORY - gCoreFunction->MNNPackedMatMul_int4 = MNNPackedMatMul_int4; - gCoreFunction->MNNPackedMatMulRemain_int4 = MNNPackedMatMulRemain_int4; - gCoreFunction->MNNPackedMatMul_int8 = MNNPackedMatMul_int8; - gCoreFunction->MNNPackedMatMulRemain_int8 = MNNPackedMatMulRemain_int8; - gCoreFunction->MNNAbsMax = MNNAbsMaxFP32; - gCoreFunction->MNNDynamicQuant = MNNDynamicQuantFP32; - gCoreFunction->MNNQuantScale = MNNQuantScaleFP32; - gCoreFunction->MNNQuantSum = MNNQuantSumFP32; -#endif gCoreFunction->MNNCountMaxMinValue = MNNCountMaxMinValue; gCoreFunction->MNNGetSparseMatMulPackMode = MNNGetSparseMatMulPackMode; gCoreFunction->MNNAdjustOptimalSparseKernel = _MNNAdjustOptimalSparseKernel; @@ -3315,7 +3257,6 @@ void MNNCoreFunctionInit() { gCoreFunction->MNNComputeMatMulForE_1 = MNNComputeMatMulForE_1; gCoreFunction->MNNComputeMatMulForH_1 = MNNComputeMatMulForH_1; - // Lowp gCoreFunction->MNNFp32ToLowp = nullptr; gCoreFunction->MNNLowpToFp32 = nullptr; @@ -3394,24 +3335,24 @@ void MNNCoreFunctionInit() { gCoreFunction->MNNAccumulateSequenceNumber = MNNAccumulateSequenceNumber; - cpuinfo_arm_isa gCPUInfo; - cpuinfo_arm_init(&gCPUInfo); + const MNNCPUInfo& gCPUInfo = *MNNGetCPUInfo(); gCoreFunction->supportFp16arith = gCPUInfo.fp16arith; gCoreFunction->supportSDot = gCPUInfo.dot; gCoreFunction->supportI8mm = gCPUInfo.i8mm; + gCoreFunction->MNNSumByAxisLForMatmul_A = MNNSumByAxisLForMatmul_A; #ifdef MNN_LOW_MEMORY - gCoreFunction->MNNGemmHybridInt8 = MNNGemmHybridInt8FP32; - gCoreFunction->MNNGemmHybridInt4 = MNNGemmHybridInt4FP32; -#if defined(__aarch64__) - if (gCoreFunction->supportSDot) { - gCoreFunction->MNNGemmHybridInt8 = MNNGemmHybridInt8FP32_sdot; - gCoreFunction->MNNGemmHybridInt4 = MNNGemmHybridInt4FP32_sdot; - } - if (gCoreFunction->supportI8mm) { - gCoreFunction->MNNGemmHybridInt8 = MNNGemmHybridInt8FP32_smmla; - gCoreFunction->MNNGemmHybridInt4 = MNNGemmHybridInt4FP32_smmla; - } -#endif + // Weight Dequant Gemm Kernels + gCoreFunction->MNNPackedMatMul_int4 = MNNPackedMatMul_int4; + gCoreFunction->MNNPackedMatMulRemain_int4 = MNNPackedMatMulRemain_int4; + gCoreFunction->MNNPackedMatMul_int8 = MNNPackedMatMul_int8; + gCoreFunction->MNNPackedMatMulRemain_int8 = MNNPackedMatMulRemain_int8; + // Dynamic Quant Helper Functions + gCoreFunction->MNNAbsMax = MNNAbsMaxFP32; + gCoreFunction->MNNDynamicQuant = MNNDynamicQuantFP32; + gCoreFunction->MNNQuantScale = MNNQuantScaleFP32; + gCoreFunction->MNNQuantSum = MNNQuantSumFP32; + // Dynamic Quan Bias + gCoreFunction->MNNDynamicUpdateConvBiasScale = MNNDynamicUpdateConvBiasScale; #endif MNNCoreInt8FunctionInit(); MNNFunctionInit(); diff --git a/source/backend/cpu/compute/CommonOptFunction.h b/source/backend/cpu/compute/CommonOptFunction.h index 9058c1353..bbfdce0fa 100644 --- a/source/backend/cpu/compute/CommonOptFunction.h +++ b/source/backend/cpu/compute/CommonOptFunction.h @@ -126,9 +126,9 @@ void MNNPackedMatMul_int8(float* C, const float* A, const float* B, const size_t void MNNPackedMatMulRemain_int8(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b); void MNNAbsMaxFP32(const float* source, float* absmax, size_t src_depth_quad, size_t realSize, int pack); void MNNQuantScaleFP32(float* absmax, float* quant_scale, float* dequant_scale, size_t thread, size_t batch); -void MNNDynamicQuantFP32(const float* src, int8_t* dst, const float* scale, float* sum, size_t src_depth_quad, size_t realSize, int pack); +void MNNDynamicQuantFP32(const float* src, int8_t* dst, const float* scale, size_t src_depth_quad, size_t realSize, int pack); void MNNQuantSumFP32(float* sum, const float* dequant_scale, size_t thread, size_t batch); - +void MNNDynamicUpdateConvBiasScale(float* newbias, float* newscale, float* oldbias, float* weightScale, float* inputScale, float* weightKernelSum, float* inputZero, size_t ocQuad, size_t scaleSize); void MNNPackForSparseMatMul_B(float* dest, unsigned int* NNZMap, int* dataOffsetMap, int sparseBlockOC, const float* source, size_t h, size_t l, const int eP, bool transpose); struct SparseMatMulParas @@ -169,8 +169,15 @@ void MNNSourceTransformCommonF23(const float *source, float *dest, int unit, int void MNNConvDwF23MulTransUnit(float **cacheLine, const float *weigth, float *dest, size_t ow, const float* bias, const float* postParameter); void MNNMultiAndDestTransformCommon23(float **cacheLine, const float *weigth, float *dest, int cacheLineSize, int ow); void MNNInt8ToInt16(int16_t* dest, const int8_t* source, size_t count); -void MNNGemmHybridInt4FP32(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); -void MNNGemmHybridInt8FP32(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); + +struct SumByAxisParams { + ssize_t kernelCountUnitDouble; + ssize_t col_buffer_unit_size; + ssize_t DST_XUNIT; + ssize_t SRC_UNIT; + ssize_t blockNum; + ssize_t oneScale; +}; } typedef void(*MNNBinaryExecute)(void* outputRaw, const void* inputRaw0, const void* inputRaw1, int elementSize, int broadcastIndex); @@ -195,20 +202,18 @@ struct CoreFunctions { // parameters: e, l, h, CStride, AStride, BStride void(*MNNPackedMatMul)(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b); void(*MNNPackedMatMulRemain)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b); - void(*MNNPackedMatMul_int4)(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b); - void(*MNNPackedMatMulRemain_int4)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b); - void(*MNNAbsMax)(const float* source, float* absmax, size_t src_depth_quad, size_t realSize, int pack); - void(*MNNQuantScale)(float* absmax, float* quant_scale, float* dequant_scale, size_t thread, size_t batch); - void(*MNNDynamicQuant)(const float* src, int8_t* dst, const float* scale, float* sum, size_t src_depth_quad, size_t realSize, int pack); - void(*MNNQuantSum)(float* sum, const float* dequant_scale, size_t thread, size_t batch); - void(*MNNGemmHybridInt4)(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); - void(*MNNGemmHybridInt8)(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); - void(*MNNPackedMatMul_int8)(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b); - void(*MNNPackedMatMulRemain_int8)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b); + void(*MNNPackedMatMul_int4)(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b) = nullptr; + void(*MNNPackedMatMulRemain_int4)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b) = nullptr; + void(*MNNAbsMax)(const float* source, float* absmax, size_t src_depth_quad, size_t realSize, int pack) = nullptr; + void(*MNNQuantScale)(float* absmax, float* quant_scale, float* dequant_scale, size_t thread, size_t batch) = nullptr; + void(*MNNDynamicQuant)(const float* src, int8_t* dst, const float* scale, size_t src_depth_quad, size_t realSize, int pack) = nullptr; + void(*MNNQuantSum)(float* sum, const float* dequant_scale, size_t thread, size_t batch) = nullptr; + void(*MNNPackedMatMul_int8)(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b) = nullptr; + void(*MNNPackedMatMulRemain_int8)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b) = nullptr; void(*MNNComputeMatMulForH_1)(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId); void(*MNNComputeMatMulForE_1)(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId); void(*MNNCountMaxMinValue)(float* source, float* minVal, float* maxVal, size_t size); - + void(*MNNDynamicUpdateConvBiasScale)(float* newbias, float* newscale, float* oldbias, float* weightScale, float* inputScale, float* weightKernelSum, float* inputZero, size_t ocQuad, size_t scaleSize); typedef void(*MNNPackedMatMulKernel)(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias); @@ -228,6 +233,7 @@ struct CoreFunctions { void(*MNNFp32ToLowp)(const float* src, int16_t* dst, size_t size); void(*MNNLowpToFp32)(const int16_t* src, float* dst, size_t size); int bytes; // Byte for float + int matmulBytes = 0; // Special bytes for dense matmul, C = A*B, A, B is matmulBytes, C is bytes. If 0, means the same as bytes /**NC4HW4's Functions*/ int pack; @@ -330,6 +336,7 @@ struct CoreFunctions { void(*MNN2BitcopyFast)(uint8_t* dstO, const uint8_t* srcO, int size, int stride, int ds); void(*MNN1BitcopyFast)(uint8_t* dstO, const uint8_t* srcO, int size, int stride, int ds); void(*MNNAccumulateSequenceNumber)(float* dst, const float* src, int size); + void(*MNNSumByAxisLForMatmul_A)(float* dest, int8_t* source, const float* dequantScale, ssize_t realDstCount, SumByAxisParams sumParams); }; void MNNCoreFunctionInit(); CoreFunctions* MNNGetCoreFunctions(); diff --git a/source/backend/cpu/compute/ConvInt8TiledExecutor.cpp b/source/backend/cpu/compute/ConvInt8TiledExecutor.cpp index 7abf443d2..756f24aee 100644 --- a/source/backend/cpu/compute/ConvInt8TiledExecutor.cpp +++ b/source/backend/cpu/compute/ConvInt8TiledExecutor.cpp @@ -13,12 +13,12 @@ #include #include "backend/cpu/CPUBackend.hpp" -#include "backend/cpu/compute/CommonOptFunction.h" #include "core/Concurrency.h" #include "core/TensorUtils.hpp" + namespace MNN { -ConvInt8TiledExecutor::ConvInt8TiledExecutor(Backend* backend, const Convolution2DCommon* convOp, std::shared_ptr res): CPUConvolution(convOp, backend), mResource(res), mMutableResource(res, backend) { +ConvInt8TiledExecutor::ConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr res): CPUConvolution(convOp->common(), backend), mResourceInt8(res), mMutableResource(res, backend) { mValid = mMutableResource.mValid; } @@ -85,9 +85,10 @@ void ConvInt8TiledExecutor::reorderWeight(Tensor* weight, const uint8_t* weightS } static bool _reorderWeightInside(Backend* bn, const Convolution2DCommon* common, - const std::shared_ptr& weightOrigin, - std::shared_ptr& weight) { + const std::shared_ptr& weightOrigin, + std::shared_ptr& weight) { auto core = static_cast(bn)->int8Functions(); + auto gcore = static_cast(bn)->functions(); int UNIT, SRC_UNIT, DST_XUNIT; core->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); // reorder weight, [oc, ic, k^2] => [oc/unit, ((ic/unit)*k^2)/(src_unit/unit), unit(oc), (src_unit/unit), unit(ic)] @@ -111,30 +112,195 @@ static bool _reorderWeightInside(Backend* bn, const Convolution2DCommon* common, return true; } -DenseConvInt8TiledExecutor::DenseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr res) : ConvInt8TiledExecutor(backend, convOp->common(), res) { - std::shared_ptr weightOrigin = mResource->mWeightInt8; - mValid = _reorderWeightInside(backend, convOp->common(), weightOrigin, mResource->mWeightInt8); +static void Getfp32Info (std::shared_ptr resource, std::shared_ptr weightOrigin, const Convolution2D* conv2d, std::shared_ptr quantCommon) { + // common parameters + int outputCount = conv2d->common()->outputCount(); + auto core = static_cast(resource->backend)->functions(); + int LSize = conv2d->common()->inputCount() * conv2d->common()->kernelX() * conv2d->common()->kernelY(); + int ocUp4 = ROUND_UP(outputCount, core->pack); + + int dequantCnt = quantCommon->alpha.size(); + if (quantCommon->asymmetric) { + dequantCnt /= 2; + } + int blockNum = dequantCnt / outputCount; + int scaleSize = blockNum * ocUp4; // pack size. + int blockSize = LSize / blockNum; + int originOffset = 0; + if (quantCommon->canUseInt4) { + originOffset = -8; + } + + // Save weight quant scale and bias: wf=scale*wi+bias + int bytes = 4; + resource->mDequantize.mScaleBias.reset(Tensor::createDevice({2 * scaleSize * bytes})); + auto success = resource->backend->onAcquireBuffer(resource->mDequantize.mScaleBias.get(), Backend::STATIC); + if (!success) { + MNN_ERROR("Alloc denquant scaleBias memory error\n"); + return; + } + auto alphaPtr = resource->mDequantize.mScaleBias->host(); + auto biasPtr = reinterpret_cast(reinterpret_cast(alphaPtr) + scaleSize * bytes); + ::memset(alphaPtr, 1, scaleSize * bytes); + ::memset(biasPtr, 0, scaleSize * bytes); + auto quanInfoPtr = quantCommon->alpha.get(); + int h = quantCommon->alpha.size(); + if (quantCommon->asymmetric) { + for (int i = 0; i < blockNum; ++i) { + auto dstAlpha = alphaPtr + i * ocUp4; + auto dstBias = biasPtr + i * ocUp4; + for (int j = 0; j < outputCount; ++j) { + int scaleIndex = j * blockNum + i; + dstAlpha[j] = quanInfoPtr[2 * scaleIndex + 1]; + dstBias[j] = quanInfoPtr[2 * scaleIndex] + (float)originOffset * dstAlpha[j]; + } + } + + } else { + for (int i = 0; i < blockNum; ++i) { + auto dstAlpha = alphaPtr + i * ocUp4; + auto dstBias = biasPtr + i * ocUp4; + for (int j = 0; j < outputCount; ++j) { + int scaleIndex = j * blockNum + i; + dstAlpha[j] = quanInfoPtr[scaleIndex]; + dstBias[j] = (float)originOffset * dstAlpha[j]; + } + } + } + // Save float weight kernel sum + resource->mWeightKernelSum.reset(Tensor::createDevice({bytes * ocUp4})); + success = resource->backend->onAcquireBuffer(resource->mWeightKernelSum.get(), Backend::STATIC); + if (!success) { + MNN_ERROR("Alloc denquant mWeightKernelSum memory error\n"); + return; + } + auto weightKernelSum = resource->mWeightKernelSum->host(); + auto realWeightData = weightOrigin->host(); + ::memset(weightKernelSum, 0, resource->mWeightKernelSum->size()); + for (int j = 0; j < outputCount; ++j) { + float sum = 0.f; + for (int k = 0; k < blockNum; ++k) { + int scaleIndex = k + j * blockNum; + float scale = 0; + float bias = 0; + if (quantCommon->asymmetric) { + scale = quanInfoPtr[2 * scaleIndex + 1]; + bias = quanInfoPtr[2 * scaleIndex]; + } else { + scale = quanInfoPtr[scaleIndex]; + bias = 0; + } + int tmp = 0; + for (int i = 0; i < blockSize; ++i) { + int l_index = k * blockSize + i; + tmp += (int)realWeightData[j * blockNum * blockSize + l_index]; + } + sum += (tmp * scale + blockSize * bias); + } + weightKernelSum[j] = sum; + } +} + +DenseConvInt8TiledExecutor::DenseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr res, bool dynamicQuantExe) : ConvInt8TiledExecutor(backend, convOp, res) { + std::shared_ptr weightOrigin = mResourceInt8->mWeightInt8; + std::shared_ptr quanCommon ; + mDynamicQuantExe = dynamicQuantExe; + if (dynamicQuantExe) { + MNN_ASSERT(convOp->quanParameter() != nullptr && convOp->quanParameter()->buffer() != nullptr); + quanCommon = ConvolutionCommon::load(convOp, backend, false, true); + // fp32 weightKernelSum + mResource.reset(new CPUConvolution::Resource); + mResource->backend = backend; + Getfp32Info(mResource, weightOrigin, convOp, quanCommon); // Call this before reorder weight. + } + + mValid = _reorderWeightInside(backend, convOp->common(), weightOrigin, mResourceInt8->mWeightInt8); if(!mValid) { return; } - // choose int8 gemm kernel auto core = static_cast(backend)->int8Functions(); - mGemmKernel = core->Int8GemmKernel; + auto gcore = static_cast(backend)->functions(); + // offline quant + if (false == dynamicQuantExe) { + mGemmKernel = core->Int8GemmKernel; #ifdef MNN_USE_SSE - int actBits = convOp->symmetricQuan()->nbits(); - if (actBits <= 7) { - mGemmKernel = core->Int8GemmKernelFast; - } + int actBits = convOp->symmetricQuan()->nbits(); + if (actBits <= 7) { + mGemmKernel = core->Int8GemmKernelFast; + } #else - if(convOp->symmetricQuan()->method() == QuantizeAlgo_OVERFLOW_AWARE){ - mGemmKernel = core->Int8GemmKernelFast; - } + if(convOp->symmetricQuan()->method() == QuantizeAlgo_OVERFLOW_AWARE){ + mGemmKernel = core->Int8GemmKernelFast; + } #endif -} + mResource.reset(new CPUConvolution::Resource); + CPUConvolution::makeResource(backend, mResource, convOp, mResourceInt8); + return; + } -DenseConvInt8TiledExecutor::DenseConvInt8TiledExecutor(Backend* backend, const Convolution2DCommon* common, const DenseConvInt8TiledExecutor& exe) - : ConvInt8TiledExecutor(backend, common, exe.mResource), mGemmKernel(exe.mGemmKernel) { + // dynamic quant + int UNIT, SRC_UNIT, DST_XUNIT; + core->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); + bool needPermuteInt4weight = ((UNIT == 8 && SRC_UNIT == 8 && DST_XUNIT ==10) || (UNIT == 4 && SRC_UNIT == 8 && DST_XUNIT ==20) || (UNIT == 64 && SRC_UNIT == 4 && DST_XUNIT ==4)); + mResource->mDequantize.bits = 8; + if (quanCommon->canUseInt4) { + mResourceInt8->mWeightAsymmetricQuant = true; + auto weightLength = mResourceInt8->mWeightInt8->size(); + MNN_ASSERT(weightLength % 2 == 0); + mResource->mDequantize.bits = 4; + std::shared_ptr weightLow(Tensor::createDevice( mResourceInt8->mWeightInt8->shape())); + auto res = mResource->backend->onAcquireBuffer(weightLow.get(), Backend::STATIC); + if (!res) { + MNN_ERROR("int4 weight acquire buffer error\n"); + return ; + } + auto srcPtr = mResourceInt8->mWeightInt8->host(); + auto dstPtr = weightLow->host(); + // Pack two int4-weight to one int8-weight. + if (false == needPermuteInt4weight) { + weightLength = UP_DIV(weightLength, 2); + for (int i=0; i < weightLength; ++i) { + int s0 = srcPtr[2 * i + 0]; + int s1 = srcPtr[2 * i + 1]; + int d = (s0 + 8) * 16 + (s1 + 8); + dstPtr[i] = d; + } + } else { + int permuteUnit = UNIT * SRC_UNIT; + int halfPermuteStride = static_cast(permuteUnit / 2); + for (int i = 0; i < weightLength / permuteUnit; ++i) { + auto src0 = srcPtr + i * permuteUnit; + auto dst0 = dstPtr + i * halfPermuteStride; + for (int j = 0; j < halfPermuteStride; ++j) { + int s0 = src0[j]; + int s1 = src0[j + halfPermuteStride]; + int d = (s0 + 8) * 16 + (s1 + 8); + dst0[j] = d; + } + } + } + // Update int4 weight to mWeightInt8. + mResourceInt8->mWeightInt8 = weightLow; + } + // Relu/Relu6 post parameters + auto postPtr = getPostParameters(); + mResource->mReluThreshold.resize(2); + mResource->mReluThreshold[0] = postPtr[2]; + mResource->mReluThreshold[1] = postPtr[3]; + if (gcore->bytes == 2) { + gcore->MNNFp32ToLowp(mResource->mReluThreshold.data(), reinterpret_cast(mResource->mReluThreshold.data()), 2); + } + if (mCommon->relu()) { + mResource->mReluThreshold[0] = 0.f; + } + if (mCommon->relu6()) { + mResource->mReluThreshold[0] = 0.f; + mResource->mReluThreshold[1] = 6.f; + } +} +DenseConvInt8TiledExecutor::DenseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, bool dynamicQuantExe, const DenseConvInt8TiledExecutor& exe) + : ConvInt8TiledExecutor(backend, convOp, exe.mResourceInt8), mGemmKernel(exe.mGemmKernel), mResource(exe.mResource), mDynamicQuantExe(dynamicQuantExe) { } DenseConvInt8TiledExecutor::~DenseConvInt8TiledExecutor() { @@ -145,7 +311,7 @@ bool DenseConvInt8TiledExecutor::onClone(Backend* bn, const Op* op, Execution** if (nullptr == dst) { return true; } - auto exe = new DenseConvInt8TiledExecutor(bn, op->main_as_Convolution2D()->common(), *this); + auto exe = new DenseConvInt8TiledExecutor(bn, op->main_as_Convolution2D(), mDynamicQuantExe, *this); if (!exe->valid()) { return false; } @@ -159,42 +325,153 @@ void DenseConvInt8TiledExecutor::getPackParameter(int* Unit, int* srcUnit, int* ErrorCode DenseConvInt8TiledExecutor::onResize(const std::vector& inputs, const std::vector& outputs) { - // Timer kernelTimer; - ConvInt8TiledExecutor::onResize(inputs, outputs); - auto output = outputs[0]; + mUseBatchQuan = (static_cast(backend())->getRuntime()->hint().dynamicQuantOption == 1); + mUseBatchQuan &= mCommon->kernelY() == 1 && mCommon->kernelX() == 1 + && outputs[0]->width() == inputs[0]->width() && outputs[0]->height() == inputs[0]->height() + && mCommon->strideX() == 1 && mCommon->strideY() == 1 && mCommon->padX() == 0 && mCommon->padY() == 0 + && outputs[0]->height() == 1 && outputs[0]->width() == 1; + mUseBatchQuan &= mDynamicQuantExe; + mUseBatchQuan &= (inputs[0]->batch() > 1); auto core = static_cast(backend())->int8Functions(); - + auto gcore =static_cast(backend())->functions(); int UNIT, SRC_UNIT, DST_XUNIT; - getPackParameter(&UNIT, &SRC_UNIT, &DST_XUNIT, core); - const int threads = std::max(static_cast(backend())->threadNumber(), 1); + core->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); + + if (mDynamicQuantExe == false) { + mMutableResource.updateInputOutputScale(TensorUtils::getQuantInfo(inputs[0]), TensorUtils::getQuantInfo(outputs[0])); + CPUConvolution::onResize(inputs, outputs); + ConvolutionTiledExecutor::setIm2ColParameter(mIm2ColParamter, mCommon, inputs[0], outputs[0], mPadX, mPadY, gcore, core); + mBlockNum = 1; + } else { // Dynamic Quant kernels + CPUConvolution::onResize(inputs, outputs); + // Gemm Kernel + mGemmKernel = core->Int8GemmKernel; + if (mResource->mDequantize.bits == 4) { + mGemmKernel = core->Int8GemmKernel_W4; + } + mQuantFunc = core->MNNFloat2Int8; + if (gcore->bytes == 2 && gcore->pack == 8) { + mGemmKernel = core->MNNGemmInt8AddBiasScale_Unit_FP16; + if (mResource->mDequantize.bits == 4) { + mGemmKernel = core->MNNGemmInt8AddBiasScale_w4_Unit_FP16; + } + mQuantFunc = core->DynamicQuanInput_ARM82; + mQuantAndReorderFunc = core->DynamicQuanInputAndReorder_ARM82; + + } + // A axisSum kernel + mSumByAxisLFunc = gcore->MNNSumByAxisLForMatmul_A; + if (gcore->bytes == 2 && gcore->pack == 8) { // use fp16 + ConvolutionTiledExecutor::setIm2ColParameter(mIm2ColParamter, mCommon, inputs[0], outputs[0], mPadX, mPadY, gcore, core, 4); + } else { + ConvolutionTiledExecutor::setIm2ColParameter(mIm2ColParamter, mCommon, inputs[0], outputs[0], mPadX, mPadY, gcore, core); + } + int ocUp4 = ROUND_UP(outputs[0]->channel(), gcore->pack); + int alphaSize = mResource->mDequantize.mScaleBias->size() / (4 * 2); + mBlockNum = alphaSize / ocUp4; + } + + // input scale buffer + int batch = inputs[0]->batch(); +// mTempIm2ColBuffer.reset(Tensor::createDevice({mThreadNums, DST_XUNIT * mIm2ColCount * mResourceInt8->mWeightInt8->length(1) * SRC_UNIT})); + mInputDeqScales.reset(Tensor::createDevice({batch * 4})); + bool success = backend()->onAcquireBuffer(mInputDeqScales.get(), Backend::DYNAMIC); + + // Im2col info + auto output = outputs[0]; + const int threads = static_cast(backend())->threadNumber(); auto planeSize = output->width() * output->height() * output->batch(); - auto planeSizeInThread = UP_DIV(planeSize, threads); const int L2Size = 2048; const int tileLimitByC = UP_DIV(L2Size, mIm2ColParamter.kernelCountUnit * SRC_UNIT); - int tileLimit = ALIMIN(tileLimitByC, planeSizeInThread); + int tileLimit = 0; + int outC = output->channel(); + int outC4 = UP_DIV(outC, gcore->pack); + + if (threads < planeSize) { // Thread split by output nhw. + tileLimit = ALIMIN(tileLimitByC, UP_DIV(planeSize, threads)); + mSplitByOc = false; + } else { + tileLimit = ALIMIN(tileLimitByC, planeSize); + auto ocPerThread = UP_DIV(outC4, threads); + auto threadNeed = UP_DIV(outC4, ocPerThread); + if (UNIT > gcore->pack) { // AVX512:UNIT=64,pack=16 + MNN_ASSERT(UNIT % gcore->pack == 0); + int ocDivUnit = UP_DIV(outC4 * gcore->pack, UNIT); + ocPerThread = UP_DIV(ocDivUnit, threads); + threadNeed = UP_DIV(ocDivUnit, ocPerThread); + } + mThreadNums = ALIMIN(threads, threadNeed); + mSplitByOc = true; + + mDivides.resize(threads+1); + mDivides[0] = 0; + static_cast(backend()->getRuntime())->computeDivideSizes(outC4, mDivides.data() + 1); + } mIm2ColCount = UP_DIV(tileLimit, DST_XUNIT); auto DynamicDestUnit = DST_XUNIT * mIm2ColCount; mTileCount = UP_DIV(planeSize, DynamicDestUnit); - mThreadNums = std::min(threads, mTileCount); - - auto input = inputs[0]; - // set im2col tensor info - mTempIm2ColBuffer.reset(Tensor::createDevice({mThreadNums, DST_XUNIT * mIm2ColCount * mResource->mWeightInt8->length(1) * SRC_UNIT})); - bool success = backend()->onAcquireBuffer(mTempIm2ColBuffer.get(), Backend::DYNAMIC); - if (!success) { - return OUT_OF_MEMORY; + + if (threads < planeSize) { + mThreadNums = ALIMIN(threads, mTileCount); + mDivides.resize(threads+1); + mDivides[0] = 0; + static_cast(backend()->getRuntime())->computeDivideSizes(mTileCount, mDivides.data() + 1); } + int ocUp4 = ROUND_UP(outC, gcore->pack); + int alphaSize = mResource->mDequantize.mScaleBias->size() / (4 * 2); + auto bufferAlloc = static_cast(backend())->getBufferAllocator(); auto blitInfoSize = ConvolutionTiledExecutor::computeBlitInfoSize(DST_XUNIT * mIm2ColCount, mIm2ColParamter.ow, mIm2ColParamter.kernelX * mIm2ColParamter.kernelY, mThreadNums); + mBlitInfoStride = blitInfoSize.second; mBlitInfo = bufferAlloc->alloc(blitInfoSize.first); - if (mBlitInfo.invalid()) { + mTempIm2ColBuffer.reset(Tensor::createDevice({mThreadNums, DST_XUNIT * mIm2ColCount * mResourceInt8->mWeightInt8->length(1) * SRC_UNIT})); + mTempSrcSum.resize(mThreadNums * mBlockNum * DST_XUNIT * mIm2ColCount * 4); // Use 4 bytes to save kernel sum. + + success &= backend()->onAcquireBuffer(mTempIm2ColBuffer.get(), Backend::DYNAMIC); + if (!success || mBlitInfo.invalid()) { return OUT_OF_MEMORY; } - bufferAlloc->free(mBlitInfo); - mBlitInfoStride = blitInfoSize.second; + if (false == mDynamicQuantExe) { + bufferAlloc->free(mBlitInfo); + backend()->onReleaseBuffer(mInputDeqScales.get(), Backend::DYNAMIC); + backend()->onReleaseBuffer(mTempIm2ColBuffer.get(), Backend::DYNAMIC); + return NO_ERROR; + } + + int inC = inputs[0]->channel(); + // set im2col tensor info + mQuantInput.reset((Tensor::createDevice({batch, mIm2ColParamter.ih, mIm2ColParamter.iw, ROUND_UP(inC, gcore->pack)}))); + // set dynamic quant buffer + mTempMaxMinValueBuffer.reset(Tensor::createDevice({mThreadNums, 2 * gcore->bytes})); + // set compute buffer + mDynamicBias.reset(Tensor::createDevice({ocUp4 * 4})); + mScaleFuse.reset(Tensor::createDevice({alphaSize * 4})); + + success &= backend()->onAcquireBuffer(mQuantInput.get(), Backend::DYNAMIC); + success &= backend()->onAcquireBuffer(mDynamicBias.get(), Backend::DYNAMIC); + success &= backend()->onAcquireBuffer(mTempMaxMinValueBuffer.get(), Backend::DYNAMIC); + success &= backend()->onAcquireBuffer(mScaleFuse.get(), Backend::DYNAMIC); + + if (mUseBatchQuan) { + int infobytes = 4; // use float32 to save dequant scale and quant scale. + int size = mThreadNums * batch * gcore->bytes + 2 * batch * infobytes; + mBatchQuantInfo.reset(Tensor::createDevice({size})); + success &= backend()->onAcquireBuffer(mBatchQuantInfo.get(), Backend::DYNAMIC); + } + if (!success) { + return OUT_OF_MEMORY; + } + bufferAlloc->free(mBlitInfo); + backend()->onReleaseBuffer(mInputDeqScales.get(), Backend::DYNAMIC); backend()->onReleaseBuffer(mTempIm2ColBuffer.get(), Backend::DYNAMIC); - // MNN_PRINT("dense conv2d int8 resize: cost time: %llu us\n", kernelTimer.durationInUs()); + backend()->onReleaseBuffer(mQuantInput.get(), Backend::DYNAMIC); + backend()->onReleaseBuffer(mDynamicBias.get(), Backend::DYNAMIC); + backend()->onReleaseBuffer(mTempMaxMinValueBuffer.get(), Backend::DYNAMIC); + backend()->onReleaseBuffer(mScaleFuse.get(), Backend::DYNAMIC); + if (mUseBatchQuan) { + backend()->onReleaseBuffer(mBatchQuantInfo.get(), Backend::DYNAMIC); + } return NO_ERROR; } @@ -203,85 +480,323 @@ ErrorCode DenseConvInt8TiledExecutor::onExecute(const std::vector& inpu const auto input = inputs[0]; auto output = outputs[0]; auto core = static_cast(backend())->int8Functions(); + auto gcore = static_cast(backend())->functions(); int UNIT__, SRC_UNIT, DST_XUNIT; core->MNNGetGemmUnit(&UNIT__, &SRC_UNIT, &DST_XUNIT); auto blitProc = core->MNNPackC4Int8ForMatMul_A; - const int plane = output->batch() * mIm2ColParamter.oh * mIm2ColParamter.ow; - int PackUnit = static_cast(backend())->functions()->pack; - const int dstZStep = plane * PackUnit; - - const int batch = input->batch(); - const int ocDiv4 = UP_DIV(output->channel(), PackUnit); + if ( mDynamicQuantExe && gcore->bytes == 2 && core->MNNPackC4Int8ForMatMul_A_ARM86FP16) { + blitProc = core->MNNPackC4Int8ForMatMul_A_ARM86FP16; + } + const int plane = output->batch() * mIm2ColParamter.oh * mIm2ColParamter.ow; + const int batch = input->batch(); + const int PackUnit = gcore->pack; + const int dstZStep = plane * PackUnit; + const int ocDiv4 = UP_DIV(output->channel(), PackUnit); + const int ocUp4 = ROUND_UP(output->channel(), PackUnit); const auto kernelCountUnitDouble = mIm2ColParamter.kernelCountUnit; - //auto remain = outputPlaneLen % GEMM_INT8_DST_XUNIT; - //FUNC_PRINT(remain); + const auto col_buffer_unit_size = kernelCountUnitDouble * DST_XUNIT * SRC_UNIT * sizeof(int8_t); + const auto col_buffer_size = col_buffer_unit_size * mIm2ColCount; + const int dstBytes = static_cast(backend())->getBytes(backend(), output); + const int alphaSize = mResource->mDequantize.mScaleBias->size() / (4 * 2); + const int blockL = kernelCountUnitDouble / mBlockNum; // source depthQuad for each block. + float weightBytes = 1.f; + int weight_step_Y = weightBytes * (UNIT__ * SRC_UNIT); + int src_step_Y = DST_XUNIT * SRC_UNIT; + + auto inputDataPtr = input->host(); + auto im2colPtr = mTempIm2ColBuffer->host(); + const auto weightDataPtr = mResourceInt8->mWeightInt8->host(); + auto srcKernelSumPtr = mTempSrcSum.data(); + auto weightDequantBias = mResource->mDequantize.mScaleBias->host() + alphaSize * 4; + + auto outputDataPtr = output->host(); + auto biasPtr = mMutableResource.mBiasFloat->host(); + auto scalePtr = mMutableResource.mScaleFloat->host(); + + auto inputZeroPoint = mMutableResource.mInputZeroPoint; + auto inputScalePtr = mInputDeqScales->host(); + (reinterpret_cast(inputScalePtr))[0] = mMutableResource.mInputScale; + + auto SingleDynamicQuant = [&] () { + const auto floatptr = input->host(); + auto int8ptr = mQuantInput->host(); + auto inputsize = static_cast(backend())->getTensorSize(inputs[0]); + float quantscale = 0.f; + float dequantscale = 0.f; + int zeropoint = 0; + + /* Count max and min value to compute input scale and zeropoint */ + auto maxMinValPtr = mTempMaxMinValueBuffer->host(); + int threadNeed = mThreadNums; + auto inputSizeCount = UP_DIV(inputsize, mThreadNums); + if (inputSizeCount < 9) { + threadNeed = 1; + inputSizeCount = inputsize; + } else { + threadNeed = ALIMIN(UP_DIV(inputsize, inputSizeCount), mThreadNums); + inputSizeCount = UP_DIV(inputsize, threadNeed); + } + auto findMaxMinValueFunction = [&](int tId) { + auto perThreadWorkCount = ALIMIN(inputSizeCount, inputsize - tId * inputSizeCount); + auto minValPtrTid = reinterpret_cast(maxMinValPtr + tId * mTempMaxMinValueBuffer->stride(0)); + auto maxValPtrTid = reinterpret_cast(maxMinValPtr + tId * mTempMaxMinValueBuffer->stride(0) + gcore->bytes); + auto inputDataPtrTid = reinterpret_cast(reinterpret_cast(floatptr) + tId * inputSizeCount * gcore->bytes); + gcore->MNNCountMaxMinValue(inputDataPtrTid, minValPtrTid, maxValPtrTid, perThreadWorkCount); + }; + MNN_CONCURRENCY_BEGIN(tId, threadNeed) { + findMaxMinValueFunction((int)tId); + } + MNN_CONCURRENCY_END(); + if (threadNeed > 1) { + gcore->MNNCountMaxMinValue(reinterpret_cast(maxMinValPtr),reinterpret_cast(maxMinValPtr), reinterpret_cast(maxMinValPtr + gcore->bytes), 2 * mThreadNums); + } + float maxVal = 0; + float minVal = 0; + if (gcore->bytes == 4) { + maxVal = (reinterpret_cast(maxMinValPtr))[1]; + minVal = (reinterpret_cast(maxMinValPtr))[0]; + } + if (gcore->bytes == 2) { + std::vector _mVal(2); + gcore->MNNLowpToFp32(reinterpret_cast(maxMinValPtr), _mVal.data(), 2); + maxVal = _mVal[1]; + minVal = _mVal[0]; + } - const auto inputDataPtr = input->host(); - const auto weightDataPtr = mResource->mWeightInt8->host(); + /* Dynamic quant */ + float range = maxVal - minVal; + quantscale = 255.0f / range; + dequantscale = range / 255.0f; + zeropoint = static_cast(roundf(-minVal * 255.f / range) - 128.0f); + std::vectorqsVec(PackUnit, quantscale); + auto sizeDiv = UP_DIV(inputsize, PackUnit); + int inputPlane = input->batch() * mIm2ColParamter.iw * mIm2ColParamter.ih; + if (gcore->bytes == 2 && gcore->pack == 8 && inputPlane > 1) { // C8->C4 + mQuantAndReorderFunc(floatptr, int8ptr, inputPlane, qsVec.data(), -128, 127, (ssize_t)zeropoint, UP_DIV(input->channel(), PackUnit), 4 * inputPlane); + } else { + mQuantFunc(floatptr, int8ptr, sizeDiv, qsVec.data(), -128, 127, (ssize_t)zeropoint); + } - auto im2colPtr = mTempIm2ColBuffer->host(); - auto outputDataPtr = output->host(); - QuanPostTreatParameters quanParam; - quanParam.bias = mMutableResource.mBiasInt32->host(); - quanParam.scale = mMutableResource.mScaleFloat->host(); - quanParam.maxValue = mMutableResource.mClampMax; - if (mResource->mRelu) { - quanParam.minValue = mMutableResource.mOutputZeroPoint; + /* bias float */ + #ifdef MNN_USE_SSE + int offset = 128; + #else + int offset = 0; + #endif + auto biasfp32 = mMutableResource.mResource->mOriginBias->host(); + auto weightDequantScale = mResource->mDequantize.mScaleBias->host(); + float zerofp32 = (zeropoint + offset) * dequantscale; + + gcore->MNNDynamicUpdateConvBiasScale(mDynamicBias->host(), mScaleFuse->host(), biasfp32, weightDequantScale, &dequantscale, mResource->mWeightKernelSum->host(), &zerofp32, UP_DIV(output->channel(), 4), alphaSize); + // Move step for A and B for each block computing + + inputZeroPoint = zeropoint; + (reinterpret_cast(inputScalePtr))[0] = dequantscale; + biasPtr = mDynamicBias->host(); + scalePtr = mScaleFuse->host(); + inputDataPtr = int8ptr; + }; + + auto BatchDynamicQuant = [&]() { + // Allocate input max/sum/dequant/quant buffer + auto infobytes = 4; + auto dequantPtr = mBatchQuantInfo->host(); + auto quantPtr = dequantPtr + batch * infobytes; + auto maxPtr = mBatchQuantInfo->host() + 2 * batch * infobytes; + + // compute sum and absmax + int icDiv4 = UP_DIV(input->channel(), PackUnit); + int threadwork = UP_DIV(icDiv4, mThreadNums); + int threadNeed = UP_DIV(icDiv4, threadwork); + int threadTmp = ALIMIN(mThreadNums, threadNeed); + threadwork = UP_DIV(icDiv4, threadTmp); + MNN_CONCURRENCY_BEGIN(tId, threadTmp) { + int workCount = threadwork; + if (tId == threadTmp - 1) { + workCount = icDiv4 - tId * threadwork; + } + int icIndex = tId * threadwork; + auto inputData = reinterpret_cast(input->host() + icIndex * batch * PackUnit * gcore->bytes); + auto batchMax = reinterpret_cast(maxPtr + tId * batch * gcore->bytes); + gcore->MNNAbsMax(inputData, batchMax, workCount, batch, PackUnit); + } + MNN_CONCURRENCY_END(); + + // Compute quant scale + gcore->MNNQuantScale((float*)maxPtr, (float*)quantPtr, (float*)dequantPtr, threadTmp, batch); + + // quant + MNN_CONCURRENCY_BEGIN(tId, threadTmp) { + int workCount = threadwork; + if (tId == threadTmp - 1) { + workCount = icDiv4 - tId * threadwork; + } + auto icIndex = tId * threadwork; + auto inputData = reinterpret_cast(input->host() + icIndex * batch * PackUnit * gcore->bytes); + auto int8ptr = mQuantInput->host() + icIndex * batch * PackUnit; + auto scale_ptr = reinterpret_cast(quantPtr); + gcore->MNNDynamicQuant(inputData, int8ptr, scale_ptr, workCount, batch, PackUnit); + } + MNN_CONCURRENCY_END(); + + inputZeroPoint = 0; + inputScalePtr = (uint8_t*)dequantPtr; + inputDataPtr = mQuantInput->host(); + biasPtr = mMutableResource.mResource->mOriginBias->host(); + scalePtr = mResource->mDequantize.mScaleBias->host(); + }; + ssize_t oneScale = 1; + if (mUseBatchQuan) { + BatchDynamicQuant(); + oneScale = 0; + } else if (mDynamicQuantExe) { + SingleDynamicQuant(); } else { - quanParam.minValue = mMutableResource.mClampMin; + // offline quant. } - int dstBytes = static_cast(backend())->getBytes(backend(), output); - if (dstBytes != 1) { - quanParam.useInt8 = 0; + + if (mResource->mDequantize.bits == 4) { + weightBytes = 0.5; + weight_step_Y *= 0.5; } - //MNN_PRINT("max: %d, min: %d\n", quanParam.maxValue, quanParam.minValue); - const int col_buffer_unit_size = mIm2ColParamter.kernelCountUnit * DST_XUNIT * SRC_UNIT * sizeof(int8_t); - auto col_buffer_size = col_buffer_unit_size * mIm2ColCount; - auto threadFunction = [&](int tId) { + + SumByAxisParams sumParams; + sumParams.oneScale = oneScale; + sumParams.SRC_UNIT = SRC_UNIT; + sumParams.blockNum = mBlockNum; + sumParams.DST_XUNIT = DST_XUNIT; + sumParams.col_buffer_unit_size = col_buffer_unit_size; + sumParams.kernelCountUnitDouble = kernelCountUnitDouble; + + auto ThreadFunction = [&](int tId, int eStartIndex, int eEndIndex, int estep, int ocIndex) { + auto ocDivThread = ocDiv4; + if (mSplitByOc) { // Thread split by OC + ocDivThread = ALIMIN(mDivides[tId + 1] - mDivides[tId], ocDiv4 - mDivides[tId]); + } + float* reluPtr = mResource->mReluThreshold.data(); + uint8_t* extraScale = nullptr; // input scale for batch dynamic quant. + QuanPostTreatParameters quanParam; + quanParam.blockNum = mBlockNum; + if (mUseBatchQuan) { + extraScale = inputScalePtr; + } +#ifdef MNN_USE_SSE + quanParam.extraBias = mResource->mWeightKernelSum->host() + ocIndex; +#endif + if (dstBytes != 1) { + quanParam.useInt8 = 0; + quanParam.fp32minmax = reluPtr; + } else { + quanParam.maxValue = mMutableResource.mClampMax; + if (mResourceInt8->mRelu) { + quanParam.minValue = mMutableResource.mOutputZeroPoint; + } else { + quanParam.minValue = mMutableResource.mClampMin; + } + } + auto outputTid = outputDataPtr + ocIndex * plane * dstBytes; + const auto biasFloatTid = reinterpret_cast(biasPtr + ocIndex * 4); + const auto scaleFloatTid = reinterpret_cast(scalePtr + ocIndex * 4); + const auto weightDequanBiasTid = reinterpret_cast(weightDequantBias + ocIndex * 4); + const auto weightPtrTid = weightDataPtr + static_cast(ocIndex * kernelCountUnitDouble * SRC_UNIT * weightBytes); + if (mBlockNum == 1) { + quanParam.biasFloat = biasFloatTid; + quanParam.scale = scaleFloatTid; + quanParam.weightQuanBias = weightDequanBiasTid; + } + auto colAddr = im2colPtr + tId * mTempIm2ColBuffer->stride(0); auto srcPtr = (int8_t const **)(mBlitInfo.ptr() + tId * mBlitInfoStride.first); auto el = (int32_t *)(srcPtr + mBlitInfoStride.second); + auto xKernelSumPtrTid = reinterpret_cast(srcKernelSumPtr + tId * mBlockNum * DST_XUNIT * mIm2ColCount * 4); - int32_t info[4]; + int32_t info[6]; info[1] = mIm2ColParamter.iw * mIm2ColParamter.ih * batch; - info[2] = col_buffer_unit_size; + info[2] = static_cast(col_buffer_unit_size); info[3] = mIm2ColParamter.strideX; - for (int tIndex = tId; tIndex < mTileCount; tIndex += mThreadNums) { + info[5] = kernelCountUnitDouble; + for (int tIndex = eStartIndex; tIndex < eEndIndex; tIndex += estep) { const int xIndexStart = tIndex * DST_XUNIT * mIm2ColCount; int realDstCount = ALIMIN(plane - xIndexStart, DST_XUNIT * mIm2ColCount); - + auto ptrExtraScale = extraScale != nullptr ? (extraScale + xIndexStart * 4) : nullptr; + auto ptrInputscale = mUseBatchQuan == true ? (inputScalePtr + xIndexStart * 4) : inputScalePtr; // im2col auto res = ConvolutionTiledExecutor::turnIm2ColToBlitInfo((const float**)srcPtr, el, xIndexStart, realDstCount, mIm2ColParamter, (const uint8_t*)inputDataPtr, 1); int number = res.first; bool needZero = res.second; if (needZero) { #ifdef MNN_USE_SSE - ::memset(colAddr, mMutableResource.mInputZeroPoint + 128, col_buffer_size); + ::memset(colAddr, inputZeroPoint + 128, col_buffer_size); #else - ::memset(colAddr, mMutableResource.mInputZeroPoint, col_buffer_size); + ::memset(colAddr, inputZeroPoint, col_buffer_size); #endif } info[0] = number; + info[4] = realDstCount; if (number > 0) { blitProc(colAddr, srcPtr, info, el); } - auto outputInTilePtr = outputDataPtr + xIndexStart * PackUnit * dstBytes; + if (mResourceInt8->mWeightAsymmetricQuant) { + mSumByAxisLFunc(xKernelSumPtrTid, colAddr, (float*)ptrInputscale, realDstCount, sumParams); + } + auto outputInTilePtr = outputTid + xIndexStart * PackUnit * dstBytes; auto colAddrTemp = colAddr; - do { - int step = ALIMIN(DST_XUNIT, realDstCount); - mGemmKernel(outputInTilePtr, colAddrTemp, weightDataPtr, kernelCountUnitDouble, dstZStep * dstBytes, ocDiv4, &quanParam, step); - realDstCount-=step; - outputInTilePtr += DST_XUNIT * PackUnit * dstBytes; - colAddrTemp += col_buffer_unit_size; - } while(realDstCount > 0); + auto ptrX = xKernelSumPtrTid; + if (mBlockNum == 1) { + do { + int step = ALIMIN(DST_XUNIT, realDstCount); + quanParam.srcKernelSum = ptrX; + quanParam.extraScale = extraScale != nullptr ? (float*)ptrExtraScale : nullptr; + mGemmKernel(outputInTilePtr, colAddrTemp, weightPtrTid, kernelCountUnitDouble, dstZStep * dstBytes, ocDivThread, &quanParam, step); + ptrX += step; + realDstCount-=step; + outputInTilePtr += DST_XUNIT * PackUnit * dstBytes; + colAddrTemp += col_buffer_unit_size; + ptrExtraScale = extraScale != nullptr ? (ptrExtraScale + step * 4) : nullptr; + } while(realDstCount > 0); + } else { // Now offline quant do not run into. + do { + int step = ALIMIN(DST_XUNIT, realDstCount); + quanParam.extraScale = extraScale != nullptr ? (float*)ptrExtraScale : nullptr; + for (int k = 0; k < mBlockNum; ++k) { + quanParam.biasFloat = nullptr; + quanParam.fp32minmax = nullptr; + if (k == 0) { + quanParam.biasFloat = (float*)biasFloatTid; + } + if (k == mBlockNum - 1) { + quanParam.fp32minmax = reluPtr; + } + quanParam.srcKernelSum = ptrX + k * step; + quanParam.weightQuanBias = weightDequanBiasTid + k * ocUp4; + quanParam.scale = (float*)(scaleFloatTid + k * ocUp4); + + mGemmKernel(outputInTilePtr, colAddrTemp + k * blockL * src_step_Y, weightPtrTid + k * blockL * weight_step_Y, blockL, dstZStep * dstBytes, ocDivThread, &quanParam, step); + } + ptrX += (step * mBlockNum); + realDstCount-=step; + outputInTilePtr += DST_XUNIT * PackUnit * dstBytes; + colAddrTemp += col_buffer_unit_size; + ptrExtraScale = extraScale != nullptr ? (ptrExtraScale + step * 4) : nullptr; + } while(realDstCount > 0); + } } }; - MNN_CONCURRENCY_BEGIN(tId, mThreadNums) { - threadFunction((int)tId); + + if (!mSplitByOc) { + MNN_CONCURRENCY_BEGIN(tId, mThreadNums) { + ThreadFunction((int)tId, mDivides[tId], mDivides[tId + 1], 1, 0); + } + MNN_CONCURRENCY_END(); + } else { + MNN_CONCURRENCY_BEGIN(tId, mThreadNums) { + int ocIndex = PackUnit * mDivides[tId]; + ThreadFunction((int)tId, 0, mTileCount,1, ocIndex); + } + MNN_CONCURRENCY_END(); } - MNN_CONCURRENCY_END(); - // MNN_PRINT("dense conv2d int8 execute: cost time: %llu us\n", kernelTimer.durationInUs()); + return NO_ERROR; } diff --git a/source/backend/cpu/compute/ConvInt8TiledExecutor.hpp b/source/backend/cpu/compute/ConvInt8TiledExecutor.hpp index 685e0088b..ec2d78393 100644 --- a/source/backend/cpu/compute/ConvInt8TiledExecutor.hpp +++ b/source/backend/cpu/compute/ConvInt8TiledExecutor.hpp @@ -11,13 +11,14 @@ #include "backend/cpu/CPUConvolution.hpp" #include "Int8FunctionsOpt.h" +#include "CommonOptFunction.h" namespace MNN { class ConvInt8TiledExecutor : public CPUConvolution { public: // given weight+bias+scale, do post process - ConvInt8TiledExecutor(Backend* backend, const Convolution2DCommon* convOp, std::shared_ptr res); + ConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr res); virtual ~ConvInt8TiledExecutor(); virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override; @@ -29,7 +30,8 @@ class ConvInt8TiledExecutor : public CPUConvolution { int mTileCount; int mThreadNums; std::shared_ptr mTempIm2ColBuffer; - std::shared_ptr mResource; + std::shared_ptr mResourceInt8; + // std::shared_ptr mResource; CPUConvolution::MutableResourceInt8 mMutableResource; MemChunk mBlitInfo; std::pair mBlitInfoStride; @@ -48,16 +50,35 @@ class ConvInt8TiledExecutor : public CPUConvolution { class DenseConvInt8TiledExecutor : public ConvInt8TiledExecutor { public: // given weight+bias+scale, do post process - DenseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr res); + DenseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr res, bool dynamicQuantExe); virtual ~DenseConvInt8TiledExecutor(); virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; virtual ErrorCode onExecute(const std::vector &inputs, const std::vector &outputs) override; virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override; void getPackParameter(int* Unit, int* SrcUnit, int* DestUnit, const CoreInt8Functions* core) override; private: - DenseConvInt8TiledExecutor(Backend* backend, const Convolution2DCommon* common, const DenseConvInt8TiledExecutor& exe); + DenseConvInt8TiledExecutor(Backend* backend, const Convolution2D* common, bool dynamicQuantExe, const DenseConvInt8TiledExecutor& exe); decltype(CoreInt8Functions::Int8GemmKernel) mGemmKernel; + std::function mQuantFunc; + std::function mQuantAndReorderFunc = nullptr; + std::function mSumByAxisLFunc; + std::shared_ptr mQuantInput; + std::shared_ptr mDynamicBias; + std::shared_ptr mScaleFuse; + std::shared_ptr mBatchQuantInfo; + std::shared_ptr mInputDeqScales; + std::shared_ptr mTempMaxMinValueBuffer; + std::shared_ptr mResource; + std::vector mTempSrcSum; + std::vector mDivides; + + int mThreadNums; + int mBlockNum; + int mOcPerThread; + bool mDynamicQuantExe; + bool mSplitByOc; + bool mUseBatchQuan; }; } // namespace MNN diff --git a/source/backend/cpu/compute/ConvInt8Winograd.cpp b/source/backend/cpu/compute/ConvInt8Winograd.cpp index 180320e58..2d0a4b5f2 100644 --- a/source/backend/cpu/compute/ConvInt8Winograd.cpp +++ b/source/backend/cpu/compute/ConvInt8Winograd.cpp @@ -31,7 +31,7 @@ std::shared_ptr ConvInt8Winograd::makeWinoResour std::shared_ptr weight, offsets, scales, inputScales; weight.reset(Tensor::createDevice({alpha2, oc4, ic4, UNIT, SRC_UNIT})); - offsets.reset(Tensor::createDevice({alpha2, oc4, UNIT})); + offsets.reset(Tensor::createDevice({alpha2, oc4, UNIT})); scales.reset(Tensor::createDevice({alpha2, oc4 * UNIT})); inputScales.reset(Tensor::createDevice({alpha2, UNIT})); @@ -47,7 +47,7 @@ std::shared_ptr ConvInt8Winograd::makeWinoResour return nullptr; } ::memset(weight->host(), 0, weight->size()); - ::memset(offsets->host(), 0, offsets->size()); + ::memset(offsets->host(), 0, offsets->size()); ::memset(scales->host(), 0, scales->size()); auto inputScaleData = (const float*)attr; attr += alpha2; auto inputPointData = (const int32_t*)attr; attr += alpha2; @@ -80,7 +80,9 @@ std::shared_ptr ConvInt8Winograd::makeWinoResour for (int a = 0; a < alpha2; ++a) { for (int oz = 0; oz < oc; ++oz) { - int oz4 = oz / UNIT, ozRemain = oz % UNIT, offset = 0; + int oz4 = oz / UNIT, ozRemain = oz % UNIT; + int offset_int32 = 0; + float offset = 0.f; float scale = weightScaleData[a * oc + oz]; for (int sz = 0; sz < ic; ++sz) { int sz4 = sz / SRC_UNIT, szRemain = sz % SRC_UNIT; @@ -95,7 +97,7 @@ std::shared_ptr ConvInt8Winograd::makeWinoResour offset += quanData * (-128); #endif } - offsets->host()[a * oc4 * UNIT + oz] = offset; + offsets->host()[a * oc4 * UNIT + oz] = offset * scale * inputScaleData[a]; scales->host()[a * oc4 * UNIT + oz] = scale * inputScaleData[a]; } } @@ -178,8 +180,10 @@ ErrorCode ConvInt8Winograd::onResize(const std::vector &inputs, const } auto core = static_cast(backend())->int8Functions(); + auto gcore = static_cast(backend())->functions(); int UNIT, SRC_UNIT, DST_XUNIT; core->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); + UNIT = gcore->pack; auto input = mInputFloat.get(), output = outputs[0]; int batch = input->batch(), ic = input->channel(), oc = output->channel(); @@ -219,6 +223,7 @@ static void mergeAddBiasScaleQuantize(const std::vector& inputs, Tensor auto coreInt8 = cpuBn->int8Functions(); int UNIT, SRC_UNIT, DST_XUNIT; coreInt8->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); + UNIT = core->pack; int countC4 = UP_DIV(output->channel(), UNIT), plane = output->height() * output->width() * output->batch(); auto mergeFloat = inputs[0]->host(); @@ -226,7 +231,7 @@ static void mergeAddBiasScaleQuantize(const std::vector& inputs, Tensor core->MNNMatrixAdd(mergeFloat, mergeFloat, inputs[i]->host(), plane * countC4, 0, 0, 0, 1); } std::vector fakeScale(countC4 * UNIT, 1); - core->MNNScaleAndAddBias(mergeFloat, mergeFloat, (const float*)quanParam->bias, fakeScale.data(), plane, countC4); + core->MNNScaleAndAddBias(mergeFloat, mergeFloat, quanParam->biasFloat, fakeScale.data(), plane, countC4); coreInt8->MNNFloat2Int8(mergeFloat, output->host(), plane * countC4, quanParam->scale, quanParam->minValue, quanParam->maxValue, zeroPoint); } @@ -274,8 +279,10 @@ static void _reorderCommon(float* dst, const float* src, size_t area, size_t dep ErrorCode ConvInt8Winograd::onExecute(const std::vector &inputs, const std::vector &outputs) { auto bn = static_cast(backend()); auto core = bn->int8Functions(); + auto gcore = bn->functions(); int UNIT, SRC_UNIT, DST_XUNIT; core->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); + UNIT = gcore->pack; // scale, zero, min, max auto inputQuant = TensorUtils::getQuantInfo(inputs[0]); auto outputQuant = TensorUtils::getQuantInfo(outputs[0]); @@ -308,7 +315,7 @@ ErrorCode ConvInt8Winograd::onExecute(const std::vector &inputs, const scale.assign(UNIT, 1.0 / outputQuant[0]); quanParam.scale = scale.data(); // For winograd Int8, will not treat origin bias to int32, use float directly - quanParam.bias = mResource->mOriginBias->host(); + quanParam.biasFloat = mResource->mOriginBias->host(); quanParam.maxValue = outputQuant[3]; if (mResource->mRelu) { quanParam.minValue = outputQuant[1]; @@ -322,9 +329,11 @@ ErrorCode ConvInt8Winograd::onExecute(const std::vector &inputs, const ConvInt8Winograd::WinoExecution::WinoExecution(std::shared_ptr res, int kernelY, int kernelX, int unitY, int unitX, int outputCount, int inputCount) : Execution(res->backend), mWinoResource(res), mUnitY(unitY), mUnitX(unitX), mKernelY(kernelY), mKernelX(kernelX) { auto core = static_cast(res->backend)->int8Functions(); + auto gcore = static_cast(res->backend)->functions(); int UNIT, SRC_UNIT, DST_XUNIT; core->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); + UNIT = gcore->pack; int threadNumber = ((CPUBackend *)backend())->threadNumber(); int alphaY = mUnitY + mKernelY - 1, alphaX = mUnitX + mKernelX - 1, alpha2 = alphaY * alphaX; @@ -364,6 +373,7 @@ ErrorCode ConvInt8Winograd::WinoExecution::onExecute(const std::vector bool conv1d = (alphaY == 1 || alphaX == 1); int UNIT, SRC_UNIT, DST_XUNIT; coreInt8->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); + UNIT = core->pack; auto gemmFunc = coreInt8->Int8GemmKernel; CoreFunctions::WinoUnrollTransFunc srcTransXFunc = nullptr, srcTransYFunc = nullptr; @@ -477,6 +487,10 @@ ErrorCode ConvInt8Winograd::WinoExecution::onExecute(const std::vector auto dstOrigin = output->host(); auto weight = mWinoResource->weight->host(); + std::vector xkernelSum(DST_XUNIT, 0); + std::vector wKernelSum(dc_4 * UNIT, 0); + std::vector reluThred = {-std::numeric_limits().max(), std::numeric_limits().max()}; + auto tFunction = [&](int tId) { auto _srcOrigin = mTempInputBuffer->host() + tId * mTempInputBuffer->stride(0); auto _dstOrigin = mTempOutputBuffer->host() + tId * mTempOutputBuffer->stride(0); @@ -507,9 +521,13 @@ ErrorCode ConvInt8Winograd::WinoExecution::onExecute(const std::vector auto _dstFloatPtr = _dstOrigin + i * dc_4 * xC * UNIT; auto _weightInt8Ptr = weight + i * mWinoResource->weight->stride(0); QuanPostTreatParameters quanParam; - quanParam.bias = mWinoResource->offsets->host() + i * mWinoResource->offsets->stride(0); + quanParam.biasFloat = (mWinoResource->offsets->host() + i * mWinoResource->offsets->stride(0)); quanParam.useInt8 = 0; + quanParam.srcKernelSum = xkernelSum.data(); + quanParam.weightQuanBias = wKernelSum.data(); + quanParam.fp32minmax = reluThred.data(); quanParam.scale = mWinoResource->scales->host() + i * dc_4 * UNIT; + quanParam.extraScale = nullptr; gemmFunc((int8_t*)_dstFloatPtr, _srcInt8Ptr, _weightInt8Ptr, mTempInputBuffer->length(2), xC * UNIT * sizeof(float), dc_4, &quanParam, xC); } #ifndef MNN_WINO_TRANFORM_TEST_CLOSE diff --git a/source/backend/cpu/compute/Convolution1x1Strassen.cpp b/source/backend/cpu/compute/Convolution1x1Strassen.cpp index deeec58e4..3ed5c0c6e 100644 --- a/source/backend/cpu/compute/Convolution1x1Strassen.cpp +++ b/source/backend/cpu/compute/Convolution1x1Strassen.cpp @@ -101,7 +101,7 @@ ErrorCode Convolution1x1Strassen::onResize(const std::vector &inputs, auto CONVOLUTION_TILED_NUMBER = ePack; auto input = inputs[0]; auto output = outputs[0]; - int numberThread = ((CPUBackend *)backend())->threadNumber(); + const int numberThread = ((CPUBackend *)backend())->threadNumber(); auto ic = input->channel(); auto oc = output->channel(); auto icC4 = UP_DIV(ic, core->pack); @@ -133,13 +133,15 @@ ErrorCode Convolution1x1Strassen::onResize(const std::vector &inputs, } #endif mWeightBytes = static_cast(dequantBits) / 8.0f; + auto rt = static_cast(backend()->getRuntime()); if (matrixSizeE > CONVOLUTION_TILED_NUMBER * 8 * numberThread && matrixSizeE > ocC4) { - // Divide in plane, in this case the divide equal numberThread - int divideStep = UP_DIV(matrixSizeE, numberThread); + std::vector divides(numberThread+1); + divides[0] = 0; + rt->computeDivideSizes(matrixSizeE, divides.data()+1); mUnits.resize(numberThread); for (int i = 0; i < numberThread; ++i) { - int planeStart = i * divideStep; - int planeEnd = std::min(planeStart + divideStep, matrixSizeE); + int planeStart = divides[i]; + int planeEnd = divides[i+1]; int planeSize = planeEnd - planeStart; Unit &unit = mUnits[i]; if (planeSize <= 0) { @@ -173,15 +175,17 @@ ErrorCode Convolution1x1Strassen::onResize(const std::vector &inputs, hDiv = hPack / core->pack; } auto ocDiv = UP_DIV(ocC4, hDiv); - numberThread = std::min(numberThread, ocDiv); - int divideStep = (ocDiv / numberThread) * hDiv; + std::vector divides(numberThread+1); + divides[0] = 0; + rt->computeDivideSizes(ocDiv, divides.data()+1); mUnits.resize(numberThread); for (int i = 0; i < numberThread; ++i) { - int ocStart = i * divideStep; - int ocSize = divideStep; - if (i == numberThread - 1) { - ocSize = ocC4 - i * divideStep; + int ocStart = divides[i] * hDiv; + int ocEnd = divides[i+1] * hDiv; + if (ocEnd >= ocC4) { + ocEnd = ocC4; } + int ocSize = ocEnd - ocStart; Unit &unit = mUnits[i]; if (ocSize <= 0) { unit.mValid = false; diff --git a/source/backend/cpu/compute/ConvolutionDepthwise3x3.cpp b/source/backend/cpu/compute/ConvolutionDepthwise3x3.cpp index d70a8f1d1..46fc68048 100644 --- a/source/backend/cpu/compute/ConvolutionDepthwise3x3.cpp +++ b/source/backend/cpu/compute/ConvolutionDepthwise3x3.cpp @@ -96,7 +96,7 @@ bool ConvolutionDepthwise3x3::onClone(Backend* bn, const Op* op, Execution** dst ErrorCode ConvolutionDepthwise3x3::onResize(const std::vector &inputs, const std::vector &outputs) { CPUConvolution::onResize(inputs, outputs); - int numberThread = ((CPUBackend *)backend())->threadNumber(); + const int numberThread = ((CPUBackend *)backend())->threadNumber(); auto output = outputs[0]; auto owUnit = UP_DIV(output->width(), 2); auto core = static_cast(backend())->functions(); @@ -113,6 +113,15 @@ ErrorCode ConvolutionDepthwise3x3::onResize(const std::vector &inputs, mPostParameters = getPostParameters(); // auto rate = (float)(mSourceEndX-mSourceStartX) / (float)owUnit; // FUNC_PRINT_ALL(rate, f); + + int channelC4 = UP_DIV(inputs[0]->channel(), core->pack); + int batch = inputs[0]->batch(); + auto total = channelC4 * batch; + + mDivides.resize(numberThread+1); + mDivides[0] = 0; + static_cast(backend()->getRuntime())->computeDivideSizes(total, mDivides.data() + 1); + return NO_ERROR; } @@ -141,12 +150,11 @@ ErrorCode ConvolutionDepthwise3x3::onExecute(const std::vector &inputs int threadNumber = ((CPUBackend *)backend())->threadNumber(); auto maxKernelH = std::min(mPadY + ih, 3); - auto total = channelC4 * batch; auto inputOrigin = input->host(); auto outputOrigin = output->host(); MNN_CONCURRENCY_BEGIN(tId, threadNumber) { auto cacheLineStart = mCacheLine->host() + tId * mCacheLine->stride(0); - for (int index = (int)tId; index < total; index += threadNumber) { + for (int index = mDivides[tId]; index < mDivides[tId+1]; ++index) { int z = index / batch; auto biasPtr = (const float*)(mResource->mBias->host() + core->bytes * core->pack * z); auto inputZ = inputOrigin + core->pack * index * iw * ih * core->bytes; diff --git a/source/backend/cpu/compute/ConvolutionDepthwise3x3.hpp b/source/backend/cpu/compute/ConvolutionDepthwise3x3.hpp index 319021bb3..4ff4d4ef0 100644 --- a/source/backend/cpu/compute/ConvolutionDepthwise3x3.hpp +++ b/source/backend/cpu/compute/ConvolutionDepthwise3x3.hpp @@ -30,6 +30,7 @@ class ConvolutionDepthwise3x3 : public CPUConvolution { int mSourceStartX = 0; int mSourceEndX = 0; std::vector mPostParameters; + std::vector mDivides; }; } // namespace MNN diff --git a/source/backend/cpu/compute/ConvolutionFloatFactory.cpp b/source/backend/cpu/compute/ConvolutionFloatFactory.cpp index 5e85a184c..40a444696 100644 --- a/source/backend/cpu/compute/ConvolutionFloatFactory.cpp +++ b/source/backend/cpu/compute/ConvolutionFloatFactory.cpp @@ -15,13 +15,13 @@ #include "backend/cpu/compute/ConvolutionWinogradBridge.hpp" #include "backend/cpu/compute/DenseConvolutionTiledExecutor.hpp" -#include "backend/cpu/compute/ConvolutionHybrid.hpp" #ifdef MNN_USE_SPARSE_COMPUTE #include "backend/cpu/compute/SparseConvolutionTiledExecutor.hpp" #endif #include "core/Macro.h" #include "core/OpCommonUtils.hpp" #include "backend/cpu/OneDNNConvolution.hpp" +#include "backend/cpu/compute/ConvInt8TiledExecutor.hpp" namespace MNN { @@ -35,7 +35,7 @@ static Execution* _createUnit(const Tensor* input, const Tensor* output, Backend #ifdef MNN_USE_SPARSE_COMPUTE if (conv2d->sparseParameter() && nullptr != weightQuantInfo.get()) { - if (supportSparse) { + if (supportSparse && weightQuantInfo->quan->index() != nullptr) { return new SparseConvolutionTiledExecutor(common, backend, weightQuantInfo->quan, conv2d->sparseParameter(), bias, biasSize); } @@ -46,13 +46,15 @@ static Execution* _createUnit(const Tensor* input, const Tensor* output, Backend && common->strideX() == 1 && common->strideY() == 1; if (lowMemory && nullptr != weightQuantInfo.get() && originWeightSize == 0) { - if (cpuBackend->memoryMode() == BackendConfig::Memory_Low && fastWay) { - return new ConvolutionHybrid(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo); + if (cpuBackend->memoryMode() == BackendConfig::Memory_Low) { + auto core = static_cast(backend)->functions(); + auto resourceInt8 = CPUConvolution::makeResourceInt8(backend, conv2d, core->pack); + return new DenseConvInt8TiledExecutor(backend, conv2d, resourceInt8, true); } else { return new DenseConvolutionTiledExecutor(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo); } } - if (fastWay) { + if (fastWay && cpuBackend->functions()->matmulBytes == 0) { return new Convolution1x1Strassen(common, backend, originWeight, originWeightSize, bias, biasSize, weightQuantInfo); } if (originWeightSize == 0) { @@ -78,7 +80,7 @@ Execution* ConvolutionFloatFactory::create(const std::vector& inputs, c return new ConvolutionTiledExecutorMultiInput(conv2d->common(), backend); } #ifdef MNN_LOW_MEMORY - bool lowMemory = static_cast(backend)->memoryMode() != BackendConfig::Memory_High; + bool lowMemory = static_cast(backend)->memoryMode() != BackendConfig::Memory_High && static_cast(backend)->functions()->MNNPackedMatMul_int8 != nullptr; #else bool lowMemory = false; #endif diff --git a/source/backend/cpu/compute/ConvolutionHybrid.cpp b/source/backend/cpu/compute/ConvolutionHybrid.cpp deleted file mode 100644 index bf4f24c31..000000000 --- a/source/backend/cpu/compute/ConvolutionHybrid.cpp +++ /dev/null @@ -1,401 +0,0 @@ -// -// ConvolutionHybrid.cpp -// MNN -// -// Created by MNN on 2023/10/26. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#include "ConvolutionHybrid.hpp" -#include -#include "core/BufferAllocator.hpp" -#include "backend/cpu/CPUBackend.hpp" -#include "core/Concurrency.h" -#include "ConvOpt.h" -#include "core/Macro.h" -#include "CommonOptFunction.h" -#include "core/TensorUtils.hpp" -#include -#include "backend/cpu/compute/DenseConvolutionTiledExecutor.hpp" - -namespace MNN { - -bool ConvolutionHybrid::initQuantizeResource(std::shared_ptr int8Info, std::shared_ptr resource, int hU, int hP, int lU, int lP, int outputCount, int srcChannel, int kernelSize, int bytes) { - int weightLength = hU * lU * hP * lP; - resource->mWeight.reset(Tensor::createDevice( - {weightLength})); - auto res = resource->backend->onAcquireBuffer(resource->mWeight.get(), Backend::STATIC); - if (!res) { - return false; - } - resource->mDequantize.bits = 8; - resource->hU = hU; - resource->lU = lU; - resource->hP = hP; - resource->lP = lP; - - // Save scale bias - resource->mDequantize.mScaleBias.reset(MNN::Tensor::createDevice({hU * hP * 2})); - res = resource->backend->onAcquireBuffer(resource->mDequantize.mScaleBias.get(), Backend::STATIC); - if (!res) { - return false; - } - auto alphaPtr = resource->mDequantize.mScaleBias->host(); - auto biasPtr = reinterpret_cast(reinterpret_cast(alphaPtr) + hU * hP * bytes); - ::memset(alphaPtr, 0, 2 * hU * hP * bytes); - int h = int8Info->alpha.size(); - if (int8Info->canUseInt4 && int8Info->asymmetric) { - // int4 to uint4, -8 offset merge to bias - for (int i = 0; i < h/2; ++i) { - int8Info->alpha.get()[2 * i] -= 8 * int8Info->alpha.get()[2 * i + 1]; - } - } - if (bytes == 2) { - auto core = static_cast(resource->backend)->functions(); - if (int8Info->asymmetric) { - std::unique_ptr tmp(new int16_t[h]); - core->MNNFp32ToLowp(int8Info->alpha.get(), tmp.get(), h); - for (int i=0; i< h/2; ++i) { - reinterpret_cast(alphaPtr)[i] = tmp[2 * i + 1]; - reinterpret_cast(biasPtr)[i] = tmp[2 * i]; - } - } else { - core->MNNFp32ToLowp(int8Info->alpha.get(), reinterpret_cast(alphaPtr), h); - if (int8Info->canUseInt4) { - for (int i = 0; i < h; ++i) { - int8Info->alpha.get()[i] *= -8.0; - } - core->MNNFp32ToLowp(int8Info->alpha.get(), reinterpret_cast(biasPtr), h); - } - } - } else { - if (int8Info->asymmetric) { - h = h / 2; - for (int i=0; ialpha.get()[2 * i + 1]; - biasPtr[i] = int8Info->alpha.get()[2 * i]; - } - } else { - for (int i=0; ialpha.get()[i]; - if (int8Info->canUseInt4) { - biasPtr[i] = -8.0 * int8Info->alpha.get()[i]; - } else { - biasPtr[i] = 0.f; - } - } - } - } - std::vector data(weightLength, 0); - auto srcWInt8 = int8Info->weight.get(); - if (hP * hU != outputCount || lP * lU != srcChannel) { - int packedic = lU * lP; - for (int i = 0; i < outputCount; ++i) { - for (int j = 0; j < srcChannel; ++j) { - int destIdx = i * packedic + j; - int srcIdx = i * srcChannel + j; - data[destIdx] = srcWInt8[srcIdx]; - } - } - srcWInt8 = data.data(); - } - if (int8Info->canUseInt4) { - MNN_ASSERT(weightLength % 2 == 0); - weightLength = UP_DIV(weightLength, 2); - resource->mDequantize.bits = 4; - - auto srcPtr = int8Info->weight.get(); - auto dstPtr = resource->mWeight->host(); - // oc, ic -> oc/hP, ic/lP, hP, lP - if (hP == 8 && lP == 8) { - for (int i = 0; i < hU; i++) { - for (int j = 0; j < lU; j++) { - for (int k = 0; k < 2; k++) { - for (int n = 0; n < 16; n++) { - int hp_idx = n / 8; - int lp_idx = n % 8; - int s0 = srcWInt8[(i * hP + k * 4 + hp_idx) * lP *lU + (j * lP + lp_idx)]; - int s1 = srcWInt8[(i * hP + k * 4 + hp_idx + 2) * lP * lU + (j * lP + lp_idx)]; - int d = (s0 + 8) * 16 + (s1 + 8); - dstPtr[(i * lU * lP * hP + j * hP * lP + k * 32) / 2 + n] = (uint8_t)d; - } - } - } - } - } else { - for (int i = 0; i < hU; i++) { - for (int j = 0; j < lU; j++) { - for (int k = 0; k < hP; k++) { - for (int l = 0; l < lP; l+=2) { - int s0 = srcWInt8[(i * hP + k) * lP * lU + (j * lP + l)]; - int s1 = srcWInt8[(i * hP + k) * lP * lU + (j * lP + l + 1)]; - int d = (s0 + 8) * 16 + (s1 + 8); - dstPtr[(i * lU * lP * hP + j * hP * lP + k * lP + l) / 2] = d; - } - } - } - } - } - } else { - // Reorder weight for int8 - auto dstWInt8 = resource->mWeight->host(); - // oc, ic -> oc/hP, ic/lP, hP, lP - for (int i = 0; i < hU; i++) { - for (int j = 0; j < lU; j++) { - for (int k = 0; k < hP; k++) { - for (int l = 0; l < lP; l++) { - dstWInt8[i * lU * lP * hP + j * hP * lP + k * lP + l] = srcWInt8[(i * hP + k) * lP * lU + (j * lP + l)]; - } - } - } - } - } - return true; -} - -ConvolutionHybrid::ConvolutionHybrid(const Convolution2DCommon *common, Backend *b, const float *originWeight, - size_t originWeightSize, const float *bias, size_t biasSize, std::shared_ptr quantInfo) - : CPUConvolution(common, b) { - mResource.reset(new CPUConvolution::Resource); - mResource->backend = b; - if (!mResource->copyBiasAlign(bias, (int)biasSize)) { - MNN_ERROR("Not Enough Memory\n"); - mValid = false; - return; - } - MNN_ASSERT(nullptr != quantInfo.get()); - originWeightSize = quantInfo->weight.size(); - auto outputCount = (int)biasSize; - int inputCount = (int)originWeightSize / (int)biasSize * common->kernelX() * common->kernelY(); - auto core = static_cast(b)->functions(); - auto int8_core = static_cast(backend())->int8Functions(); - int unit = core->pack; - int ePack, lPack, hPack; - core->MNNGetMatMulPackMode(&ePack, &lPack, &hPack); - // printf("ePack, lPack, hPack = %d, %d, %d\n", ePack, lPack, hPack); - // printf("UNIT, SRC_UNIT, DST_XUNIT = %d, %d, %d\n", UNIT, SRC_UNIT, DST_XUNIT); - hPack = unit; - lPack = unit; - // [oc, ic] => [oc/unit, ic/src_unit, unit, src_unit] - if (unit == 4 && core->supportI8mm) { // Low Memory: use fp32 and smmla. - hPack = 8; - lPack = 8; - } - auto hU = UP_DIV(outputCount, hPack); - auto lU = UP_DIV(inputCount, lPack); - ConvolutionHybrid::initQuantizeResource(quantInfo, mResource, hU, hPack, lU, lPack, outputCount, (int)originWeightSize / (int)biasSize, common->kernelX() * common->kernelY(), core->bytes); -} - -ConvolutionHybrid::ConvolutionHybrid(std::shared_ptr resource, const Convolution2DCommon *common, Backend* b) : CPUConvolution(common, b) { - mResource = resource; -} - -ConvolutionHybrid::~ConvolutionHybrid() { - // Do nothing -} - -bool ConvolutionHybrid::onClone(Backend* bn, const Op* op, Execution** dst) { - if (!mValid) { - return false; - } - if (nullptr == dst) { - return true; - } - *dst = new ConvolutionHybrid(mResource, op->main_as_Convolution2D()->common(), bn); - return true; -} - -ErrorCode ConvolutionHybrid::allocTensor(Tensor* tensor, size_t size) { - tensor->buffer().type = halide_type_of(); - tensor->buffer().dimensions = 1; - tensor->buffer().dim[0].extent = size; - bool success = backend()->onAcquireBuffer(tensor, Backend::DYNAMIC); - if (!success) { - return OUT_OF_MEMORY; - } - return NO_ERROR; -} - -ErrorCode ConvolutionHybrid::allocDynamicQuantInfo(int thread, int batch, int ic, int oc, int bytes) { - // absmax: thread * batch * bytes - // sum: thread * batch * sizeof(int) - // dequant_scale: batch * bytes - // quant_scale: batch * bytes - allocTensor(&mQuantInfo.quant_info, (thread + 2) * batch * bytes + thread * batch * sizeof(int)); - if (ANeedToPack8) { - int ic8 = UP_DIV(ic, 8) * 8; - int oc8 = UP_DIV(oc, 8) * 8; - mInputTemp.reset(Tensor::createDevice({batch, 1, 1, ic8})); - mOutputTemp.reset(Tensor::createDevice({batch, 1, 1, oc8})); - bool allocSucc = backend()->onAcquireBuffer(mInputTemp.get(), Backend::DYNAMIC); - allocSucc = allocSucc && backend()->onAcquireBuffer(mOutputTemp.get(), Backend::DYNAMIC); - if (!allocSucc) { - return OUT_OF_MEMORY; - } - allocTensor(&mQuantInfo.quant_buffer, batch * ic8); - backend()->onReleaseBuffer(mInputTemp.get(), Backend::DYNAMIC); - backend()->onReleaseBuffer(mOutputTemp.get(), Backend::DYNAMIC); - } else { - allocTensor(&mQuantInfo.quant_buffer, batch * ic); - } - backend()->onReleaseBuffer(&mQuantInfo.quant_info, Backend::DYNAMIC); - backend()->onReleaseBuffer(&mQuantInfo.quant_buffer, Backend::DYNAMIC); - return NO_ERROR; -} - -ErrorCode ConvolutionHybrid::onResize(const std::vector &inputs, const std::vector &outputs) { - CPUConvolution::onResize(inputs, outputs); - auto input = inputs[0]; - auto output = outputs[0]; - auto core = static_cast(backend())->functions(); - auto int8_core = static_cast(backend())->int8Functions(); - auto inputPtr = input->host(); - auto outputPtr = output->host(); - auto weightPtr = mResource->mWeight->host(); - auto biasPtr = mResource->mBias->host(); - auto batch = output->batch() * output->height() * output->width(); - int ic = input->channel(); - int oc = output->channel(); - int bytes = core->bytes; - int unit = core->pack; - int eP, lP, hP; - core->MNNGetMatMulPackMode(&eP, &lP, &hP); - int UNIT, SRC_UNIT, DST_XUNIT; - int8_core->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DST_XUNIT); - hP = unit; - lP = unit; - int tileC = std::max(unit, hP); - LowMemoryGemmFuncWithInt8Weight gemmKernel; - gemmKernel = core->MNNGemmHybridInt8; - float weightBytes = 1; - if (mResource->mDequantize.bits == 4) { - weightBytes = 0.5; - gemmKernel = core->MNNGemmHybridInt4; - } - - const uint8_t* dequantAlpha = mResource->mDequantize.mScaleBias->host();; - const uint8_t* dequantBias = dequantAlpha + mResource->hU * mResource->hP * bytes;; - int threadNumber = ((CPUBackend *)backend())->threadNumber(); - auto oC4 = UP_DIV(oc, tileC); - int iC4 = UP_DIV(ic, unit); - if (iC4 < threadNumber || oC4 < threadNumber) { - threadNumber = std::min(oC4, iC4); - } - int tileCount = UP_DIV(oC4, threadNumber); - int iTileCount = UP_DIV(iC4, threadNumber); - if (unit == 4 && core->supportI8mm) { // Low Memory: use fp32 and smmla. - ANeedToPack8 = true; - } - int8_t order[32] = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31, 8, 9, 10, 11, 4, 5, 6, 7, 24, 25, 26, 27, 20, 21, 22, 23}; - allocDynamicQuantInfo(threadNumber, batch, ic, oc, bytes); - mDynamicQuant = [=]() { - auto maxPtr = mQuantInfo.quant_info.host(); - auto sumPtr = maxPtr + threadNumber * batch * bytes; - auto dequantPtr = sumPtr + threadNumber * batch * sizeof(int); - auto quantPtr = dequantPtr + batch * bytes; - // compute sum and absmax - MNN_CONCURRENCY_BEGIN(tId, threadNumber) { - int workCount = iTileCount; - if (tId == threadNumber - 1) { - workCount = iC4 - tId * iTileCount; - } - int icIndex = tId * iTileCount; - auto input_ptr = reinterpret_cast(input->host() + icIndex * batch * unit * bytes); - auto max_ptr = reinterpret_cast(maxPtr + tId * batch * bytes); - core->MNNAbsMax(input_ptr, max_ptr, workCount, batch, unit); - } - MNN_CONCURRENCY_END(); - // compute scale - core->MNNQuantScale((float*)maxPtr, (float*)quantPtr, (float*)dequantPtr, threadNumber, batch); - // quant - MNN_CONCURRENCY_BEGIN(tId, threadNumber) { - int workCount = iTileCount; - if (tId == threadNumber - 1) { - workCount = iC4 - tId * iTileCount; - } - int icIndex = tId * iTileCount; - auto input_ptr = reinterpret_cast(input->host() + icIndex * batch * unit * bytes); - auto quant_ptr = mQuantInfo.quant_buffer.host() + icIndex * batch * unit; - auto scale_ptr = reinterpret_cast(quantPtr); - auto sum_ptr = reinterpret_cast(sumPtr + tId * batch * sizeof(int)); - core->MNNDynamicQuant(input_ptr, quant_ptr, scale_ptr, sum_ptr, workCount, batch, unit); - } - MNN_CONCURRENCY_END(); - // compute quant sum - core->MNNQuantSum((float*)sumPtr, (float*)dequantPtr, threadNumber, batch); - }; - mFunction.first = threadNumber; - mFunction.second = [=](int tId){ - int workCount = tileCount; - if (tId == threadNumber - 1) { - workCount = oC4 - tId * tileCount; - } - int unit_ = unit; - int tileCount_ = tileCount; - if (ANeedToPack8) { - int oC8 = UP_DIV(oc, 8); - tileCount_ = UP_DIV(oC8, threadNumber); - workCount = tileCount_; - if (tId == threadNumber - 1) { - workCount = oC8 - tId * tileCount_; - } - unit_ = 8; - } - - int ocIndex = tId * tileCount_ * unit_; - const float* finput_ptr = input->host(); - const int8_t* input_ptr = mQuantInfo.quant_buffer.host(); - const int8_t* input_ptr_tmp = mQuantInfo.quant_buffer.host(); - auto weight_ptr = mResource->mWeight->host() + static_cast(ocIndex * ic * weightBytes); - auto output_ptr = reinterpret_cast(outputs[0]->host() + ocIndex * batch * bytes); - if (ANeedToPack8 && batch > 1) { - input_ptr = mInputTemp->host(); - output_ptr = reinterpret_cast(mOutputTemp->host() + ocIndex * batch * bytes); - } - auto bias_ptr = reinterpret_cast(mResource->mBias->host() + ocIndex * bytes); - auto alpha_ptr = reinterpret_cast(dequantAlpha + ocIndex * bytes); - auto zero_ptr = reinterpret_cast(dequantBias + ocIndex * bytes); - const uint8_t* max_ptr = mQuantInfo.quant_info.host(); - const float* sums_ptr = reinterpret_cast(max_ptr + threadNumber * batch * bytes); - const float* scale_ptr = reinterpret_cast(max_ptr + threadNumber * batch * (bytes + sizeof(int))); - size_t dst_depth_quad = workCount; - size_t src_depth_quad = UP_DIV(ic, unit_); - size_t dst_step = batch * unit_ * bytes; - size_t realSize = batch; - const float* param[6]; - param[0] = alpha_ptr; - param[1] = zero_ptr; - param[2] = bias_ptr; - param[3] = sums_ptr; - param[4] = scale_ptr; - param[5] = (float*)order; - gemmKernel(output_ptr, input_ptr, weight_ptr, src_depth_quad, dst_step, dst_depth_quad, realSize, param); - }; - return NO_ERROR; -} - -ErrorCode ConvolutionHybrid::onExecute(const std::vector &inputs, const std::vector &outputs) { - mDynamicQuant(); - if (ANeedToPack8 && inputs[0]->batch() > 1) { - auto core = static_cast(backend())->functions(); - auto plane_in = inputs[0]->width() * inputs[0]->height() * inputs[0]->batch(); - auto plane_out = outputs[0]->width() * outputs[0]->height() * outputs[0]->batch(); - auto depth = UP_DIV(inputs[0]->channel(), core->pack); - auto output_depth = UP_DIV(outputs[0]->channel(), core->pack); - int areaOffset[2] = {plane_out, plane_out}; - MNNPackInt8C2Origin(mInputTemp.get()->host(), mQuantInfo.quant_buffer.host(), plane_in, depth, plane_in); - MNN_CONCURRENCY_BEGIN(tId, mFunction.first) { - mFunction.second((int)tId); - } - MNN_CONCURRENCY_END(); - MNNUnpackC2Float(outputs[0]->host(), mOutputTemp.get()->host(), plane_out, output_depth, areaOffset, core->pack); - return NO_ERROR; - } - - MNN_CONCURRENCY_BEGIN(tId, mFunction.first) { - mFunction.second((int)tId); - } - MNN_CONCURRENCY_END(); - return NO_ERROR; -} -} // namespace MNN diff --git a/source/backend/cpu/compute/ConvolutionHybrid.hpp b/source/backend/cpu/compute/ConvolutionHybrid.hpp deleted file mode 100644 index df260b21a..000000000 --- a/source/backend/cpu/compute/ConvolutionHybrid.hpp +++ /dev/null @@ -1,48 +0,0 @@ -// -// ConvolutionHybrid.hpp -// MNN -// -// Created by MNN on 2023/10/26. -// Copyright © 2018, Alibaba Group Holding Limited -// - -#ifndef ConvolutionHybrid_hpp -#define ConvolutionHybrid_hpp - -#include -#include "backend/cpu/CPUConvolution.hpp" - -typedef void(*LowMemoryGemmFuncWithInt8Weight)(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param); -namespace MNN { -class ConvolutionHybrid : public CPUConvolution { -public: - ConvolutionHybrid(const Convolution2DCommon *common, Backend *b, const float *originWeight, - size_t originWeightSize, const float *bias, size_t biasSize, std::shared_ptr); - ConvolutionHybrid(std::shared_ptr resource, const Convolution2DCommon *common, Backend* b); - static bool initQuantizeResource(std::shared_ptr int8Info, std::shared_ptr resource, int hU, int hP, int lU, int lP, int outputCount, int srcChannel, int kernelSize, int bytes); - - virtual ~ConvolutionHybrid(); - - virtual ErrorCode onExecute(const std::vector &inputs, const std::vector &outputs) override; - - virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; - virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override; -private: - ErrorCode allocTensor(Tensor* tensor, size_t size); - ErrorCode allocDynamicQuantInfo(int thread, int batch, int ic, int oc, int bytes); -private: - struct DynamicQuantInfo { - Tensor quant_info; - Tensor quant_buffer; - }; - std::shared_ptr mResource; - std::function mDynamicQuant; - std::pair> mFunction; - DynamicQuantInfo mQuantInfo; - bool ANeedToPack8 = false; - std::shared_ptr mInputTemp; - std::shared_ptr mOutputTemp; -}; -} // namespace MNN - -#endif /* ConvolutionHybrid_hpp */ diff --git a/source/backend/cpu/compute/ConvolutionPackWinograd.cpp b/source/backend/cpu/compute/ConvolutionPackWinograd.cpp index 83b17c77d..7d3d3a553 100644 --- a/source/backend/cpu/compute/ConvolutionPackWinograd.cpp +++ b/source/backend/cpu/compute/ConvolutionPackWinograd.cpp @@ -32,6 +32,10 @@ ConvolutionPackWinograd::ConvolutionPackWinograd(const Convolution2DCommon *conv int unit = config.unit; auto core = static_cast(backend())->functions(); int pack = core->pack, bytes = core->bytes; + int weightBytes = bytes; + if (0!=core->matmulBytes) { + weightBytes = core->matmulBytes; + } mResource.reset(new Resource); mResource->backend = b; @@ -83,14 +87,14 @@ ConvolutionPackWinograd::ConvolutionPackWinograd(const Convolution2DCommon *conv auto tempWeight = generator.allocTransformWeight(sourceWeight.get(), lPack, hPack, true); auto shape = tempWeight->shape(); - shape.push_back(bytes); + shape.push_back(weightBytes); mResource->mWeight.reset(Tensor::createDevice(shape)); mValid = backend()->onAcquireBuffer(mResource->mWeight.get(), Backend::STATIC); if (!mValid) { return; } generator.transformWeight(tempWeight.get(), sourceWeight.get(), true); - if (bytes != 4) { + if (weightBytes != 4) { core->MNNFp32ToLowp(tempWeight->host(), mResource->mWeight->host(), tempWeight->elementSize()); } else { ::memcpy(mResource->mWeight->host(), tempWeight->host(), tempWeight->size()); @@ -143,7 +147,11 @@ WinogradConfig ConvolutionPackWinograd::bestWinogradUnit(const Convolution2DComm auto core = static_cast(b)->functions(); - auto winogradMemoryLevel = static_cast(b)->getRuntime()->getWinogradMemoryLevel(); + auto winogradMemoryLevel = static_cast(b)->getRuntime()->hint().winogradMemoryUsed; + int multiBytes = static_cast(b)->functions()->bytes; + if (static_cast(b)->functions()->matmulBytes != 0) { + multiBytes = static_cast(b)->functions()->matmulBytes; + } int ow = outputTensor->width(); int oh = outputTensor->height(); int oc = outputTensor->channel(); @@ -164,6 +172,9 @@ WinogradConfig ConvolutionPackWinograd::bestWinogradUnit(const Convolution2DComm float maxRate = 0.0f; float originCost = (float)ow * oh * (2.0 * ic) * oc * kernelSize * kernelSize; // macs, with bias std::set supportSu{4, 6, 8}; + if (multiBytes < 4) { + supportSu = {4, 6}; + } CoreFunctions::WinoUnrollDestTransFunc destTransform[CONVOLUTION_WINOGRAD_MAX_UNIT + 1]; for (int u = CONVOLUTION_WINOGRAD_MIN_UNIT; u <= maxUnit; ++u) { auto sui = u + kernelSize - 1; @@ -245,20 +256,11 @@ ErrorCode ConvolutionPackWinograd::onResize(const std::vector &inputs, auto totalCount = wUnit * hUnit * batch; // MNN_PRINT("ow=%d, oh=%d\n", ow, oh); - int threadNumber = std::max(((CPUBackend *)backend())->threadNumber(), 1); - int tileCount = UP_DIV(totalCount, ePack); - int eRemain = totalCount % ePack; - threadNumber = std::min(threadNumber, tileCount); - std::vector parameters(6); - parameters[0] = eRemain * bytes; - parameters[1] = input->channel(); - parameters[2] = output->channel(); - parameters[3] = ePack * pack * bytes; - parameters[4] = 0; - parameters[5] = 0; - - std::vector parametersRemain = parameters; - parametersRemain[3] = eRemain * pack * bytes; + int threadNumber = ((CPUBackend*)(backend()))->threadNumber(); + + std::vector divides(threadNumber+1); + static_cast( static_cast(backend())->getRuntime())->computeDivideSizes(totalCount, divides.data()+1); + divides[0] = 0; auto midBuffer0Bytes = srcUnit2 * pack * bytes; bool allow_x86_bf16_winograd = true; #ifdef MNN_USE_SSE @@ -269,6 +271,24 @@ ErrorCode ConvolutionPackWinograd::onResize(const std::vector &inputs, auto bias = mResource->mBias->host(); mMainFunction.first = threadNumber; mMainFunction.second = [=](int tId, const uint8_t* inputOrigin, uint8_t* dstOrigin) { + int tSta = divides[tId]; + int tFin = divides[tId+1]; + if (tSta >= tFin) { + return; + } + int eRemain = (tFin-tSta) % ePack; + std::vector parameters(6); + parameters[1] = input->channel(); + parameters[2] = output->channel(); + parameters[4] = 0; + parameters[5] = 0; + parameters[0] = eRemain * bytes; + parameters[3] = ePack * pack * bytes; + + std::vector parametersRemain = parameters; + parametersRemain[0] = eRemain * bytes; + parametersRemain[3] = eRemain * pack * bytes; + auto srcOrigin = inputOrigin; auto _srcOrigin = mTempBuffer->host() + tId * mTempBuffer->stride(0); auto gemmBuffer = (mGemmMidBuffer->host() + tId * mGemmMidBuffer->stride(0)); @@ -276,12 +296,11 @@ ErrorCode ConvolutionPackWinograd::onResize(const std::vector &inputs, auto midBufferStride1 = mTransformMidBuffer->stride(1); auto weightStride = mResource->mWeight->stride(0); auto midBuffer1 = midBuffer0 + midBuffer0Bytes; - for (int tIndex = (int)tId; tIndex < tileCount; tIndex += threadNumber) { - int xIndex = (int)tIndex * ePack; - int xReamin = totalCount - xIndex; + for (int xIndex = tSta; xIndex < tFin; xIndex+=ePack) { + int xReamin = tFin - xIndex; int xC = xReamin > ePack ? ePack : xReamin; - const bool fuseTransformPack = (xC * FULSE_THRESHHOLD_DENOMINATOR >= FULSE_THRESHHOLD_NUMERATOR * ePack) && allow_x86_bf16_winograd && nullptr != mSourceTransformPack; + const bool fuseTransformPack = (xC * FULSE_THRESHHOLD_DENOMINATOR >= FULSE_THRESHHOLD_NUMERATOR * ePack) && allow_x86_bf16_winograd && nullptr != mSourceTransformPack && core->matmulBytes == 0; /*Source Transform Begin*/ #ifndef MNN_WINO_TRANFORM_TEST_CLOSE { @@ -519,11 +538,16 @@ ErrorCode ConvolutionPackWinograd::onResize(const std::vector &inputs, /*Dest Transform And Post Treat End*/ } }; + std::vector postDivides(threadNumber+1); + static_cast( static_cast(backend())->getRuntime())->computeDivideSizes(dc_4, postDivides.data()+1); + postDivides[0] = 0; mPostFunction.first = threadNumber; mPostFunction.second = [=](int tId, uint8_t* outputOrigin) { auto dstOrigin = outputOrigin; - for (int dy=(int)tId; dy < dc_4; dy += threadNumber) { + int tSta = postDivides[tId]; + int tFin = postDivides[tId+1]; + for (int dy=tSta; dy < tFin; ++dy) { auto dataFloatPtr = (float*)(dstOrigin + ow * oh * batch * dy * pack * bytes); auto biasFloatPtr = (const float*)(bias + pack * dy * bytes); core->MNNAxByClampBroadcastUnit(dataFloatPtr, dataFloatPtr, biasFloatPtr, ow * oh * batch, 0, 0, 1, mPostParameters.data()); diff --git a/source/backend/cpu/compute/ConvolutionTiledExecutor.cpp b/source/backend/cpu/compute/ConvolutionTiledExecutor.cpp index ff3a0afa1..e2e0f16bc 100644 --- a/source/backend/cpu/compute/ConvolutionTiledExecutor.cpp +++ b/source/backend/cpu/compute/ConvolutionTiledExecutor.cpp @@ -91,9 +91,12 @@ std::pair> ConvolutionTiledExecutor::computeBl return std::make_pair(total, std::make_pair(stride, kernelSize * maxLine)); } -void ConvolutionTiledExecutor:: setIm2ColParameter(ConvolutionCommon::Im2ColParameter& dstIm2ColParamter, const Convolution2DCommon* convCommon, Tensor* input, Tensor* output, int padX, int padY, const CoreFunctions* floatCore, const CoreInt8Functions* int8Core) { +void ConvolutionTiledExecutor:: setIm2ColParameter(ConvolutionCommon::Im2ColParameter& dstIm2ColParamter, const Convolution2DCommon* convCommon, Tensor* input, Tensor* output, int padX, int padY, const CoreFunctions* floatCore, const CoreInt8Functions* int8Core, int pack) { // FIXME: Set int8 and float's pack as diff - int pack = floatCore->pack; + if (pack == 0) { + pack = floatCore->pack; + } + const auto kernelCount = convCommon->kernelX() * convCommon->kernelY(); dstIm2ColParamter.dilateX = convCommon->dilateX(); @@ -119,7 +122,12 @@ void ConvolutionTiledExecutor:: setIm2ColParameter(ConvolutionCommon::Im2ColPara int UNIT, SRC_UNIT, DynamicDestUnit; auto core = int8Core; core->MNNGetGemmUnit(&UNIT, &SRC_UNIT, &DynamicDestUnit); - if (SRC_UNIT > pack) { + if (floatCore->bytes == 2 && DynamicDestUnit == 20) { + UNIT = 8; + SRC_UNIT= 8; + DynamicDestUnit = 10; + } + if (SRC_UNIT > UNIT) { const auto srcCountUnit = UP_DIV(input->channel(), pack); dstIm2ColParamter.kernelCountUnit = UP_DIV(srcCountUnit * kernelCount, SRC_UNIT / pack); dstIm2ColParamter.ic = dstIm2ColParamter.icDiv4 * pack; diff --git a/source/backend/cpu/compute/ConvolutionTiledExecutor.hpp b/source/backend/cpu/compute/ConvolutionTiledExecutor.hpp index 1d83fa3b3..3fc0076bd 100644 --- a/source/backend/cpu/compute/ConvolutionTiledExecutor.hpp +++ b/source/backend/cpu/compute/ConvolutionTiledExecutor.hpp @@ -46,7 +46,7 @@ class ConvolutionTiledExecutor : public Execution { virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override; void initWeight(const float *source, float* cache, int depth, int outputCount, int kernelSize, const CoreFunctions* function); static std::pair turnIm2ColToBlitInfo(float const ** srcPtr, int32_t* el, int start, int xC, const ConvolutionCommon::Im2ColParameter& im2Col, const uint8_t* srcOrigin, int bytes); - static void setIm2ColParameter(ConvolutionCommon::Im2ColParameter& dstIm2ColParamter, const Convolution2DCommon* convCommon, Tensor* input, Tensor* output, int padX, int padY, const CoreFunctions* floatCore, const CoreInt8Functions* int8Core); + static void setIm2ColParameter(ConvolutionCommon::Im2ColParameter& dstIm2ColParamter, const Convolution2DCommon* convCommon, Tensor* input, Tensor* output, int padX, int padY, const CoreFunctions* floatCore, const CoreInt8Functions* int8Core, int pack = 0); // Total / Stride static std::pair> computeBlitInfoSize(int eP, int ow, int kernelSize, int threadNumber); diff --git a/source/backend/cpu/compute/ConvolutionWinogradImpl.cpp b/source/backend/cpu/compute/ConvolutionWinogradImpl.cpp index 0d597b07b..5ecb180f4 100644 --- a/source/backend/cpu/compute/ConvolutionWinogradImpl.cpp +++ b/source/backend/cpu/compute/ConvolutionWinogradImpl.cpp @@ -49,17 +49,4 @@ bool ConvolutionWinogradImpl::canUseWinograd(const Convolution2DCommon *common) return true; } -ErrorCode ConvolutionWinogradImpl::onExecute(const std::vector &inputs, const std::vector &outputs) { - return NO_ERROR; -} - -ErrorCode ConvolutionWinogradImpl::onResize(const std::vector &inputs, const std::vector &outputs) { - return NO_ERROR; -} - -bool ConvolutionWinogradImpl::onClone(Backend* bn, const Op* op, Execution** dst) { - return false; -} - - } // namespace MNN diff --git a/source/backend/cpu/compute/ConvolutionWinogradImpl.hpp b/source/backend/cpu/compute/ConvolutionWinogradImpl.hpp index 377cea037..c7ba42f1a 100644 --- a/source/backend/cpu/compute/ConvolutionWinogradImpl.hpp +++ b/source/backend/cpu/compute/ConvolutionWinogradImpl.hpp @@ -44,12 +44,9 @@ class ConvolutionWinogradImpl : public CPUConvolution { public: ConvolutionWinogradImpl(const Convolution2DCommon *convOp, Backend *b); virtual ~ConvolutionWinogradImpl(); - virtual ErrorCode onExecute(const std::vector &inputs, const std::vector &outputs) override; - virtual ErrorCode onResize(const std::vector &inputs, const std::vector &outputs) override; static bool canUseWinograd(const Convolution2DCommon *convOp); static WinogradConfig bestWinogradUnit(const Convolution2DCommon *convOp, const Tensor *input, const Tensor *output, int threadnumber, Backend* b, const PerfConfig& denseConfig); - virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override; protected: ConvolutionWinogradImpl(std::shared_ptr resource, const Convolution2DCommon *convOp, Backend* b) : CPUConvolution(convOp, b) { mResource = resource; diff --git a/source/backend/cpu/compute/DenseConvolutionTiledExecutor.cpp b/source/backend/cpu/compute/DenseConvolutionTiledExecutor.cpp index f844d7d5b..61dfb445a 100644 --- a/source/backend/cpu/compute/DenseConvolutionTiledExecutor.cpp +++ b/source/backend/cpu/compute/DenseConvolutionTiledExecutor.cpp @@ -193,6 +193,9 @@ DenseConvolutionTiledExecutor::DenseConvolutionTiledExecutor(const Convolution2D return; } } else { + if (core->matmulBytes != 0) { + bytes = core->matmulBytes; + } mResource->mWeight.reset(Tensor::createDevice( {hU * lU * hP * lP * bytes})); mValid = mValid && backend()->onAcquireBuffer(mResource->mWeight.get(), Backend::STATIC); @@ -330,7 +333,6 @@ void DenseConvolutionTiledImpl::getPackParameter(int* eP, int* lP, int* hP, cons return; } -// #define PROFILE_DETAIL PerfConfig DenseConvolutionTiledImpl::bestTileConvolutionConfig(const Convolution2DCommon *common, const Tensor *inputTensor, const Tensor *outputTensor, int threadNumber, Backend* b) { @@ -413,29 +415,11 @@ PerfConfig DenseConvolutionTiledImpl::bestTileConvolutionConfig(const Convolutio innerAcc += inner[i]; } PerfConfig thisConfig(false, eP, eP, 0, -1); - thisConfig.isParallelInner = outerAcc > innerAcc; + thisConfig.isParallelInner = outerAcc > innerAcc && 0 == core->matmulBytes; thisConfig.instructionCosts = outerAcc > innerAcc ? innerAcc : outerAcc; if (thisConfig.instructionCosts < denseConfig.instructionCosts) { denseConfig = thisConfig; -#ifdef PROFILE_DETAIL - MNN_PRINT("\nouterFlops:"); - formatMatrix(outerFlops, {sizeof(outerFlops) / sizeof(float)}); - MNN_PRINT("\ninnerFlops:"); - formatMatrix(innerFlops, {sizeof(innerFlops) / sizeof(float)}); - MNN_PRINT("\nouterBandwidth:"); - formatMatrix(outerBandwidth, {sizeof(outerBandwidth) / sizeof(float)}); - MNN_PRINT("\ninnerBandwidth:"); - formatMatrix(innerBandwidth, {sizeof(innerBandwidth) / sizeof(float)}); - - MNN_PRINT("\nouter:"); - formatMatrix(outer, {sizeof(outer) / sizeof(float)}); - MNN_PRINT("\ninner:"); - formatMatrix(inner, {sizeof(inner) / sizeof(float)}); - - MNN_PRINT("\ndense im2col mParallelInner:%d, ePack:%d, outerAcc:%.1f, innerAcc:%.1f, totalCount:%d, tileCount:%d, outerCoefficient:%.2f, innerCoefficient:%.2f, tailCost:%.2f, lastTail:%.2f, allowed thread:%d, omp thread:\n\n", - denseConfig.isParallelInner, eP, outerAcc, innerAcc, plane, tileCount, outerCoefficient, innerCoefficient, tailCost, lastTail, threadNumber); -#endif } } @@ -455,12 +439,15 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs int bytes = core->bytes; float weightBytes = bytes; int unit = core->pack; + int matmulBytes = bytes; + if (core->matmulBytes != 0) { + matmulBytes = core->matmulBytes; + } auto packA = core->MNNPackC4ForMatMul_A; int eP, lP, hP; getPackParameter(&eP, &lP, &hP, core); auto matmulUnit = core->MNNPackedMatMul; auto matmulRemain = core->MNNPackedMatMulRemain; - auto weightType = weight->getType(); const uint8_t* dequantAlpha = nullptr; const uint8_t* dequantBias = nullptr; auto ic = input->channel(); @@ -503,13 +490,11 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs mTempBufferTranspose.buffer().type = halide_type_of(); mTempBufferTranspose.buffer().dimensions = 2; mTempBufferTranspose.buffer().dim[0].extent = threadNumber; - mTempBufferTranspose.buffer().dim[1].extent = UP_DIV(L, lP) * lP * eP * bytes; + mTempBufferTranspose.buffer().dim[1].extent = UP_DIV(L, lP) * lP * eP * matmulBytes; TensorUtils::setLinearLayout(&mTempBufferTranspose); auto plane = mIm2ColParameters.ow * mIm2ColParameters.oh * batch; int tileCount = UP_DIV(plane, eP); mConvPerfconfig = bestTileConvolutionConfig(mCommon, input, output, threadNumber, backend()); - - auto threadNumberFirst = mConvPerfconfig.isParallelInner ? threadNumber : std::min(threadNumber, tileCount); bool success = backend()->onAcquireBuffer(&mTempBufferTranspose, Backend::DYNAMIC); if (!success) { return OUT_OF_MEMORY; @@ -525,15 +510,14 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs bufferAlloc->free(tempPtr); auto postParameters = getPostParameters(); - mFunction.first = threadNumberFirst; + mFunction.first = threadNumber; if (mConvPerfconfig.isParallelInner) { - + auto rt = static_cast(backend()->getRuntime()); + std::vector ocC4ParralSize(threadNumber + 1); + ocC4ParralSize[0] = 0; + rt->computeDivideSizes(oC4, ocC4ParralSize.data()+1); mFunction.second = [=](int placeholder) { -#ifdef PROFILE_DETAIL - MNN_PRINT("dense conv: n:%d, ic:%d, oc:%d, kh:%d, kw:%d, plane:%d, threadNumberFirst:%d, tileCount:%d, ePack:%d, pack::%d, bytes:%d\n", - batch, ic, outputChannel, kernel_width, kernel_height, plane, threadNumberFirst, tileCount, eP, unit, bytes); -#endif const float* biasPtr = bias ? bias->host() : nullptr; auto gemmBuffer = mTempBufferTranspose.host() + mTempBufferTranspose.stride(0) * 0; auto srcPtr = (float const **)(tempPtr.ptr() + 0 * kernelSize * maxLine * (4 * sizeof(int32_t) + sizeof(float *))); @@ -556,16 +540,10 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs parameters[5] = weightStride; // Only used when block quant parameters[6] = 0; -#ifdef PROFILE_DETAIL - std::vector durationMul(threadNumberFirst, 0); - std::vector packATime(threadNumberFirst, 0); - std::vector indexTime(threadNumberFirst, 0); - Timer timer[threadNumberFirst]; - std::vector macs(threadNumberFirst, 0); -#endif - auto dstOrigin = output->host(); auto srcOrigin = input->host(); + std::vector im2colParallelSize(threadNumber + 1); + im2colParallelSize[0] = 0; for (int x = 0; x < tileCount; x += 1) { int start = (int)x * eP; @@ -578,17 +556,15 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs if (needZero || lP != 1) { ::memset(gemmBuffer, 0, mTempBufferTranspose.stride(0)); } - -#ifdef PROFILE_DETAIL - indexTime[0] += timer[0].durationInUs(); - timer[0].reset(); -#endif - info[0] = 1; int hw4Stride = info[1] * unit * bytes; - MNN_CONCURRENCY_BEGIN(tId, threadNumberFirst) { + rt->computeDivideSizes(number * icC4, im2colParallelSize.data() + 1); + im2colParallelSize[0] = 0; + MNN_CONCURRENCY_BEGIN(tId, threadNumber) { int threadEL[4]; - for(int tic_inumber = tId; tic_inumber < number * icC4; tic_inumber+=threadNumberFirst) { + int ticSta = im2colParallelSize[tId]; + int ticEnd = im2colParallelSize[tId+1]; + for(int tic_inumber = ticSta; tic_inumber < ticEnd; tic_inumber++) { int inumber = tic_inumber / icC4; int t_ic = tic_inumber % icC4; memcpy(threadEL, el + 4 * inumber, 4 * sizeof(int)); @@ -600,16 +576,11 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs } MNN_CONCURRENCY_END(); -#ifdef PROFILE_DETAIL - packATime[0] += timer[0].durationInUs(); - timer[0].reset(); -#endif - if (xC == eP) { - MNN_CONCURRENCY_BEGIN(tId, threadNumberFirst) { + MNN_CONCURRENCY_BEGIN(tId, threadNumber) { size_t paraParameters[PARAMETERSIZE]; memcpy(paraParameters, parameters, PARAMETERSIZE * sizeof(size_t)); - for (int t_oc = tId; t_oc < oC4; t_oc += threadNumberFirst) { + for (int t_oc = ocC4ParralSize[tId]; t_oc < ocC4ParralSize[tId+1]; ++t_oc) { int ocIndex = t_oc * tileC; auto _dstFloatPtr = reinterpret_cast(dstOrigin + (ocIndex / unit * plane + start) * unit * bytes); auto _weightFloatPtr = reinterpret_cast(weightPtr + int((ocIndex / hP * LRoundup * hP) * weightBytes)); @@ -637,10 +608,10 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs } MNN_CONCURRENCY_END(); } else { - MNN_CONCURRENCY_BEGIN(tId, threadNumberFirst) { + MNN_CONCURRENCY_BEGIN(tId, threadNumber) { size_t paraParameters[PARAMETERSIZE]; memcpy(paraParameters, parameters, PARAMETERSIZE * sizeof(size_t)); - for (int t_oc = tId; t_oc < oC4; t_oc += threadNumberFirst) { + for (int t_oc = ocC4ParralSize[tId]; t_oc < ocC4ParralSize[tId+1]; ++t_oc) { int ocIndex = t_oc * tileC; auto _dstFloatPtr = reinterpret_cast(dstOrigin + (ocIndex / unit * plane + start) * unit * bytes); auto _weightFloatPtr = reinterpret_cast(weightPtr + int((ocIndex / hP * LRoundup * hP) * weightBytes)); @@ -669,32 +640,16 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs MNN_CONCURRENCY_END(); } -#ifdef PROFILE_DETAIL - macs[0] += 2.0 * xC * L * oC4 * unit / threadNumberFirst; - durationMul[0] += timer[0].durationInUs(); - timer[0].reset(); -#endif - } - -#ifdef PROFILE_DETAIL - double gflops = macs[0] / 1000.0 / durationMul[0]; - MNN_PRINT("dense conv mParallelInner:%d, inside measure: indexTime:%lu us, packATime:%lu us, durationMul:%lu us, total:%lu us, %.3f GFLOPS\n", - mConvPerfconfig.isParallelInner, indexTime[0], packATime[0], durationMul[0], indexTime[0] + packATime[0] + durationMul[0], gflops); - -#endif - }; } else { - mFunction.second = [=](int tId) { + std::vector divides(threadNumber + 1); + divides[0] = 0; -#ifdef PROFILE_DETAIL - if (tId == 0) { - MNN_PRINT("dense conv: n:%d, ic:%d, oc:%d, kh:%d, kw:%d, plane:%d, tileCount:%d, ePack:%d, pack::%d, bytes:%d\n", - batch, ic, outputChannel, kernel_width, kernel_height, plane, tileCount, eP, unit, bytes); - } -#endif + static_cast(static_cast(backend())->getRuntime())->computeDivideSizes(tileCount, divides.data() + 1); + + mFunction.second = [=](int tId) { const float* biasPtr = bias ? bias->host() : nullptr; auto gemmBuffer = mTempBufferTranspose.host() + mTempBufferTranspose.stride(0) * tId; auto srcPtr = (float const **)(tempPtr.ptr() + tId * kernelSize * maxLine * (4 * sizeof(int32_t) + sizeof(float *))); @@ -713,17 +668,11 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs parameters[5] = weightStride; // Only used when block quant parameters[6] = 0; -#ifdef PROFILE_DETAIL - std::vector durationMul(threadNumberFirst, 0); - std::vector packATime(threadNumberFirst, 0); - std::vector indexTime(threadNumberFirst, 0); - Timer timer[threadNumberFirst]; - std::vector macs(threadNumberFirst, 0); -#endif - auto dstOrigin = output->host(); auto srcOrigin = input->host(); - for (int x = (int)tId; x < tileCount; x += threadNumberFirst) { + int tEnd = divides[tId+1]; + int tStart = divides[tId]; + for (int x = (int)tStart; x < tEnd; ++x) { int start = (int)x * eP; int remain = plane - start; int xC = remain > eP ? eP : remain; @@ -735,18 +684,10 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs ::memset(gemmBuffer, 0, mTempBufferTranspose.stride(0)); } -#ifdef PROFILE_DETAIL - indexTime[tId] += timer[tId].durationInUs(); - timer[tId].reset(); -#endif if (number > 0) { packA((float *)gemmBuffer, srcPtr, info, el); } -#ifdef PROFILE_DETAIL - packATime[tId] += timer[tId].durationInUs(); - timer[tId].reset(); -#endif int finishedL = 0; int wquantStride = 0; int8_t* _weightPtr = reinterpret_cast(weightPtr); @@ -780,20 +721,7 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs } // matmulRemain(_dstFloatPtr, (float*)gemmBuffer, (float*)weightPtr, xC, parameters, postParameters.data(), biasPtr, k, b); } - -#ifdef PROFILE_DETAIL - macs[tId] += 2.0 * xC * L * oC4 * unit; // bias - durationMul[tId] += timer[tId].durationInUs(); - timer[tId].reset(); -#endif } - -#ifdef PROFILE_DETAIL - double gflops = macs[tId] / 1000.0 / durationMul[tId]; - MNN_PRINT("dense conv mParallelInner:%d, inside measure: indexTime:%lu us, packATime:%lu us, durationMul:%lu us, total:%lu us, %.3f GFLOPS\n", - mConvPerfconfig.isParallelInner, indexTime[tId], packATime[tId], durationMul[tId], indexTime[tId] + packATime[tId] + durationMul[tId], gflops); - -#endif }; } return NO_ERROR; @@ -801,10 +729,6 @@ ErrorCode DenseConvolutionTiledImpl::onResize(const std::vector& inputs ErrorCode DenseConvolutionTiledImpl::onExecute(const std::vector& inputs, const std::vector& outputs) { -#ifdef PROFILE_DETAIL - Timer outsideTimer; - outsideTimer.reset(); -#endif if (mConvPerfconfig.isParallelInner) { mFunction.second(0); } else { @@ -814,12 +738,8 @@ ErrorCode DenseConvolutionTiledImpl::onExecute(const std::vector& input MNN_CONCURRENCY_END(); } -#ifdef PROFILE_DETAIL - MNN_PRINT("dense conv. mParallelInner:%d, outside measure: total cost %lu us\n", mConvPerfconfig.isParallelInner, outsideTimer.durationInUs()); -#endif return NO_ERROR; } -#undef PROFILE_DETAIL } // namespace MNN diff --git a/source/backend/cpu/compute/DenseConvolutionTiledExecutor.hpp b/source/backend/cpu/compute/DenseConvolutionTiledExecutor.hpp index 2ce01634f..f618b127f 100644 --- a/source/backend/cpu/compute/DenseConvolutionTiledExecutor.hpp +++ b/source/backend/cpu/compute/DenseConvolutionTiledExecutor.hpp @@ -31,7 +31,6 @@ class DenseConvolutionTiledImpl : public ConvolutionTiledImpl { static PerfConfig bestTileConvolutionConfig(const Convolution2DCommon *common, const Tensor *inputTensor, const Tensor *outputTensor, int threadNumber, Backend* b); protected: - }; class DenseConvolutionTiledExecutor : public ConvolutionTiledExecutor { public: diff --git a/source/backend/cpu/compute/GemmInt8Executor.cpp b/source/backend/cpu/compute/GemmInt8Executor.cpp index 0c7e9a7ff..00e501e5d 100644 --- a/source/backend/cpu/compute/GemmInt8Executor.cpp +++ b/source/backend/cpu/compute/GemmInt8Executor.cpp @@ -15,7 +15,9 @@ namespace MNN { GemmInt8Executor::GemmInt8Executor(Backend* bn, std::shared_ptr resource, const Convolution2D *conv2D, decltype(CoreInt8Functions::Int8GemmKernel) gemmKernel, std::vector bias): - CPUConvolution(conv2D->common(), bn), mResource(resource), mMutableResource(resource, bn), mGemmKernel(gemmKernel), mQuantBias(bias){ + CPUConvolution(conv2D->common(), bn), mResourceInt8(resource), mMutableResource(resource, bn), mGemmKernel(gemmKernel), mQuantBias(bias){ + mResource.reset(new Resource); + CPUConvolution::makeResource(bn, mResource, conv2D, mResourceInt8); } GemmInt8Executor::~GemmInt8Executor() { @@ -43,23 +45,32 @@ ErrorCode GemmInt8Executor::onResize(const std::vector &inputs, const auto pack = gcore->pack; auto scaleSrc = mMutableResource.mScaleFloat->host(); + int realWeightQuantScaleSize = mResource->mDequantize.mScaleBias->size() / 2; + auto weightBiasSrc = reinterpret_cast(mResource->mDequantize.mScaleBias->host() + realWeightQuantScaleSize); auto ocDivUp = UP_DIV(output->channel(), pack) * pack; mKernelY = mCommon->kernelY(); mKernelX = mCommon->kernelX(); int kernelCount = mKernelX * mKernelY; std::vector scaleData(ocDivUp); + mKernelSum.resize(ocDivUp, 0); ::memset(scaleData.data(), 0.f, ocDivUp * sizeof(float)); auto l = mMutableResource.mScaleFloat->length(0); auto lU = UP_DIV(l, pack); for (int divC = 0; divC < lU; ++divC) { auto srcX = scaleSrc + divC * pack; + auto wbias = weightBiasSrc + divC * pack; for (int k = 0; k < kernelCount; ++k) { int indexK = divC * kernelCount * pack + k * pack; for (int j = 0; j < pack; ++j) { scaleData[indexK + j] = srcX[j]; + mKernelSum[indexK + j] = wbias[j]; } } } + float* biasFloat = reinterpret_cast(mQuantBias.data()); + for (int i = 0; i < mQuantBias.size(); ++i) { + biasFloat[i] = mQuantBias[i] * scaleData[i]; + } mScaleData = scaleData; const auto IC4 = UP_DIV(input->channel(), pack); ConvolutionTiledExecutor::setIm2ColParameter(mIm2ColParamter, mCommon, input, output, 0, 0, static_cast(backend())->functions(), core); @@ -71,7 +82,7 @@ ErrorCode GemmInt8Executor::onResize(const std::vector &inputs, const mIm2ColParamter.padX = 0; mIm2ColParamter.padY = 0; mIm2ColParamter.kernelCountUnit = UP_DIV(input->channel(), SRC_UNIT); - if (SRC_UNIT > pack) { + if (SRC_UNIT > UNIT___) { const auto srcCountUnit = UP_DIV(input->channel(), pack); mIm2ColParamter.ic = mIm2ColParamter.icDiv4 * pack; } else { @@ -131,22 +142,39 @@ ErrorCode GemmInt8Executor::onExecute(const std::vector &inputs, const QuanPostTreatParameters quanParam; quanParam.scale = mScaleData.data(); quanParam.maxValue = mMutableResource.mClampMax; - if (mResource->mRelu) { + if (mResourceInt8->mRelu) { quanParam.minValue = mMutableResource.mOutputZeroPoint; } else { quanParam.minValue = mMutableResource.mClampMin; } + auto postParameters = getPostParameters(); + std::vector fp32minmax = {postParameters[2], postParameters[3]}; + quanParam.fp32minmax = fp32minmax.data(); quanParam.useInt8 = 0; // Save result as float data type. - quanParam.bias = mQuantBias.data(); + quanParam.biasFloat = reinterpret_cast(mQuantBias.data()); + quanParam.weightQuanBias = mKernelSum.data(); + quanParam.extraScale = nullptr; + float dequantScale = mMutableResource.mResource->mInputScale; + + SumByAxisParams sumParams; + sumParams.DST_XUNIT = DST_XUNIT; + sumParams.SRC_UNIT = SRC_UNIT; + sumParams.blockNum = 1; + sumParams.kernelCountUnitDouble = mIm2ColParamter.kernelCountUnit; + sumParams.oneScale = 1; + sumParams.col_buffer_unit_size = mInputCol->stride(0); auto threadFunction = [&](int tId) { auto colAddr = im2colPtr + tId * mInputCol->stride(0); auto col_buffer_size = mInputCol->stride(0); - int32_t info[4]; + int32_t info[6]; info[1] = mIm2ColParamter.iw * mIm2ColParamter.ih * batch; info[2] = DST_XUNIT; info[3] = mIm2ColParamter.strideX; + info[5] = mIm2ColParamter.kernelCountUnit; + float paramsf[1]; + paramsf[0] = dequantScale; auto srcPtr = (int8_t const **)(mBlitInfo.ptr() + tId * mBlitInfoStride.first); auto el = (int32_t *)(srcPtr + mBlitInfoStride.second); @@ -165,9 +193,15 @@ ErrorCode GemmInt8Executor::onExecute(const std::vector &inputs, const #endif } info[0] = number; + info[4] = realDstCount; + std::vector xKernelSum(realDstCount); if (number > 0) { blitProc(colAddr, srcPtr, info, el); } + if (mResourceInt8->mWeightAsymmetricQuant) { + gcore->MNNSumByAxisLForMatmul_A(xKernelSum.data(), colAddr, &dequantScale, realDstCount, sumParams); + } + quanParam.srcKernelSum = xKernelSum.data(); auto outputInTilePtr = outputDataPtr + xIndexStart * PackUnit; mGemmKernel((int8_t*)outputInTilePtr, colAddr, weightDataPtr, src_depth_quad, dstZStep * sizeof(float), ocDiv4, &quanParam, realDstCount); } diff --git a/source/backend/cpu/compute/GemmInt8Executor.hpp b/source/backend/cpu/compute/GemmInt8Executor.hpp index 668d56308..0c1345f03 100644 --- a/source/backend/cpu/compute/GemmInt8Executor.hpp +++ b/source/backend/cpu/compute/GemmInt8Executor.hpp @@ -26,13 +26,15 @@ class GemmInt8Executor : public CPUConvolution { int mKernelY; std::shared_ptr mInputCol; std::vector mScaleData; + std::vector mKernelSum; std::vector mQuantBias; - std::shared_ptr mResource; + std::shared_ptr mResourceInt8; ConvolutionCommon::Im2ColParameter mIm2ColParamter; CPUConvolution::MutableResourceInt8 mMutableResource; decltype(CoreInt8Functions::Int8GemmKernel) mGemmKernel; MemChunk mBlitInfo; std::pair mBlitInfoStride; + std::shared_ptr mResource; }; } // namespace MNN #endif /* DeconvInt8Executor_hpp */ diff --git a/source/backend/cpu/compute/IdstConvolutionInt8.cpp b/source/backend/cpu/compute/IdstConvolutionInt8.cpp index 140d8dd21..bec8d7109 100644 --- a/source/backend/cpu/compute/IdstConvolutionInt8.cpp +++ b/source/backend/cpu/compute/IdstConvolutionInt8.cpp @@ -72,15 +72,18 @@ IdstConvolutionInt8::IdstConvolutionInt8(const Convolution2DCommon* convOp, Back shape = {UP_DIV(outputCount, UNIT), UP_DIV(srcCount, SRC_UNIT) * kernelCount, UNIT, SRC_UNIT}; } mWeight.reset(Tensor::createDevice(shape)); - mFakeBias.reset(Tensor::createDevice({(int)ROUND_UP(biasSize, PackUnit)})); + mFakeBias.reset(Tensor::createDevice({(int)ROUND_UP(biasSize, PackUnit)})); + mFakeWeightBias.reset(Tensor::createDevice({(int)ROUND_UP(biasSize, PackUnit)})); mValid = b->onAcquireBuffer(mWeight.get(), Backend::STATIC); mValid &= b->onAcquireBuffer(mFakeBias.get(), Backend::STATIC); + mValid &= b->onAcquireBuffer(mFakeWeightBias.get(), Backend::STATIC); if (!mValid) { MNN_ERROR("Memory not enough\n"); return; } ConvInt8TiledExecutor::reorderWeight(mWeight.get(), (uint8_t*)common->weight.get(), SRC_UNIT, UNIT, srcCount, outputCount, kernelCount); - ::memset(mFakeBias->host(), 0, mFakeBias->size()); + ::memset(mFakeBias->host(), 0, mFakeBias->size()); + ::memset(mFakeWeightBias->host(), 0, mFakeWeightBias->size()); #ifdef MNN_USE_SSE for (int oz = 0; oz < outputCount; ++oz) { auto srcZ = common->weight.get() + oz * kernelCount * srcCount; @@ -88,7 +91,7 @@ IdstConvolutionInt8::IdstConvolutionInt8(const Convolution2DCommon* convOp, Back for (int i = 0; i < kernelCount * srcCount; ++i) { offset += srcZ[i] * (-128); } - mFakeBias->host()[oz] = offset; + mFakeBias->host()[oz] = static_cast(offset) * 1.f; } #endif } @@ -149,7 +152,7 @@ ErrorCode IdstConvolutionInt8::onExecute(const std::vector& inputs, con int UNIT__, SRC_UNIT, DST_XUNIT; coreInt->MNNGetGemmUnit(&UNIT__, &SRC_UNIT, &DST_XUNIT); int PackUnit = static_cast(backend())->functions()->pack; - + auto gemmKernel = coreInt->Int8GemmKernel; // AUTOTIME; @@ -176,9 +179,14 @@ ErrorCode IdstConvolutionInt8::onExecute(const std::vector& inputs, con std::vector fakeScale(ocC4 * PackUnit, 1.0f); QuanPostTreatParameters quanParam; - quanParam.bias = mFakeBias->host(); + quanParam.biasFloat = mFakeBias->host(); quanParam.scale = fakeScale.data(); quanParam.useInt8 = 0; + float fp32minmax[2] = {-std::numeric_limits().max(), std::numeric_limits().max()}; + quanParam.fp32minmax = fp32minmax; + quanParam.weightQuanBias = mFakeWeightBias->host(); + std::vector fakeSrcKernleSum(DST_XUNIT, 0.f); + quanParam.srcKernelSum = fakeSrcKernleSum.data(); // MNN_PRINT("%s, %d, %d, %d,%d->%d,%d\n", layer->layer.layerId, layer->kernelSize[0], layer->kernelSize[1], // input->d1, input->d2, output->d1, output->d2); diff --git a/source/backend/cpu/compute/IdstConvolutionInt8.hpp b/source/backend/cpu/compute/IdstConvolutionInt8.hpp index 1a188c077..c66332512 100644 --- a/source/backend/cpu/compute/IdstConvolutionInt8.hpp +++ b/source/backend/cpu/compute/IdstConvolutionInt8.hpp @@ -40,6 +40,7 @@ class IdstConvolutionInt8 : public CPUConvolution { std::vector mPostParameters; // mFakeBias used by GemmKernel std::shared_ptr mFakeBias; + std::shared_ptr mFakeWeightBias; MemChunk mBlitInfo; std::pair mBlitInfoStride; }; diff --git a/source/backend/cpu/compute/Int8FunctionsOpt.cpp b/source/backend/cpu/compute/Int8FunctionsOpt.cpp index 2d046de25..50fad7e6a 100644 --- a/source/backend/cpu/compute/Int8FunctionsOpt.cpp +++ b/source/backend/cpu/compute/Int8FunctionsOpt.cpp @@ -22,6 +22,8 @@ void MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, const int const QuanPostTreatParameters* post, size_t realCount); void MNNGemmInt8AddBiasScale_16x4_Unit_FAST(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realCount); +void MNNGemmInt8AddBiasScale_16x4_w4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realCount); void MNNLineDepthWiseInt8AddBiasScaleUnit(int8_t* dst, const int8_t* src, const int8_t* weight, const QuanPostTreatParameters* parameters, size_t width, size_t src_w_step, size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, int8_t* idxOrder=nullptr); void MNNMaxPoolInt8(int8_t* dst, int8_t* src, size_t outputWidth, size_t inputWidth, size_t kernelx, size_t kernely, size_t stridesx); @@ -35,6 +37,31 @@ void MNNGemmInt8AddBiasScale_ARMV86_Unit(int8_t* dst, const int8_t* src, const i const QuanPostTreatParameters* post, size_t realDstCount); void MNNLineDepthWiseInt8AddBiasScale_ARMV82_Unit3X3(int8_t* dst, const int8_t* src, const int8_t* weight, const QuanPostTreatParameters* parameters, size_t width, size_t src_w_step, size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, int8_t* idxOrder=nullptr); +#if defined(MNN_LOW_MEMORY) +// int4 weight gemmInt8 kernel +void MNNGemmInt8AddBiasScale_ARMV82_w4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realDstCount); +void MNNGemmInt8AddBiasScale_ARMV86_w4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realDstCount); +void MNNGemmInt8AddBiasScale_16x4_w4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realDstCount); +// Tools to dynamic-quant fp16-input data. +#ifdef MNN_USE_ARMV82 +void DynamicQuanInput_ARM82(const float* src, int8_t* dst, size_t sizeQuad, const float* scalep, ssize_t minValue, + ssize_t maxValue, ssize_t zeroPoint); +// int8 weight gemmInt8 kernel to return fp16-output data. +void MNNGemmInt8AddBiasScale_ARMV82_Unit_FP16(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realDstCount); +void MNNGemmInt8AddBiasScale_ARMV82_w4_Unit_FP16(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realDstCount); +void MNNGemmInt8AddBiasScale_ARMV86_Unit_FP16(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realDstCount); +void MNNGemmInt8AddBiasScale_ARMV86_w4_Unit_FP16(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realDstCount); +void DynamicQuanInputAndReorder_ARM82(const float* src, int8_t* dst, size_t planeSize, const float* scale, ssize_t aMin, + ssize_t aMax, ssize_t zeroPoint, size_t ocQuad, size_t offset); +#endif +#endif #endif // __aarch64__ } #endif // MNN_USE_NEON @@ -1386,11 +1413,28 @@ static int8_t MNNInt32ToInt8(int data, int bias, float scale, float maxValue, fl static void MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realCount) { const int bytes = ((post->useInt8 == 1) ? 1 : 4); + float fp32min = 0, fp32max = 0; +// if (0 == post->useInt8) { +// fp32min = (post->fp32minmax)[0]; +// fp32max = (post->fp32minmax)[1]; +// } + auto blockNum = post->blockNum; + int weight_step_Z = (src_depth_quad * blockNum) * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); + int weight_step_Y = (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); + const auto srcSumPtr = post->srcKernelSum; + if (0 == post->useInt8 && post->fp32minmax) { + fp32min = (post->fp32minmax)[0]; + fp32max = (post->fp32minmax)[1]; + } + + float* biasPtr = (float*)post->biasFloat; + for (int dz = 0; dz < dst_depth_quad; ++dz) { - const auto weight_dz = weight + dz * src_depth_quad * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); - const auto bias_dz = post->bias + dz * GEMM_INT8_UNIT; + const auto weight_dz = weight + weight_step_Z * dz; + const auto bias_dz = biasPtr + dz * GEMM_INT8_UNIT; + const auto weight_zero = post->weightQuanBias + (dz * GEMM_INT8_UNIT); const float* scale_dz = nullptr; - scale_dz = post->scale + dz * GEMM_INT8_UNIT; + scale_dz = post->scale + (dz * GEMM_INT8_UNIT); auto dst_z = dst + dz * dst_step; for (int w = 0; w < realCount; ++w) { const auto src_x = src + w * GEMM_INT8_SRC_UNIT; @@ -1398,7 +1442,7 @@ static void MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, co int32_t dstTemp[4] = {0, 0, 0, 0}; for (int sz = 0; sz < src_depth_quad; ++sz) { - const auto weight_sz = weight_dz + (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT) * sz; + const auto weight_sz = weight_dz + weight_step_Y * sz; const auto src_z = src_x + sz * GEMM_INT8_DST_XUNIT * GEMM_INT8_SRC_UNIT; for (int j = 0; j < GEMM_INT8_UNIT; ++j) { @@ -1410,34 +1454,125 @@ static void MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, co } for (int j = 0; j < GEMM_INT8_UNIT; ++j) { - if (!post->scale) { - ((float*)dst_x)[j] = (float)(dstTemp[j] + bias_dz[j]); - } else if (post->useInt8 == 1) { - dst_x[j] = MNNInt32ToInt8(dstTemp[j], bias_dz[j], scale_dz[j], post->maxValue, post->minValue); - } else { - float value = (float)(dstTemp[j] + bias_dz[j]) * scale_dz[j]; + float value = dstTemp[j] * scale_dz[j] + srcSumPtr[w] * weight_zero[j]; + if (post->extraScale) { + value = dstTemp[j] * scale_dz[j] * post->extraScale[w] + srcSumPtr[w] * weight_zero[j]; + } + if (post->useInt8 == 0) { + if (biasPtr) { + value += bias_dz[j]; + } else { + float dstv = ((float*)dst_x)[j]; + value += dstv; + } + if (post->fp32minmax) { + value = std::min(std::max(fp32min, value), fp32max); + } ((float*)dst_x)[j] = value; + } else { + value += bias_dz[j]; + value = ALIMAX(value, post->minValue); + value = ALIMIN(value, post->maxValue); + dst_x[j] = static_cast(roundf(value)); + } + } + } + } +} + +static void MNNGemmInt8AddBiasScale_16x4_w4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realCount) { + uint32_t c = 0xf; + const int bytes = 4; + float fp32min = 0, fp32max = 0; + int weight_step_Z = 0.5 * (post->blockNum * src_depth_quad) * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); + int weight_step_Y = 0.5 * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); + MNN_ASSERT(post->useInt8==0); + if (post->fp32minmax) { + fp32min = (post->fp32minmax)[0]; + fp32max = (post->fp32minmax)[1]; + } + + float* biasPtr = (float*)post->biasFloat; + int blockNum = post->blockNum; + + const auto srcSumPtr = post->srcKernelSum; + for (int dz = 0; dz < dst_depth_quad; ++dz) { + const auto weight_dz = weight + weight_step_Z * dz; + const auto bias_dz = biasPtr + dz * GEMM_INT8_UNIT; + const auto weight_zero = post->weightQuanBias + (dz * GEMM_INT8_UNIT); + const float* scale_dz = nullptr; + scale_dz = post->scale + (dz * GEMM_INT8_UNIT); + auto dst_z = dst + dz * dst_step; + for (int w = 0; w < realCount; ++w) { + const auto src_x = src + w * GEMM_INT8_SRC_UNIT; + auto dst_x = dst_z + w * GEMM_INT8_UNIT * bytes; + int32_t dstTemp[4] = {0, 0, 0, 0}; + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = (uint8_t*)weight_dz + weight_step_Y * sz; + const auto src_z = src_x + sz * GEMM_INT8_DST_XUNIT * GEMM_INT8_SRC_UNIT; + + int w8[64]; // 64=GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT + for (int k = 0; k < 32; ++k) { + w8[2 * k] = (weight_sz[k]>>4); + w8[2 * k + 1] = (weight_sz[k] & c); + } + + for (int j = 0; j < GEMM_INT8_UNIT; ++j) { + const auto weight_j = w8 + j * GEMM_INT8_SRC_UNIT; + for (int i = 0; i < GEMM_INT8_SRC_UNIT; ++i) { + dstTemp[j] += (int32_t)src_z[i] * (int32_t)weight_j[i]; + } } } + + for (int j = 0; j < GEMM_INT8_UNIT; ++j) { + float value = dstTemp[j] * scale_dz[j] + srcSumPtr[w] * weight_zero[j]; + if (post->extraScale) { + value = dstTemp[j] * scale_dz[j] * post->extraScale[w] + srcSumPtr[w] * weight_zero[j]; + } + + if (biasPtr) { + value += bias_dz[j]; + } else { + float dstv = ((float*)dst_x)[j]; + value += dstv; + } + if (post->fp32minmax) { + value = std::min(std::max(fp32min, value), fp32max); + } + ((float*)dst_x)[j] = value; + } } } } static void MNNReluWithSlopeChannelInt8(int8_t* dst, const int8_t* src, const float* slope, size_t planeNumber, size_t depthQuad, QuanPrePostParameters *params) { +#ifdef MNN_USE_SSE +float offset = 128.f; +uint8_t* srcPtr = (uint8_t*)src; +uint8_t* dstPtr = (uint8_t*)dst; +#else +float offset = 0.f; +const int8_t* srcPtr = src; +int8_t* dstPtr = dst; +#endif float mulVal = 0.f; float inputScale = params->inputScale[0]; float outputScale = params->outputScale[0]; - int32_t inputZero = static_cast(params->inputZeroPoint[0]); - int32_t outputZero = static_cast(params->outputZeroPoint[0]); + float inputZero = static_cast(params->inputZeroPoint[0]) + offset; + float outputZero = static_cast(params->outputZeroPoint[0]) + offset; + int32_t minval = params->minValue + offset; + int32_t maxval = params->maxValue + offset; for (int j = 0;j < depthQuad; ++j) { const float* slopeZ = slope + 4 * j; - const int8_t* srcZ = src + 4 * j * planeNumber; - int8_t* dstZ = dst + 4 * j * planeNumber; + const auto srcZ = srcPtr + 4 * j * planeNumber; + auto dstZ = dstPtr + 4 * j * planeNumber; for (int i = 0; i < planeNumber; ++i) { for (int c = 0; c < 4; ++c) { - if (srcZ[4 * i + c] < 0) { + if ((float)srcZ[4 * i + c] < inputZero) { mulVal = (srcZ[4 * i + c] - inputZero) * slopeZ[c]; - dstZ[4 * i + c] = ALIMIN(ALIMAX(static_cast(roundf(mulVal)) + outputZero, params->minValue), params->maxValue); + dstZ[4 * i + c] = ALIMIN(ALIMAX(static_cast(roundf(mulVal)) + outputZero, minval), maxval); } else { dstZ[4 * i + c] = srcZ[4 * i + c]; } @@ -1974,9 +2109,9 @@ static void MNNGetGemmUnitSdot(int* UNIT, int* SRC_UNIT, int* DST_XUNIT) { } static void MNNGetGemmUnitI8mm(int* UNIT, int* SRC_UNIT, int* DST_XUNIT) { - *UNIT = 4; + *UNIT = 8; *SRC_UNIT = 8; - *DST_XUNIT = 20; + *DST_XUNIT = 10; } template @@ -2055,6 +2190,9 @@ void MNNCoreInt8FunctionInit() { gCoreFunc->Int8GemmKernel = MNNGemmInt8AddBiasScale_16x4_Unit; gCoreFunc->Int8GemmKernelFast = MNNGemmInt8AddBiasScale_16x4_Unit_FAST; gCoreFunc->MNNGetGemmUnit = MNNGetGemmUnit; +#ifdef MNN_LOW_MEMORY + gCoreFunc->Int8GemmKernel_W4 = MNNGemmInt8AddBiasScale_16x4_w4_Unit; +#endif // Im2Col gCoreFunc->MNNPackC4Int8ForMatMul_A = _ArmBasicMNNPackC4ForMatMul_A; @@ -2088,15 +2226,31 @@ void MNNCoreInt8FunctionInit() { gCoreFunc->MNNPackC4Int8ForMatMul_A = _ArmBasicMNNPackC4ForMatMul_A_L4<12, 4>; // ConvDepthwise gCoreFunc->ConvDepthwise3x3LineInt8_ARM82 = MNNLineDepthWiseInt8AddBiasScale_ARMV82_Unit3X3; - +#if defined(MNN_LOW_MEMORY) + #ifdef MNN_USE_ARMV82 + gCoreFunc->DynamicQuanInput_ARM82 = DynamicQuanInput_ARM82; + gCoreFunc->MNNGemmInt8AddBiasScale_Unit_FP16 = MNNGemmInt8AddBiasScale_ARMV82_Unit_FP16; + gCoreFunc->MNNGemmInt8AddBiasScale_w4_Unit_FP16 = MNNGemmInt8AddBiasScale_ARMV82_w4_Unit_FP16; + gCoreFunc->DynamicQuanInputAndReorder_ARM82 = DynamicQuanInputAndReorder_ARM82; + #endif + gCoreFunc->Int8GemmKernel_W4 = MNNGemmInt8AddBiasScale_ARMV82_w4_Unit; +#endif } if (core->supportI8mm) { // MatMul gCoreFunc->Int8GemmKernel = MNNGemmInt8AddBiasScale_ARMV86_Unit; gCoreFunc->Int8GemmKernelFast = MNNGemmInt8AddBiasScale_ARMV86_Unit; gCoreFunc->MNNGetGemmUnit = MNNGetGemmUnitI8mm; +#if defined(MNN_LOW_MEMORY) + gCoreFunc->Int8GemmKernel_W4 = MNNGemmInt8AddBiasScale_ARMV86_w4_Unit; + #ifdef MNN_USE_ARMV82 + gCoreFunc->MNNGemmInt8AddBiasScale_Unit_FP16 = MNNGemmInt8AddBiasScale_ARMV86_Unit_FP16; + gCoreFunc->MNNGemmInt8AddBiasScale_w4_Unit_FP16 = MNNGemmInt8AddBiasScale_ARMV86_w4_Unit_FP16; + #endif +#endif // Im2Col - gCoreFunc->MNNPackC4Int8ForMatMul_A = _ArmBasicMNNPackC4ForMatMul_A<20, 8, 4>; + gCoreFunc->MNNPackC4Int8ForMatMul_A = _ArmBasicMNNPackC4ForMatMul_A<10, 8, 8>; + gCoreFunc->MNNPackC4Int8ForMatMul_A_ARM86FP16 = _ArmBasicMNNPackC4ForMatMul_A<10, 8, 8>; } #endif MNNInt8FunctionInit(); diff --git a/source/backend/cpu/compute/Int8FunctionsOpt.h b/source/backend/cpu/compute/Int8FunctionsOpt.h index eea714090..da974619c 100644 --- a/source/backend/cpu/compute/Int8FunctionsOpt.h +++ b/source/backend/cpu/compute/Int8FunctionsOpt.h @@ -38,13 +38,19 @@ extern "C" { struct QuanPostTreatParameters { const float* scale; - const int32_t* bias; + const float* biasFloat; int32_t maxValue; int32_t minValue; int32_t useInt8 = 1; // Save result as int8_t dataType; otherwise float32. float roundValuePos = 0.5f; float roundValueNeg = -0.5f; - + float* srcKernelSum; + float* weightQuanBias; + float* fp32minmax; + ssize_t blockNum = 1; + const int32_t* bias; + const float* extraScale = nullptr; + const float* extraBias = nullptr; }; struct QuanPrePostParameters{ float* inputScale; @@ -78,7 +84,13 @@ struct CoreInt8Functions { void(*Int8GemmKernelFast)(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realCount); void(*MNNGetGemmUnit)(int* UNIT, int* SRC_UNIT, int* DST_XUNIT); void(*MNNPackC4Int8ForMatMul_A)(int8_t* destOrigin, int8_t const** sourceGroup, const int32_t* info, const int32_t* el); - + void(*MNNPackC4Int8ForMatMul_A_ARM86FP16)(int8_t* destOrigin, int8_t const** sourceGroup, const int32_t* info, const int32_t* el) = nullptr; + void(*MNNGemmInt8AddBiasScale_Unit_FP16)(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realDstCount); + void(*MNNGemmInt8AddBiasScale_w4_Unit_FP16)(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realDstCount); + void(*Int8GemmKernel_W4)(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, + const QuanPostTreatParameters* post, size_t realDstCount); // sparse void(*MNNGetSparseQuantMatMulPackMode)(int* eP, int *lP, int* hP); void(*MNNPackForSparseQuantMatMul_B)(int8_t* dest, unsigned int* NNZMap, int* dataOffsetMap, int sparseBlockOC, const int8_t* source, size_t h, size_t kernelCount, size_t icCount, const int eP); @@ -90,9 +102,9 @@ struct CoreInt8Functions { size_t src_w_step, size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, int8_t* idxOrder); void(*ConvDepthwise3x3LineInt8_ARM82)(int8_t* dst, const int8_t* src, const int8_t* weight, const QuanPostTreatParameters* parameters, size_t width, size_t src_w_step, size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, int8_t* idxOrder) = nullptr; - - void(*MNNFloat2Int8)(const float* src, int8_t* dst, size_t sizeQuad, const float* scalep, ssize_t minValue, - ssize_t maxValue, ssize_t zeroPoint); + void(*DynamicQuanInput_ARM82)(const float* src, int8_t* dst, size_t sizeQuad, const float* scalep, ssize_t minValue, ssize_t maxValue, ssize_t zeroPoint) = nullptr; + void (*DynamicQuanInputAndReorder_ARM82)(const float* src, int8_t* dst, size_t planeSize, const float* scale, ssize_t aMin, ssize_t aMax, ssize_t zeroPoint, size_t ocQuad, size_t offset) = nullptr; + void(*MNNFloat2Int8)(const float* src, int8_t* dst, size_t sizeQuad, const float* scalep, ssize_t minValue, ssize_t maxValue, ssize_t zeroPoint); void(*MNNInt8ScaleToFloat)(float* dst, const int8_t* src, const float* scale, size_t size, ssize_t zeroPoint); void(*MNNScaleAndAddBias)(float* dst, const float* src, const float* bias, const float* alpha, size_t planeNumber, size_t biasNumber); diff --git a/source/backend/cpu/compute/SparseConvInt8TiledExecutor.cpp b/source/backend/cpu/compute/SparseConvInt8TiledExecutor.cpp index 395fa3745..5c8fc0dca 100644 --- a/source/backend/cpu/compute/SparseConvInt8TiledExecutor.cpp +++ b/source/backend/cpu/compute/SparseConvInt8TiledExecutor.cpp @@ -64,12 +64,12 @@ bool SparseConvInt8TiledExecutor::reorderWeight(Backend* b, const Convolution2DC return true; } -SparseConvInt8TiledExecutor::SparseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr res) : ConvInt8TiledExecutor(backend, convOp->common(), res) { +SparseConvInt8TiledExecutor::SparseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, std::shared_ptr res) : ConvInt8TiledExecutor(backend, convOp, res) { std::shared_ptr weightOrigin; - weightOrigin.swap(mResource->mWeightInt8); + weightOrigin.swap(mResourceInt8->mWeightInt8); const SparseCommon* sparseCommon = convOp->sparseParameter(); - mValid = reorderWeight(backend, convOp->common(), weightOrigin, mResource->mWeightInt8, sparseCommon); + mValid = reorderWeight(backend, convOp->common(), weightOrigin, mResourceInt8->mWeightInt8, sparseCommon); if(!mValid) { return; } @@ -81,9 +81,9 @@ SparseConvInt8TiledExecutor::SparseConvInt8TiledExecutor(Backend* backend, const } -SparseConvInt8TiledExecutor::SparseConvInt8TiledExecutor(Backend* backend, const Convolution2DCommon* common, +SparseConvInt8TiledExecutor::SparseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, const SparseConvInt8TiledExecutor& exe) - : ConvInt8TiledExecutor(backend, common, exe.mResource), + : ConvInt8TiledExecutor(backend, convOp, exe.mResourceInt8), mNNZMap(exe.mNNZMap), mDataOffsetMap(exe.mDataOffsetMap), mSparseBlockOC(exe.mSparseBlockOC), @@ -98,7 +98,7 @@ bool SparseConvInt8TiledExecutor::onClone(Backend* bn, const Op* op, Execution** if (nullptr == dst) { return true; } - auto exe = new SparseConvInt8TiledExecutor(bn, op->main_as_Convolution2D()->common(), *this); + auto exe = new SparseConvInt8TiledExecutor(bn, op->main_as_Convolution2D(), *this); if (!exe->valid()) { return false; } @@ -170,7 +170,7 @@ ErrorCode SparseConvInt8TiledExecutor::onExecute(const std::vector& inp const int ocDivPack = UP_DIV(output->channel(), PackUnit); const auto inputDataPtr = input->host(); - const auto weightDataPtr = mResource->mWeightInt8->host(); + const auto weightDataPtr = mResourceInt8->mWeightInt8->host(); const auto NNZMapPtr = mNNZMap->host(); const auto dataOffsetPtr = mDataOffsetMap->host(); auto im2colPtr = mTempIm2ColBuffer->host(); @@ -179,7 +179,7 @@ ErrorCode SparseConvInt8TiledExecutor::onExecute(const std::vector& inp quanParam.bias = mMutableResource.mBiasInt32->host(); quanParam.scale = mMutableResource.mScaleFloat->host(); quanParam.maxValue = mMutableResource.mClampMax; - if (mResource->mRelu) { + if (mResourceInt8->mRelu) { quanParam.minValue = mMutableResource.mOutputZeroPoint; } else { quanParam.minValue = mMutableResource.mClampMin; diff --git a/source/backend/cpu/compute/SparseConvInt8TiledExecutor.hpp b/source/backend/cpu/compute/SparseConvInt8TiledExecutor.hpp index b3982fab7..9bcb7ee61 100644 --- a/source/backend/cpu/compute/SparseConvInt8TiledExecutor.hpp +++ b/source/backend/cpu/compute/SparseConvInt8TiledExecutor.hpp @@ -50,7 +50,7 @@ class SparseConvInt8TiledExecutor : public ConvInt8TiledExecutor { } private: - SparseConvInt8TiledExecutor(Backend* backend, const Convolution2DCommon* common, const SparseConvInt8TiledExecutor& exe); + SparseConvInt8TiledExecutor(Backend* backend, const Convolution2D* convOp, const SparseConvInt8TiledExecutor& exe); SparseQuantMatMulParam mSparseQuantParam; decltype(CoreInt8Functions::MNNPackedSparseQuantMatMulEpx1) mSparseQuantMatMulKernel; diff --git a/source/backend/cpu/compute/SparseConvolutionTiledExecutor.cpp b/source/backend/cpu/compute/SparseConvolutionTiledExecutor.cpp index ab39bfd9b..06b4c1b11 100644 --- a/source/backend/cpu/compute/SparseConvolutionTiledExecutor.cpp +++ b/source/backend/cpu/compute/SparseConvolutionTiledExecutor.cpp @@ -273,6 +273,10 @@ ErrorCode SparseConvolutionTiledImpl::onResize(const std::vector& input int bytes = core->bytes; int unit = core->pack; auto packA = core->MNNPackC4ForMatMul_A; + if (core->matmulBytes != 0) { + // Use origin packC4 + packA = MNNGetCoreFunctions()->MNNPackC4ForMatMul_A; + } int eP, lP, hP; getPackParameter(&eP, &lP, &hP, core); auto weightPtr = weight->host(); diff --git a/source/backend/cpu/x86_x64/AVX2Functions.cpp b/source/backend/cpu/x86_x64/AVX2Functions.cpp index 0f1db20d4..e48d00981 100644 --- a/source/backend/cpu/x86_x64/AVX2Functions.cpp +++ b/source/backend/cpu/x86_x64/AVX2Functions.cpp @@ -44,10 +44,7 @@ bool AVX2Functions::init(int cpuFlags) { coreFunction->MNNPackedMatMulRemain_int4 = _AVX_MNNPackedMatMulRemain_int4; coreFunction->MNNPackedMatMul_int8 = _AVX_MNNPackedMatMul_int8; coreFunction->MNNPackedMatMulRemain_int8 = _AVX_MNNPackedMatMulRemain_int8; - coreFunction->MNNGemmHybridInt4 = _AVX_MNNGemmHybridInt4; - coreFunction->MNNGemmHybridInt8 = _AVX_MNNGemmHybridInt8; coreFunction->MNNAbsMax = _AVX_MNNAbsMaxFP32; - coreFunction->MNNDynamicQuant = _AVX_MNNDynamicQuantFP32; #endif coreFunction->MNNPackC4ForMatMul_A = _AVX_MNNPackC4ForMatMul_A; coreFunction->MNNPackForMatMul_B = _AVX_MNNPackForMatMul_B; diff --git a/source/backend/cpu/x86_x64/FunctionDispatcher.cpp b/source/backend/cpu/x86_x64/FunctionDispatcher.cpp index f3dc97bdc..ca87c0464 100644 --- a/source/backend/cpu/x86_x64/FunctionDispatcher.cpp +++ b/source/backend/cpu/x86_x64/FunctionDispatcher.cpp @@ -55,10 +55,7 @@ void MNNFunctionInit() { coreFunction->MNNPackedMatMulRemain_int4 = _SSE_MNNPackedMatMulRemain_int4; coreFunction->MNNPackedMatMul_int8 = _SSE_MNNPackedMatMul_int8; coreFunction->MNNPackedMatMulRemain_int8 = _SSE_MNNPackedMatMulRemain_int8; - coreFunction->MNNGemmHybridInt4 = _SSE_MNNGemmHybridInt4; - coreFunction->MNNGemmHybridInt8 = _SSE_MNNGemmHybridInt8; coreFunction->MNNAbsMax = _SSE_MNNAbsMaxFP32; - coreFunction->MNNDynamicQuant = _SSE_MNNDynamicQuantFP32; #endif coreFunction->MNNPackC4ForMatMul_A = _SSE_MNNPackC4ForMatMul_A; coreFunction->MNNPackForMatMul_B = _SSE_MNNPackForMatMul_B; @@ -137,6 +134,9 @@ void MNNInt8FunctionInit() { core->Int8GemmKernel = _SSE_MNNGemmInt8AddBiasScale_16x4_Unit; core->Int8GemmKernelFast = _SSE_MNNGemmInt8AddBiasScale_16x4_Unit; core->ConvDepthwiseLineInt8 = _SSE_MNNLineDepthWiseInt8AddBiasScaleUnit; +#ifdef MNN_LOW_MEMORY + core->Int8GemmKernel_W4 = _SSE_MNNGemmInt8AddBiasScale_16x4_w4; +#endif } } diff --git a/source/backend/cpu/x86_x64/avx/FunctionSummary.hpp b/source/backend/cpu/x86_x64/avx/FunctionSummary.hpp index e6056c907..214010c6f 100644 --- a/source/backend/cpu/x86_x64/avx/FunctionSummary.hpp +++ b/source/backend/cpu/x86_x64/avx/FunctionSummary.hpp @@ -46,12 +46,7 @@ void _AVX_MNNPackedMatMul_int8(float* C, const float* A, const float* B, const s const float* postParameters, const float* bias, const float* k, const float* b); void _AVX_MNNPackedMatMulRemain_int8(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b); -void _AVX_MNNGemmHybridInt4(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, - size_t dst_depth_quad, size_t realSize, const float** param); -void _AVX_MNNGemmHybridInt8(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, - size_t dst_depth_quad, size_t realSize, const float** param); void _AVX_MNNAbsMaxFP32(const float* source, float* absmax, size_t src_depth_quad, size_t realSize, int pack); -void _AVX_MNNDynamicQuantFP32(const float* src, int8_t* dst, const float* scale, float* sum, size_t src_depth_quad, size_t realSize, int pack); #endif void _AVX_MNNPackC4ForMatMul_A(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el); diff --git a/source/backend/cpu/x86_x64/avx/GemmAVX2.cpp b/source/backend/cpu/x86_x64/avx/GemmAVX2.cpp index 2214e1688..d19863b14 100644 --- a/source/backend/cpu/x86_x64/avx/GemmAVX2.cpp +++ b/source/backend/cpu/x86_x64/avx/GemmAVX2.cpp @@ -71,146 +71,6 @@ static __m128i _load_int4_to_int8(const uint8_t* src) { return int8_tx16; } -void _AVX_MNNGemmHybridInt4(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, - size_t dst_depth_quad, size_t realSize, const float** param) { - int pack = 8; - size_t weight_step = src_depth_quad * pack * pack * 0.5; - size_t weight_stride = pack * pack * 0.5; - const float* alpha_ptr = param[0]; - const float* zero_ptr = param[1]; - const float* bias_ptr = param[2]; - const float* sums_ptr = param[3]; - const float* scale_ptr = param[4]; - auto one_int16 = _mm256_set1_epi16(1); - auto offset_int8 = _mm256_set1_epi8(128); - for (int ci = 0; ci < dst_depth_quad; ++ci) { - float* dstZ = C + ci * pack * realSize; - const int8_t* weight = B + ci * weight_step; - auto alpha = alpha_ptr + ci * pack; - auto zero = zero_ptr + ci * pack; - auto bias = bias_ptr + ci * pack; - __m256 alphaValue = _mm256_loadu_ps(alpha); - for (int j = 0; j < realSize; ++j) { - const float* sums = sums_ptr + j; - const float* scale = scale_ptr + j; - float* dstX = dstZ + j * pack; - __m256 scaleValue = _mm256_set1_ps(scale[0]); - auto sum_val = _mm256_set1_ps(sums[0]); - __m256 biasValue = _mm256_add_ps(_mm256_loadu_ps(bias), _mm256_mul_ps(_mm256_loadu_ps(zero), sum_val)); - const int8_t* srcBatch = A + j * pack; - auto oc0123_int16 = _mm256_set1_epi16(0); - auto oc4567_int16 = _mm256_set1_epi16(0); - auto oc0123_int32 = _mm256_set1_epi32(0); - auto oc4567_int32 = _mm256_set1_epi32(0); - const __m256i mask = _mm256_set1_epi8(0xf); - // auto extra = _mm256_set1_epi32(0); - for (int k = 0; k < src_depth_quad; ++k) { - auto srcZ = srcBatch + k * pack * realSize; - const uint8_t* weightZ = (uint8_t*)weight + k * weight_stride; - auto s0 = _mm256_castpd_si256(_mm256_broadcast_sd((double*)srcZ)); - auto wi4 = _mm256_castps_si256(_mm256_loadu_ps((const float*)weightZ)); - auto w0_ = _mm256_and_si256(mask, _mm256_srli_epi16(wi4, 4)); - auto w1_ = _mm256_and_si256(mask, wi4); - auto w0 = _mm256_permute2x128_si256(w0_, w1_, 0x20); - auto w1 = _mm256_permute2x128_si256(w0_, w1_, 0x31); - oc0123_int16 = _mm256_maddubs_epi16(w0, s0); // int16_t sum - oc4567_int16 = _mm256_maddubs_epi16(w1, s0); // int16_t sum - oc0123_int32 = _mm256_add_epi32(_mm256_madd_epi16(oc0123_int16, one_int16), oc0123_int32); - oc4567_int32 = _mm256_add_epi32(_mm256_madd_epi16(oc4567_int16, one_int16), oc4567_int32); - } - - auto oc0426_int32 = _mm256_unpacklo_epi32(oc0123_int32, oc4567_int32); - auto oc1537_int32 = _mm256_unpackhi_epi32(oc0123_int32, oc4567_int32); - auto tmp0 = _mm256_unpacklo_epi32(oc0426_int32, oc1537_int32); // 01452367 - auto tmp1 = _mm256_unpackhi_epi32(oc0426_int32, oc1537_int32); // 01452367 - auto tmp2 = _mm256_add_epi32(tmp0, tmp1); // 01452367 - auto oc0145 = _mm256_extractf128_si256(tmp2, 0); - auto oc2367 = _mm256_extractf128_si256(tmp2, 1); - auto oc0123 = _mm_unpacklo_epi64(oc0145, oc2367); - auto oc4567 = _mm_unpackhi_epi64(oc0145, oc2367); - - auto sum8 = _mm256_set_m128i(oc4567, oc0123); - - __m256 f0 = _mm256_cvtepi32_ps(sum8); - __m256 fs = _mm256_mul_ps(_mm256_mul_ps(f0, scaleValue), alphaValue); - fs = _mm256_add_ps(biasValue, fs); - _mm256_storeu_ps(dstX, fs); - - } - } -} -void _AVX_MNNGemmHybridInt8(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, - size_t dst_depth_quad, size_t realSize, const float** param) { - int pack = 8; - size_t weight_step = src_depth_quad * pack * pack; - const float* alpha_ptr = param[0]; - const float* zero_ptr = param[1]; - const float* bias_ptr = param[2]; - const float* sums_ptr = param[3]; - const float* scale_ptr = param[4]; - for (int ci = 0; ci < dst_depth_quad; ++ci) { - float* dstZ = C + ci * pack * realSize; - const int8_t* weight = B + ci * weight_step; - auto alpha = alpha_ptr + ci * pack; - auto zero = zero_ptr + ci * pack; - auto bias = bias_ptr + ci * pack; - __m256 alphaValue = _mm256_load_ps(alpha); - for (int j = 0; j < realSize; ++j) { - const float* sums = sums_ptr + j; - const float* scale = scale_ptr + j; - float* dstX = dstZ + j * pack; - __m256 scaleValue = _mm256_set1_ps(scale[0]); - __m256 biasValue = _mm256_add_ps(_mm256_load_ps(bias), _mm256_mul_ps(_mm256_load_ps(zero), _mm256_set1_ps(sums[0]))); - const int8_t* srcBatch = A + j * pack; - auto oc0_and_1 = _mm256_set1_epi32(0); - auto oc2_and_3 = _mm256_set1_epi32(0); - auto oc4_and_5 = _mm256_set1_epi32(0); - auto oc6_and_7 = _mm256_set1_epi32(0); - for (int k = 0; k < src_depth_quad; ++k) { - const int8_t* srcZ = srcBatch + k * pack * realSize; - const int8_t* weightZ = weight + k * pack * pack; - auto w0 = _mm_loadu_si128((__m128i const*)weightZ); // w0-1 - auto w1 = _mm_loadu_si128((__m128i const*)(weightZ + 16)); - auto w2 = _mm_loadu_si128((__m128i const*)(weightZ + 16 * 2)); - auto w3 = _mm_loadu_si128((__m128i const*)(weightZ + 16 * 3)); - auto w0_16= _mm256_cvtepi8_epi16(w0); //16xint16_t - auto w1_16= _mm256_cvtepi8_epi16(w1); - auto w2_16= _mm256_cvtepi8_epi16(w2); - auto w3_16= _mm256_cvtepi8_epi16(w3); - auto s0 = _mm_castps_si128(_mm_broadcast_ss((float*)srcZ + 0)); - auto s1 = _mm_castps_si128(_mm_broadcast_ss((float*)srcZ + 1)); - auto s0_16 = _mm256_cvtepi8_epi16(s0); - auto s1_16 = _mm256_cvtepi8_epi16(s1); - auto S_int16 = _mm256_unpacklo_epi64(s0_16, s1_16); - oc0_and_1 = _mm256_add_epi32(oc0_and_1, _mm256_madd_epi16(S_int16, w0_16)); - oc2_and_3 = _mm256_add_epi32(oc2_and_3, _mm256_madd_epi16(S_int16, w1_16)); - oc4_and_5 = _mm256_add_epi32(oc4_and_5, _mm256_madd_epi16(S_int16, w2_16)); - oc6_and_7 = _mm256_add_epi32(oc6_and_7, _mm256_madd_epi16(S_int16, w3_16)); - } - auto oc_02021313_lo = _mm256_unpacklo_epi32(oc0_and_1, oc2_and_3); - auto oc_02021313_hi = _mm256_unpackhi_epi32(oc0_and_1, oc2_and_3); - auto oc_46465757_lo = _mm256_unpacklo_epi32(oc4_and_5, oc6_and_7); - auto oc_46465757_hi = _mm256_unpackhi_epi32(oc4_and_5, oc6_and_7); - auto oc_02021313 = _mm256_add_epi32(oc_02021313_lo, oc_02021313_hi); - auto oc_46465757 = _mm256_add_epi32(oc_46465757_lo, oc_46465757_hi); - auto oc_04261537_lo = _mm256_unpacklo_epi32(oc_02021313, oc_46465757); - auto oc_04261537_hi = _mm256_unpackhi_epi32(oc_02021313, oc_46465757); - auto oc_04261537 = _mm256_add_epi32(oc_04261537_lo, oc_04261537_hi); - auto oc_0426 = _mm256_extractf128_si256(oc_04261537, 0); - auto oc_1537 = _mm256_extractf128_si256(oc_04261537, 1); - auto oc_0145 = _mm_unpacklo_epi32(oc_0426, oc_1537); - auto oc_2367 = _mm_unpackhi_epi32(oc_0426, oc_1537); - auto oc_0123 = _mm_unpacklo_epi64(oc_0145, oc_2367); - auto oc_4567 = _mm_unpackhi_epi64(oc_0145, oc_2367); - auto sum8 = _mm256_set_m128i(oc_4567, oc_0123); - __m256 f0 = _mm256_cvtepi32_ps(sum8); - __m256 fs = _mm256_mul_ps(_mm256_mul_ps(f0, scaleValue), alphaValue); - fs = _mm256_add_ps(biasValue, fs); - _mm256_storeu_ps(dstX, fs); - } - } -} - void _AVX_MNNAbsMaxFP32(const float* source, float* absmax, size_t src_depth_quad, size_t realSize, int pack) { // source: (ic/8, N, 8) auto srcStep = pack * realSize; @@ -236,40 +96,6 @@ void _AVX_MNNAbsMaxFP32(const float* source, float* absmax, size_t src_depth_qua } } -void _AVX_MNNDynamicQuantFP32(const float* src, int8_t* dst, const float* scale, float* sum, size_t src_depth_quad, size_t realSize, int pack) { - // AVX: pack=8 - __m256 zero = _mm256_setzero_ps(); - __m256 plus = _mm256_set1_ps(0.5f); - __m256 minus = _mm256_set1_ps(-0.5f); - auto offset = _mm256_set1_epi32(128); - uint8_t* dstPtr = reinterpret_cast(dst); - float temp[8]; - for (int i = 0; i < realSize; ++i) { - __m256 scaleVal = _mm256_set1_ps(scale[i]); - __m256 acc = _mm256_setzero_ps(); - for (int c = 0; c < src_depth_quad; ++c) { - auto srcZ = src + c * pack * realSize + i * pack; - auto dstZ = dstPtr + c * pack * realSize + i * pack; - __m256 f0 = _mm256_loadu_ps(srcZ); - __m256 m0 = _mm256_mul_ps(f0, scaleVal); - __m256 mask = _mm256_cmp_ps(m0, zero, 1); - __m256 d0 = _mm256_blendv_ps(plus, minus, mask); - d0 = _mm256_add_ps(d0, m0); - __m256 round0 = _mm256_round_ps(d0, 3); - auto d0_epi32 = _mm256_cvtps_epi32(round0); // int32x8 - auto d0_epi16 = _mm256_packs_epi32(d0_epi32, _mm256_castps_si256(_mm256_permute2f128_ps(_mm256_castsi256_ps(d0_epi32), _mm256_castsi256_ps(d0_epi32), 1))); - // d0_epi32 = _mm256_packs_epi32(d0_epi32, d0_epi32); // int16x8 - d0_epi32 = _mm256_packs_epi16(d0_epi16, d0_epi16); // int8x8 - auto D0 = _mm_castsi128_ps(_mm256_extracti128_si256(d0_epi32, 0)); - _mm_storeu_ps(temp, D0); - ::memcpy(dstZ, temp, pack * sizeof(int8_t)); - acc = _mm256_add_ps(acc, round0); - } - _mm256_storeu_ps(temp, acc); - int sumVal = static_cast(temp[0] + temp[1] + temp[2] + temp[3] + temp[4] + temp[5] + temp[6] + temp[7]); - ((int32_t*)sum)[i] = sumVal; - } -} #endif void _AVX_MNNComputeMatMulForE_1(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId) { diff --git a/source/backend/cpu/x86_x64/avx/GemmCommon.cpp b/source/backend/cpu/x86_x64/avx/GemmCommon.cpp index 0753e7f8d..ed944f4c6 100644 --- a/source/backend/cpu/x86_x64/avx/GemmCommon.cpp +++ b/source/backend/cpu/x86_x64/avx/GemmCommon.cpp @@ -420,11 +420,11 @@ void _AVX_MNNPackedSparseMatMul(float* C, const float* A, const float* B, unsign void _AVX_MNNComputeScaleZeroScalar(float* source, float* min, float* max, size_t size) { int pack = 8; - int sizeDiv8 = UP_DIV(size, pack); - __m256 minVal = _mm256_loadu_ps(source); + int sizeDiv8 = size / pack; + __m256 minVal = _mm256_set1_ps(source[0]); __m256 maxVal = minVal; float maxArr[8], minArr[8]; - for (int i = 1; i < sizeDiv8; ++i) { + for (int i = 0; i < sizeDiv8; ++i) { auto src0 = source + pack * i; __m256 vecA = _mm256_loadu_ps(src0); __m256 maskMax = _mm256_cmp_ps(vecA, maxVal, 14); @@ -432,7 +432,6 @@ void _AVX_MNNComputeScaleZeroScalar(float* source, float* min, float* max, size_ maxVal = _mm256_blendv_ps(maxVal, vecA, maskMax); minVal = _mm256_blendv_ps(minVal, vecA, maskMin); } - _mm256_storeu_ps(maxArr, maxVal); _mm256_storeu_ps(minArr, minVal); float max_ = maxArr[0], min_ = minArr[0]; @@ -444,12 +443,11 @@ void _AVX_MNNComputeScaleZeroScalar(float* source, float* min, float* max, size_ min_ = minArr[k]; } } + for (int i = pack * sizeDiv8; i < size; ++i) { + max_ = std::max(max_, source[i]); + min_ = std::min(min_, source[i]); + } min[0] = min_; max[0] = max_; - // float range = max_ - min_; - // MNN_ASSERT(range != 0); - // *quantScale = 255.0f / range; - // *dequantScale = range / 255.0f; - // *zeroPoint = std::min(255.f, std::max(roundf(-(min_ * 255.f) / range), 0.f)) - 128.f; } diff --git a/source/backend/cpu/x86_x64/avx/GemmInt8.cpp b/source/backend/cpu/x86_x64/avx/GemmInt8.cpp index 18c4422d1..1a6b60746 100644 --- a/source/backend/cpu/x86_x64/avx/GemmInt8.cpp +++ b/source/backend/cpu/x86_x64/avx/GemmInt8.cpp @@ -51,6 +51,450 @@ auto d##i = _mm_add_epi32(d##i##0, d##i##1); #define COMPUTE(u, v)\ D##u##v = _mm256_add_epi32(D##u##v, _mm256_madd_epi16(W##u, S##v)); +#define LOAD_INT4_TO_INT8 \ +auto w_int4 = _mm_loadu_si128((__m128i const*)weight_sz);\ +auto w_int4_high = _mm_and_si128(mask, _mm_srli_epi16(w_int4, 4));\ +auto w_int4_low = _mm_and_si128(mask, w_int4);\ +auto w_0 = _mm_unpacklo_epi8(w_int4_high, w_int4_low);\ +auto w_1 = _mm_unpackhi_epi8(w_int4_high, w_int4_low); + +void _AVX_MNNGemmInt8AddBiasScale_16x4_w4(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst) { + MNN_ASSERT(post->useInt8==0); + const auto dst_step_tmp = dst_step / sizeof(int8_t); + auto zero128 = _mm256_set1_ps(0.0f); + auto minValue = _mm256_set1_ps(post->minValue); + auto maxValue = _mm256_set1_ps(post->maxValue); + auto offset = _mm256_set1_epi32(128); + __m256 fp32min, fp32max; + if (post->fp32minmax) { + fp32min = _mm256_set1_ps((post->fp32minmax)[0]); + fp32max = _mm256_set1_ps((post->fp32minmax)[1]); + } + int blockNum = post->blockNum; + const float* biasPtr = nullptr; + if (post->biasFloat) { + biasPtr = post->biasFloat; + } + + int weight_step_Z = 0.5 * blockNum * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); + int weight_step_Y = 0.5 * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); + const __m128i mask = _mm_set1_epi8(0xf); + + auto srcKernelSumPtr = post->srcKernelSum; + __m256 kernelSum0 = _mm256_setzero_ps(); + __m256 kernelSum1 = _mm256_setzero_ps(); + __m256 kernelSum2 = _mm256_setzero_ps(); + __m256 kernelSum3 = _mm256_setzero_ps(); + if (GEMMINT8_AVX2_E == realDst) { + kernelSum0 = _mm256_set1_ps(post->srcKernelSum[0]); + kernelSum1 = _mm256_set1_ps(post->srcKernelSum[1]); + kernelSum2 = _mm256_set1_ps(post->srcKernelSum[2]); + kernelSum3 = _mm256_set1_ps(post->srcKernelSum[3]); + } else { + kernelSum0 = _mm256_set1_ps(post->srcKernelSum[0]); + if (realDst > 1) { + kernelSum1 = _mm256_set1_ps(post->srcKernelSum[1]); + } + if (realDst > 2) { + kernelSum2 = _mm256_set1_ps(post->srcKernelSum[2]); + } + } + auto f128 = _mm256_set1_ps(128.f); + __m256 extrascale0 = _mm256_setzero_ps(); + __m256 extrascale1 = _mm256_setzero_ps(); + __m256 extrascale2 = _mm256_setzero_ps(); + __m256 extrascale3 = _mm256_setzero_ps(); + if (post->extraScale) { + if (GEMMINT8_AVX2_E == realDst) { + extrascale0 = _mm256_set1_ps(post->extraScale[0]); + extrascale1 = _mm256_set1_ps(post->extraScale[1]); + extrascale2 = _mm256_set1_ps(post->extraScale[2]); + extrascale3 = _mm256_set1_ps(post->extraScale[3]); + } else { + extrascale0 = _mm256_set1_ps(post->extraScale[0]); + if (realDst > 1) { + extrascale1 = _mm256_set1_ps(post->extraScale[1]); + } + if (realDst > 2) { + extrascale2 = _mm256_set1_ps(post->extraScale[2]); + } + } + } + //printf("e=%d, sz=%d, dz=%d\n", realDst, src_depth_quad, dst_depth_quad); + if (GEMMINT8_AVX2_E == realDst) { + for (int dz = 0; dz < dst_depth_quad; ++dz) { + const auto weight_dz = weight + dz * weight_step_Z; + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; + const float* scale_dz = post->scale + dz * AVX2_PACKINT8; + auto dst_z = dst + dz * dst_step_tmp; + const auto src_x = src; + auto dst_x = dst_z; + __m256i D00 = _mm256_set1_epi32(0); + __m256i D01 = _mm256_set1_epi32(0); + __m256i D02 = _mm256_set1_epi32(0); + __m256i D03 = _mm256_set1_epi32(0); + __m256i D10 = _mm256_set1_epi32(0); + __m256i D11 = _mm256_set1_epi32(0); + __m256i D12 = _mm256_set1_epi32(0); + __m256i D13 = _mm256_set1_epi32(0); + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + sz * weight_step_Y; + const auto src_z = src_x + sz * GEMMINT8_AVX2_L * GEMMINT8_AVX2_E; + LOAD_INT4_TO_INT8; + auto W0 = _mm256_cvtepi8_epi16(w_0); + auto W1 = _mm256_cvtepi8_epi16(w_1); + + auto s0 = _mm_castps_si128(_mm_broadcast_ss((float*)src_z + 0)); + auto s1 = _mm_castps_si128(_mm_broadcast_ss((float*)src_z + 1)); + auto s2 = _mm_castps_si128(_mm_broadcast_ss((float*)src_z + 2)); + auto s3 = _mm_castps_si128(_mm_broadcast_ss((float*)src_z + 3)); + auto S0 = _mm256_cvtepu8_epi16(s0); + auto S1 = _mm256_cvtepu8_epi16(s1); + auto S2 = _mm256_cvtepu8_epi16(s2); + auto S3 = _mm256_cvtepu8_epi16(s3); + + COMPUTE(0, 0); + COMPUTE(1, 0); + COMPUTE(0, 1); + COMPUTE(1, 1); + COMPUTE(0, 2); + COMPUTE(1, 2); + COMPUTE(0, 3); + COMPUTE(1, 3); + } + auto D0 = NORMAL_HADD(D00, D10); + auto D1 = NORMAL_HADD(D01, D11); + auto D2 = NORMAL_HADD(D02, D12); + auto D3 = NORMAL_HADD(D03, D13); + auto scaleValue = _mm256_loadu_ps(scale_dz); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); + + auto f0 = _mm256_cvtepi32_ps(D0); + auto f1 = _mm256_cvtepi32_ps(D1); + auto f2 = _mm256_cvtepi32_ps(D2); + auto f3 = _mm256_cvtepi32_ps(D3); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm256_mul_ps(kernelSum1, weightBiasValue); // ..second + auto xy0_2 = _mm256_mul_ps(kernelSum2, weightBiasValue); // .. third + auto xy0_3 = _mm256_mul_ps(kernelSum3, weightBiasValue); // ..fourth + f0 = _mm256_mul_ps(f0, scaleValue); + f1 = _mm256_mul_ps(f1, scaleValue); + f2 = _mm256_mul_ps(f2, scaleValue); + f3 = _mm256_mul_ps(f3, scaleValue); + if (post->extraScale) { + f0 = _mm256_mul_ps(f0, extrascale0); + f1 = _mm256_mul_ps(f1, extrascale1); + f2 = _mm256_mul_ps(f2, extrascale2); + f3 = _mm256_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extraB = post->extraBias + dz * AVX2_PACKINT8; + auto extrabias = _mm256_loadu_ps(extraB); + extrabias = _mm256_mul_ps(f128, extrabias); + auto extrabias0 = _mm256_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm256_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm256_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm256_mul_ps(extrabias, extrascale3); + f0 = _mm256_sub_ps(f0, extrabias0); + f1 = _mm256_sub_ps(f1, extrabias1); + f2 = _mm256_sub_ps(f2, extrabias2); + f3 = _mm256_sub_ps(f3, extrabias3); + } + } + f0 = _mm256_add_ps(f0, xy0_0); + f1 = _mm256_add_ps(f1, xy0_1); + f2 = _mm256_add_ps(f2, xy0_2); + f3 = _mm256_add_ps(f3, xy0_3); + + if (nullptr != biasPtr) { + const auto bias_dz = biasPtr + dz * AVX2_PACKINT8; + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + f1 = _mm256_add_ps(f1, biasValue); + f2 = _mm256_add_ps(f2, biasValue); + f3 = _mm256_add_ps(f3, biasValue); + } else { + auto dstv0 = _mm256_loadu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8); + auto dstv1 = _mm256_loadu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8); + auto dstv2 = _mm256_loadu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8); + auto dstv3 = _mm256_loadu_ps(((float*)dst_x) + 3 * AVX2_PACKINT8); + f0 = _mm256_add_ps(f0, dstv0); + f1 = _mm256_add_ps(f1, dstv1); + f2 = _mm256_add_ps(f2, dstv2); + f3 = _mm256_add_ps(f3, dstv3); + } + if (post->fp32minmax) { + f0 = _mm256_min_ps(f0, fp32max); + f1 = _mm256_min_ps(f1, fp32max); + f2 = _mm256_min_ps(f2, fp32max); + f3 = _mm256_min_ps(f3, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + f1 = _mm256_max_ps(f1, fp32min); + f2 = _mm256_max_ps(f2, fp32min); + f3 = _mm256_max_ps(f3, fp32min); + } + _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); + _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); + _mm256_storeu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8, f2); + _mm256_storeu_ps(((float*)dst_x) + 3 * AVX2_PACKINT8, f3); + + } + return; + } + if (3 == realDst) { + for (int dz = 0; dz < dst_depth_quad; ++dz) { + const auto weight_dz = weight + dz * weight_step_Z; + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; + const float* scale_dz = post->scale + dz * AVX2_PACKINT8; + auto dst_z = dst + dz * dst_step_tmp; + const auto src_x = src; + auto dst_x = dst_z; + __m256i D00 = _mm256_set1_epi32(0); + __m256i D01 = _mm256_set1_epi32(0); + __m256i D02 = _mm256_set1_epi32(0); + + __m256i D10 = _mm256_set1_epi32(0); + __m256i D11 = _mm256_set1_epi32(0); + __m256i D12 = _mm256_set1_epi32(0); + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + sz * weight_step_Y; + const auto src_z = src_x + sz * GEMMINT8_AVX2_L * GEMMINT8_AVX2_E; + LOAD_INT4_TO_INT8; + + auto W0 = _mm256_cvtepi8_epi16(w_0); + auto W1 = _mm256_cvtepi8_epi16(w_1); + + auto s0 = _mm_castps_si128(_mm_broadcast_ss((float*)src_z + 0)); + auto s1 = _mm_castps_si128(_mm_broadcast_ss((float*)src_z + 1)); + auto s2 = _mm_castps_si128(_mm_broadcast_ss((float*)src_z + 2)); + auto S0 = _mm256_cvtepu8_epi16(s0); + auto S1 = _mm256_cvtepu8_epi16(s1); + auto S2 = _mm256_cvtepu8_epi16(s2); + + COMPUTE(0, 0); + COMPUTE(1, 0); + COMPUTE(0, 1); + COMPUTE(1, 1); + COMPUTE(0, 2); + COMPUTE(1, 2); + } + auto D0 = NORMAL_HADD(D00, D10); + auto D1 = NORMAL_HADD(D01, D11); + auto D2 = NORMAL_HADD(D02, D12); + auto scaleValue = _mm256_loadu_ps(scale_dz); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); + + auto f0 = _mm256_cvtepi32_ps(D0); + auto f1 = _mm256_cvtepi32_ps(D1); + auto f2 = _mm256_cvtepi32_ps(D2); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm256_mul_ps(kernelSum1, weightBiasValue); // ..second + auto xy0_2 = _mm256_mul_ps(kernelSum2, weightBiasValue); // .. third + f0 = _mm256_mul_ps(f0, scaleValue); + f1 = _mm256_mul_ps(f1, scaleValue); + f2 = _mm256_mul_ps(f2, scaleValue); + if (post->extraScale) { + f0 = _mm256_mul_ps(f0, extrascale0); + f1 = _mm256_mul_ps(f1, extrascale1); + f2 = _mm256_mul_ps(f2, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extraB = post->extraBias + dz * AVX2_PACKINT8; + auto extrabias = _mm256_loadu_ps(extraB); + extrabias = _mm256_mul_ps(f128, extrabias); + auto extrabias0 = _mm256_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm256_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm256_mul_ps(extrabias, extrascale2); + f0 = _mm256_sub_ps(f0, extrabias0); + f1 = _mm256_sub_ps(f1, extrabias1); + f2 = _mm256_sub_ps(f2, extrabias2); + } + } + f0 = _mm256_add_ps(f0, xy0_0); + f1 = _mm256_add_ps(f1, xy0_1); + f2 = _mm256_add_ps(f2, xy0_2); + + if (nullptr != biasPtr) { + const auto bias_dz = biasPtr + dz * AVX2_PACKINT8; + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + f1 = _mm256_add_ps(f1, biasValue); + f2 = _mm256_add_ps(f2, biasValue); + } else { + auto dstv0 = _mm256_loadu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8); + auto dstv1 = _mm256_loadu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8); + auto dstv2 = _mm256_loadu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8); + f0 = _mm256_add_ps(f0, dstv0); + f1 = _mm256_add_ps(f1, dstv1); + f2 = _mm256_add_ps(f2, dstv2); + } + if (post->fp32minmax) { + f0 = _mm256_min_ps(f0, fp32max); + f1 = _mm256_min_ps(f1, fp32max); + f2 = _mm256_min_ps(f2, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + f1 = _mm256_max_ps(f1, fp32min); + f2 = _mm256_max_ps(f2, fp32min); + } + _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); + _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); + _mm256_storeu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8, f2); + + } + return; + } + if (2 == realDst) { + for (int dz = 0; dz < dst_depth_quad; ++dz) { + const auto weight_dz = weight + dz * weight_step_Z; + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; + const float* scale_dz = post->scale + dz * AVX2_PACKINT8; + auto dst_z = dst + dz * dst_step_tmp; + const auto src_x = src; + auto dst_x = dst_z; + __m256i D00 = _mm256_set1_epi32(0); + __m256i D01 = _mm256_set1_epi32(0); + + __m256i D10 = _mm256_set1_epi32(0); + __m256i D11 = _mm256_set1_epi32(0); + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + sz * weight_step_Y; + const auto src_z = src_x + sz * GEMMINT8_AVX2_L * GEMMINT8_AVX2_E; + LOAD_INT4_TO_INT8; + auto W0 = _mm256_cvtepi8_epi16(w_0); + auto W1 = _mm256_cvtepi8_epi16(w_1); + + auto s0 = _mm_castps_si128(_mm_broadcast_ss((float*)src_z + 0)); + auto s1 = _mm_castps_si128(_mm_broadcast_ss((float*)src_z + 1)); + auto S0 = _mm256_cvtepu8_epi16(s0); + auto S1 = _mm256_cvtepu8_epi16(s1); + + COMPUTE(0, 0); + COMPUTE(1, 0); + COMPUTE(0, 1); + COMPUTE(1, 1); + } + auto D0 = NORMAL_HADD(D00, D10); + auto D1 = NORMAL_HADD(D01, D11); + auto scaleValue = _mm256_loadu_ps(scale_dz); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); + + auto f0 = _mm256_cvtepi32_ps(D0); + auto f1 = _mm256_cvtepi32_ps(D1); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm256_mul_ps(kernelSum1, weightBiasValue); // ..second + f0 = _mm256_mul_ps(f0, scaleValue); + f1 = _mm256_mul_ps(f1, scaleValue); + if (post->extraScale) { + f0 = _mm256_mul_ps(f0, extrascale0); + f1 = _mm256_mul_ps(f1, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extraB = post->extraBias + dz * AVX2_PACKINT8; + auto extrabias = _mm256_loadu_ps(extraB); + extrabias = _mm256_mul_ps(f128, extrabias); + auto extrabias0 = _mm256_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm256_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm256_mul_ps(extrabias, extrascale2); + f0 = _mm256_sub_ps(f0, extrabias0); + f1 = _mm256_sub_ps(f1, extrabias1); + } + } + f0 = _mm256_add_ps(f0, xy0_0); + f1 = _mm256_add_ps(f1, xy0_1); + + if (nullptr != biasPtr) { + const auto bias_dz = biasPtr + dz * AVX2_PACKINT8; + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + f1 = _mm256_add_ps(f1, biasValue); + } else { + auto dstv0 = _mm256_loadu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8); + auto dstv1 = _mm256_loadu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8); + f0 = _mm256_add_ps(f0, dstv0); + f1 = _mm256_add_ps(f1, dstv1); + } + if (post->fp32minmax) { + f0 = _mm256_min_ps(f0, fp32max); + f1 = _mm256_min_ps(f1, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + f1 = _mm256_max_ps(f1, fp32min); + } + _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); + _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); + + } + return; + } + if (1 == realDst) { + for (int dz = 0; dz < dst_depth_quad; ++dz) { + const auto weight_dz = weight + dz * weight_step_Z; + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; + const float* scale_dz = post->scale + dz * AVX2_PACKINT8; + auto dst_z = dst + dz * dst_step_tmp; + const auto src_x = src; + auto dst_x = dst_z; + __m256i D00 = _mm256_set1_epi32(0); + __m256i D10 = _mm256_set1_epi32(0); + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + sz * weight_step_Y; + const auto src_z = src_x + sz * GEMMINT8_AVX2_L * GEMMINT8_AVX2_E; + LOAD_INT4_TO_INT8; + auto W0 = _mm256_cvtepi8_epi16(w_0); + auto W1 = _mm256_cvtepi8_epi16(w_1); + + auto s0 = _mm_castps_si128(_mm_broadcast_ss((float*)src_z + 0)); + auto S0 = _mm256_cvtepu8_epi16(s0); + + COMPUTE(0, 0); + COMPUTE(1, 0); + } + auto D0 = NORMAL_HADD(D00, D10); + auto scaleValue = _mm256_loadu_ps(scale_dz); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); + + auto f0 = _mm256_cvtepi32_ps(D0); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + f0 = _mm256_mul_ps(f0, scaleValue); + if (post->extraScale) { + f0 = _mm256_mul_ps(f0, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extraB = post->extraBias + dz * AVX2_PACKINT8; + auto extrabias = _mm256_loadu_ps(extraB); + extrabias = _mm256_mul_ps(f128, extrabias); + auto extrabias0 = _mm256_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm256_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm256_mul_ps(extrabias, extrascale2); + f0 = _mm256_sub_ps(f0, extrabias0); + } + } + f0 = _mm256_add_ps(f0, xy0_0); + + if (nullptr != biasPtr) { + const auto bias_dz = biasPtr + dz * AVX2_PACKINT8; + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + } else { + auto dstv = _mm256_loadu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8); + f0 = _mm256_add_ps(f0, dstv); + } + if (post->fp32minmax) { + f0 = _mm256_min_ps(f0, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + } + + _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); + + } + return; + } + +} + void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst) { const auto dst_step_tmp = dst_step / sizeof(int8_t); auto zero128 = _mm256_set1_ps(0.0f); @@ -59,11 +503,61 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, cons auto plus = _mm256_set1_ps(0.5f); auto minus = _mm256_set1_ps(-0.5f); auto offset = _mm256_set1_epi32(128); + __m256 fp32min, fp32max; + if (0 == post->useInt8 && post->fp32minmax) { + fp32min = _mm256_set1_ps((post->fp32minmax)[0]); + fp32max = _mm256_set1_ps((post->fp32minmax)[1]); + } + int blockNum = post->blockNum; + const float* biasPtr = nullptr; + if (post->biasFloat) { + biasPtr = post->biasFloat; + } + auto srcKernelSumPtr = post->srcKernelSum; + __m256 kernelSum0 = _mm256_setzero_ps(); + __m256 kernelSum1 = _mm256_setzero_ps(); + __m256 kernelSum2 = _mm256_setzero_ps(); + __m256 kernelSum3 = _mm256_setzero_ps(); + if (GEMMINT8_AVX2_E == realDst) { + kernelSum0 = _mm256_set1_ps(post->srcKernelSum[0]); + kernelSum1 = _mm256_set1_ps(post->srcKernelSum[1]); + kernelSum2 = _mm256_set1_ps(post->srcKernelSum[2]); + kernelSum3 = _mm256_set1_ps(post->srcKernelSum[3]); + } else { + kernelSum0 = _mm256_set1_ps(post->srcKernelSum[0]); + if (realDst > 1) { + kernelSum1 = _mm256_set1_ps(post->srcKernelSum[1]); + } + if (realDst > 2) { + kernelSum2 = _mm256_set1_ps(post->srcKernelSum[2]); + } + } + auto f128 = _mm256_set1_ps(128.f); + __m256 extrascale0 = _mm256_setzero_ps(); + __m256 extrascale1 = _mm256_setzero_ps(); + __m256 extrascale2 = _mm256_setzero_ps(); + __m256 extrascale3 = _mm256_setzero_ps(); + if (post->extraScale) { + if (GEMMINT8_AVX2_E == realDst) { + extrascale0 = _mm256_set1_ps(post->extraScale[0]); + extrascale1 = _mm256_set1_ps(post->extraScale[1]); + extrascale2 = _mm256_set1_ps(post->extraScale[2]); + extrascale3 = _mm256_set1_ps(post->extraScale[3]); + } else { + extrascale0 = _mm256_set1_ps(post->extraScale[0]); + if (realDst > 1) { + extrascale1 = _mm256_set1_ps(post->extraScale[1]); + } + if (realDst > 2) { + extrascale2 = _mm256_set1_ps(post->extraScale[2]); + } + } + } //printf("e=%d, sz=%d, dz=%d\n", realDst, src_depth_quad, dst_depth_quad); if (GEMMINT8_AVX2_E == realDst) { for (int dz = 0; dz < dst_depth_quad; ++dz) { - const auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); - const auto bias_dz = post->bias + dz * AVX2_PACKINT8; + const auto weight_dz = weight + dz * blockNum * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; const float* scale_dz = post->scale + dz * AVX2_PACKINT8; auto dst_z = dst + dz * dst_step_tmp; const auto src_x = src; @@ -107,40 +601,92 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, cons auto D1 = NORMAL_HADD(D01, D11); auto D2 = NORMAL_HADD(D02, D12); auto D3 = NORMAL_HADD(D03, D13); - - auto biasValue0 = _mm256_loadu_si256((__m256i*)(bias_dz)); - D0 = _mm256_add_epi32(D0, biasValue0); - D1 = _mm256_add_epi32(D1, biasValue0); - D2 = _mm256_add_epi32(D2, biasValue0); - D3 = _mm256_add_epi32(D3, biasValue0); - auto scaleValue = _mm256_loadu_ps(scale_dz); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); + auto f0 = _mm256_cvtepi32_ps(D0); auto f1 = _mm256_cvtepi32_ps(D1); auto f2 = _mm256_cvtepi32_ps(D2); auto f3 = _mm256_cvtepi32_ps(D3); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm256_mul_ps(kernelSum1, weightBiasValue); // ..second + auto xy0_2 = _mm256_mul_ps(kernelSum2, weightBiasValue); // .. third + auto xy0_3 = _mm256_mul_ps(kernelSum3, weightBiasValue); // ..fourth f0 = _mm256_mul_ps(f0, scaleValue); f1 = _mm256_mul_ps(f1, scaleValue); f2 = _mm256_mul_ps(f2, scaleValue); f3 = _mm256_mul_ps(f3, scaleValue); - if (post->useInt8 == 0) { - _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); - _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); - _mm256_storeu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8, f2); - _mm256_storeu_ps(((float*)dst_x) + 3 * AVX2_PACKINT8, f3); - } else { + if (post->extraScale) { + f0 = _mm256_mul_ps(f0, extrascale0); + f1 = _mm256_mul_ps(f1, extrascale1); + f2 = _mm256_mul_ps(f2, extrascale2); + f3 = _mm256_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extraB = post->extraBias + dz * AVX2_PACKINT8; + auto extrabias = _mm256_loadu_ps(extraB); + extrabias = _mm256_mul_ps(f128, extrabias); + auto extrabias0 = _mm256_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm256_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm256_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm256_mul_ps(extrabias, extrascale3); + f0 = _mm256_sub_ps(f0, extrabias0); + f1 = _mm256_sub_ps(f1, extrabias1); + f2 = _mm256_sub_ps(f2, extrabias2); + f3 = _mm256_sub_ps(f3, extrabias3); + } + } + f0 = _mm256_add_ps(f0, xy0_0); + f1 = _mm256_add_ps(f1, xy0_1); + f2 = _mm256_add_ps(f2, xy0_2); + f3 = _mm256_add_ps(f3, xy0_3); + if (nullptr != biasPtr) { + const auto bias_dz = biasPtr + dz * AVX2_PACKINT8; + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + f1 = _mm256_add_ps(f1, biasValue); + f2 = _mm256_add_ps(f2, biasValue); + f3 = _mm256_add_ps(f3, biasValue); + } + if (post->useInt8 == 1) { POSTTREAT(0); POSTTREAT(1); POSTTREAT(2); POSTTREAT(3); + } else { + if (nullptr == biasPtr) { + auto dstv0 = _mm256_loadu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8); + auto dstv1 = _mm256_loadu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8); + auto dstv2 = _mm256_loadu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8); + auto dstv3 = _mm256_loadu_ps(((float*)dst_x) + 3 * AVX2_PACKINT8); + + f0 = _mm256_add_ps(f0, dstv0); + f1 = _mm256_add_ps(f1, dstv1); + f2 = _mm256_add_ps(f2, dstv2); + f3 = _mm256_add_ps(f3, dstv3); + } + if (post->fp32minmax) { + f0 = _mm256_min_ps(f0, fp32max); + f1 = _mm256_min_ps(f1, fp32max); + f2 = _mm256_min_ps(f2, fp32max); + f3 = _mm256_min_ps(f3, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + f1 = _mm256_max_ps(f1, fp32min); + f2 = _mm256_max_ps(f2, fp32min); + f3 = _mm256_max_ps(f3, fp32min); + } + _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); + _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); + _mm256_storeu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8, f2); + _mm256_storeu_ps(((float*)dst_x) + 3 * AVX2_PACKINT8, f3); } } return; } if (3 == realDst) { for (int dz = 0; dz < dst_depth_quad; ++dz) { - const auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); - const auto bias_dz = post->bias + dz * AVX2_PACKINT8; + const auto weight_dz = weight + dz * blockNum * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; const float* scale_dz = post->scale + dz * AVX2_PACKINT8; auto dst_z = dst + dz * dst_step_tmp; const auto src_x = src; @@ -178,35 +724,77 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, cons auto D0 = NORMAL_HADD(D00, D10); auto D1 = NORMAL_HADD(D01, D11); auto D2 = NORMAL_HADD(D02, D12); - - auto biasValue0 = _mm256_loadu_si256((__m256i*)(bias_dz)); - D0 = _mm256_add_epi32(D0, biasValue0); - D1 = _mm256_add_epi32(D1, biasValue0); - D2 = _mm256_add_epi32(D2, biasValue0); - auto scaleValue = _mm256_loadu_ps(scale_dz); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); + auto f0 = _mm256_cvtepi32_ps(D0); auto f1 = _mm256_cvtepi32_ps(D1); auto f2 = _mm256_cvtepi32_ps(D2); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm256_mul_ps(kernelSum1, weightBiasValue); // ..second + auto xy0_2 = _mm256_mul_ps(kernelSum2, weightBiasValue); // .. third f0 = _mm256_mul_ps(f0, scaleValue); f1 = _mm256_mul_ps(f1, scaleValue); f2 = _mm256_mul_ps(f2, scaleValue); - if (post->useInt8 == 0) { - _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); - _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); - _mm256_storeu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8, f2); - } else { + if (post->extraScale) { + f0 = _mm256_mul_ps(f0, extrascale0); + f1 = _mm256_mul_ps(f1, extrascale1); + f2 = _mm256_mul_ps(f2, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extraB = post->extraBias + dz * AVX2_PACKINT8; + auto extrabias = _mm256_loadu_ps(extraB); + extrabias = _mm256_mul_ps(f128, extrabias); + auto extrabias0 = _mm256_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm256_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm256_mul_ps(extrabias, extrascale2); + f0 = _mm256_sub_ps(f0, extrabias0); + f1 = _mm256_sub_ps(f1, extrabias1); + f2 = _mm256_sub_ps(f2, extrabias2); + } + } + f0 = _mm256_add_ps(f0, xy0_0); + f1 = _mm256_add_ps(f1, xy0_1); + f2 = _mm256_add_ps(f2, xy0_2); + if (nullptr != biasPtr) { + const auto bias_dz = biasPtr + dz * AVX2_PACKINT8; + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + f1 = _mm256_add_ps(f1, biasValue); + f2 = _mm256_add_ps(f2, biasValue); + } + if (post->useInt8 == 1) { POSTTREAT(0); POSTTREAT(1); POSTTREAT(2); + } else { + if (nullptr == biasPtr) { + auto dstv0 = _mm256_loadu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8); + auto dstv1 = _mm256_loadu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8); + auto dstv2 = _mm256_loadu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8); + f0 = _mm256_add_ps(f0, dstv0); + f1 = _mm256_add_ps(f1, dstv1); + f2 = _mm256_add_ps(f2, dstv2); + } + if (post->fp32minmax) { + f0 = _mm256_min_ps(f0, fp32max); + f1 = _mm256_min_ps(f1, fp32max); + f2 = _mm256_min_ps(f2, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + f1 = _mm256_max_ps(f1, fp32min); + f2 = _mm256_max_ps(f2, fp32min); + } + _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); + _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); + _mm256_storeu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8, f2); } } return; } if (2 == realDst) { for (int dz = 0; dz < dst_depth_quad; ++dz) { - const auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); - const auto bias_dz = post->bias + dz * AVX2_PACKINT8; + const auto weight_dz = weight + dz * blockNum * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; const float* scale_dz = post->scale + dz * AVX2_PACKINT8; auto dst_z = dst + dz * dst_step_tmp; const auto src_x = src; @@ -237,30 +825,64 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, cons } auto D0 = NORMAL_HADD(D00, D10); auto D1 = NORMAL_HADD(D01, D11); - - auto biasValue0 = _mm256_loadu_si256((__m256i*)(bias_dz)); - D0 = _mm256_add_epi32(D0, biasValue0); - D1 = _mm256_add_epi32(D1, biasValue0); - auto scaleValue = _mm256_loadu_ps(scale_dz); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); + auto f0 = _mm256_cvtepi32_ps(D0); auto f1 = _mm256_cvtepi32_ps(D1); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm256_mul_ps(kernelSum1, weightBiasValue); // ..second f0 = _mm256_mul_ps(f0, scaleValue); f1 = _mm256_mul_ps(f1, scaleValue); - if (post->useInt8 == 0) { - _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); - _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); - } else { + if (post->extraScale) { + f0 = _mm256_mul_ps(f0, extrascale0); + f1 = _mm256_mul_ps(f1, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extraB = post->extraBias + dz * AVX2_PACKINT8; + auto extrabias = _mm256_loadu_ps(extraB); + extrabias = _mm256_mul_ps(f128, extrabias); + auto extrabias0 = _mm256_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm256_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm256_mul_ps(extrabias, extrascale2); + f0 = _mm256_sub_ps(f0, extrabias0); + f1 = _mm256_sub_ps(f1, extrabias1); + } + } + f0 = _mm256_add_ps(f0, xy0_0); + f1 = _mm256_add_ps(f1, xy0_1); + if (nullptr != biasPtr) { + const auto bias_dz = biasPtr + dz * AVX2_PACKINT8; + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + f1 = _mm256_add_ps(f1, biasValue); + } + if (post->useInt8 == 1) { POSTTREAT(0); POSTTREAT(1); + } else { + if (nullptr == biasPtr) { + auto dstv0 = _mm256_loadu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8); + auto dstv1 = _mm256_loadu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8); + f0 = _mm256_add_ps(f0, dstv0); + f1 = _mm256_add_ps(f1, dstv1); + } + if (post->fp32minmax) { + f0 = _mm256_min_ps(f0, fp32max); + f1 = _mm256_min_ps(f1, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + f1 = _mm256_max_ps(f1, fp32min); + } + _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); + _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); } } return; } if (1 == realDst) { for (int dz = 0; dz < dst_depth_quad; ++dz) { - const auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); - const auto bias_dz = post->bias + dz * AVX2_PACKINT8; + const auto weight_dz = weight + dz * blockNum * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; const float* scale_dz = post->scale + dz * AVX2_PACKINT8; auto dst_z = dst + dz * dst_step_tmp; const auto src_x = src; @@ -283,17 +905,43 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, cons COMPUTE(1, 0); } auto D0 = NORMAL_HADD(D00, D10); - - auto biasValue0 = _mm256_loadu_si256((__m256i*)(bias_dz)); - D0 = _mm256_add_epi32(D0, biasValue0); - auto scaleValue = _mm256_loadu_ps(scale_dz); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); + auto f0 = _mm256_cvtepi32_ps(D0); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first f0 = _mm256_mul_ps(f0, scaleValue); - if (post->useInt8 == 0) { - _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); - } else { + if (post->extraScale) { + f0 = _mm256_mul_ps(f0, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extraB = post->extraBias + dz * AVX2_PACKINT8; + auto extrabias = _mm256_loadu_ps(extraB); + extrabias = _mm256_mul_ps(f128, extrabias); + auto extrabias0 = _mm256_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm256_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm256_mul_ps(extrabias, extrascale2); + f0 = _mm256_sub_ps(f0, extrabias0); + } + } + f0 = _mm256_add_ps(f0, xy0_0); + if (nullptr != biasPtr) { + const auto bias_dz = biasPtr + dz * AVX2_PACKINT8; + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + } + if (post->useInt8 == 1) { POSTTREAT(0); + } else { + if (nullptr == biasPtr) { + auto dstv0 = _mm256_loadu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8); + f0 = _mm256_add_ps(f0, dstv0); + } + if (post->fp32minmax) { + f0 = _mm256_min_ps(f0, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + } + _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); } } return; @@ -309,11 +957,36 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit_Fast(int8_t* dst, const int8_t* src, auto minus = _mm256_set1_ps(-0.5f); auto oneValue = _mm256_set1_epi16(1); auto offset = _mm256_set1_epi32(128); + __m256 fp32min, fp32max; + if (0 == post->useInt8) { + fp32min = _mm256_set1_ps((post->fp32minmax)[0]); + fp32max = _mm256_set1_ps((post->fp32minmax)[1]); + } + auto srcKernelSumPtr = post->srcKernelSum; + __m256 kernelSum0 = _mm256_setzero_ps(); + __m256 kernelSum1 = _mm256_setzero_ps(); + __m256 kernelSum2 = _mm256_setzero_ps(); + __m256 kernelSum3 = _mm256_setzero_ps(); + if (GEMMINT8_AVX2_E == realDst) { + kernelSum0 = _mm256_set1_ps(post->srcKernelSum[0]); + kernelSum1 = _mm256_set1_ps(post->srcKernelSum[1]); + kernelSum2 = _mm256_set1_ps(post->srcKernelSum[2]); + kernelSum3 = _mm256_set1_ps(post->srcKernelSum[3]); + } else { + kernelSum0 = _mm256_set1_ps(post->srcKernelSum[0]); + if (realDst > 1) { + kernelSum1 = _mm256_set1_ps(post->srcKernelSum[1]); + } + if (realDst > 2) { + kernelSum2 = _mm256_set1_ps(post->srcKernelSum[2]); + } + } //printf("e=%d, sz=%d, dz=%d\n", realDst, src_depth_quad, dst_depth_quad); if (GEMMINT8_AVX2_E == realDst) { for (int dz = 0; dz < dst_depth_quad; ++dz) { const auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); - const auto bias_dz = post->bias + dz * AVX2_PACKINT8; + const auto bias_dz = post->biasFloat + dz * AVX2_PACKINT8; + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; const float* scale_dz = post->scale + dz * AVX2_PACKINT8; auto dst_z = dst + dz * dst_step_tmp; const auto src_x = src; @@ -344,22 +1017,45 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit_Fast(int8_t* dst, const int8_t* src, auto D2 = D02; auto D3 = D03; - auto biasValue0 = _mm256_loadu_si256((__m256i*)(bias_dz)); - D0 = _mm256_add_epi32(D0, biasValue0); - D1 = _mm256_add_epi32(D1, biasValue0); - D2 = _mm256_add_epi32(D2, biasValue0); - D3 = _mm256_add_epi32(D3, biasValue0); + // auto biasValue0 = _mm256_loadu_si256((__m256i*)(bias_dz)); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); + // D0 = _mm256_add_epi32(D0, biasValue0); + // D1 = _mm256_add_epi32(D1, biasValue0); + // D2 = _mm256_add_epi32(D2, biasValue0); + // D3 = _mm256_add_epi32(D3, biasValue0); auto scaleValue = _mm256_loadu_ps(scale_dz); auto f0 = _mm256_cvtepi32_ps(D0); auto f1 = _mm256_cvtepi32_ps(D1); auto f2 = _mm256_cvtepi32_ps(D2); auto f3 = _mm256_cvtepi32_ps(D3); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm256_mul_ps(kernelSum1, weightBiasValue); // ..second + auto xy0_2 = _mm256_mul_ps(kernelSum2, weightBiasValue); // .. third + auto xy0_3 = _mm256_mul_ps(kernelSum3, weightBiasValue); // ..fourth f0 = _mm256_mul_ps(f0, scaleValue); f1 = _mm256_mul_ps(f1, scaleValue); f2 = _mm256_mul_ps(f2, scaleValue); f3 = _mm256_mul_ps(f3, scaleValue); + f0 = _mm256_add_ps(f0, xy0_0); + f1 = _mm256_add_ps(f1, xy0_1); + f2 = _mm256_add_ps(f2, xy0_2); + f3 = _mm256_add_ps(f3, xy0_3); + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + f1 = _mm256_add_ps(f1, biasValue); + f2 = _mm256_add_ps(f2, biasValue); + f3 = _mm256_add_ps(f3, biasValue); if (post->useInt8 == 0) { + f0 = _mm256_min_ps(f0, fp32max); + f1 = _mm256_min_ps(f1, fp32max); + f2 = _mm256_min_ps(f2, fp32max); + f3 = _mm256_min_ps(f3, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + f1 = _mm256_max_ps(f1, fp32min); + f2 = _mm256_max_ps(f2, fp32min); + f3 = _mm256_max_ps(f3, fp32min); _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); _mm256_storeu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8, f2); @@ -376,7 +1072,8 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit_Fast(int8_t* dst, const int8_t* src, if (3 == realDst) { for (int dz = 0; dz < dst_depth_quad; ++dz) { const auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); - const auto bias_dz = post->bias + dz * AVX2_PACKINT8; + const auto bias_dz = post->biasFloat + dz * AVX2_PACKINT8; + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; const float* scale_dz = post->scale + dz * AVX2_PACKINT8; auto dst_z = dst + dz * dst_step_tmp; const auto src_x = src; @@ -402,19 +1099,38 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit_Fast(int8_t* dst, const int8_t* src, auto D1 = D01; auto D2 = D02; - auto biasValue0 = _mm256_loadu_si256((__m256i*)(bias_dz)); - D0 = _mm256_add_epi32(D0, biasValue0); - D1 = _mm256_add_epi32(D1, biasValue0); - D2 = _mm256_add_epi32(D2, biasValue0); + // auto biasValue0 = _mm256_loadu_si256((__m256i*)(bias_dz)); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); + // D0 = _mm256_add_epi32(D0, biasValue0); + // D1 = _mm256_add_epi32(D1, biasValue0); + // D2 = _mm256_add_epi32(D2, biasValue0); auto scaleValue = _mm256_loadu_ps(scale_dz); + auto f0 = _mm256_cvtepi32_ps(D0); auto f1 = _mm256_cvtepi32_ps(D1); auto f2 = _mm256_cvtepi32_ps(D2); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm256_mul_ps(kernelSum1, weightBiasValue); // ..second + auto xy0_2 = _mm256_mul_ps(kernelSum2, weightBiasValue); // .. third f0 = _mm256_mul_ps(f0, scaleValue); f1 = _mm256_mul_ps(f1, scaleValue); f2 = _mm256_mul_ps(f2, scaleValue); + f0 = _mm256_add_ps(f0, xy0_0); + f1 = _mm256_add_ps(f1, xy0_1); + f2 = _mm256_add_ps(f2, xy0_2); + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + f1 = _mm256_add_ps(f1, biasValue); + f2 = _mm256_add_ps(f2, biasValue); if (post->useInt8 == 0) { + f0 = _mm256_min_ps(f0, fp32max); + f1 = _mm256_min_ps(f1, fp32max); + f2 = _mm256_min_ps(f2, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + f1 = _mm256_max_ps(f1, fp32min); + f2 = _mm256_max_ps(f2, fp32min); _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); _mm256_storeu_ps(((float*)dst_x) + 2 * AVX2_PACKINT8, f2); @@ -429,7 +1145,8 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit_Fast(int8_t* dst, const int8_t* src, if (2 == realDst) { for (int dz = 0; dz < dst_depth_quad; ++dz) { const auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); - const auto bias_dz = post->bias + dz * AVX2_PACKINT8; + const auto bias_dz = post->biasFloat + dz * AVX2_PACKINT8; + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; const float* scale_dz = post->scale + dz * AVX2_PACKINT8; auto dst_z = dst + dz * dst_step_tmp; const auto src_x = src; @@ -451,16 +1168,26 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit_Fast(int8_t* dst, const int8_t* src, auto D0 = D00; auto D1 = D01; - auto biasValue0 = _mm256_loadu_si256((__m256i*)(bias_dz)); - D0 = _mm256_add_epi32(D0, biasValue0); - D1 = _mm256_add_epi32(D1, biasValue0); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); auto scaleValue = _mm256_loadu_ps(scale_dz); auto f0 = _mm256_cvtepi32_ps(D0); auto f1 = _mm256_cvtepi32_ps(D1); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm256_mul_ps(kernelSum1, weightBiasValue); // ..second f0 = _mm256_mul_ps(f0, scaleValue); f1 = _mm256_mul_ps(f1, scaleValue); + f0 = _mm256_add_ps(f0, xy0_0); + f1 = _mm256_add_ps(f1, xy0_1); + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); + f1 = _mm256_add_ps(f1, biasValue); if (post->useInt8 == 0) { + f0 = _mm256_min_ps(f0, fp32max); + f1 = _mm256_min_ps(f1, fp32max); + f0 = _mm256_max_ps(f0, fp32min); + f1 = _mm256_max_ps(f1, fp32min); _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); _mm256_storeu_ps(((float*)dst_x) + 1 * AVX2_PACKINT8, f1); } else { @@ -473,7 +1200,8 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit_Fast(int8_t* dst, const int8_t* src, if (1 == realDst) { for (int dz = 0; dz < dst_depth_quad; ++dz) { const auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX2_L * GEMMINT8_AVX2_H); - const auto bias_dz = post->bias + dz * AVX2_PACKINT8; + const auto bias_dz = post->biasFloat + dz * AVX2_PACKINT8; + const auto weightBias_dz = post->weightQuanBias + dz * AVX2_PACKINT8; const float* scale_dz = post->scale + dz * AVX2_PACKINT8; auto dst_z = dst + dz * dst_step_tmp; const auto src_x = src; @@ -490,14 +1218,19 @@ void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit_Fast(int8_t* dst, const int8_t* src, D00 = _mm256_add_epi32(D00, _mm256_madd_epi16(_mm256_maddubs_epi16(s0, w0), oneValue)); } auto D0 = D00; - - auto biasValue0 = _mm256_loadu_si256((__m256i*)(bias_dz)); - D0 = _mm256_add_epi32(D0, biasValue0); + auto weightBiasValue = _mm256_loadu_ps((float*)weightBias_dz); auto scaleValue = _mm256_loadu_ps(scale_dz); auto f0 = _mm256_cvtepi32_ps(D0); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm256_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first f0 = _mm256_mul_ps(f0, scaleValue); + f0 = _mm256_add_ps(f0, xy0_0); + auto biasValue = _mm256_loadu_ps(bias_dz); + f0 = _mm256_add_ps(f0, biasValue); if (post->useInt8 == 0) { + f0 = _mm256_min_ps(f0, fp32max); + f0 = _mm256_max_ps(f0, fp32min); _mm256_storeu_ps(((float*)dst_x) + 0 * AVX2_PACKINT8, f0); } else { POSTTREAT(0); @@ -747,6 +1480,9 @@ void _AVX_MNNInt8FunctionInit(void* functions) { gAVX2CoreInt8Functions->Int8GemmKernelFast = _AVX_MNNGemmInt8AddBiasScale_16x4_Unit_Fast; gAVX2CoreInt8Functions->MNNGetGemmUnit = _AVX2_MNNGetGemmUnit; gAVX2CoreInt8Functions->MNNPackC4Int8ForMatMul_A = _AVXMNNPackC4ForMatMul_A; +#ifdef MNN_LOW_MEMORY + gAVX2CoreInt8Functions->Int8GemmKernel_W4 = _AVX_MNNGemmInt8AddBiasScale_16x4_w4; +#endif // Int8 <-> Float gAVX2CoreInt8Functions->MNNFloat2Int8 = _AVX_MNNFloat2Int8; diff --git a/source/backend/cpu/x86_x64/avx512/GemmInt8.cpp b/source/backend/cpu/x86_x64/avx512/GemmInt8.cpp index 1dc73cbab..6eb8a5379 100644 --- a/source/backend/cpu/x86_x64/avx512/GemmInt8.cpp +++ b/source/backend/cpu/x86_x64/avx512/GemmInt8.cpp @@ -14,10 +14,12 @@ #ifdef MNN_AVX512_VNNI extern void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst); extern void _AVX512_MNNLineDepthWiseInt8AddBiasScaleUnit_VNNI(int8_t* dstO, const int8_t* srcO, const int8_t* weightO, const QuanPostTreatParameters* parameters, size_t width, size_t src_w_step, size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, int8_t* idxOrder=nullptr); +extern void _AVX512_MNNGemmInt8AddBiasScale_16x4_w4_Unit_VNNI(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst); #endif // Define in GemmInt8_4_4_64.cpp extern void _AVX512_NO_VNNI_4_4_64(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst); +extern void _AVX512_NO_VNNI_4_4_64_w4(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst); // Define in GemmInt8_4_4_64_7bit.cpp extern void _AVX512_NO_VNNI_4_4_64_7bit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst); @@ -123,7 +125,6 @@ static void _AVX512BasicMNNPackC4ForMatMul_A(int8_t* destOrigin, int8_t const** } } } - } @@ -201,32 +202,53 @@ void _AVX512_MNNLineDepthWiseInt8AddBiasScaleUnit(int8_t* dstO, const int8_t* sr } } void _AVX512_MNNFloat2Int8(const float* src, int8_t* dst, size_t sizeQuad, const float* scalep, ssize_t minV, ssize_t maxV, ssize_t zeroPoint) { - auto zero = _mm512_setzero_ps(); - auto minValue = _mm512_set1_ps(minV); - auto maxValue = _mm512_set1_ps(maxV); - auto zeroPointValue = _mm512_set1_ps(zeroPoint); - auto offset = _mm512_set1_ps(128.f); - auto plus = _mm512_set1_ps(0.5f); - auto minus = _mm512_set1_ps(-0.5f); - auto scaleValue0 = _mm512_loadu_ps(scalep); + auto zero = _mm256_set1_epi32(0); + auto minValue = _mm256_set1_ps(minV); + auto maxValue = _mm256_set1_ps(maxV); + auto zeroPointValue = _mm256_set1_ps(zeroPoint); + auto offset = _mm256_set1_epi32(128); + auto plus = _mm256_set1_ps(0.5f); + auto minus = _mm256_set1_ps(-0.5f); + auto scaleValue0 = _mm256_loadu_ps(scalep); + auto scaleValue1 = _mm256_loadu_ps(scalep + 8); for (int i = 0; i < sizeQuad; ++i) { - auto f0 = _mm512_loadu_ps(src + PACK_UNIT * i); - f0 = _mm512_mul_ps(f0, scaleValue0); - f0 = _mm512_add_ps(f0, zeroPointValue); - f0 = _mm512_min_ps(f0, maxValue); - f0 = _mm512_max_ps(f0, minValue); - auto m0 = _mm512_cmp_ps_mask(f0, zero, 1); - auto r0 = _mm512_mask_blend_ps(m0, plus, minus); - f0 = _mm512_add_ps(f0, r0); - __m512 round0 = _mm512_roundscale_ps(f0, 3); - round0 = _mm512_add_ps(round0, offset); - auto i0_int32 = _mm512_cvtps_epi32(round0); - auto i0_int16 = _mm512_cvtsepi32_epi16(i0_int32); - auto h0_int16 = _mm256_extracti128_si256(i0_int16, 0); - auto h1_int16 = _mm256_extracti128_si256(i0_int16, 1); - h0_int16 = _mm_packus_epi16(h0_int16, h1_int16); - _mm_storeu_si128((__m128i*)(dst + i * PACK_UNIT), h0_int16); + auto f0 = _mm256_loadu_ps(src + PACK_UNIT * i); + auto f1 = _mm256_loadu_ps(src + PACK_UNIT * i + 8); + f0 = _mm256_mul_ps(f0, scaleValue0); + f1 = _mm256_mul_ps(f1, scaleValue1); + f0 = _mm256_add_ps(f0, zeroPointValue); + f1 = _mm256_add_ps(f1, zeroPointValue); + f0 = _mm256_min_ps(f0, maxValue); + f1 = _mm256_min_ps(f1, maxValue); + f0 = _mm256_max_ps(f0, minValue); + f1 = _mm256_max_ps(f1, minValue); + auto m0 = _mm256_cmp_ps(f0, _mm256_castsi256_ps(zero), 1); + auto m1 = _mm256_cmp_ps(f1, _mm256_castsi256_ps(zero), 1); + m0 = _mm256_blendv_ps(plus, minus, m0); + m1 = _mm256_blendv_ps(plus, minus, m1); + f0 = _mm256_add_ps(f0, m0); + f1 = _mm256_add_ps(f1, m1); + // 3: _MM_FROUND_TO_ZERO + auto d0 = _mm256_cvtps_epi32(_mm256_round_ps(f0, 3)); + auto d1 = _mm256_cvtps_epi32(_mm256_round_ps(f1, 3)); + d0 = _mm256_add_epi32(d0, offset); + d1 = _mm256_add_epi32(d1, offset); + d0 = _mm256_packs_epi32(d0, _mm256_setzero_si256()); + d1 = _mm256_packs_epi32(d1, _mm256_setzero_si256()); + d0 = _mm256_permute4x64_epi64(d0, 0xD8); + d1 = _mm256_permute4x64_epi64(d1, 0xD8); +#if defined(_MSC_VER) + __m256i x = static_cast<__m256i>(_mm256_packus_epi16(d0, _mm256_setzero_si256())); + __m256i y = static_cast<__m256i>(_mm256_packus_epi16(d1, _mm256_setzero_si256())); + *((int64_t*)dst + 2 * i + 0) = x.m256i_i64[0]; + *((int64_t*)dst + 2 * i + 1) = y.m256i_i64[0]; +#else + __v4di x = static_cast<__v4di>(_mm256_packus_epi16(d0, _mm256_setzero_si256())); + __v4di y = static_cast<__v4di>(_mm256_packus_epi16(d1, _mm256_setzero_si256())); + *((int64_t*)dst + 2 * i + 0) = x[0]; + *((int64_t*)dst + 2 * i + 1) = y[0]; +#endif } } @@ -296,17 +318,22 @@ void _AVX512_MNNInt8FunctionInit(void* functions, bool supportVNNI) { if (supportVNNI) { gAVX2CoreInt8Functions->Int8GemmKernel = _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI; gAVX2CoreInt8Functions->Int8GemmKernelFast = _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI; + gAVX2CoreInt8Functions->Int8GemmKernel_W4 = _AVX512_MNNGemmInt8AddBiasScale_16x4_w4_Unit_VNNI; // conv depthwise gAVX2CoreInt8Functions->ConvDepthwiseLineInt8 = _AVX512_MNNLineDepthWiseInt8AddBiasScaleUnit_VNNI; // MatMul gAVX2CoreInt8Functions->MNNGetGemmUnit = _AVX512_MNNGetGemmUnit_VNNI; // Im2Col gAVX2CoreInt8Functions->MNNPackC4Int8ForMatMul_A = _AVX512BasicMNNPackC4ForMatMul_A; + + + } else #endif { gAVX2CoreInt8Functions->Int8GemmKernel = _AVX512_NO_VNNI_4_4_64; gAVX2CoreInt8Functions->Int8GemmKernelFast = _AVX512_NO_VNNI_4_4_64_7bit; + gAVX2CoreInt8Functions->Int8GemmKernel_W4 = _AVX512_NO_VNNI_4_4_64_w4; // conv depthwise gAVX2CoreInt8Functions->ConvDepthwiseLineInt8 = _AVX512_MNNLineDepthWiseInt8AddBiasScaleUnit; // MatMul diff --git a/source/backend/cpu/x86_x64/avx512/GemmInt8_4_4_64_NOVNNI.cpp b/source/backend/cpu/x86_x64/avx512/GemmInt8_4_4_64_NOVNNI.cpp index 0df2809d6..7273eab05 100644 --- a/source/backend/cpu/x86_x64/avx512/GemmInt8_4_4_64_NOVNNI.cpp +++ b/source/backend/cpu/x86_x64/avx512/GemmInt8_4_4_64_NOVNNI.cpp @@ -16,4 +16,5 @@ static inline __m512i mnn_mm512_dpbusds_epi32_replace(__m512i dst, __m512i src, } #define MATMULCOREFUNC_NAME _AVX512_NO_VNNI_4_4_64 +#define MATMULCOREFUNC_NAME_W4 _AVX512_NO_VNNI_4_4_64_w4 #include "Matmul_4_4_64.inl" \ No newline at end of file diff --git a/source/backend/cpu/x86_x64/avx512/GemmInt8_VNNI.cpp b/source/backend/cpu/x86_x64/avx512/GemmInt8_VNNI.cpp index f97480b68..31335e2cf 100644 --- a/source/backend/cpu/x86_x64/avx512/GemmInt8_VNNI.cpp +++ b/source/backend/cpu/x86_x64/avx512/GemmInt8_VNNI.cpp @@ -13,10 +13,13 @@ #define GEMMINT8_AVX512_H GEMMINT8_AVX512_H_VNNI #define _MM256_SET_M128I(__H, __L) _mm256_insertf128_si256(_mm256_castsi128_si256(__L), __H, 1) // for compile compatiable #define AVX512_BROADCAST_INT32(src) _mm512_castps_si512(_mm512_broadcastss_ps(_mm_load_ss(src))) + +#define DEQUANT_VALUE(N) \ + auto f##N = _mm512_cvtepi32_ps(D##N);\ + f##N = _mm512_mul_ps(f##N, scaleValue); + #define SCALE_BIAS_VEC(N) \ - auto d##N = _mm512_add_epi32(D##N, biasValue);\ - auto f##N = _mm512_cvtepi32_ps(d##N);\ - f##N = _mm512_mul_ps(f##N, scaleValue); + f##N = _mm512_add_ps(f##N, biasValue); #define POSTTREAT(N, O) \ f##N = _mm512_min_ps(f##N, maxValue);\ @@ -24,13 +27,76 @@ auto m##N = _mm512_cmp_ps_mask(f##N, zero512, 1);\ auto b##N = _mm512_mask_blend_ps(m##N, plus, minus);\ f##N = _mm512_add_ps(f##N, b##N);\ - d##N = _mm512_cvtps_epi32(_mm512_roundscale_ps(f##N, 3));\ + auto d##N = _mm512_cvtps_epi32(_mm512_roundscale_ps(f##N, 3));\ auto hd##N = _mm512_cvtsepi32_epi16(d##N); hd##N = _mm256_add_epi16(hd##N, offset);\ auto h0##N = _mm256_extracti128_si256(hd##N, 0);\ auto h1##N = _mm256_extracti128_si256(hd##N, 1);\ h0##N = _mm_packus_epi16(h0##N, h1##N);\ _mm_storeu_si128((__m128i*)dst_x + O, h0##N); +#define POST_TREAT_FLOAT(N,M,K,V) \ + f##N = _mm512_min_ps(f##N, fp32max);\ + f##N = _mm512_max_ps(f##N, fp32min);\ + f##M = _mm512_min_ps(f##M, fp32max);\ + f##M = _mm512_max_ps(f##M, fp32min);\ + f##K = _mm512_min_ps(f##K, fp32max);\ + f##K = _mm512_max_ps(f##K, fp32min);\ + f##V = _mm512_min_ps(f##V, fp32max);\ + f##V = _mm512_max_ps(f##V, fp32min); + +#define SRCKERNELSUM_MUL_WEIGHTQUANBIAS \ + xy0_0 = _mm512_mul_ps(kernelSum0, weightBiasValue);\ + xy0_1 = _mm512_mul_ps(kernelSum1, weightBiasValue);\ + xy0_2 = _mm512_mul_ps(kernelSum2, weightBiasValue);\ + xy0_3 = _mm512_mul_ps(kernelSum3, weightBiasValue); + +#define PLUS_TERM(N,M,K,V) \ + f##N = _mm512_add_ps(f##N, xy0_0);\ + f##M = _mm512_add_ps(f##M, xy0_1);\ + f##K = _mm512_add_ps(f##K, xy0_2);\ + f##V = _mm512_add_ps(f##V, xy0_3); + +#define POST_TREAT_FLOAT_3(N,M,K) \ + f##N = _mm512_min_ps(f##N, fp32max);\ + f##N = _mm512_max_ps(f##N, fp32min);\ + f##M = _mm512_min_ps(f##M, fp32max);\ + f##M = _mm512_max_ps(f##M, fp32min);\ + f##K = _mm512_min_ps(f##K, fp32max);\ + f##K = _mm512_max_ps(f##K, fp32min); + +#define SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3 \ + xy0_0 = _mm512_mul_ps(kernelSum0, weightBiasValue);\ + xy0_1 = _mm512_mul_ps(kernelSum1, weightBiasValue);\ + xy0_2 = _mm512_mul_ps(kernelSum2, weightBiasValue); + +#define PLUS_TERM_3(N,M,K) \ + f##N = _mm512_add_ps(f##N, xy0_0);\ + f##M = _mm512_add_ps(f##M, xy0_1);\ + f##K = _mm512_add_ps(f##K, xy0_2); + +#define POST_TREAT_FLOAT_2(N,M) \ + f##N = _mm512_min_ps(f##N, fp32max);\ + f##N = _mm512_max_ps(f##N, fp32min);\ + f##M = _mm512_min_ps(f##M, fp32max);\ + f##M = _mm512_max_ps(f##M, fp32min); + +#define SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2 \ + xy0_0 = _mm512_mul_ps(kernelSum0, weightBiasValue);\ + xy0_1 = _mm512_mul_ps(kernelSum1, weightBiasValue); + +#define PLUS_TERM_2(N,M) \ + f##N = _mm512_add_ps(f##N, xy0_0);\ + f##M = _mm512_add_ps(f##M, xy0_1); + +#define POST_TREAT_FLOAT_1(N) \ + f##N = _mm512_min_ps(f##N, fp32max);\ + f##N = _mm512_max_ps(f##N, fp32min); + +#define SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1 \ + xy0_0 = _mm512_mul_ps(kernelSum0, weightBiasValue); + +#define PLUS_TERM_1(N) \ + f##N = _mm512_add_ps(f##N, xy0_0); // GemmInt8 with VNNI void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst) { @@ -44,10 +110,69 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s int dzUnit = GEMMINT8_AVX512_H / PACK_UNIT; int dzU = dst_depth_quad / dzUnit; int dzR = dst_depth_quad % dzUnit; + __m512 fp32min, fp32max; + if (0 == post->useInt8 && post->fp32minmax) { + fp32min = _mm512_set1_ps((post->fp32minmax)[0]); + fp32max = _mm512_set1_ps((post->fp32minmax)[1]); + } + auto blockNum = post->blockNum; + const float* biasPtr = nullptr; + const float* bias_dz = nullptr; + const float* extraB_dz = nullptr; + if (post->biasFloat) { + biasPtr = post->biasFloat; + } + auto srcKernelSumPtr = post->srcKernelSum; + __m512 kernelSum0 = _mm512_setzero_ps(); + __m512 kernelSum1 = _mm512_setzero_ps(); + __m512 kernelSum2 = _mm512_setzero_ps(); + __m512 kernelSum3 = _mm512_setzero_ps(); + if (GEMMINT8_AVX512_E == realDst) { + kernelSum0 = _mm512_set1_ps(post->srcKernelSum[0]); + kernelSum1 = _mm512_set1_ps(post->srcKernelSum[1]); + kernelSum2 = _mm512_set1_ps(post->srcKernelSum[2]); + kernelSum3 = _mm512_set1_ps(post->srcKernelSum[3]); + } else { + kernelSum0 = _mm512_set1_ps(post->srcKernelSum[0]); + if (realDst > 1) { + kernelSum1 = _mm512_set1_ps(post->srcKernelSum[1]); + } + if (realDst > 2) { + kernelSum2 = _mm512_set1_ps(post->srcKernelSum[2]); + } + } + auto f128 = _mm512_set1_ps(128.f); + __m512 extrascale0 = _mm512_setzero_ps(); + __m512 extrascale1 = _mm512_setzero_ps(); + __m512 extrascale2 = _mm512_setzero_ps(); + __m512 extrascale3 = _mm512_setzero_ps(); + if (post->extraScale) { + if (GEMMINT8_AVX512_E == realDst) { + extrascale0 = _mm512_set1_ps(post->extraScale[0]); + extrascale1 = _mm512_set1_ps(post->extraScale[1]); + extrascale2 = _mm512_set1_ps(post->extraScale[2]); + extrascale3 = _mm512_set1_ps(post->extraScale[3]); + } else { + extrascale0 = _mm512_set1_ps(post->extraScale[0]); + if (realDst > 1) { + extrascale1 = _mm512_set1_ps(post->extraScale[1]); + } + if (realDst > 2) { + extrascale2 = _mm512_set1_ps(post->extraScale[2]); + } + } + } + int weightZStride = blockNum * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); if (realDst == GEMMINT8_AVX512_E) { for (int dz = 0; dz < dzU; ++dz) { - auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dz * PACK_UNIT * dzUnit; + auto weight_dz = weight + dz * weightZStride; + if (biasPtr) { + bias_dz = biasPtr + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; auto dst_z = dst + dz * dst_step_tmp * dzUnit; const auto src_x = src; @@ -77,9 +202,9 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s const auto weight_sz = weight_dz + (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) * sz; const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); auto w0 = _mm512_loadu_si512(weight_sz); - auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_E); + auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_L); auto s0 = AVX512_BROADCAST_INT32(src_z + 0); auto s1 = AVX512_BROADCAST_INT32(src_z + 1); @@ -106,37 +231,185 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s D14 = _mm512_dpbusds_epi32(D14, s2, w3); D15 = _mm512_dpbusds_epi32(D15, s3, w3); } - - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2, xy0_3; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + DEQUANT_VALUE(3); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + f3 = _mm512_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + f3 = _mm512_sub_ps(f3, extrabias3); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); - SCALE_BIAS_VEC(2); - SCALE_BIAS_VEC(3); + PLUS_TERM(0,1,2,3); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + SCALE_BIAS_VEC(3); + } - biasValue = _mm512_loadu_si512(bias_dz + 1 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); - SCALE_BIAS_VEC(4); - SCALE_BIAS_VEC(5); - SCALE_BIAS_VEC(6); - SCALE_BIAS_VEC(7); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + DEQUANT_VALUE(6); + DEQUANT_VALUE(7); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + f6 = _mm512_mul_ps(f6, extrascale2); + f7 = _mm512_mul_ps(f7, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + f6 = _mm512_sub_ps(f6, extrabias2); + f7 = _mm512_sub_ps(f7, extrabias3); + } + } + + PLUS_TERM(4,5,6,7); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + SCALE_BIAS_VEC(6); + SCALE_BIAS_VEC(7); + } - biasValue = _mm512_loadu_si512(bias_dz + 2 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); - SCALE_BIAS_VEC(8); - SCALE_BIAS_VEC(9); - SCALE_BIAS_VEC(10); - SCALE_BIAS_VEC(11); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + DEQUANT_VALUE(10); + DEQUANT_VALUE(11); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + f10 = _mm512_mul_ps(f10, extrascale2); + f11 = _mm512_mul_ps(f11, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + f10 = _mm512_sub_ps(f10, extrabias2); + f11 = _mm512_sub_ps(f11, extrabias3); + } + } + + PLUS_TERM(8,9,10,11); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + SCALE_BIAS_VEC(10); + SCALE_BIAS_VEC(11); + } - biasValue = _mm512_loadu_si512(bias_dz + 3 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); - SCALE_BIAS_VEC(12); - SCALE_BIAS_VEC(13); - SCALE_BIAS_VEC(14); - SCALE_BIAS_VEC(15); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + DEQUANT_VALUE(14); + DEQUANT_VALUE(15); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + f14 = _mm512_mul_ps(f14, extrascale2); + f15 = _mm512_mul_ps(f15, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + f14 = _mm512_sub_ps(f14, extrabias2); + f15 = _mm512_sub_ps(f15, extrabias3); + } + } + + PLUS_TERM(12,13,14,15); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + SCALE_BIAS_VEC(14); + SCALE_BIAS_VEC(15); + } if (post->useInt8 == 0) { + if (biasPtr == nullptr) { + auto destTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps((float*)destTmp), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f2); + f3 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f3); + destTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f5); + f6 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f6); + f7 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f7); + destTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f9); + f10 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f10); + f11 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f11); + destTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f13); + f14 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f14); + f15 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f15); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT(0,1,2,3); + POST_TREAT_FLOAT(4,5,6,7); + POST_TREAT_FLOAT(8,9,10,11); + POST_TREAT_FLOAT(12,13,14,15); + } + _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); @@ -181,9 +454,15 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s POSTTREAT(15, 3); } } - auto weight_dz = weight + dzU * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dzU * PACK_UNIT * dzUnit; + auto weight_dz = weight + dzU * weightZStride; + if (biasPtr) { + bias_dz = biasPtr + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; auto dst_z = dst + dzU * dst_step_tmp * dzUnit; const auto src_x = src; @@ -210,15 +489,54 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s D3 = _mm512_dpbusds_epi32(D3, s3, w0); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2, xy0_3; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + DEQUANT_VALUE(3); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + f3 = _mm512_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + f3 = _mm512_sub_ps(f3, extrabias3); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); - SCALE_BIAS_VEC(2); - SCALE_BIAS_VEC(3); + PLUS_TERM(0,1,2,3); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + SCALE_BIAS_VEC(3); + } if (post->useInt8 == 0) { + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps((float*)dst_x), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 2), f2); + f3 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 3), f3); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT(0,1,2,3); + } _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); @@ -231,17 +549,28 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s } dst_x += dst_step_tmp; scale_dz += PACK_UNIT; - bias_dz += PACK_UNIT; - weight_dz += PACK_UNIT * GEMMINT8_AVX512_E; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; } return; } // e = 3 if (realDst == 3) { for (int dz = 0; dz < dzU; ++dz) { - auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dz * PACK_UNIT * dzUnit; + auto weight_dz = weight + dz * weightZStride; + if (biasPtr) { + bias_dz = biasPtr + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; auto dst_z = dst + dz * dst_step_tmp * dzUnit; const auto src_x = src; auto dst_x = dst_z; @@ -266,9 +595,9 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s const auto weight_sz = weight_dz + (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) * sz; const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); auto w0 = _mm512_loadu_si512(weight_sz); - auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_E); + auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_L); auto s0 = AVX512_BROADCAST_INT32(src_z + 0); auto s1 = AVX512_BROADCAST_INT32(src_z + 1); @@ -291,32 +620,160 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s D14 = _mm512_dpbusds_epi32(D14, s2, w3); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); - SCALE_BIAS_VEC(2); + PLUS_TERM_3(0,1,2); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + } - biasValue = _mm512_loadu_si512(bias_dz + 1 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); - SCALE_BIAS_VEC(4); - SCALE_BIAS_VEC(5); - SCALE_BIAS_VEC(6); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + DEQUANT_VALUE(6); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + f6 = _mm512_mul_ps(f6, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + f6 = _mm512_sub_ps(f6, extrabias2); + } + } + + PLUS_TERM_3(4,5,6); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + SCALE_BIAS_VEC(6); + } - biasValue = _mm512_loadu_si512(bias_dz + 2 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); - SCALE_BIAS_VEC(8); - SCALE_BIAS_VEC(9); - SCALE_BIAS_VEC(10); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + DEQUANT_VALUE(10); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + f10 = _mm512_mul_ps(f10, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + f10 = _mm512_sub_ps(f10, extrabias2); + } + } + + PLUS_TERM_3(8,9,10); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + SCALE_BIAS_VEC(10); + } - biasValue = _mm512_loadu_si512(bias_dz + 3 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); - SCALE_BIAS_VEC(12); - SCALE_BIAS_VEC(13); - SCALE_BIAS_VEC(14); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + DEQUANT_VALUE(14); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + f14 = _mm512_mul_ps(f14, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + f14 = _mm512_sub_ps(f14, extrabias2); + } + } + + PLUS_TERM_3(12,13,14); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + SCALE_BIAS_VEC(14); + } if (post->useInt8 == 0) { + if (biasPtr == nullptr) { + auto dstTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f2); + dstTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f5); + f6 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f6); + dstTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f9); + f10 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f10); + dstTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f13); + f14 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f14); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_3(0,1,2); + POST_TREAT_FLOAT_3(4,5,6); + POST_TREAT_FLOAT_3(8,9,10); + POST_TREAT_FLOAT_3(12,13,14); + } _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); @@ -353,9 +810,15 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s POSTTREAT(14, 2); } } - auto weight_dz = weight + dzU * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dzU * PACK_UNIT * dzUnit; + auto weight_dz = weight + dzU * weightZStride; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; auto dst_z = dst + dzU * dst_step_tmp * dzUnit; const auto src_x = src; @@ -379,14 +842,49 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s D2 = _mm512_dpbusds_epi32(D2, s2, w0); } - auto biasValue = _mm512_loadu_si512(bias_dz); + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); - SCALE_BIAS_VEC(2); - + PLUS_TERM_3(0,1,2); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + } + if (post->useInt8 == 0) { + if (biasPtr == nullptr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 2), f2); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_3(0,1,2); + } _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); @@ -397,17 +895,28 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s } dst_x += dst_step_tmp; scale_dz += PACK_UNIT; - bias_dz += PACK_UNIT; - weight_dz += PACK_UNIT * GEMMINT8_AVX512_E; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; } return; } // e = 2 if (realDst == 2) { for (int dz = 0; dz < dzU; ++dz) { - auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dz * PACK_UNIT * dzUnit; + auto weight_dz = weight + dz * weightZStride; + if (biasPtr) { + bias_dz = post->biasFloat + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; auto dst_z = dst + dz * dst_step_tmp * dzUnit; const auto src_x = src; auto dst_x = dst_z; @@ -428,9 +937,9 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s const auto weight_sz = weight_dz + (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) * sz; const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); auto w0 = _mm512_loadu_si512(weight_sz); - auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_E); + auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_L); auto s0 = AVX512_BROADCAST_INT32(src_z + 0); auto s1 = AVX512_BROADCAST_INT32(src_z + 1); @@ -448,28 +957,135 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s D13 = _mm512_dpbusds_epi32(D13, s1, w3); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1; + + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); + PLUS_TERM_2(0,1); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + } - biasValue = _mm512_loadu_si512(bias_dz + 1 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); - SCALE_BIAS_VEC(4); - SCALE_BIAS_VEC(5); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + } + } + + PLUS_TERM_2(4,5); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + } - biasValue = _mm512_loadu_si512(bias_dz + 2 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); - SCALE_BIAS_VEC(8); - SCALE_BIAS_VEC(9); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + } + } + + PLUS_TERM_2(8,9); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + } - biasValue = _mm512_loadu_si512(bias_dz + 3 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); - SCALE_BIAS_VEC(12); - SCALE_BIAS_VEC(13); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + } + } + + PLUS_TERM_2(12,13); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + } if (post->useInt8 == 0) { + if (nullptr == biasPtr) { + auto dstTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16), f1); + dstTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f5); + dstTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f9); + dstTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f13); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_2(0,1); + POST_TREAT_FLOAT_2(4,5); + POST_TREAT_FLOAT_2(8,9); + POST_TREAT_FLOAT_2(12,13); + } _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); dst_x += dst_step_tmp; @@ -498,9 +1114,15 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s POSTTREAT(13, 1); } } - auto weight_dz = weight + dzU * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dzU * PACK_UNIT * dzUnit; + auto weight_dz = weight + dzU * weightZStride; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; auto dst_z = dst + dzU * dst_step_tmp * dzUnit; const auto src_x = src; @@ -521,13 +1143,40 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s D1 = _mm512_dpbusds_epi32(D1, s1, w0); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); + PLUS_TERM_2(0,1); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + } if (post->useInt8 == 0) { + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + } + POST_TREAT_FLOAT_2(0,1); _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); } else { @@ -536,16 +1185,27 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s } dst_x += dst_step_tmp; scale_dz += PACK_UNIT; - bias_dz += PACK_UNIT; - weight_dz += PACK_UNIT * GEMMINT8_AVX512_E; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; } return; } if (realDst == 1) { for (int dz = 0; dz < dzU; ++dz) { - auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dz * PACK_UNIT * dzUnit; + auto weight_dz = weight + dz * weightZStride; + if (biasPtr) { + bias_dz = post->biasFloat + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; auto dst_z = dst + dz * dst_step_tmp * dzUnit; const auto src_x = src; auto dst_x = dst_z; @@ -561,9 +1221,9 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s const auto weight_sz = weight_dz + (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) * sz; const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); auto w0 = _mm512_loadu_si512(weight_sz); - auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_E); + auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_L); auto s0 = AVX512_BROADCAST_INT32(src_z + 0); @@ -576,24 +1236,113 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s D12 = _mm512_dpbusds_epi32(D12, s0, w3); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0; + + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(0); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f0 = _mm512_sub_ps(f0, extrabias0); + } + } - SCALE_BIAS_VEC(0); + PLUS_TERM_1(0); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + } - biasValue = _mm512_loadu_si512(bias_dz + 1 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); - SCALE_BIAS_VEC(4); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(4); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f4 = _mm512_sub_ps(f4, extrabias0); + } + } + + PLUS_TERM_1(4); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + } - biasValue = _mm512_loadu_si512(bias_dz + 2 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); - SCALE_BIAS_VEC(8); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(8); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f8 = _mm512_sub_ps(f8, extrabias0); + } + } + + PLUS_TERM_1(8); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + } - biasValue = _mm512_loadu_si512(bias_dz + 3 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); - SCALE_BIAS_VEC(12); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(12); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f12 = _mm512_sub_ps(f12, extrabias0); + } + } + + PLUS_TERM_1(12); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + } if (post->useInt8 == 0) { + if (nullptr == biasPtr) { + auto dstTemp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp)), f0); + dstTemp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f4); + dstTemp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f8); + dstTemp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f12); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_1(0); + POST_TREAT_FLOAT_1(4); + POST_TREAT_FLOAT_1(8); + POST_TREAT_FLOAT_1(12); + } _mm512_storeu_ps(((float*)dst_x), f0); dst_x += dst_step_tmp; _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f4); @@ -614,9 +1363,15 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s POSTTREAT(12, 0); } } - auto weight_dz = weight + dzU * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dzU * PACK_UNIT * dzUnit; + auto weight_dz = weight + dzU * weightZStride; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; auto dst_z = dst + dzU * dst_step_tmp * dzUnit; const auto src_x = src; @@ -634,20 +1389,1324 @@ void _AVX512_MNNGemmInt8AddBiasScale_16x4_Unit_VNNI(int8_t* dst, const int8_t* s D0 = _mm512_dpbusds_epi32(D0, s0, w0); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); - SCALE_BIAS_VEC(0); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(0); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f0 = _mm512_sub_ps(f0, extrabias0); + } + } + + PLUS_TERM_1(0); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + } if (post->useInt8 == 0) { + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_1(0); + } _mm512_storeu_ps(((float*)dst_x), f0); } else { POSTTREAT(0, 0); } dst_x += dst_step_tmp; scale_dz += PACK_UNIT; - bias_dz += PACK_UNIT; - weight_dz += PACK_UNIT * GEMMINT8_AVX512_E; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; + } + return; + } +} + +// GemmInt8 with VNNI int4-weight fp32-output +void _AVX512_MNNGemmInt8AddBiasScale_16x4_w4_Unit_VNNI(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst) { + MNN_ASSERT(post->useInt8 == 0); + const auto dst_step_tmp = dst_step / sizeof(int8_t); + auto zero512 = _mm512_set1_ps(0.0f); + int dzUnit = GEMMINT8_AVX512_H / PACK_UNIT; + int dzU = dst_depth_quad / dzUnit; + int dzR = dst_depth_quad % dzUnit; + const __m512i mask = _mm512_set1_epi8(0xf); + __m512 fp32min, fp32max; + if (post->fp32minmax) { + fp32min = _mm512_set1_ps((post->fp32minmax)[0]); + fp32max = _mm512_set1_ps((post->fp32minmax)[1]); + } + auto blockNum = post->blockNum; + const float* biasPtr = nullptr; + const float* bias_dz = nullptr; + const float* extraB_dz = nullptr; + if (post->biasFloat) { + biasPtr = post->biasFloat; + } + + auto srcKernelSumPtr = post->srcKernelSum; + __m512 kernelSum0 = _mm512_setzero_ps(); + __m512 kernelSum1 = _mm512_setzero_ps(); + __m512 kernelSum2 = _mm512_setzero_ps(); + __m512 kernelSum3 = _mm512_setzero_ps(); + if (GEMMINT8_AVX512_E == realDst) { + kernelSum0 = _mm512_set1_ps(post->srcKernelSum[0]); + kernelSum1 = _mm512_set1_ps(post->srcKernelSum[1]); + kernelSum2 = _mm512_set1_ps(post->srcKernelSum[2]); + kernelSum3 = _mm512_set1_ps(post->srcKernelSum[3]); + } else { + kernelSum0 = _mm512_set1_ps(post->srcKernelSum[0]); + if (realDst > 1) { + kernelSum1 = _mm512_set1_ps(post->srcKernelSum[1]); + } + if (realDst > 2) { + kernelSum2 = _mm512_set1_ps(post->srcKernelSum[2]); + } + } + auto f128 = _mm512_set1_ps(128.f); + __m512 extrascale0 = _mm512_setzero_ps(); + __m512 extrascale1 = _mm512_setzero_ps(); + __m512 extrascale2 = _mm512_setzero_ps(); + __m512 extrascale3 = _mm512_setzero_ps(); + if (post->extraScale) { + if (GEMMINT8_AVX512_E == realDst) { + extrascale0 = _mm512_set1_ps(post->extraScale[0]); + extrascale1 = _mm512_set1_ps(post->extraScale[1]); + extrascale2 = _mm512_set1_ps(post->extraScale[2]); + extrascale3 = _mm512_set1_ps(post->extraScale[3]); + } else { + extrascale0 = _mm512_set1_ps(post->extraScale[0]); + if (realDst > 1) { + extrascale1 = _mm512_set1_ps(post->extraScale[1]); + } + if (realDst > 2) { + extrascale2 = _mm512_set1_ps(post->extraScale[2]); + } + } + } + int weight_step_Z = static_cast(blockNum * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) / 2); // sizeof(int4_t) + int weight_step_Y = static_cast(GEMMINT8_AVX512_L * GEMMINT8_AVX512_H / 2); // sizeof(int4_t) + + if (realDst == GEMMINT8_AVX512_E) { + for (int dz = 0; dz < dzU; ++dz) { + auto weight_dz = weight + dz * weight_step_Z; + if (post->biasFloat) { + bias_dz = biasPtr + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; + float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + auto dst_z = dst + dz * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + __m512i D0 = _mm512_set1_epi32(0); + __m512i D1 = _mm512_set1_epi32(0); + __m512i D2 = _mm512_set1_epi32(0); + __m512i D3 = _mm512_set1_epi32(0); + + __m512i D4 = _mm512_set1_epi32(0); + __m512i D5 = _mm512_set1_epi32(0); + __m512i D6 = _mm512_set1_epi32(0); + __m512i D7 = _mm512_set1_epi32(0); + + __m512i D8 = _mm512_set1_epi32(0); + __m512i D9 = _mm512_set1_epi32(0); + __m512i D10 = _mm512_set1_epi32(0); + __m512i D11 = _mm512_set1_epi32(0); + + __m512i D12 = _mm512_set1_epi32(0); + __m512i D13 = _mm512_set1_epi32(0); + __m512i D14 = _mm512_set1_epi32(0); + __m512i D15 = _mm512_set1_epi32(0); + + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + weight_step_Y * sz; + const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); + + // int4->int8: total count=4*64(GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) + // Load 4*64 int4 weight + auto w0_int4_64 = _mm512_loadu_si512(weight_sz); // 128xint4_t=64 byte + auto w1_int4_64 = _mm512_loadu_si512(weight_sz + 64); // 128xint4_t + // 256xint4_t->256xint8_t + auto w0 = _mm512_and_si512(mask, _mm512_srli_epi16(w0_int4_64, 4)); // 64xint8_t + auto w2 = _mm512_and_si512(mask, w0_int4_64); // 64xint8_t + auto w1 = _mm512_and_si512(mask, _mm512_srli_epi16(w1_int4_64, 4)); + auto w3 = _mm512_and_si512(mask, w1_int4_64); + + auto s0 = AVX512_BROADCAST_INT32(src_z + 0); + auto s1 = AVX512_BROADCAST_INT32(src_z + 1); + auto s2 = AVX512_BROADCAST_INT32(src_z + 2); + auto s3 = AVX512_BROADCAST_INT32(src_z + 3); + + D0 = _mm512_dpbusds_epi32(D0, s0, w0); + D1 = _mm512_dpbusds_epi32(D1, s1, w0); + D2 = _mm512_dpbusds_epi32(D2, s2, w0); + D3 = _mm512_dpbusds_epi32(D3, s3, w0); + + D4 = _mm512_dpbusds_epi32(D4, s0, w1); + D5 = _mm512_dpbusds_epi32(D5, s1, w1); + D6 = _mm512_dpbusds_epi32(D6, s2, w1); + D7 = _mm512_dpbusds_epi32(D7, s3, w1); + + D8 = _mm512_dpbusds_epi32(D8, s0, w2); + D9 = _mm512_dpbusds_epi32(D9, s1, w2); + D10 = _mm512_dpbusds_epi32(D10, s2, w2); + D11 = _mm512_dpbusds_epi32(D11, s3, w2); + + D12 = _mm512_dpbusds_epi32(D12, s0, w3); + D13 = _mm512_dpbusds_epi32(D13, s1, w3); + D14 = _mm512_dpbusds_epi32(D14, s2, w3); + D15 = _mm512_dpbusds_epi32(D15, s3, w3); + } + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2, xy0_3; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + DEQUANT_VALUE(3); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + f3 = _mm512_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + f3 = _mm512_sub_ps(f3, extrabias3); + } + } + + PLUS_TERM(0,1,2,3); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + SCALE_BIAS_VEC(3); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + DEQUANT_VALUE(6); + DEQUANT_VALUE(7); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + f6 = _mm512_mul_ps(f6, extrascale2); + f7 = _mm512_mul_ps(f7, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + f6 = _mm512_sub_ps(f6, extrabias2); + f7 = _mm512_sub_ps(f7, extrabias3); + } + } + + PLUS_TERM(4,5,6,7); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + SCALE_BIAS_VEC(6); + SCALE_BIAS_VEC(7); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + DEQUANT_VALUE(10); + DEQUANT_VALUE(11); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + f10 = _mm512_mul_ps(f10, extrascale2); + f11 = _mm512_mul_ps(f11, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + f10 = _mm512_sub_ps(f10, extrabias2); + f11 = _mm512_sub_ps(f11, extrabias3); + } + } + + PLUS_TERM(8,9,10,11); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + SCALE_BIAS_VEC(10); + SCALE_BIAS_VEC(11); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + DEQUANT_VALUE(14); + DEQUANT_VALUE(15); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + f14 = _mm512_mul_ps(f14, extrascale2); + f15 = _mm512_mul_ps(f15, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + f14 = _mm512_sub_ps(f14, extrabias2); + f15 = _mm512_sub_ps(f15, extrabias3); + } + } + + PLUS_TERM(12,13,14,15); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + SCALE_BIAS_VEC(14); + SCALE_BIAS_VEC(15); + } + if (biasPtr == nullptr) { + auto destTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps((float*)destTmp), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f2); + f3 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f3); + destTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f5); + f6 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f6); + f7 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f7); + destTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f9); + f10 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f10); + f11 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f11); + destTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f13); + f14 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f14); + f15 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f15); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT(0,1,2,3); + POST_TREAT_FLOAT(4,5,6,7); + POST_TREAT_FLOAT(8,9,10,11); + POST_TREAT_FLOAT(12,13,14,15); + } + + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); + _mm512_storeu_ps(((float*)dst_x) + 16 * 3, f3); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f4); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f5); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f6); + _mm512_storeu_ps(((float*)dst_x) + 16 * 3, f7); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f8); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f9); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f10); + _mm512_storeu_ps(((float*)dst_x) + 16 * 3, f11); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f12); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f13); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f14); + _mm512_storeu_ps(((float*)dst_x) + 16 * 3, f15); + + } + auto weight_dz = weight + dzU * weight_step_Z; + if (biasPtr) { + bias_dz = biasPtr + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; + + auto dst_z = dst + dzU * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + for (int i=0; i256xint8_t + auto w0 = _mm512_and_si512(mask, _mm512_srli_epi16(w0_int4_64, 4)); // 64xint8_t + + auto s0 = AVX512_BROADCAST_INT32(src_z + 0); + auto s1 = AVX512_BROADCAST_INT32(src_z + 1); + auto s2 = AVX512_BROADCAST_INT32(src_z + 2); + auto s3 = AVX512_BROADCAST_INT32(src_z + 3); + + D0 = _mm512_dpbusds_epi32(D0, s0, w0); + D1 = _mm512_dpbusds_epi32(D1, s1, w0); + D2 = _mm512_dpbusds_epi32(D2, s2, w0); + D3 = _mm512_dpbusds_epi32(D3, s3, w0); + } + + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2, xy0_3; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + DEQUANT_VALUE(3); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + f3 = _mm512_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + f3 = _mm512_sub_ps(f3, extrabias3); + } + } + + PLUS_TERM(0,1,2,3); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + SCALE_BIAS_VEC(3); + } + + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps((float*)dst_x), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 2), f2); + f3 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 3), f3); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT(0,1,2,3); + } + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); + _mm512_storeu_ps(((float*)dst_x) + 16 * 3, f3); + + dst_x += dst_step_tmp; + scale_dz += PACK_UNIT; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; + } + return; + } + // e = 3 + if (realDst == 3) { + for (int dz = 0; dz < dzU; ++dz) { + auto weight_dz = weight + dz * weight_step_Z; + if (biasPtr) { + bias_dz = biasPtr + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; + auto dst_z = dst + dz * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + __m512i D0 = _mm512_set1_epi32(0); + __m512i D1 = _mm512_set1_epi32(0); + __m512i D2 = _mm512_set1_epi32(0); + + __m512i D4 = _mm512_set1_epi32(0); + __m512i D5 = _mm512_set1_epi32(0); + __m512i D6 = _mm512_set1_epi32(0); + + __m512i D8 = _mm512_set1_epi32(0); + __m512i D9 = _mm512_set1_epi32(0); + __m512i D10 = _mm512_set1_epi32(0); + + __m512i D12 = _mm512_set1_epi32(0); + __m512i D13 = _mm512_set1_epi32(0); + __m512i D14 = _mm512_set1_epi32(0); + + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + weight_step_Y * sz; + const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); + // int4->int8: total count=4*64(GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) + // Load 4*64 int4 weight + auto w0_int4_64 = _mm512_loadu_si512(weight_sz); // 128xint4_t=64 byte + auto w1_int4_64 = _mm512_loadu_si512(weight_sz + 64); // 128xint4_t + // 256xint4_t->256xint8_t + auto w0 = _mm512_and_si512(mask, _mm512_srli_epi16(w0_int4_64, 4)); // 64xint8_t + auto w2 = _mm512_and_si512(mask, w0_int4_64); // 64xint8_t + auto w1 = _mm512_and_si512(mask, _mm512_srli_epi16(w1_int4_64, 4)); + auto w3 = _mm512_and_si512(mask, w1_int4_64); + + auto s0 = AVX512_BROADCAST_INT32(src_z + 0); + auto s1 = AVX512_BROADCAST_INT32(src_z + 1); + auto s2 = AVX512_BROADCAST_INT32(src_z + 2); + + D0 = _mm512_dpbusds_epi32(D0, s0, w0); + D1 = _mm512_dpbusds_epi32(D1, s1, w0); + D2 = _mm512_dpbusds_epi32(D2, s2, w0); + + D4 = _mm512_dpbusds_epi32(D4, s0, w1); + D5 = _mm512_dpbusds_epi32(D5, s1, w1); + D6 = _mm512_dpbusds_epi32(D6, s2, w1); + + D8 = _mm512_dpbusds_epi32(D8, s0, w2); + D9 = _mm512_dpbusds_epi32(D9, s1, w2); + D10 = _mm512_dpbusds_epi32(D10, s2, w2); + + D12 = _mm512_dpbusds_epi32(D12, s0, w3); + D13 = _mm512_dpbusds_epi32(D13, s1, w3); + D14 = _mm512_dpbusds_epi32(D14, s2, w3); + } + + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + } + } + + PLUS_TERM_3(0,1,2); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + DEQUANT_VALUE(6); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + f6 = _mm512_mul_ps(f6, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + f6 = _mm512_sub_ps(f6, extrabias2); + } + } + + PLUS_TERM_3(4,5,6); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + SCALE_BIAS_VEC(6); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + DEQUANT_VALUE(10); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + f10 = _mm512_mul_ps(f10, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + f10 = _mm512_sub_ps(f10, extrabias2); + } + } + + PLUS_TERM_3(8,9,10); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + SCALE_BIAS_VEC(10); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + DEQUANT_VALUE(14); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + f14 = _mm512_mul_ps(f14, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + f14 = _mm512_sub_ps(f14, extrabias2); + } + } + + PLUS_TERM_3(12,13,14); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + SCALE_BIAS_VEC(14); + } + + if (biasPtr == nullptr) { + auto dstTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f2); + dstTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f5); + f6 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f6); + dstTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f9); + f10 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f10); + dstTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f13); + f14 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f14); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_3(0,1,2); + POST_TREAT_FLOAT_3(4,5,6); + POST_TREAT_FLOAT_3(8,9,10); + POST_TREAT_FLOAT_3(12,13,14); + } + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f4); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f5); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f6); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f8); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f9); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f10); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f12); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f13); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f14); + + } + auto weight_dz = weight + dzU * weight_step_Z; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; + + auto dst_z = dst + dzU * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + for (int i=0; iextraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + } + } + + PLUS_TERM_3(0,1,2); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + } + + if (biasPtr == nullptr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 2), f2); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_3(0,1,2); + } + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); + + dst_x += dst_step_tmp; + scale_dz += PACK_UNIT; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; + } + return; + } + // e = 2 + if (realDst == 2) { + for (int dz = 0; dz < dzU; ++dz) { + auto weight_dz = weight + dz * weight_step_Z; + if (biasPtr) { + bias_dz = post->biasFloat + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; + auto dst_z = dst + dz * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + __m512i D0 = _mm512_set1_epi32(0); + __m512i D1 = _mm512_set1_epi32(0); + + __m512i D4 = _mm512_set1_epi32(0); + __m512i D5 = _mm512_set1_epi32(0); + + __m512i D8 = _mm512_set1_epi32(0); + __m512i D9 = _mm512_set1_epi32(0); + + __m512i D12 = _mm512_set1_epi32(0); + __m512i D13 = _mm512_set1_epi32(0); + + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + weight_step_Y * sz; + const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); + // int4->int8: total count=4*64(GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) + // Load 4*64 int4 weight + auto w0_int4_64 = _mm512_loadu_si512(weight_sz); // 128xint4_t=64 byte + auto w1_int4_64 = _mm512_loadu_si512(weight_sz + 64); // 128xint4_t + // 256xint4_t->256xint8_t + auto w0 = _mm512_and_si512(mask, _mm512_srli_epi16(w0_int4_64, 4)); // 64xint8_t + auto w2 = _mm512_and_si512(mask, w0_int4_64); // 64xint8_t + auto w1 = _mm512_and_si512(mask, _mm512_srli_epi16(w1_int4_64, 4)); + auto w3 = _mm512_and_si512(mask, w1_int4_64); + + auto s0 = AVX512_BROADCAST_INT32(src_z + 0); + auto s1 = AVX512_BROADCAST_INT32(src_z + 1); + + D0 = _mm512_dpbusds_epi32(D0, s0, w0); + D1 = _mm512_dpbusds_epi32(D1, s1, w0); + + D4 = _mm512_dpbusds_epi32(D4, s0, w1); + D5 = _mm512_dpbusds_epi32(D5, s1, w1); + + D8 = _mm512_dpbusds_epi32(D8, s0, w2); + D9 = _mm512_dpbusds_epi32(D9, s1, w2); + + D12 = _mm512_dpbusds_epi32(D12, s0, w3); + D13 = _mm512_dpbusds_epi32(D13, s1, w3); + } + + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1; + + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + } + } + + PLUS_TERM_2(0,1); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + } + } + + PLUS_TERM_2(4,5); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + } + } + + PLUS_TERM_2(8,9); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + } + } + + PLUS_TERM_2(12,13); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + } + + if (nullptr == biasPtr) { + auto dstTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16), f1); + dstTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f5); + dstTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f9); + dstTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f13); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_2(0,1); + POST_TREAT_FLOAT_2(4,5); + POST_TREAT_FLOAT_2(8,9); + POST_TREAT_FLOAT_2(12,13); + } + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f4); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f5); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f8); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f9); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f12); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f13); + + } + auto weight_dz = weight + dzU * weight_step_Z; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; + + auto dst_z = dst + dzU * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + for (int i=0; iextraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + } + } + + PLUS_TERM_2(0,1); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + } + + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + } + POST_TREAT_FLOAT_2(0,1); + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + + dst_x += dst_step_tmp; + scale_dz += PACK_UNIT; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; + } + return; + } + if (realDst == 1) { + for (int dz = 0; dz < dzU; ++dz) { + auto weight_dz = weight + dz * weight_step_Z; + if (biasPtr) { + bias_dz = post->biasFloat + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; + auto dst_z = dst + dz * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + __m512i D0 = _mm512_set1_epi32(0); + + __m512i D4 = _mm512_set1_epi32(0); + + __m512i D8 = _mm512_set1_epi32(0); + + __m512i D12 = _mm512_set1_epi32(0); + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + weight_step_Y * sz; + const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); + // int4->int8: total count=4*64(GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) + // Load 4*64 int4 weight + auto w0_int4_64 = _mm512_loadu_si512(weight_sz); // 128xint4_t=64 byte + auto w1_int4_64 = _mm512_loadu_si512(weight_sz + 64); // 128xint4_t + // 256xint4_t->256xint8_t + auto w0 = _mm512_and_si512(mask, _mm512_srli_epi16(w0_int4_64, 4)); // 64xint8_t + auto w2 = _mm512_and_si512(mask, w0_int4_64); // 64xint8_t + auto w1 = _mm512_and_si512(mask, _mm512_srli_epi16(w1_int4_64, 4)); + auto w3 = _mm512_and_si512(mask, w1_int4_64); + + auto s0 = AVX512_BROADCAST_INT32(src_z + 0); + + D0 = _mm512_dpbusds_epi32(D0, s0, w0); + + D4 = _mm512_dpbusds_epi32(D4, s0, w1); + + D8 = _mm512_dpbusds_epi32(D8, s0, w2); + + D12 = _mm512_dpbusds_epi32(D12, s0, w3); + } + + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0; + + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(0); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f0 = _mm512_sub_ps(f0, extrabias0); + } + } + + PLUS_TERM_1(0); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(4); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f4 = _mm512_sub_ps(f4, extrabias0); + } + } + + PLUS_TERM_1(4); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(8); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f8 = _mm512_sub_ps(f8, extrabias0); + } + } + + PLUS_TERM_1(8); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(12); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f12 = _mm512_sub_ps(f12, extrabias0); + } + } + + PLUS_TERM_1(12); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + } + + if (nullptr == biasPtr) { + auto dstTemp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp)), f0); + dstTemp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f4); + dstTemp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f8); + dstTemp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f12); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_1(0); + POST_TREAT_FLOAT_1(4); + POST_TREAT_FLOAT_1(8); + POST_TREAT_FLOAT_1(12); + } + _mm512_storeu_ps(((float*)dst_x), f0); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f4); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f8); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f12); + + } + auto weight_dz = weight + dzU * weight_step_Z; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; + + auto dst_z = dst + dzU * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + for (int i=0; iextraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f0 = _mm512_sub_ps(f0, extrabias0); + } + } + + PLUS_TERM_1(0); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + } + + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_1(0); + } + _mm512_storeu_ps(((float*)dst_x), f0); + dst_x += dst_step_tmp; + scale_dz += PACK_UNIT; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; } return; } diff --git a/source/backend/cpu/x86_x64/avx512/Matmul_4_4_64.inl b/source/backend/cpu/x86_x64/avx512/Matmul_4_4_64.inl index aae677b09..5addec946 100644 --- a/source/backend/cpu/x86_x64/avx512/Matmul_4_4_64.inl +++ b/source/backend/cpu/x86_x64/avx512/Matmul_4_4_64.inl @@ -1,10 +1,13 @@ #define GEMMINT8_AVX512_H GEMMINT8_AVX512_H_NOVNNI #define AVX512_BROADCAST_INT32(src) _mm512_castps_si512(_mm512_broadcastss_ps(_mm_load_ss(src))) + +#define DEQUANT_VALUE(N) \ + auto f##N = _mm512_cvtepi32_ps(D##N);\ + f##N = _mm512_mul_ps(f##N, scaleValue); + #define SCALE_BIAS_VEC(N) \ - auto d##N = _mm512_add_epi32(D##N, biasValue);\ - auto f##N = _mm512_cvtepi32_ps(d##N);\ - f##N = _mm512_mul_ps(f##N, scaleValue); + f##N = _mm512_add_ps(f##N, biasValue); #define POSTTREAT(N, O) \ f##N = _mm512_min_ps(f##N, maxValue);\ @@ -12,13 +15,77 @@ auto m##N = _mm512_cmp_ps_mask(f##N, zero512, 1);\ auto b##N = _mm512_mask_blend_ps(m##N, plus, minus);\ f##N = _mm512_add_ps(f##N, b##N);\ - d##N = _mm512_cvtps_epi32(_mm512_roundscale_ps(f##N, 3));\ + auto d##N = _mm512_cvtps_epi32(_mm512_roundscale_ps(f##N, 3));\ auto hd##N = _mm512_cvtsepi32_epi16(d##N); hd##N = _mm256_add_epi16(hd##N, offset);\ auto h0##N = _mm256_extracti128_si256(hd##N, 0);\ auto h1##N = _mm256_extracti128_si256(hd##N, 1);\ h0##N = _mm_packus_epi16(h0##N, h1##N);\ _mm_storeu_si128((__m128i*)dst_x + O, h0##N); +#define POST_TREAT_FLOAT(N,M,K,V) \ + f##N = _mm512_min_ps(f##N, fp32max);\ + f##N = _mm512_max_ps(f##N, fp32min);\ + f##M = _mm512_min_ps(f##M, fp32max);\ + f##M = _mm512_max_ps(f##M, fp32min);\ + f##K = _mm512_min_ps(f##K, fp32max);\ + f##K = _mm512_max_ps(f##K, fp32min);\ + f##V = _mm512_min_ps(f##V, fp32max);\ + f##V = _mm512_max_ps(f##V, fp32min); + +#define SRCKERNELSUM_MUL_WEIGHTQUANBIAS \ + xy0_0 = _mm512_mul_ps(kernelSum0, weightBiasValue);\ + xy0_1 = _mm512_mul_ps(kernelSum1, weightBiasValue);\ + xy0_2 = _mm512_mul_ps(kernelSum2, weightBiasValue);\ + xy0_3 = _mm512_mul_ps(kernelSum3, weightBiasValue); + +#define PLUS_TERM(N,M,K,V) \ + f##N = _mm512_add_ps(f##N, xy0_0);\ + f##M = _mm512_add_ps(f##M, xy0_1);\ + f##K = _mm512_add_ps(f##K, xy0_2);\ + f##V = _mm512_add_ps(f##V, xy0_3); + +#define POST_TREAT_FLOAT_3(N,M,K) \ + f##N = _mm512_min_ps(f##N, fp32max);\ + f##N = _mm512_max_ps(f##N, fp32min);\ + f##M = _mm512_min_ps(f##M, fp32max);\ + f##M = _mm512_max_ps(f##M, fp32min);\ + f##K = _mm512_min_ps(f##K, fp32max);\ + f##K = _mm512_max_ps(f##K, fp32min); + +#define SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3 \ + xy0_0 = _mm512_mul_ps(kernelSum0, weightBiasValue);\ + xy0_1 = _mm512_mul_ps(kernelSum1, weightBiasValue);\ + xy0_2 = _mm512_mul_ps(kernelSum2, weightBiasValue); + +#define PLUS_TERM_3(N,M,K) \ + f##N = _mm512_add_ps(f##N, xy0_0);\ + f##M = _mm512_add_ps(f##M, xy0_1);\ + f##K = _mm512_add_ps(f##K, xy0_2); + +#define POST_TREAT_FLOAT_2(N,M) \ + f##N = _mm512_min_ps(f##N, fp32max);\ + f##N = _mm512_max_ps(f##N, fp32min);\ + f##M = _mm512_min_ps(f##M, fp32max);\ + f##M = _mm512_max_ps(f##M, fp32min); + +#define SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2 \ + xy0_0 = _mm512_mul_ps(kernelSum0, weightBiasValue);\ + xy0_1 = _mm512_mul_ps(kernelSum1, weightBiasValue); + +#define PLUS_TERM_2(N,M) \ + f##N = _mm512_add_ps(f##N, xy0_0);\ + f##M = _mm512_add_ps(f##M, xy0_1); + +#define POST_TREAT_FLOAT_1(N) \ + f##N = _mm512_min_ps(f##N, fp32max);\ + f##N = _mm512_max_ps(f##N, fp32min); + +#define SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1 \ + xy0_0 = _mm512_mul_ps(kernelSum0, weightBiasValue); + +#define PLUS_TERM_1(N) \ + f##N = _mm512_add_ps(f##N, xy0_0); + // GemmInt8 with NO VNNI void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst) { @@ -33,10 +100,71 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s int dzU = dst_depth_quad / dzUnit; int dzR = dst_depth_quad % dzUnit; auto one = _mm512_set1_epi16(1); + __m512 fp32min, fp32max; + if (0 == post->useInt8 && post->fp32minmax) { + fp32min = _mm512_set1_ps((post->fp32minmax)[0]); + fp32max = _mm512_set1_ps((post->fp32minmax)[1]); + } + auto blockNum = post->blockNum; + const float* biasPtr = nullptr; + const float* bias_dz = nullptr; + const float* extraB_dz = nullptr; + if (post->biasFloat) { + biasPtr = post->biasFloat; + } + + int weightZStride = blockNum * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); + + auto srcKernelSumPtr = post->srcKernelSum; + __m512 kernelSum0 = _mm512_setzero_ps(); + __m512 kernelSum1 = _mm512_setzero_ps(); + __m512 kernelSum2 = _mm512_setzero_ps(); + __m512 kernelSum3 = _mm512_setzero_ps(); + if (GEMMINT8_AVX512_E == realDst) { + kernelSum0 = _mm512_set1_ps(post->srcKernelSum[0]); + kernelSum1 = _mm512_set1_ps(post->srcKernelSum[1]); + kernelSum2 = _mm512_set1_ps(post->srcKernelSum[2]); + kernelSum3 = _mm512_set1_ps(post->srcKernelSum[3]); + } else { + kernelSum0 = _mm512_set1_ps(post->srcKernelSum[0]); + if (realDst > 1) { + kernelSum1 = _mm512_set1_ps(post->srcKernelSum[1]); + } + if (realDst > 2) { + kernelSum2 = _mm512_set1_ps(post->srcKernelSum[2]); + } + } + auto f128 = _mm512_set1_ps(128.f); + __m512 extrascale0 = _mm512_setzero_ps(); + __m512 extrascale1 = _mm512_setzero_ps(); + __m512 extrascale2 = _mm512_setzero_ps(); + __m512 extrascale3 = _mm512_setzero_ps(); + if (post->extraScale) { + if (GEMMINT8_AVX512_E == realDst) { + extrascale0 = _mm512_set1_ps(post->extraScale[0]); + extrascale1 = _mm512_set1_ps(post->extraScale[1]); + extrascale2 = _mm512_set1_ps(post->extraScale[2]); + extrascale3 = _mm512_set1_ps(post->extraScale[3]); + } else { + extrascale0 = _mm512_set1_ps(post->extraScale[0]); + if (realDst > 1) { + extrascale1 = _mm512_set1_ps(post->extraScale[1]); + } + if (realDst > 2) { + extrascale2 = _mm512_set1_ps(post->extraScale[2]); + } + } + } if (realDst == GEMMINT8_AVX512_E) { for (int dz = 0; dz < dzU; ++dz) { - auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dz * PACK_UNIT * dzUnit; + auto weight_dz = weight + dz * weightZStride; + if (post->biasFloat) { + bias_dz = biasPtr + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; auto dst_z = dst + dz * dst_step_tmp * dzUnit; const auto src_x = src; @@ -66,9 +194,9 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s const auto weight_sz = weight_dz + (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) * sz; const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); auto w0 = _mm512_loadu_si512(weight_sz); - auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_E); + auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_L); auto s0 = AVX512_BROADCAST_INT32(src_z + 0); auto s1 = AVX512_BROADCAST_INT32(src_z + 1); @@ -95,37 +223,185 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s D14 = mnn_mm512_dpbusds_epi32(D14, s2, w3); D15 = mnn_mm512_dpbusds_epi32(D15, s3, w3); } - - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2, xy0_3; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + DEQUANT_VALUE(3); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + f3 = _mm512_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + f3 = _mm512_sub_ps(f3, extrabias3); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); - SCALE_BIAS_VEC(2); - SCALE_BIAS_VEC(3); + PLUS_TERM(0,1,2,3); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + SCALE_BIAS_VEC(3); + } - biasValue = _mm512_loadu_si512(bias_dz + 1 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); - SCALE_BIAS_VEC(4); - SCALE_BIAS_VEC(5); - SCALE_BIAS_VEC(6); - SCALE_BIAS_VEC(7); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + DEQUANT_VALUE(6); + DEQUANT_VALUE(7); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + f6 = _mm512_mul_ps(f6, extrascale2); + f7 = _mm512_mul_ps(f7, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + f6 = _mm512_sub_ps(f6, extrabias2); + f7 = _mm512_sub_ps(f7, extrabias3); + } + } + + PLUS_TERM(4,5,6,7); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + SCALE_BIAS_VEC(6); + SCALE_BIAS_VEC(7); + } - biasValue = _mm512_loadu_si512(bias_dz + 2 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); - SCALE_BIAS_VEC(8); - SCALE_BIAS_VEC(9); - SCALE_BIAS_VEC(10); - SCALE_BIAS_VEC(11); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + DEQUANT_VALUE(10); + DEQUANT_VALUE(11); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + f10 = _mm512_mul_ps(f10, extrascale2); + f11 = _mm512_mul_ps(f11, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + f10 = _mm512_sub_ps(f10, extrabias2); + f11 = _mm512_sub_ps(f11, extrabias3); + } + } + + PLUS_TERM(8,9,10,11); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + SCALE_BIAS_VEC(10); + SCALE_BIAS_VEC(11); + } - biasValue = _mm512_loadu_si512(bias_dz + 3 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); - SCALE_BIAS_VEC(12); - SCALE_BIAS_VEC(13); - SCALE_BIAS_VEC(14); - SCALE_BIAS_VEC(15); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + DEQUANT_VALUE(14); + DEQUANT_VALUE(15); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + f14 = _mm512_mul_ps(f14, extrascale2); + f15 = _mm512_mul_ps(f15, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + f14 = _mm512_sub_ps(f14, extrabias2); + f15 = _mm512_sub_ps(f15, extrabias3); + } + } + + PLUS_TERM(12,13,14,15); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + SCALE_BIAS_VEC(14); + SCALE_BIAS_VEC(15); + } if (post->useInt8 == 0) { + if (biasPtr == nullptr) { + auto destTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps((float*)destTmp), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f2); + f3 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f3); + destTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f5); + f6 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f6); + f7 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f7); + destTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f9); + f10 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f10); + f11 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f11); + destTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f13); + f14 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f14); + f15 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f15); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT(0,1,2,3); + POST_TREAT_FLOAT(4,5,6,7); + POST_TREAT_FLOAT(8,9,10,11); + POST_TREAT_FLOAT(12,13,14,15); + } + _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); @@ -170,9 +446,15 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s POSTTREAT(15, 3); } } - auto weight_dz = weight + dzU * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dzU * PACK_UNIT * dzUnit; + auto weight_dz = weight + dzU * weightZStride; + if (biasPtr) { + bias_dz = biasPtr + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; auto dst_z = dst + dzU * dst_step_tmp * dzUnit; const auto src_x = src; @@ -199,15 +481,54 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s D3 = mnn_mm512_dpbusds_epi32(D3, s3, w0); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2, xy0_3; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + DEQUANT_VALUE(3); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + f3 = _mm512_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + f3 = _mm512_sub_ps(f3, extrabias3); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); - SCALE_BIAS_VEC(2); - SCALE_BIAS_VEC(3); + PLUS_TERM(0,1,2,3); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + SCALE_BIAS_VEC(3); + } if (post->useInt8 == 0) { + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps((float*)dst_x), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 2), f2); + f3 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 3), f3); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT(0,1,2,3); + } _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); @@ -220,17 +541,28 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s } dst_x += dst_step_tmp; scale_dz += PACK_UNIT; - bias_dz += PACK_UNIT; - weight_dz += PACK_UNIT * GEMMINT8_AVX512_E; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; } return; } // e = 3 if (realDst == 3) { for (int dz = 0; dz < dzU; ++dz) { - auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dz * PACK_UNIT * dzUnit; + auto weight_dz = weight + dz * weightZStride; + if (biasPtr) { + bias_dz = biasPtr + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; auto dst_z = dst + dz * dst_step_tmp * dzUnit; const auto src_x = src; auto dst_x = dst_z; @@ -255,9 +587,9 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s const auto weight_sz = weight_dz + (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) * sz; const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); auto w0 = _mm512_loadu_si512(weight_sz); - auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_E); + auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_L); auto s0 = AVX512_BROADCAST_INT32(src_z + 0); auto s1 = AVX512_BROADCAST_INT32(src_z + 1); @@ -280,32 +612,160 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s D14 = mnn_mm512_dpbusds_epi32(D14, s2, w3); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); - SCALE_BIAS_VEC(2); + PLUS_TERM_3(0,1,2); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + } - biasValue = _mm512_loadu_si512(bias_dz + 1 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); - SCALE_BIAS_VEC(4); - SCALE_BIAS_VEC(5); - SCALE_BIAS_VEC(6); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + DEQUANT_VALUE(6); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + f6 = _mm512_mul_ps(f6, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + f6 = _mm512_sub_ps(f6, extrabias2); + } + } + + PLUS_TERM_3(4,5,6); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + SCALE_BIAS_VEC(6); + } - biasValue = _mm512_loadu_si512(bias_dz + 2 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); - SCALE_BIAS_VEC(8); - SCALE_BIAS_VEC(9); - SCALE_BIAS_VEC(10); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + DEQUANT_VALUE(10); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + f10 = _mm512_mul_ps(f10, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + f10 = _mm512_sub_ps(f10, extrabias2); + } + } + + PLUS_TERM_3(8,9,10); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + SCALE_BIAS_VEC(10); + } - biasValue = _mm512_loadu_si512(bias_dz + 3 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); - SCALE_BIAS_VEC(12); - SCALE_BIAS_VEC(13); - SCALE_BIAS_VEC(14); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + DEQUANT_VALUE(14); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + f14 = _mm512_mul_ps(f14, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + f14 = _mm512_sub_ps(f14, extrabias2); + } + } + + PLUS_TERM_3(12,13,14); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + SCALE_BIAS_VEC(14); + } if (post->useInt8 == 0) { + if (biasPtr == nullptr) { + auto dstTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f2); + dstTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f5); + f6 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f6); + dstTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f9); + f10 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f10); + dstTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f13); + f14 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f14); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_3(0,1,2); + POST_TREAT_FLOAT_3(4,5,6); + POST_TREAT_FLOAT_3(8,9,10); + POST_TREAT_FLOAT_3(12,13,14); + } _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); @@ -342,9 +802,15 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s POSTTREAT(14, 2); } } - auto weight_dz = weight + dzU * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dzU * PACK_UNIT * dzUnit; + auto weight_dz = weight + dzU * weightZStride; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; auto dst_z = dst + dzU * dst_step_tmp * dzUnit; const auto src_x = src; @@ -368,14 +834,49 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s D2 = mnn_mm512_dpbusds_epi32(D2, s2, w0); } - auto biasValue = _mm512_loadu_si512(bias_dz); + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); - SCALE_BIAS_VEC(2); - + PLUS_TERM_3(0,1,2); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + } + if (post->useInt8 == 0) { + if (biasPtr == nullptr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 2), f2); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_3(0,1,2); + } _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); @@ -386,17 +887,28 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s } dst_x += dst_step_tmp; scale_dz += PACK_UNIT; - bias_dz += PACK_UNIT; - weight_dz += PACK_UNIT * GEMMINT8_AVX512_E; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; } return; } // e = 2 if (realDst == 2) { for (int dz = 0; dz < dzU; ++dz) { - auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dz * PACK_UNIT * dzUnit; + auto weight_dz = weight + dz * weightZStride; + if (biasPtr) { + bias_dz = post->biasFloat + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; auto dst_z = dst + dz * dst_step_tmp * dzUnit; const auto src_x = src; auto dst_x = dst_z; @@ -417,9 +929,9 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s const auto weight_sz = weight_dz + (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) * sz; const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); auto w0 = _mm512_loadu_si512(weight_sz); - auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_E); + auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_L); auto s0 = AVX512_BROADCAST_INT32(src_z + 0); auto s1 = AVX512_BROADCAST_INT32(src_z + 1); @@ -437,28 +949,135 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s D13 = mnn_mm512_dpbusds_epi32(D13, s1, w3); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1; + + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); + PLUS_TERM_2(0,1); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + } - biasValue = _mm512_loadu_si512(bias_dz + 1 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); - SCALE_BIAS_VEC(4); - SCALE_BIAS_VEC(5); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + } + } + + PLUS_TERM_2(4,5); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + } - biasValue = _mm512_loadu_si512(bias_dz + 2 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); - SCALE_BIAS_VEC(8); - SCALE_BIAS_VEC(9); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + } + } + + PLUS_TERM_2(8,9); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + } - biasValue = _mm512_loadu_si512(bias_dz + 3 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); - SCALE_BIAS_VEC(12); - SCALE_BIAS_VEC(13); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + } + } + + PLUS_TERM_2(12,13); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + } if (post->useInt8 == 0) { + if (nullptr == biasPtr) { + auto dstTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16), f1); + dstTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f5); + dstTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f9); + dstTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f13); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_2(0,1); + POST_TREAT_FLOAT_2(4,5); + POST_TREAT_FLOAT_2(8,9); + POST_TREAT_FLOAT_2(12,13); + } _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); dst_x += dst_step_tmp; @@ -487,9 +1106,15 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s POSTTREAT(13, 1); } } - auto weight_dz = weight + dzU * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dzU * PACK_UNIT * dzUnit; + auto weight_dz = weight + dzU * weightZStride; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; auto dst_z = dst + dzU * dst_step_tmp * dzUnit; const auto src_x = src; @@ -510,13 +1135,40 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s D1 = mnn_mm512_dpbusds_epi32(D1, s1, w0); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + } + } - SCALE_BIAS_VEC(0); - SCALE_BIAS_VEC(1); + PLUS_TERM_2(0,1); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + } if (post->useInt8 == 0) { + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + } + POST_TREAT_FLOAT_2(0,1); _mm512_storeu_ps(((float*)dst_x), f0); _mm512_storeu_ps(((float*)dst_x) + 16, f1); } else { @@ -525,16 +1177,27 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s } dst_x += dst_step_tmp; scale_dz += PACK_UNIT; - bias_dz += PACK_UNIT; - weight_dz += PACK_UNIT * GEMMINT8_AVX512_E; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; } return; } if (realDst == 1) { for (int dz = 0; dz < dzU; ++dz) { - auto weight_dz = weight + dz * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dz * PACK_UNIT * dzUnit; + auto weight_dz = weight + dz * weightZStride; + if (biasPtr) { + bias_dz = post->biasFloat + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; auto dst_z = dst + dz * dst_step_tmp * dzUnit; const auto src_x = src; auto dst_x = dst_z; @@ -550,9 +1213,9 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s const auto weight_sz = weight_dz + (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) * sz; const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); auto w0 = _mm512_loadu_si512(weight_sz); - auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_E); - auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_E); + auto w1 = _mm512_loadu_si512(weight_sz + 1 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w2 = _mm512_loadu_si512(weight_sz + 2 * PACK_UNIT * GEMMINT8_AVX512_L); + auto w3 = _mm512_loadu_si512(weight_sz + 3 * PACK_UNIT * GEMMINT8_AVX512_L); auto s0 = AVX512_BROADCAST_INT32(src_z + 0); @@ -565,24 +1228,113 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s D12 = mnn_mm512_dpbusds_epi32(D12, s0, w3); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0; + + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(0); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f0 = _mm512_sub_ps(f0, extrabias0); + } + } - SCALE_BIAS_VEC(0); + PLUS_TERM_1(0); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + } - biasValue = _mm512_loadu_si512(bias_dz + 1 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); - SCALE_BIAS_VEC(4); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(4); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f4 = _mm512_sub_ps(f4, extrabias0); + } + } + + PLUS_TERM_1(4); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + } - biasValue = _mm512_loadu_si512(bias_dz + 2 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); - SCALE_BIAS_VEC(8); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(8); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f8 = _mm512_sub_ps(f8, extrabias0); + } + } + + PLUS_TERM_1(8); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + } - biasValue = _mm512_loadu_si512(bias_dz + 3 * PACK_UNIT); scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); - SCALE_BIAS_VEC(12); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(12); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f12 = _mm512_sub_ps(f12, extrabias0); + } + } + + PLUS_TERM_1(12); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + } if (post->useInt8 == 0) { + if (nullptr == biasPtr) { + auto dstTemp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp)), f0); + dstTemp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f4); + dstTemp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f8); + dstTemp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f12); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_1(0); + POST_TREAT_FLOAT_1(4); + POST_TREAT_FLOAT_1(8); + POST_TREAT_FLOAT_1(12); + } _mm512_storeu_ps(((float*)dst_x), f0); dst_x += dst_step_tmp; _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f4); @@ -603,9 +1355,15 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s POSTTREAT(12, 0); } } - auto weight_dz = weight + dzU * src_depth_quad * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H); - auto bias_dz = (int32_t*)post->bias + dzU * PACK_UNIT * dzUnit; + auto weight_dz = weight + dzU * weightZStride; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; auto dst_z = dst + dzU * dst_step_tmp * dzUnit; const auto src_x = src; @@ -623,20 +1381,1325 @@ void MATMULCOREFUNC_NAME(int8_t* dst, const int8_t* src, const int8_t* weight, s D0 = mnn_mm512_dpbusds_epi32(D0, s0, w0); } - auto biasValue = _mm512_loadu_si512(bias_dz); auto scaleValue = _mm512_loadu_ps(scale_dz); - SCALE_BIAS_VEC(0); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(0); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f0 = _mm512_sub_ps(f0, extrabias0); + } + } + + PLUS_TERM_1(0); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + } if (post->useInt8 == 0) { + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_1(0); + } _mm512_storeu_ps(((float*)dst_x), f0); } else { POSTTREAT(0, 0); } dst_x += dst_step_tmp; scale_dz += PACK_UNIT; - bias_dz += PACK_UNIT; - weight_dz += PACK_UNIT * GEMMINT8_AVX512_E; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; + } + return; + } +} + +void MATMULCOREFUNC_NAME_W4(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst) { + MNN_ASSERT(post->useInt8==0); + const auto dst_step_tmp = dst_step / sizeof(int8_t); + auto zero512 = _mm512_set1_ps(0.0f); + auto offset = _mm256_set1_epi16(128); + int dzUnit = GEMMINT8_AVX512_H / PACK_UNIT; + int dzU = dst_depth_quad / dzUnit; + int dzR = dst_depth_quad % dzUnit; + auto one = _mm512_set1_epi16(1); + __m512 fp32min, fp32max; + if (0 == post->useInt8 && post->fp32minmax) { + fp32min = _mm512_set1_ps((post->fp32minmax)[0]); + fp32max = _mm512_set1_ps((post->fp32minmax)[1]); + } + auto blockNum = post->blockNum; + const float* biasPtr = nullptr; + const float* bias_dz = nullptr; + const float* extraB_dz = nullptr; + if (post->biasFloat) { + biasPtr = post->biasFloat; + } + + auto srcKernelSumPtr = post->srcKernelSum; + __m512 kernelSum0 = _mm512_setzero_ps(); + __m512 kernelSum1 = _mm512_setzero_ps(); + __m512 kernelSum2 = _mm512_setzero_ps(); + __m512 kernelSum3 = _mm512_setzero_ps(); + + int weight_step_Z = static_cast(src_depth_quad * blockNum * (GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) / 2); + int weight_step_Y = static_cast(GEMMINT8_AVX512_L * GEMMINT8_AVX512_H / 2); + const __m512i mask = _mm512_set1_epi8(0xf); + if (GEMMINT8_AVX512_E == realDst) { + kernelSum0 = _mm512_set1_ps(post->srcKernelSum[0]); + kernelSum1 = _mm512_set1_ps(post->srcKernelSum[1]); + kernelSum2 = _mm512_set1_ps(post->srcKernelSum[2]); + kernelSum3 = _mm512_set1_ps(post->srcKernelSum[3]); + } else { + kernelSum0 = _mm512_set1_ps(post->srcKernelSum[0]); + if (realDst > 1) { + kernelSum1 = _mm512_set1_ps(post->srcKernelSum[1]); + } + if (realDst > 2) { + kernelSum2 = _mm512_set1_ps(post->srcKernelSum[2]); + } + } + auto f128 = _mm512_set1_ps(128.f); + __m512 extrascale0 = _mm512_setzero_ps(); + __m512 extrascale1 = _mm512_setzero_ps(); + __m512 extrascale2 = _mm512_setzero_ps(); + __m512 extrascale3 = _mm512_setzero_ps(); + if (post->extraScale) { + if (GEMMINT8_AVX512_E == realDst) { + extrascale0 = _mm512_set1_ps(post->extraScale[0]); + extrascale1 = _mm512_set1_ps(post->extraScale[1]); + extrascale2 = _mm512_set1_ps(post->extraScale[2]); + extrascale3 = _mm512_set1_ps(post->extraScale[3]); + } else { + extrascale0 = _mm512_set1_ps(post->extraScale[0]); + if (realDst > 1) { + extrascale1 = _mm512_set1_ps(post->extraScale[1]); + } + if (realDst > 2) { + extrascale2 = _mm512_set1_ps(post->extraScale[2]); + } + } + } + + if (realDst == GEMMINT8_AVX512_E) { + for (int dz = 0; dz < dzU; ++dz) { + auto weight_dz = weight + dz * weight_step_Z; + if (post->biasFloat) { + bias_dz = biasPtr + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; + float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + auto dst_z = dst + dz * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + __m512i D0 = _mm512_set1_epi32(0); + __m512i D1 = _mm512_set1_epi32(0); + __m512i D2 = _mm512_set1_epi32(0); + __m512i D3 = _mm512_set1_epi32(0); + + __m512i D4 = _mm512_set1_epi32(0); + __m512i D5 = _mm512_set1_epi32(0); + __m512i D6 = _mm512_set1_epi32(0); + __m512i D7 = _mm512_set1_epi32(0); + + __m512i D8 = _mm512_set1_epi32(0); + __m512i D9 = _mm512_set1_epi32(0); + __m512i D10 = _mm512_set1_epi32(0); + __m512i D11 = _mm512_set1_epi32(0); + + __m512i D12 = _mm512_set1_epi32(0); + __m512i D13 = _mm512_set1_epi32(0); + __m512i D14 = _mm512_set1_epi32(0); + __m512i D15 = _mm512_set1_epi32(0); + + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + weight_step_Y * sz; + const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); + // int4->int8: total count=4*64(GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) + // Load 4*64 int4 weight + auto w0_int4_64 = _mm512_loadu_si512(weight_sz); // 128xint4_t=64 byte + auto w1_int4_64 = _mm512_loadu_si512(weight_sz + 64); // 128xint4_t + // 256xint4_t->256xint8_t + auto w0 = _mm512_and_si512(mask, _mm512_srli_epi16(w0_int4_64, 4)); // 64xint8_t + auto w2 = _mm512_and_si512(mask, w0_int4_64); // 64xint8_t + auto w1 = _mm512_and_si512(mask, _mm512_srli_epi16(w1_int4_64, 4)); + auto w3 = _mm512_and_si512(mask, w1_int4_64); + + auto s0 = AVX512_BROADCAST_INT32(src_z + 0); + auto s1 = AVX512_BROADCAST_INT32(src_z + 1); + auto s2 = AVX512_BROADCAST_INT32(src_z + 2); + auto s3 = AVX512_BROADCAST_INT32(src_z + 3); + + D0 = mnn_mm512_dpbusds_epi32(D0, s0, w0); + D1 = mnn_mm512_dpbusds_epi32(D1, s1, w0); + D2 = mnn_mm512_dpbusds_epi32(D2, s2, w0); + D3 = mnn_mm512_dpbusds_epi32(D3, s3, w0); + + D4 = mnn_mm512_dpbusds_epi32(D4, s0, w1); + D5 = mnn_mm512_dpbusds_epi32(D5, s1, w1); + D6 = mnn_mm512_dpbusds_epi32(D6, s2, w1); + D7 = mnn_mm512_dpbusds_epi32(D7, s3, w1); + + D8 = mnn_mm512_dpbusds_epi32(D8, s0, w2); + D9 = mnn_mm512_dpbusds_epi32(D9, s1, w2); + D10 = mnn_mm512_dpbusds_epi32(D10, s2, w2); + D11 = mnn_mm512_dpbusds_epi32(D11, s3, w2); + + D12 = mnn_mm512_dpbusds_epi32(D12, s0, w3); + D13 = mnn_mm512_dpbusds_epi32(D13, s1, w3); + D14 = mnn_mm512_dpbusds_epi32(D14, s2, w3); + D15 = mnn_mm512_dpbusds_epi32(D15, s3, w3); + } + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2, xy0_3; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + DEQUANT_VALUE(3); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + f3 = _mm512_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + f3 = _mm512_sub_ps(f3, extrabias3); + } + } + + PLUS_TERM(0,1,2,3); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + SCALE_BIAS_VEC(3); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + DEQUANT_VALUE(6); + DEQUANT_VALUE(7); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + f6 = _mm512_mul_ps(f6, extrascale2); + f7 = _mm512_mul_ps(f7, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + f6 = _mm512_sub_ps(f6, extrabias2); + f7 = _mm512_sub_ps(f7, extrabias3); + } + } + + PLUS_TERM(4,5,6,7); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + SCALE_BIAS_VEC(6); + SCALE_BIAS_VEC(7); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + DEQUANT_VALUE(10); + DEQUANT_VALUE(11); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + f10 = _mm512_mul_ps(f10, extrascale2); + f11 = _mm512_mul_ps(f11, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + f10 = _mm512_sub_ps(f10, extrabias2); + f11 = _mm512_sub_ps(f11, extrabias3); + } + } + + PLUS_TERM(8,9,10,11); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + SCALE_BIAS_VEC(10); + SCALE_BIAS_VEC(11); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + DEQUANT_VALUE(14); + DEQUANT_VALUE(15); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + f14 = _mm512_mul_ps(f14, extrascale2); + f15 = _mm512_mul_ps(f15, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + f14 = _mm512_sub_ps(f14, extrabias2); + f15 = _mm512_sub_ps(f15, extrabias3); + } + } + + PLUS_TERM(12,13,14,15); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + SCALE_BIAS_VEC(14); + SCALE_BIAS_VEC(15); + } + if (biasPtr == nullptr) { + auto destTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps((float*)destTmp), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f2); + f3 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f3); + destTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f5); + f6 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f6); + f7 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f7); + destTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f9); + f10 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f10); + f11 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f11); + destTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 1), f13); + f14 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 2), f14); + f15 = _mm512_add_ps(_mm512_loadu_ps(((float*)destTmp) + 16 * 3), f15); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT(0,1,2,3); + POST_TREAT_FLOAT(4,5,6,7); + POST_TREAT_FLOAT(8,9,10,11); + POST_TREAT_FLOAT(12,13,14,15); + } + + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); + _mm512_storeu_ps(((float*)dst_x) + 16 * 3, f3); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f4); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f5); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f6); + _mm512_storeu_ps(((float*)dst_x) + 16 * 3, f7); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f8); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f9); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f10); + _mm512_storeu_ps(((float*)dst_x) + 16 * 3, f11); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f12); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f13); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f14); + _mm512_storeu_ps(((float*)dst_x) + 16 * 3, f15); + + } + auto weight_dz = weight + dzU * weight_step_Z; + if (biasPtr) { + bias_dz = biasPtr + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; + + auto dst_z = dst + dzU * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + for (int i=0; iextraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + f3 = _mm512_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm512_mul_ps(extrabias, extrascale3); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + f3 = _mm512_sub_ps(f3, extrabias3); + } + } + + PLUS_TERM(0,1,2,3); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + SCALE_BIAS_VEC(3); + } + + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps((float*)dst_x), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 2), f2); + f3 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 3), f3); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT(0,1,2,3); + } + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); + _mm512_storeu_ps(((float*)dst_x) + 16 * 3, f3); + + dst_x += dst_step_tmp; + scale_dz += PACK_UNIT; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; + } + return; + } + // e = 3 + if (realDst == 3) { + for (int dz = 0; dz < dzU; ++dz) { + auto weight_dz = weight + dz * weight_step_Z; + if (biasPtr) { + bias_dz = biasPtr + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; + auto dst_z = dst + dz * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + __m512i D0 = _mm512_set1_epi32(0); + __m512i D1 = _mm512_set1_epi32(0); + __m512i D2 = _mm512_set1_epi32(0); + + __m512i D4 = _mm512_set1_epi32(0); + __m512i D5 = _mm512_set1_epi32(0); + __m512i D6 = _mm512_set1_epi32(0); + + __m512i D8 = _mm512_set1_epi32(0); + __m512i D9 = _mm512_set1_epi32(0); + __m512i D10 = _mm512_set1_epi32(0); + + __m512i D12 = _mm512_set1_epi32(0); + __m512i D13 = _mm512_set1_epi32(0); + __m512i D14 = _mm512_set1_epi32(0); + + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + weight_step_Y * sz; + const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); + // int4->int8: total count=4*64(GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) + // Load 4*64 int4 weight + auto w0_int4_64 = _mm512_loadu_si512(weight_sz); // 128xint4_t=64 byte + auto w1_int4_64 = _mm512_loadu_si512(weight_sz + 64); // 128xint4_t + // 256xint4_t->256xint8_t + auto w0 = _mm512_and_si512(mask, _mm512_srli_epi16(w0_int4_64, 4)); // 64xint8_t + auto w2 = _mm512_and_si512(mask, w0_int4_64); // 64xint8_t + auto w1 = _mm512_and_si512(mask, _mm512_srli_epi16(w1_int4_64, 4)); + auto w3 = _mm512_and_si512(mask, w1_int4_64); + + auto s0 = AVX512_BROADCAST_INT32(src_z + 0); + auto s1 = AVX512_BROADCAST_INT32(src_z + 1); + auto s2 = AVX512_BROADCAST_INT32(src_z + 2); + + D0 = mnn_mm512_dpbusds_epi32(D0, s0, w0); + D1 = mnn_mm512_dpbusds_epi32(D1, s1, w0); + D2 = mnn_mm512_dpbusds_epi32(D2, s2, w0); + + D4 = mnn_mm512_dpbusds_epi32(D4, s0, w1); + D5 = mnn_mm512_dpbusds_epi32(D5, s1, w1); + D6 = mnn_mm512_dpbusds_epi32(D6, s2, w1); + + D8 = mnn_mm512_dpbusds_epi32(D8, s0, w2); + D9 = mnn_mm512_dpbusds_epi32(D9, s1, w2); + D10 = mnn_mm512_dpbusds_epi32(D10, s2, w2); + + D12 = mnn_mm512_dpbusds_epi32(D12, s0, w3); + D13 = mnn_mm512_dpbusds_epi32(D13, s1, w3); + D14 = mnn_mm512_dpbusds_epi32(D14, s2, w3); + } + + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1, xy0_2; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + DEQUANT_VALUE(2); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + } + } + + PLUS_TERM_3(0,1,2); + if (nullptr != biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + DEQUANT_VALUE(6); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + f6 = _mm512_mul_ps(f6, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + f6 = _mm512_sub_ps(f6, extrabias2); + } + } + + PLUS_TERM_3(4,5,6); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + SCALE_BIAS_VEC(6); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + DEQUANT_VALUE(10); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + f10 = _mm512_mul_ps(f10, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + f10 = _mm512_sub_ps(f10, extrabias2); + } + } + + PLUS_TERM_3(8,9,10); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + SCALE_BIAS_VEC(10); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_3; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + DEQUANT_VALUE(14); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + f14 = _mm512_mul_ps(f14, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + f14 = _mm512_sub_ps(f14, extrabias2); + } + } + + PLUS_TERM_3(12,13,14); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + SCALE_BIAS_VEC(14); + } + + if (biasPtr == nullptr) { + auto dstTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f2); + dstTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f5); + f6 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f6); + dstTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f9); + f10 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f10); + dstTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f13); + f14 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 2), f14); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_3(0,1,2); + POST_TREAT_FLOAT_3(4,5,6); + POST_TREAT_FLOAT_3(8,9,10); + POST_TREAT_FLOAT_3(12,13,14); + } + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f4); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f5); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f6); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f8); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f9); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f10); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f12); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f13); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f14); + + } + auto weight_dz = weight + dzU * weight_step_Z; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; + + auto dst_z = dst + dzU * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + for (int i=0; iextraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + f2 = _mm512_mul_ps(f2, extrascale2); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm512_mul_ps(extrabias, extrascale2); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + f2 = _mm512_sub_ps(f2, extrabias2); + } + } + + PLUS_TERM_3(0,1,2); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + SCALE_BIAS_VEC(2); + } + + if (biasPtr == nullptr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + f2 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16 * 2), f2); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_3(0,1,2); + } + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + _mm512_storeu_ps(((float*)dst_x) + 16 * 2, f2); + + dst_x += dst_step_tmp; + scale_dz += PACK_UNIT; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; + } + return; + } + // e = 2 + if (realDst == 2) { + for (int dz = 0; dz < dzU; ++dz) { + auto weight_dz = weight + dz * weight_step_Z; + if (biasPtr) { + bias_dz = post->biasFloat + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; + auto dst_z = dst + dz * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + __m512i D0 = _mm512_set1_epi32(0); + __m512i D1 = _mm512_set1_epi32(0); + + __m512i D4 = _mm512_set1_epi32(0); + __m512i D5 = _mm512_set1_epi32(0); + + __m512i D8 = _mm512_set1_epi32(0); + __m512i D9 = _mm512_set1_epi32(0); + + __m512i D12 = _mm512_set1_epi32(0); + __m512i D13 = _mm512_set1_epi32(0); + + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + weight_step_Y * sz; + const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); + // int4->int8: total count=4*64(GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) + // Load 4*64 int4 weight + auto w0_int4_64 = _mm512_loadu_si512(weight_sz); // 128xint4_t=64 byte + auto w1_int4_64 = _mm512_loadu_si512(weight_sz + 64); // 128xint4_t + // 256xint4_t->256xint8_t + auto w0 = _mm512_and_si512(mask, _mm512_srli_epi16(w0_int4_64, 4)); // 64xint8_t + auto w2 = _mm512_and_si512(mask, w0_int4_64); // 64xint8_t + auto w1 = _mm512_and_si512(mask, _mm512_srli_epi16(w1_int4_64, 4)); + auto w3 = _mm512_and_si512(mask, w1_int4_64); + + auto s0 = AVX512_BROADCAST_INT32(src_z + 0); + auto s1 = AVX512_BROADCAST_INT32(src_z + 1); + + D0 = mnn_mm512_dpbusds_epi32(D0, s0, w0); + D1 = mnn_mm512_dpbusds_epi32(D1, s1, w0); + + D4 = mnn_mm512_dpbusds_epi32(D4, s0, w1); + D5 = mnn_mm512_dpbusds_epi32(D5, s1, w1); + + D8 = mnn_mm512_dpbusds_epi32(D8, s0, w2); + D9 = mnn_mm512_dpbusds_epi32(D9, s1, w2); + + D12 = mnn_mm512_dpbusds_epi32(D12, s0, w3); + D13 = mnn_mm512_dpbusds_epi32(D13, s1, w3); + } + + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1; + + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + } + } + + PLUS_TERM_2(0,1); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(4); + DEQUANT_VALUE(5); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + f5 = _mm512_mul_ps(f5, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f4 = _mm512_sub_ps(f4, extrabias0); + f5 = _mm512_sub_ps(f5, extrabias1); + } + } + + PLUS_TERM_2(4,5); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + SCALE_BIAS_VEC(5); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(8); + DEQUANT_VALUE(9); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + f9 = _mm512_mul_ps(f9, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f8 = _mm512_sub_ps(f8, extrabias0); + f9 = _mm512_sub_ps(f9, extrabias1); + } + } + + PLUS_TERM_2(8,9); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + SCALE_BIAS_VEC(9); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(12); + DEQUANT_VALUE(13); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + f13 = _mm512_mul_ps(f13, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f12 = _mm512_sub_ps(f12, extrabias0); + f13 = _mm512_sub_ps(f13, extrabias1); + } + } + + PLUS_TERM_2(12,13); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + SCALE_BIAS_VEC(13); + } + + if (nullptr == biasPtr) { + auto dstTmp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16), f1); + dstTmp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f4); + f5 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f5); + dstTmp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f8); + f9 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f9); + dstTmp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 0), f12); + f13 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTmp) + 16 * 1), f13); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_2(0,1); + POST_TREAT_FLOAT_2(4,5); + POST_TREAT_FLOAT_2(8,9); + POST_TREAT_FLOAT_2(12,13); + } + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f4); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f5); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f8); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f9); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f12); + _mm512_storeu_ps(((float*)dst_x) + 16 * 1, f13); + + } + auto weight_dz = weight + dzU * weight_step_Z; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; + + auto dst_z = dst + dzU * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + for (int i=0; i256xint8_t + auto w0 = _mm512_and_si512(mask, _mm512_srli_epi16(w0_int4_64, 4)); // 64xint8_t + + auto s0 = AVX512_BROADCAST_INT32(src_z + 0); + auto s1 = AVX512_BROADCAST_INT32(src_z + 1); + + D0 = mnn_mm512_dpbusds_epi32(D0, s0, w0); + D1 = mnn_mm512_dpbusds_epi32(D1, s1, w0); + } + + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0, xy0_1; + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_2; + DEQUANT_VALUE(0); + DEQUANT_VALUE(1); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + f1 = _mm512_mul_ps(f1, extrascale1); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm512_mul_ps(extrabias, extrascale1); + f0 = _mm512_sub_ps(f0, extrabias0); + f1 = _mm512_sub_ps(f1, extrabias1); + } + } + + PLUS_TERM_2(0,1); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + SCALE_BIAS_VEC(1); + } + + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + f1 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x) + 16), f1); + } + POST_TREAT_FLOAT_2(0,1); + _mm512_storeu_ps(((float*)dst_x), f0); + _mm512_storeu_ps(((float*)dst_x) + 16, f1); + + dst_x += dst_step_tmp; + scale_dz += PACK_UNIT; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; + } + return; + } + if (realDst == 1) { + for (int dz = 0; dz < dzU; ++dz) { + auto weight_dz = weight + dz * weight_step_Z; + if (biasPtr) { + bias_dz = post->biasFloat + dz * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dz * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dz * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dz * PACK_UNIT * dzUnit; + auto dst_z = dst + dz * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + __m512i D0 = _mm512_set1_epi32(0); + + __m512i D4 = _mm512_set1_epi32(0); + + __m512i D8 = _mm512_set1_epi32(0); + + __m512i D12 = _mm512_set1_epi32(0); + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + weight_step_Y * sz; + const auto src_z = (const float*)(src_x + sz * GEMMINT8_AVX512_E * GEMMINT8_AVX512_L); + // int4->int8: total count=4*64(GEMMINT8_AVX512_L * GEMMINT8_AVX512_H) + // Load 4*64 int4 weight + auto w0_int4_64 = _mm512_loadu_si512(weight_sz); // 128xint4_t=64 byte + auto w1_int4_64 = _mm512_loadu_si512(weight_sz + 64); // 128xint4_t + // 256xint4_t->256xint8_t + auto w0 = _mm512_and_si512(mask, _mm512_srli_epi16(w0_int4_64, 4)); // 64xint8_t + auto w2 = _mm512_and_si512(mask, w0_int4_64); // 64xint8_t + auto w1 = _mm512_and_si512(mask, _mm512_srli_epi16(w1_int4_64, 4)); + auto w3 = _mm512_and_si512(mask, w1_int4_64); + + auto s0 = AVX512_BROADCAST_INT32(src_z + 0); + + D0 = mnn_mm512_dpbusds_epi32(D0, s0, w0); + + D4 = mnn_mm512_dpbusds_epi32(D4, s0, w1); + + D8 = mnn_mm512_dpbusds_epi32(D8, s0, w2); + + D12 = mnn_mm512_dpbusds_epi32(D12, s0, w3); + } + + auto scaleValue = _mm512_loadu_ps(scale_dz); + auto weightBiasValue = _mm512_loadu_ps(weightBias_dz); + __m512 xy0_0; + + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(0); + + if (post->extraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f0 = _mm512_sub_ps(f0, extrabias0); + } + } + + PLUS_TERM_1(0); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 1 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 1 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(4); + + if (post->extraScale) { // Batch quant + f4 = _mm512_mul_ps(f4, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 1 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f4 = _mm512_sub_ps(f4, extrabias0); + } + } + + PLUS_TERM_1(4); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 1 * PACK_UNIT); + SCALE_BIAS_VEC(4); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 2 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 2 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(8); + + if (post->extraScale) { // Batch quant + f8 = _mm512_mul_ps(f8, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 2 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f8 = _mm512_sub_ps(f8, extrabias0); + } + } + + PLUS_TERM_1(8); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 2 * PACK_UNIT); + SCALE_BIAS_VEC(8); + } + + scaleValue = _mm512_loadu_ps(scale_dz + 3 * PACK_UNIT); + weightBiasValue = _mm512_loadu_ps(weightBias_dz + 3 * PACK_UNIT); + // x_kernelSum x w_quantZero + SRCKERNELSUM_MUL_WEIGHTQUANBIAS_1; + DEQUANT_VALUE(12); + + if (post->extraScale) { // Batch quant + f12 = _mm512_mul_ps(f12, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz + 3 * PACK_UNIT); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f12 = _mm512_sub_ps(f12, extrabias0); + } + } + + PLUS_TERM_1(12); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz + 3 * PACK_UNIT); + SCALE_BIAS_VEC(12); + } + + if (nullptr == biasPtr) { + auto dstTemp = dst_x; + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp)), f0); + dstTemp += dst_step_tmp; + f4 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f4); + dstTemp += dst_step_tmp; + f8 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f8); + dstTemp += dst_step_tmp; + f12 = _mm512_add_ps(_mm512_loadu_ps(((float*)dstTemp) + 16 * 0), f12); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_1(0); + POST_TREAT_FLOAT_1(4); + POST_TREAT_FLOAT_1(8); + POST_TREAT_FLOAT_1(12); + } + _mm512_storeu_ps(((float*)dst_x), f0); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f4); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f8); + dst_x += dst_step_tmp; + _mm512_storeu_ps(((float*)dst_x) + 16 * 0, f12); + + } + auto weight_dz = weight + dzU * weight_step_Z; + if (biasPtr) { + bias_dz = post->biasFloat + dzU * PACK_UNIT * dzUnit; + } + if (post->extraBias) { + extraB_dz = post->extraBias + dzU * PACK_UNIT * dzUnit; + } + float* scale_dz = (float*)post->scale + dzU * PACK_UNIT * dzUnit; + const auto weightBias_dz = post->weightQuanBias + dzU * PACK_UNIT * dzUnit; + + auto dst_z = dst + dzU * dst_step_tmp * dzUnit; + const auto src_x = src; + auto dst_x = dst_z; + for (int i=0; iextraScale) { // Batch quant + f0 = _mm512_mul_ps(f0, extrascale0); + if (post->extraBias && nullptr != biasPtr) { + auto extrabias = _mm512_loadu_ps(extraB_dz); + extrabias = _mm512_mul_ps(f128, extrabias); + auto extrabias0 = _mm512_mul_ps(extrabias, extrascale0); + f0 = _mm512_sub_ps(f0, extrabias0); + } + } + + PLUS_TERM_1(0); + if (biasPtr) { + auto biasValue = _mm512_loadu_ps(bias_dz); + SCALE_BIAS_VEC(0); + } + + if (nullptr == biasPtr) { + f0 = _mm512_add_ps(_mm512_loadu_ps(((float*)dst_x)), f0); + } + if (post->fp32minmax) { + POST_TREAT_FLOAT_1(0); + } + _mm512_storeu_ps(((float*)dst_x), f0); + dst_x += dst_step_tmp; + scale_dz += PACK_UNIT; + if (biasPtr) { + bias_dz += PACK_UNIT; + } + if (post->extraBias) { + extraB_dz += PACK_UNIT; + } + weight_dz += PACK_UNIT * GEMMINT8_AVX512_L; } return; } diff --git a/source/backend/cpu/x86_x64/avx512/PackedFunction.cpp b/source/backend/cpu/x86_x64/avx512/PackedFunction.cpp index 6dbc438d7..047c3dc7a 100644 --- a/source/backend/cpu/x86_x64/avx512/PackedFunction.cpp +++ b/source/backend/cpu/x86_x64/avx512/PackedFunction.cpp @@ -39,11 +39,11 @@ void _AVX512_MNNAddC4WithStride(const float* source, float* dest, size_t srcStri void _AVX512_MNNComputeScaleZeroScalar(float* source, float* min, float* max, size_t size) { int pack = 16; - int sizeDiv16 = UP_DIV(size, pack); - __m512 minVal = _mm512_loadu_ps(source); + int sizeDiv16 = size / pack; + __m512 minVal = _mm512_set1_ps(source[0]); __m512 maxVal = minVal; float maxArr[16], minArr[16]; - for (int i = 1; i < sizeDiv16; ++i) { + for (int i = 0; i < sizeDiv16; ++i) { auto src0 = source + pack * i; __m512 vecA = _mm512_loadu_ps(src0); auto maskMax = _mm512_cmp_ps_mask(vecA, maxVal, 14); @@ -62,14 +62,27 @@ void _AVX512_MNNComputeScaleZeroScalar(float* source, float* min, float* max, si min_ = minArr[k]; } } + for (int i = pack * sizeDiv16; i < size; ++i) { + min_ = ALIMIN(min_, source[i]); + max_ = ALIMAX(max_, source[i]); + } min[0] = min_; max[0] = max_; - // float range = max_ - min_; - // MNN_ASSERT(range != 0); - // *quantScale = 255.0f / range; - // *dequantScale = range / 255.0f; - // *zeroPoint = std::min(255.f, std::max(roundf(-(min_ * 255.f) / range), 0.f)) - 128.f; +} +void _AVX512_MNNAbsMaxFP32(const float* source, float* absmax, size_t src_depth_quad, size_t realSize, int pack) { + // source: (ic/4, N, 4) + auto srcStep = pack * realSize; + for (int i = 0; i < realSize; ++i) { + float absmaxVal = 0.f; // absmaxVal>=0 + for (int c = 0; c < src_depth_quad; ++c) { + auto src = source + c * srcStep + i * pack; + for (int k = 0; k < pack; ++k) { + absmaxVal = std::max(absmaxVal, std::abs(src[k])); + } + } + absmax[i] = absmaxVal; + } } void _AVX512_MNNReluWithSlopeChannel(float* dst, const float* src, const float* slope, size_t sizeQuad, size_t depthQuad) { @@ -737,6 +750,7 @@ void _AVX512_ExtraInit(void* functions) { coreFunction->MNNMatrixAdd = _AVX512_MNNMatrixAdd; coreFunction->MNNMatrixSub = _AVX512_MNNMatrixSub; coreFunction->MNNCountMaxMinValue = _AVX512_MNNComputeScaleZeroScalar; + coreFunction->MNNAbsMax = _AVX512_MNNAbsMaxFP32; coreFunction->MNNConvRunForUnitDepthWise = _AVX512_MNNConvRunForUnitDepthWise; coreFunction->MNNConvRunForLineDepthwise = _AVX512_MNNConvRunForLineDepthwise; diff --git a/source/backend/cpu/x86_x64/sse/FunctionSummary.hpp b/source/backend/cpu/x86_x64/sse/FunctionSummary.hpp index d0a2fed31..4f1525087 100644 --- a/source/backend/cpu/x86_x64/sse/FunctionSummary.hpp +++ b/source/backend/cpu/x86_x64/sse/FunctionSummary.hpp @@ -59,12 +59,9 @@ void _SSE_MNNPackedMatMul_int8(float* C, const float* A, const float* B, const s const float* postParameters, const float* bias, const float* k, const float* b); void _SSE_MNNPackedMatMulRemain_int8(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b); -void _SSE_MNNGemmHybridInt4(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, - size_t dst_depth_quad, size_t realSize, const float** param); -void _SSE_MNNGemmHybridInt8(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, - size_t dst_depth_quad, size_t realSize, const float** param); void _SSE_MNNAbsMaxFP32(const float* source, float* absmax, size_t src_depth_quad, size_t realSize, int pack); -void _SSE_MNNDynamicQuantFP32(const float* src, int8_t* dst, const float* scale, float* sum, size_t src_depth_quad, size_t realSize, int pack); +void _SSE_MNNGemmInt8AddBiasScale_16x4_w4(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, + size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst); #endif void _SSE_MNNPackC4ForMatMul_A(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el); void _SSE_MNNConvRunForLineDepthwise(float* dst, const float* src, const float* weight, size_t width, size_t src_w_setup, diff --git a/source/backend/cpu/x86_x64/sse/GemmCommon.cpp b/source/backend/cpu/x86_x64/sse/GemmCommon.cpp index 53e21f0c7..40a372601 100644 --- a/source/backend/cpu/x86_x64/sse/GemmCommon.cpp +++ b/source/backend/cpu/x86_x64/sse/GemmCommon.cpp @@ -185,11 +185,11 @@ void _SSE_MNNPackedSparseMatMul(float* C, const float* A, const float* B, unsign void _SSE_MNNComputeScaleZeroScalar(float* source, float* min, float* max, size_t size) { int pack = 4; - int sizeDiv4 = UP_DIV(size, pack); - __m128 minVal = _mm_loadu_ps(source); + int sizeDiv4 = size / pack; + __m128 minVal = _mm_set1_ps(source[0]); __m128 maxVal = minVal; float maxArr[4], minArr[4]; - for (int i = 1; i < sizeDiv4; ++i) { + for (int i = 0; i < sizeDiv4; ++i) { auto src0 = source + pack * i; __m128 vecA = _mm_loadu_ps(src0); __m128 maskMax = _mm_cmpgt_ps(maxVal, vecA); @@ -200,7 +200,7 @@ void _SSE_MNNComputeScaleZeroScalar(float* source, float* min, float* max, size_ _mm_storeu_ps(maxArr, maxVal); _mm_storeu_ps(minArr, minVal); float max_ = maxArr[0], min_ = minArr[0]; - for (int k = 1; k < 4; ++k) { + for (int k = 1; k < pack; ++k) { if (max_ < maxArr[k]) { max_ = maxArr[k]; } @@ -208,13 +208,11 @@ void _SSE_MNNComputeScaleZeroScalar(float* source, float* min, float* max, size_ min_ = minArr[k]; } } + for (int i = pack * sizeDiv4; i < size; ++i) { + max_ = std::max(max_, source[i]); + min_ = std::min(min_, source[i]); + } min[0] = min_; max[0] = max_; - // float range = max_ - min_; - // MNN_ASSERT(range != 0); - // *quantScale = 255.0f / range; - // *dequantScale = range / 255.0f; - // *zeroPoint = std::min(255.f, std::max(roundf(-(min_ * 255.f) / range), 0.f)) - 128.0f; - } diff --git a/source/backend/cpu/x86_x64/sse/GemmFunction.hpp b/source/backend/cpu/x86_x64/sse/GemmFunction.hpp index e0272c184..89841f389 100644 --- a/source/backend/cpu/x86_x64/sse/GemmFunction.hpp +++ b/source/backend/cpu/x86_x64/sse/GemmFunction.hpp @@ -729,162 +729,4 @@ static void _SSE_MNNPackednMatMulRemainCommon_int8(float* C, const float* A, con } } } -// int4 -> int8 -static inline __m128i _load_int4_to_int8(const uint8_t* src) { - uint8_t c = 0xf; - int32_t data[4]; - int8_t temp[16]; - for (int i = 0; i < 8; ++i) { - temp[2 * i] = (src[i] >> 4); - temp[2 * i +1] = (src[i] & c); - } - auto int8_tx16 = _mm_loadu_si128((const __m128i*)temp); - return int8_tx16; -} -static void _SSE_MNNGemmHybrid_int4(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param) { - // C:(oc/4,N,4) A:(ic/4,N,4) B:(oc/4,ic/4,4,4) - int pack = 4; - __m128i zero_128i = _mm_set1_epi32(0); - size_t weight_step = src_depth_quad * pack * pack * 0.5; - size_t weight_stride = pack * pack * 0.5; - const float* alpha_ptr = param[0]; - const float* zero_ptr = param[1]; - const float* bias_ptr = param[2]; - const float* sums_ptr = param[3]; - const float* scale_ptr = param[4]; - std::vector tmpsrc(16, 0); - - for (int ci = 0; ci < dst_depth_quad; ++ci) { - float* dstZ = C + ci * pack * realSize; - const int8_t* weight = B + ci * weight_step; - auto alpha = alpha_ptr + ci * pack; - auto zero = zero_ptr + ci * pack; - auto bias = bias_ptr + ci * pack; - __m128 alphaValue = _mm_load_ps(alpha); - //const float* sums = param[2]; - for (int j = 0; j < realSize; ++j) { - const float* sums = sums_ptr + j; - const float* scale = scale_ptr + j; - float* dstX = dstZ + j * pack; - __m128i sum4 = _mm_set1_epi32(0); - __m128 scaleValue = _mm_set1_ps(scale[0]); - __m128 biasValue = _mm_add_ps(_mm_load_ps(bias), _mm_mul_ps(_mm_load_ps(zero), _mm_set1_ps(sums[0]))); - const int8_t* srcBatch = A + j * pack; - for (int k = 0; k < src_depth_quad; ++k) { - const int8_t* srcZ = srcBatch + k * pack * realSize; - const uint8_t* weightZ = (uint8_t*)weight + k * weight_stride; - auto w0 = _load_int4_to_int8(weightZ); - - ::memcpy(tmpsrc.data(), srcZ, 4 * sizeof(int8_t)); - auto s0 = _mm_loadu_si128((const __m128i*)tmpsrc.data()); - // src,weight: int8->int16 - auto s0_16 = _mm_srai_epi16(_mm_unpacklo_epi8(zero_128i, s0), 8); - auto w0_16 = _mm_srai_epi16(_mm_unpacklo_epi8(zero_128i, w0), 8); - auto w1_16 = _mm_srai_epi16(_mm_unpackhi_epi8(zero_128i, w0), 8); - auto w2_16 = _mm_unpackhi_epi64(w0_16, zero_128i); - auto w3_16 = _mm_unpackhi_epi64(w1_16, zero_128i); - - auto oc0 = _mm_madd_epi16(s0_16, w0_16); - auto oc1 = _mm_madd_epi16(s0_16, w2_16); - auto oc2 = _mm_madd_epi16(s0_16, w1_16); - auto oc3 = _mm_madd_epi16(s0_16, w3_16); - - auto d0 = _mm_unpacklo_epi32(oc0, oc1); - auto d1 = _mm_unpackhi_epi32(oc0, oc1); - auto d2 = _mm_unpacklo_epi32(oc2, oc3); - auto d3 = _mm_unpackhi_epi32(oc2, oc3); - - auto e0 = _mm_unpacklo_epi64(d0, d2); - auto e1 = _mm_unpackhi_epi64(d0, d2); - auto e2 = _mm_unpacklo_epi64(d1, d3); - auto e3 = _mm_unpackhi_epi64(d1, d3); - - e0 = _mm_add_epi32(e0, e1); - e2 = _mm_add_epi32(e2, e3); - e0 = _mm_add_epi32(e0, e2); - - sum4 = _mm_add_epi32(e0, sum4); - - } - __m128 f0 = _mm_cvtepi32_ps(sum4); - __m128 fs = _mm_mul_ps(_mm_mul_ps(f0, scaleValue), alphaValue); - fs = _mm_add_ps(biasValue, fs); - _mm_storeu_ps(dstX, fs); - } - } -} -static void _SSE_MNNGemmHybrid_int8(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param) { - // C:(oc/4,N,4) A:(ic/4,N,4) B:(oc/4,ic/4,4,4) - int pack = 4; - __m128i zero_128i = _mm_set1_epi32(0); - size_t weight_step = src_depth_quad * pack * pack; - size_t weight_stride = pack * pack; - const float* alpha_ptr = param[0]; - const float* zero_ptr = param[1]; - const float* bias_ptr = param[2]; - const float* sums_ptr = param[3]; - const float* scale_ptr = param[4]; - std::vector tmpsrc(16, 0); - - for (int ci = 0; ci < dst_depth_quad; ++ci) { - float* dstZ = C + ci * pack * realSize; - const int8_t* weight = B + ci * weight_step; - auto alpha = alpha_ptr + ci * pack; - auto zero = zero_ptr + ci * pack; - auto bias = bias_ptr + ci * pack; - __m128 alphaValue = _mm_load_ps(alpha); - //const float* sums = param[2]; - for (int j = 0; j < realSize; ++j) { - const float* sums = sums_ptr + j; - const float* scale = scale_ptr + j; - float* dstX = dstZ + j * pack; - - __m128i sum4 = _mm_set1_epi32(0); - __m128 scaleValue = _mm_set1_ps(scale[0]); - __m128 biasValue = _mm_add_ps(_mm_load_ps(bias), _mm_mul_ps(_mm_load_ps(zero), _mm_set1_ps(sums[0]))); - const int8_t* srcBatch = A + j * pack; - for (int k = 0; k < src_depth_quad; ++k) { - const int8_t* srcZ = srcBatch + k * pack * realSize; - const int8_t* weightZ = weight + k * weight_stride; - auto w0 = _mm_loadu_si128((__m128i*)(weightZ)); // 16xint8_t weight - - ::memcpy(tmpsrc.data(), srcZ, 4 * sizeof(int8_t)); - auto s0 = _mm_loadu_si128((const __m128i*)tmpsrc.data()); - // src,weight: int8->int16 -// auto s0_16 = _mm_unpacklo_epi8(s0, zero_128i); - auto s0_16 = _mm_srai_epi16(_mm_unpacklo_epi8(zero_128i, s0), 8); - auto w0_16 = _mm_srai_epi16(_mm_unpacklo_epi8(zero_128i, w0), 8); - auto w1_16 = _mm_srai_epi16(_mm_unpackhi_epi8(zero_128i, w0), 8); - auto w2_16 = _mm_unpackhi_epi64(w0_16, zero_128i); - auto w3_16 = _mm_unpackhi_epi64(w1_16, zero_128i); - - auto oc0 = _mm_madd_epi16(s0_16, w0_16); - auto oc1 = _mm_madd_epi16(s0_16, w2_16); - auto oc2 = _mm_madd_epi16(s0_16, w1_16); - auto oc3 = _mm_madd_epi16(s0_16, w3_16); - - auto d0 = _mm_unpacklo_epi32(oc0, oc1); - auto d1 = _mm_unpackhi_epi32(oc0, oc1); - auto d2 = _mm_unpacklo_epi32(oc2, oc3); - auto d3 = _mm_unpackhi_epi32(oc2, oc3); - - auto e0 = _mm_unpacklo_epi64(d0, d2); - auto e1 = _mm_unpackhi_epi64(d0, d2); - auto e2 = _mm_unpacklo_epi64(d1, d3); - auto e3 = _mm_unpackhi_epi64(d1, d3); - - e0 = _mm_add_epi32(e0, e1); - e2 = _mm_add_epi32(e2, e3); - e0 = _mm_add_epi32(e0, e2); - - sum4 = _mm_add_epi32(e0, sum4); - - } - __m128 f0 = _mm_cvtepi32_ps(sum4); - __m128 fs = _mm_mul_ps(_mm_mul_ps(f0, scaleValue), alphaValue); - fs = _mm_add_ps(biasValue, fs); - _mm_storeu_ps(dstX, fs); - } - } -} #endif diff --git a/source/backend/cpu/x86_x64/sse/GemmInt8.cpp b/source/backend/cpu/x86_x64/sse/GemmInt8.cpp index 2afb7144e..77702c2d4 100644 --- a/source/backend/cpu/x86_x64/sse/GemmInt8.cpp +++ b/source/backend/cpu/x86_x64/sse/GemmInt8.cpp @@ -22,11 +22,61 @@ void _SSE_MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, cons __m128 maxValue = _mm_set1_ps(post->maxValue); __m128 plus = _mm_set1_ps(0.5f); __m128 minus = _mm_set1_ps(-0.5f); + __m128 fp32min, fp32max; + if (0 == post->useInt8 && post->fp32minmax) { + fp32min = _mm_set1_ps((post->fp32minmax)[0]); + fp32max = _mm_set1_ps((post->fp32minmax)[1]); + } auto oneValue = _mm_set1_epi16(1); auto offset = _mm_set1_epi32(128); + auto f128 = _mm_set1_ps(128.f); + auto srcKernelSumPtr = post->srcKernelSum; + __m128 kernelSum0 = _mm_setzero_ps(); + __m128 kernelSum1 = _mm_setzero_ps(); + __m128 kernelSum2 = _mm_setzero_ps(); + __m128 kernelSum3 = _mm_setzero_ps(); + if (GEMM_INT8_DST_XUNIT == realDst) { + kernelSum0 = _mm_load_ps1(post->srcKernelSum); + kernelSum1 = _mm_load_ps1(post->srcKernelSum + 1); + kernelSum2 = _mm_load_ps1(post->srcKernelSum + 2); + kernelSum3 = _mm_load_ps1(post->srcKernelSum + 3); + } else { + kernelSum0 = _mm_load_ps1(post->srcKernelSum); + if (realDst > 1) { + kernelSum1 = _mm_load_ps1(post->srcKernelSum + 1); + } + if (realDst > 2) { + kernelSum2 = _mm_load_ps1(post->srcKernelSum + 2); + } + } + __m128 extrascale0 = _mm_setzero_ps(); + __m128 extrascale1 = _mm_setzero_ps(); + __m128 extrascale2 = _mm_setzero_ps(); + __m128 extrascale3 = _mm_setzero_ps(); + if (post->extraScale) { + if (GEMM_INT8_DST_XUNIT == realDst) { + extrascale0 = _mm_load_ps1(post->extraScale); + extrascale1 = _mm_load_ps1(post->extraScale + 1); + extrascale2 = _mm_load_ps1(post->extraScale + 2); + extrascale3 = _mm_load_ps1(post->extraScale + 3); + } else { + extrascale0 = _mm_load_ps1(post->extraScale); + if (realDst > 1) { + extrascale1 = _mm_load_ps1(post->extraScale + 1); + } + if (realDst > 2) { + extrascale2 = _mm_load_ps1(post->extraScale + 2); + } + } + } + const float* biasPtr = nullptr; + if (post->biasFloat) { + biasPtr = post->biasFloat; + } + auto blockNum = post->blockNum; for (int dz = 0; dz < dst_depth_quad; ++dz) { - const auto weight_dz = weight + dz * src_depth_quad * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); - const auto bias_dz = post->bias + dz * GEMM_INT8_UNIT; + const auto weight_dz = weight + dz * (src_depth_quad * blockNum) * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); + const auto weightBias_dz = post->weightQuanBias + dz * GEMM_INT8_UNIT; const float* scale_dz = nullptr; scale_dz = post->scale + dz * GEMM_INT8_UNIT; auto dst_z = dst + dz * dst_step_tmp; @@ -128,22 +178,60 @@ auto d##i##j = _mm_add_epi32(_mm_madd_epi16(S##i##j##0, W##i##j##0), _mm_madd_ep E0 = _mm_hadd_epi32(E0, E1); E1 = _mm_hadd_epi32(E2, E3); d3 = _mm_hadd_epi32(E0, E1); - - auto biasValue = _mm_loadu_si128((__m128i*)(bias_dz)); auto scaleValue = _mm_loadu_ps(scale_dz); - d0 = _mm_add_epi32(d0, biasValue); - d1 = _mm_add_epi32(d1, biasValue); - d2 = _mm_add_epi32(d2, biasValue); - d3 = _mm_add_epi32(d3, biasValue); + // auto biasValue = _mm_loadu_si128((__m128i*)(bias_dz)); + // d0 = _mm_add_epi32(d0, biasValue); + // d1 = _mm_add_epi32(d1, biasValue); + // d2 = _mm_add_epi32(d2, biasValue); + // d3 = _mm_add_epi32(d3, biasValue); + //auto biasValue = _mm_loadu_ps((float*)(bias_dz)); + auto weightBiasValue = _mm_loadu_ps((float*)weightBias_dz); __m128 f0 = _mm_cvtepi32_ps(d0); __m128 f1 = _mm_cvtepi32_ps(d1); __m128 f2 = _mm_cvtepi32_ps(d2); __m128 f3 = _mm_cvtepi32_ps(d3); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm_mul_ps(kernelSum1, weightBiasValue); // ..second + auto xy0_2 = _mm_mul_ps(kernelSum2, weightBiasValue); // .. third + auto xy0_3 = _mm_mul_ps(kernelSum3, weightBiasValue); // ..fourth f0 = _mm_mul_ps(f0, scaleValue); f1 = _mm_mul_ps(f1, scaleValue); f2 = _mm_mul_ps(f2, scaleValue); f3 = _mm_mul_ps(f3, scaleValue); + if (post->extraScale) { + f0 = _mm_mul_ps(f0, extrascale0); + f1 = _mm_mul_ps(f1, extrascale1); + f2 = _mm_mul_ps(f2, extrascale2); + f3 = _mm_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extraB = post->extraBias + dz * GEMM_INT8_UNIT; + auto extrabias = _mm_loadu_ps(extraB); + extrabias = _mm_mul_ps(f128, extrabias); + auto extrabias0 = _mm_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm_mul_ps(extrabias, extrascale3); + f0 = _mm_sub_ps(f0, extrabias0); + f1 = _mm_sub_ps(f1, extrabias1); + f2 = _mm_sub_ps(f2, extrabias2); + f3 = _mm_sub_ps(f3, extrabias3); + } + } + f0 = _mm_add_ps(f0, xy0_0); + f1 = _mm_add_ps(f1, xy0_1); + f2 = _mm_add_ps(f2, xy0_2); + f3 = _mm_add_ps(f3, xy0_3); + if (nullptr != biasPtr) { + const auto bias_dz = biasPtr + dz * GEMM_INT8_UNIT; + auto biasValue = _mm_loadu_ps(bias_dz); + f0 = _mm_add_ps(f0, biasValue); + f1 = _mm_add_ps(f1, biasValue); + f2 = _mm_add_ps(f2, biasValue); + f3 = _mm_add_ps(f3, biasValue); + } if (post->useInt8 == 1) { + // for Relu Int8 activation f0 = _mm_min_ps(f0, maxValue); f1 = _mm_min_ps(f1, maxValue); f2 = _mm_min_ps(f2, maxValue); @@ -188,7 +276,24 @@ auto d##i##j = _mm_add_epi32(_mm_madd_epi16(S##i##j##0, W##i##j##0), _mm_madd_ep } } } else { // Store float values directly. + // for Relu float activation. __m128 f[4] = {f0, f1, f2, f3}; + if (nullptr == biasPtr) { + for (int j = 0; j < realDst; ++j) { + auto dstv = _mm_loadu_ps(((float*)dst_x) + j * 4); + f[j] = _mm_add_ps(dstv, f[j]); + } + } + if (post->fp32minmax) { + f[0] = _mm_min_ps(f[0], fp32max); + f[1] = _mm_min_ps(f[1], fp32max); + f[2] = _mm_min_ps(f[2], fp32max); + f[3] = _mm_min_ps(f[3], fp32max); + f[0] = _mm_max_ps(f[0], fp32min); + f[1] = _mm_max_ps(f[1], fp32min); + f[2] = _mm_max_ps(f[2], fp32min); + f[3] = _mm_max_ps(f[3], fp32min); + } for (int j = 0; j < realDst; ++j) { _mm_storeu_ps(((float*)dst_x) + j * 4, f[j]); } @@ -196,6 +301,260 @@ auto d##i##j = _mm_add_epi32(_mm_madd_epi16(S##i##j##0, W##i##j##0), _mm_madd_ep } } +static inline void _load_int4_to_int8(const uint8_t* src, int8_t* dst) { + uint8_t c = 0xf; + for (int i = 0; i < 32; ++i) { + dst[2 * i] = (src[i] >> 4); + dst[2 * i +1] = (src[i] & c); + } +} +void _SSE_MNNGemmInt8AddBiasScale_16x4_w4(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, + size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst) { + MNN_ASSERT(post->useInt8 == 0); + const auto dst_step_tmp = dst_step / sizeof(int8_t); + __m128i zero = _mm_set1_epi32(0); + __m128 minValue = _mm_set1_ps(post->minValue); + __m128 maxValue = _mm_set1_ps(post->maxValue); + __m128 fp32min, fp32max; + if (post->fp32minmax) { + fp32min = _mm_set1_ps((post->fp32minmax)[0]); + fp32max = _mm_set1_ps((post->fp32minmax)[1]); + } + const float* biasPtr = nullptr; + if (post->biasFloat) { + biasPtr = post->biasFloat; + } + int blockNum = post->blockNum; + int weight_step_Z = 0.5 * (src_depth_quad * blockNum) * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); + int weight_step_Y = 0.5 * (GEMM_INT8_UNIT * GEMM_INT8_SRC_UNIT); + + auto oneValue = _mm_set1_epi16(1); + auto offset = _mm_set1_epi32(128); + auto srcKernelSumPtr = post->srcKernelSum; + __m128 kernelSum0 = _mm_setzero_ps(); + __m128 kernelSum1 = _mm_setzero_ps(); + __m128 kernelSum2 = _mm_setzero_ps(); + __m128 kernelSum3 = _mm_setzero_ps(); + if (GEMM_INT8_DST_XUNIT == realDst) { + kernelSum0 = _mm_load_ps1(post->srcKernelSum); + kernelSum1 = _mm_load_ps1(post->srcKernelSum + 1); + kernelSum2 = _mm_load_ps1(post->srcKernelSum + 2); + kernelSum3 = _mm_load_ps1(post->srcKernelSum + 3); + } else { + kernelSum0 = _mm_load_ps1(post->srcKernelSum); + if (realDst > 1) { + kernelSum1 = _mm_load_ps1(post->srcKernelSum + 1); + } + if (realDst > 2) { + kernelSum2 = _mm_load_ps1(post->srcKernelSum + 2); + } + } + auto f128 = _mm_set1_ps(128.f); + __m128 extrascale0 = _mm_setzero_ps(); + __m128 extrascale1 = _mm_setzero_ps(); + __m128 extrascale2 = _mm_setzero_ps(); + __m128 extrascale3 = _mm_setzero_ps(); + if (post->extraScale) { + if (GEMM_INT8_DST_XUNIT == realDst) { + extrascale0 = _mm_load_ps1(post->extraScale); + extrascale1 = _mm_load_ps1(post->extraScale + 1); + extrascale2 = _mm_load_ps1(post->extraScale + 2); + extrascale3 = _mm_load_ps1(post->extraScale + 3); + } else { + extrascale0 = _mm_load_ps1(post->extraScale); + if (realDst > 1) { + extrascale1 = _mm_load_ps1(post->extraScale + 1); + } + if (realDst > 2) { + extrascale2 = _mm_load_ps1(post->extraScale + 2); + } + } + } + for (int dz = 0; dz < dst_depth_quad; ++dz) { + const auto weight_dz = weight + dz * weight_step_Z; + const auto weightBias_dz = post->weightQuanBias + dz * GEMM_INT8_UNIT; + const float* scale_dz = nullptr; + scale_dz = post->scale + dz * GEMM_INT8_UNIT; + auto dst_z = dst + dz * dst_step_tmp; + const auto src_x = src; + auto dst_x = dst_z; + __m128i d0 = _mm_set1_epi32(0); + __m128i d1 = _mm_set1_epi32(0); + __m128i d2 = _mm_set1_epi32(0); + __m128i d3 = _mm_set1_epi32(0); + + __m128i e0 = _mm_set1_epi32(0); + __m128i e1 = _mm_set1_epi32(0); + __m128i e2 = _mm_set1_epi32(0); + __m128i e3 = _mm_set1_epi32(0); + + __m128i D0 = _mm_set1_epi32(0); + __m128i D1 = _mm_set1_epi32(0); + __m128i D2 = _mm_set1_epi32(0); + __m128i D3 = _mm_set1_epi32(0); + + __m128i E0 = _mm_set1_epi32(0); + __m128i E1 = _mm_set1_epi32(0); + __m128i E2 = _mm_set1_epi32(0); + __m128i E3 = _mm_set1_epi32(0); + + for (int sz = 0; sz < src_depth_quad; ++sz) { + const auto weight_sz = weight_dz + weight_step_Y * sz; + const auto src_z = src_x + sz * GEMM_INT8_DST_XUNIT * GEMM_INT8_SRC_UNIT; + + int8_t tmp_w[64]; + _load_int4_to_int8((uint8_t*)weight_sz, tmp_w); + + auto w0 = _mm_loadu_si128((__m128i*)(tmp_w + GEMM_INT8_SRC_UNIT * 0)); + auto w1 = _mm_loadu_si128((__m128i*)(tmp_w + GEMM_INT8_SRC_UNIT * 1)); + auto w2 = _mm_loadu_si128((__m128i*)(tmp_w + GEMM_INT8_SRC_UNIT * 2)); + auto w3 = _mm_loadu_si128((__m128i*)(tmp_w + GEMM_INT8_SRC_UNIT * 3)); + + auto s0 = _mm_loadu_si128((__m128i*)(src_z + GEMM_INT8_SRC_UNIT * 0)); + auto s1 = _mm_loadu_si128((__m128i*)(src_z + GEMM_INT8_SRC_UNIT * 1)); + auto s2 = _mm_loadu_si128((__m128i*)(src_z + GEMM_INT8_SRC_UNIT * 2)); + auto s3 = _mm_loadu_si128((__m128i*)(src_z + GEMM_INT8_SRC_UNIT * 3)); + + +//#define COMPUTE(i, j)\ +//auto d##i##j = _mm_maddubs_epi16(s##i, w##j);\ +//d##i##j = _mm_madd_epi16(d##i##j, oneValue);\ + +#define COMPUTE(i, j)\ +auto W##i##j##0 = _mm_srai_epi16(_mm_unpacklo_epi8(zero, w##j), 8);\ +auto W##i##j##1 = _mm_srai_epi16(_mm_unpackhi_epi8(zero, w##j), 8);\ +auto S##i##j##0 = _mm_unpacklo_epi8(s##i, zero);\ +auto S##i##j##1 = _mm_unpackhi_epi8(s##i, zero);\ +auto d##i##j = _mm_add_epi32(_mm_madd_epi16(S##i##j##0, W##i##j##0), _mm_madd_epi16(S##i##j##1, W##i##j##1));\ + + COMPUTE(0, 0); + COMPUTE(0, 1); + COMPUTE(0, 2); + COMPUTE(0, 3); + COMPUTE(1, 0); + COMPUTE(1, 1); + COMPUTE(1, 2); + COMPUTE(1, 3); + COMPUTE(2, 0); + COMPUTE(2, 1); + COMPUTE(2, 2); + COMPUTE(2, 3); + COMPUTE(3, 0); + COMPUTE(3, 1); + COMPUTE(3, 2); + COMPUTE(3, 3); + + d0 = _mm_add_epi32(d0, d00); + d1 = _mm_add_epi32(d1, d01); + d2 = _mm_add_epi32(d2, d02); + d3 = _mm_add_epi32(d3, d03); + + e0 = _mm_add_epi32(e0, d10); + e1 = _mm_add_epi32(e1, d11); + e2 = _mm_add_epi32(e2, d12); + e3 = _mm_add_epi32(e3, d13); + + D0 = _mm_add_epi32(D0, d20); + D1 = _mm_add_epi32(D1, d21); + D2 = _mm_add_epi32(D2, d22); + D3 = _mm_add_epi32(D3, d23); + + E0 = _mm_add_epi32(E0, d30); + E1 = _mm_add_epi32(E1, d31); + E2 = _mm_add_epi32(E2, d32); + E3 = _mm_add_epi32(E3, d33); + } + d0 = _mm_hadd_epi32(d0, d1); + d1 = _mm_hadd_epi32(d2, d3); + d0 = _mm_hadd_epi32(d0, d1); + + e0 = _mm_hadd_epi32(e0, e1); + e1 = _mm_hadd_epi32(e2, e3); + d1 = _mm_hadd_epi32(e0, e1); + + D0 = _mm_hadd_epi32(D0, D1); + D1 = _mm_hadd_epi32(D2, D3); + d2 = _mm_hadd_epi32(D0, D1); + + E0 = _mm_hadd_epi32(E0, E1); + E1 = _mm_hadd_epi32(E2, E3); + d3 = _mm_hadd_epi32(E0, E1); + auto scaleValue = _mm_loadu_ps(scale_dz); + // auto biasValue = _mm_loadu_si128((__m128i*)(bias_dz)); + // d0 = _mm_add_epi32(d0, biasValue); + // d1 = _mm_add_epi32(d1, biasValue); + // d2 = _mm_add_epi32(d2, biasValue); + // d3 = _mm_add_epi32(d3, biasValue); + //auto biasValue = _mm_loadu_ps((float*)(bias_dz)); + auto weightBiasValue = _mm_loadu_ps((float*)weightBias_dz); + __m128 f0 = _mm_cvtepi32_ps(d0); + __m128 f1 = _mm_cvtepi32_ps(d1); + __m128 f2 = _mm_cvtepi32_ps(d2); + __m128 f3 = _mm_cvtepi32_ps(d3); + // x_kernelSum x w_quantZero + auto xy0_0 = _mm_mul_ps(kernelSum0, weightBiasValue); // x dimemsion first + auto xy0_1 = _mm_mul_ps(kernelSum1, weightBiasValue); // ..second + auto xy0_2 = _mm_mul_ps(kernelSum2, weightBiasValue); // .. third + auto xy0_3 = _mm_mul_ps(kernelSum3, weightBiasValue); // ..fourth + f0 = _mm_mul_ps(f0, scaleValue); + f1 = _mm_mul_ps(f1, scaleValue); + f2 = _mm_mul_ps(f2, scaleValue); + f3 = _mm_mul_ps(f3, scaleValue); + if (post->extraScale) { + f0 = _mm_mul_ps(f0, extrascale0); + f1 = _mm_mul_ps(f1, extrascale1); + f2 = _mm_mul_ps(f2, extrascale2); + f3 = _mm_mul_ps(f3, extrascale3); + if (post->extraBias && nullptr != biasPtr) { + auto extraB = post->extraBias + dz * GEMM_INT8_UNIT; + auto extrabias = _mm_loadu_ps(extraB); + extrabias = _mm_mul_ps(f128, extrabias); + auto extrabias0 = _mm_mul_ps(extrabias, extrascale0); + auto extrabias1 = _mm_mul_ps(extrabias, extrascale1); + auto extrabias2 = _mm_mul_ps(extrabias, extrascale2); + auto extrabias3 = _mm_mul_ps(extrabias, extrascale3); + f0 = _mm_sub_ps(f0, extrabias0); + f1 = _mm_sub_ps(f1, extrabias1); + f2 = _mm_sub_ps(f2, extrabias2); + f3 = _mm_sub_ps(f3, extrabias3); + } + } + f0 = _mm_add_ps(f0, xy0_0); + f1 = _mm_add_ps(f1, xy0_1); + f2 = _mm_add_ps(f2, xy0_2); + f3 = _mm_add_ps(f3, xy0_3); + + if (nullptr != biasPtr) { + const auto bias_dz = biasPtr + dz * GEMM_INT8_UNIT; + auto biasValue = _mm_loadu_ps(bias_dz); + f0 = _mm_add_ps(f0, biasValue); + f1 = _mm_add_ps(f1, biasValue); + f2 = _mm_add_ps(f2, biasValue); + f3 = _mm_add_ps(f3, biasValue); + } + __m128 f[4] = {f0, f1, f2, f3}; + if (nullptr == biasPtr) { + for (int j = 0; j < realDst; ++j) { + auto dstv = _mm_loadu_ps(((float*)dst_x) + j * 4); + f[j] = _mm_add_ps(dstv, f[j]); + } + } + if (post->fp32minmax) { + f[0] = _mm_min_ps(f[0], fp32max); + f[1] = _mm_min_ps(f[1], fp32max); + f[2] = _mm_min_ps(f[2], fp32max); + f[3] = _mm_min_ps(f[3], fp32max); + f[0] = _mm_max_ps(f[0], fp32min); + f[1] = _mm_max_ps(f[1], fp32min); + f[2] = _mm_max_ps(f[2], fp32min); + f[3] = _mm_max_ps(f[3], fp32min); + } + for (int j = 0; j < realDst; ++j) { + _mm_storeu_ps(((float*)dst_x) + j * 4, f[j]); + } + } +} + void _SSE_MNNInt8ToInt16(int16_t* dest, const int8_t* sourceO, size_t count) { int countC16 = count / 16; int countR = count % 16; diff --git a/source/backend/cpu/x86_x64/sse/GemmSSE.cpp b/source/backend/cpu/x86_x64/sse/GemmSSE.cpp index 7d5699e96..8e5a32896 100644 --- a/source/backend/cpu/x86_x64/sse/GemmSSE.cpp +++ b/source/backend/cpu/x86_x64/sse/GemmSSE.cpp @@ -67,12 +67,6 @@ void _SSE_MNNPackedMatMulRemain_int8(float* C, const float* A, const float* B, s } } -void _SSE_MNNGemmHybridInt4(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param) { - _SSE_MNNGemmHybrid_int4(C, A, B, src_depth_quad, dst_step, dst_depth_quad, realSize, param); -} -void _SSE_MNNGemmHybridInt8(float* C, const int8_t* A, const int8_t* B, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, size_t realSize, const float** param) { - _SSE_MNNGemmHybrid_int8(C, A, B, src_depth_quad, dst_step, dst_depth_quad, realSize, param); -} // Dynamic quant void _SSE_MNNAbsMaxFP32(const float* source, float* absmax, size_t src_depth_quad, size_t realSize, int pack) { // source: (ic/4, N, 4) @@ -99,36 +93,4 @@ void _SSE_MNNAbsMaxFP32(const float* source, float* absmax, size_t src_depth_qua absmax[i] = absmaxVal; } } - -void _SSE_MNNDynamicQuantFP32(const float* src, int8_t* dst, const float* scale, float* sum, size_t src_depth_quad, size_t realSize, int pack) { - // SSE: pack=4 - __m128 zero = _mm_setzero_ps(); - __m128 plus = _mm_set1_ps(0.5f); - __m128 minus = _mm_set1_ps(-0.5f); - auto offset = _mm_set1_epi32(128); - uint8_t* dstPtr = reinterpret_cast(dst); - float temp[4]; - for (int i = 0; i < realSize; ++i) { - __m128 scaleVal = _mm_load_ps1(scale + i); - __m128 acc = _mm_setzero_ps(); - for (int c = 0; c < src_depth_quad; ++c) { - auto srcZ = src + c * pack * realSize + i * pack; - auto dstZ = dstPtr + c * pack * realSize + i * pack; - __m128 f0 = _mm_loadu_ps(srcZ); - __m128 m0 = _mm_mul_ps(f0, scaleVal); - __m128 mask = _mm_cmplt_ps(m0, zero); - __m128 d0 = _mm_blendv_ps(plus, minus, mask); - d0 = _mm_add_ps(d0, m0); - __m128 round0 = _mm_round_ps(d0, 3); - auto d0_epi32 = _mm_cvtps_epi32(round0); - d0_epi32 = _mm_packs_epi32(d0_epi32, d0_epi32); - d0_epi32 = _mm_packs_epi16(d0_epi32, d0_epi32); - *((int*)dstZ) = _mm_cvtsi128_si32(d0_epi32); - acc = _mm_add_ps(acc, round0); - } - _mm_storeu_ps(temp, acc); - int sumVal = static_cast(temp[0] + temp[1] + temp[2] + temp[3]); - ((int32_t*)sum)[i] = sumVal; - } -} #endif diff --git a/source/backend/cpu/x86_x64/sse/MathFunctions.cpp b/source/backend/cpu/x86_x64/sse/MathFunctions.cpp index f5c66ce5f..b9e857006 100644 --- a/source/backend/cpu/x86_x64/sse/MathFunctions.cpp +++ b/source/backend/cpu/x86_x64/sse/MathFunctions.cpp @@ -328,7 +328,7 @@ void _SSE_MNNReluWithSlopeChannelInt8(int8_t* dst, const int8_t* src, const floa d0 = _mm_add_epi32(d0, offset); d0 = _mm_packs_epi32(d0, d0); d0 = _mm_packus_epi16(d0, d0); - *((int*)dst + i) = _mm_cvtsi128_si32(d0); + *((int*)dstZ + i) = _mm_cvtsi128_si32(d0); } } } diff --git a/source/backend/metal/MetalBackend.hpp b/source/backend/metal/MetalBackend.hpp index 589dd5fff..e01913a38 100644 --- a/source/backend/metal/MetalBackend.hpp +++ b/source/backend/metal/MetalBackend.hpp @@ -138,7 +138,7 @@ class MetalBackend : public Backend { */ static void addCreator(OpType type, Creator *creator); static void setTensor(const MNN::Tensor* tensor, id encoder, int index); - static std::pair, int> getBuffer(MNN::Tensor* tensor); + static std::pair, int> getBuffer(const MNN::Tensor* tensor); size_t getTensorSizeInBytes(const Tensor* tensor) const; virtual bool onSelectDynamicAllocator(int index, int maxIndex) override; id getHostBuffer(size_t size) const; @@ -207,6 +207,12 @@ class MetalBackend : public Backend { bool useFp16InsteadFp32() const { return mUseFloatAsFp16; } + struct CopyPipeline { + id pipeline; + id shape; + MTLSize localSize; + MTLSize groupSize; + }; private: MetalRuntimeAllocator::MetalBufferAlloc mEmptyMem; id getCommandBufferForBufferCopy() const; @@ -234,6 +240,8 @@ class MetalBackend : public Backend { std::shared_ptr mStaticBufferPool; private: + CopyPipeline _makeCopyInfo(const Tensor *src, const Tensor *dst, id shape, int castType) const; + mutable id mHostBuffer = nullptr; // hostmask: 0: no host, 1: src is host, 2: dst is host void onCopyDeviceToDevice(const Tensor *src, const Tensor *dst, id encoder, id shape, int hostmask = 0) const; diff --git a/source/backend/metal/MetalBackend.mm b/source/backend/metal/MetalBackend.mm index 57d800910..6f73629bb 100644 --- a/source/backend/metal/MetalBackend.mm +++ b/source/backend/metal/MetalBackend.mm @@ -36,7 +36,7 @@ static void _MetalApplyTensor(uint8_t* host, size_t offset, Tensor* t) { des->extra.offset = offset; } static BufferAllocator* _createBufferAllocator(const Runtime* runtime, BufferAllocator* origin, bool secondResize) { - if (runtime->getAllocatorType() == Runtime::Allocator_Defer && secondResize) { + if (runtime->hint().memoryAllocatorType == Runtime::Allocator_Defer && secondResize) { return new DeferBufferAllocator(BufferAllocator::Allocator::createRecurse(origin), 1024, _MetalApplyTensor); } return new EagerBufferAllocator(BufferAllocator::Allocator::createRecurse(origin), 1024); @@ -315,6 +315,9 @@ MemChunk chunk() override { } id MetalBackend::getHostBuffer(size_t size) const { + if (size < METAL_CONST_BUFFER_LIMIT) { + size = METAL_CONST_BUFFER_LIMIT; + } // reuse if (nullptr != mHostBuffer && mHostBuffer.length >= size) { return mHostBuffer; @@ -568,17 +571,15 @@ kernel void main0(const device IType *in [[buffer(0)]], device OType *out [[buff } return res; } -void MetalBackend::onCopyDeviceToDevice(const Tensor *src, const Tensor *dst, - id encoder, id shape, int castType) const { - auto ctx = (__bridge MNNMetalContext *)context(); - auto standalone = encoder == nil; - encoder = encoder ?: [getCommandBufferForBufferCopy() computeCommandEncoder]; +MetalBackend::CopyPipeline MetalBackend::_makeCopyInfo(const Tensor *src, const Tensor *dst, id shape, int castType) const { + auto ctx = (__bridge MNNMetalContext *)context(); + MetalBackend::CopyPipeline res; auto sfmt = TensorUtils::getDescribe(src)->dimensionFormat; auto dfmt = TensorUtils::getDescribe(dst)->dimensionFormat; if (shape == nil) { shape = getConstBuffer(8 * sizeof(int)); } - // copy + res.shape = shape; if (sfmt == dfmt || src->dimensions() <= 1) { auto srcType = _getType(src->getType(), MNN_DATA_FORMAT_NC4HW4, mUseFloatAsFp16 && castType != 1); auto dstType = _getType(dst->getType(), MNN_DATA_FORMAT_NC4HW4, mUseFloatAsFp16 && castType != 2); @@ -589,6 +590,7 @@ kernel void main0(const device IType *in [[buffer(0)]], device OType *out [[buff srcType, dstType }; + ((uint32_t*)[shape contents])[0] = size; id pipeline = mRuntime->findPipeline(keys); if (nil == pipeline) { MTLCompileOptions *option = [[MTLCompileOptions alloc] init]; @@ -599,16 +601,14 @@ kernel void main0(const device IType *in [[buffer(0)]], device OType *out [[buff pipeline = makeComputePipelineWithSourceOption(gCopy, "main0", option); mRuntime->insertPipeline(keys, pipeline); } - [encoder setComputePipelineState:pipeline]; - ((uint32_t*)[shape contents])[0] = size; - setTensor(src, encoder, 0); - setTensor(dst, encoder, 1); - [encoder setBuffer:shape offset:0 atIndex:2]; - [encoder dispatchThreadgroups:MTLSizeMake(UP_DIV(size, 256), 1, 1) threadsPerThreadgroup:MTLSizeMake(256, 1, 1)]; - } - else if (sfmt == MNN_DATA_FORMAT_NC4HW4 || dfmt == MNN_DATA_FORMAT_NC4HW4) { - auto srcType = _getType(src->getType(), sfmt, mUseFloatAsFp16 && castType != 1); - auto dstType = _getType(dst->getType(), dfmt, mUseFloatAsFp16 && castType != 2); + res.groupSize = MTLSizeMake(UP_DIV(size, 256), 1, 1); + res.localSize = MTLSizeMake(256, 1, 1); + res.pipeline = pipeline; + return res; + } + auto srcType = _getType(src->getType(), sfmt, mUseFloatAsFp16 && castType != 1); + auto dstType = _getType(dst->getType(), dfmt, mUseFloatAsFp16 && castType != 2); + if (sfmt == MNN_DATA_FORMAT_NC4HW4 || dfmt == MNN_DATA_FORMAT_NC4HW4) { auto normalTensor = dst; if (dfmt == MNN_DATA_FORMAT_NC4HW4) { normalTensor = src; @@ -635,52 +635,62 @@ kernel void main0(const device IType *in [[buffer(0)]], device OType *out [[buff pipeline = makeComputePipelineWithSourceOption(gNC4HW4Convert, "main0", option); mRuntime->insertPipeline(keys, pipeline); } - [encoder setComputePipelineState:pipeline]; + res.pipeline = pipeline; auto size = getTensorShape(shape, normalTensor); - MetalBackend::setTensor(src, encoder, 0); - MetalBackend::setTensor(dst, encoder, 1); - [encoder setBuffer:shape offset:0 atIndex:2]; auto gl = [ctx computeBestGroupAndLocal:pipeline threads:size]; - [encoder dispatchThreadgroups:gl.first threadsPerThreadgroup:gl.second]; + res.groupSize = gl.first; + res.localSize = gl.second; + return res; + } + // NCHW <-> NHWC + std::vector keys = { + "transpose", + srcType, + dstType + }; + id pipeline = mRuntime->findPipeline(keys); + if (nil == pipeline) { + MTLCompileOptions *option = [[MTLCompileOptions alloc] init]; + auto dic = [NSMutableDictionary dictionaryWithCapacity:0]; + [dic setValue:@(keys[1].c_str()) forKey:@"IType"]; + [dic setValue:@(keys[2].c_str()) forKey:@"OType"]; + option.preprocessorMacros = dic; + pipeline = makeComputePipelineWithSourceOption(gTranspose, "main0", option); + mRuntime->insertPipeline(keys, pipeline); + } + res.pipeline = pipeline; + int n, c, plane; + _getNCPlane(dst, plane, c, n); + auto shapePtr = (uint32_t*)shape.contents; + shapePtr[0] = n; + shapePtr[3] = 1; + if (MNN_DATA_FORMAT_NHWC == dfmt) { + shapePtr[1] = plane; + shapePtr[2] = c; } else { - // NCHW <-> NHWC - auto srcType = _getType(src->getType(), sfmt, mUseFloatAsFp16 && castType != 1); - auto dstType = _getType(dst->getType(), dfmt, mUseFloatAsFp16 && castType != 2); - std::vector keys = { - "transpose", - srcType, - dstType - }; - id pipeline = mRuntime->findPipeline(keys); - if (nil == pipeline) { - MTLCompileOptions *option = [[MTLCompileOptions alloc] init]; - auto dic = [NSMutableDictionary dictionaryWithCapacity:0]; - [dic setValue:@(keys[1].c_str()) forKey:@"IType"]; - [dic setValue:@(keys[2].c_str()) forKey:@"OType"]; - option.preprocessorMacros = dic; - pipeline = makeComputePipelineWithSourceOption(gTranspose, "main0", option); - mRuntime->insertPipeline(keys, pipeline); - } - [encoder setComputePipelineState:pipeline]; - int n, c, plane; - _getNCPlane(dst, plane, c, n); - auto shapePtr = (uint32_t*)shape.contents; - shapePtr[0] = n; - shapePtr[3] = 1; - if (MNN_DATA_FORMAT_NHWC == dfmt) { - shapePtr[1] = plane; - shapePtr[2] = c; - } else { - shapePtr[1] = c; - shapePtr[2] = plane; - } - auto size = plane * n * c; - setTensor(src, encoder, 0); - setTensor(dst, encoder, 1); - [encoder setBuffer:shape offset:0 atIndex:2]; - [encoder dispatchThreadgroups:MTLSizeMake(UP_DIV(size, 256), 1, 1) threadsPerThreadgroup:MTLSizeMake(256, 1, 1)]; + shapePtr[1] = c; + shapePtr[2] = plane; } + auto size = plane * n * c; + res.localSize = MTLSizeMake(256, 1, 1); + res.groupSize = MTLSizeMake(UP_DIV(size, 256), 1, 1); + return res; +} +static void _execute(id encoder, const MetalBackend::CopyPipeline& info, std::pair, int> src, std::pair, int> dst) { + [encoder setComputePipelineState:info.pipeline]; + [encoder setBuffer:src.first offset:src.second atIndex:0]; + [encoder setBuffer:dst.first offset:dst.second atIndex:1]; + [encoder setBuffer:info.shape offset:0 atIndex:2]; + [encoder dispatchThreadgroups:info.groupSize threadsPerThreadgroup:info.localSize]; +} +void MetalBackend::onCopyDeviceToDevice(const Tensor *src, const Tensor *dst, + id encoder, id shape, int castType) const { + auto ctx = (__bridge MNNMetalContext *)context(); + auto info = _makeCopyInfo(src, dst, shape, castType); + auto standalone = encoder == nil; + encoder = encoder ?: [getCommandBufferForBufferCopy() computeCommandEncoder]; + _execute(encoder, info, MetalBackend::getBuffer(src), MetalBackend::getBuffer(dst)); if (standalone) { [encoder endEncoding]; MNN_PRINT_ENCODER(ctx, encoder); @@ -724,22 +734,21 @@ kernel void main0(const device IType *in [[buffer(0)]], device OType *out [[buff if (needConvert) { auto tDst = const_cast(dst); auto tmpBuffer = getHostBuffer(dst->usize()); - MetalRuntimeAllocator::MetalBufferAlloc tmp(tmpBuffer); - TensorUtils::getDescribe(tDst)->extra.offset = 0; - tDst->buffer().device = (uint64_t)(&tmp); - onCopyDeviceToDevice(src, dst, nullptr, nullptr, 2); - tDst->buffer().device = 0; - devicePtr = (uint8_t*)tmpBuffer.contents; + auto info = _makeCopyInfo(src, dst, shape, 2); + auto standalone = encoder == nil; + encoder = encoder ?: [getCommandBufferForBufferCopy() computeCommandEncoder]; + _execute(encoder, info, MetalBackend::getBuffer(src), std::make_pair(tmpBuffer, 0)); + if (standalone) { + [encoder endEncoding]; + } commit(); + devicePtr = (uint8_t*)tmpBuffer.contents; } wait(); ::memcpy(dst->host(), devicePtr, dst->usize()); return; } if (src->buffer().host && !dst->buffer().host) { - auto device = (id)((MetalRuntimeAllocator::MetalBufferAlloc *)dst->deviceId())->getBuffer(); - auto devicePtr = (uint8_t*)device.contents + TensorUtils::getDescribe(dst)->extra.offset; - // For command queue from user, need user to make sure last frame's gpu work is ready bool needWait = !mRuntime->userSync(); if (needWait) { @@ -749,13 +758,17 @@ kernel void main0(const device IType *in [[buffer(0)]], device OType *out [[buff if (needConvert) { auto tmpBuffer = getHostBuffer(srcSize); ::memcpy(tmpBuffer.contents, src->host(), srcSize); - MetalRuntimeAllocator::MetalBufferAlloc tmp(tmpBuffer); - auto tSrc = const_cast(src); - TensorUtils::getDescribe(tSrc)->extra.offset = 0; - tSrc->buffer().device = (uint64_t)(&tmp); - onCopyDeviceToDevice(tSrc, dst, nullptr, nullptr, 1); - tSrc->buffer().device = 0; + auto info = _makeCopyInfo(src, dst, shape, 1); + auto standalone = encoder == nil; + encoder = encoder ?: [getCommandBufferForBufferCopy() computeCommandEncoder]; + _execute(encoder, info, std::make_pair(tmpBuffer, 0), MetalBackend::getBuffer(dst)); + if (standalone) { + [encoder endEncoding]; + } + commit(); } else { + auto device = (id)((MetalRuntimeAllocator::MetalBufferAlloc *)dst->deviceId())->getBuffer(); + auto devicePtr = (uint8_t*)device.contents + TensorUtils::getDescribe(dst)->extra.offset; ::memcpy(devicePtr, src->host(), srcSize); } return; @@ -797,7 +810,7 @@ kernel void main0(const device IType *in [[buffer(0)]], device OType *out [[buff void MetalBackend::setTensor(const MNN::Tensor* tensor, id encoder, int index) { [encoder setBuffer:((MetalRuntimeAllocator::MetalBufferAlloc *)tensor->deviceId())->getBuffer() offset:TensorUtils::getDescribe(tensor)->extra.offset atIndex:index]; } -std::pair, int> MetalBackend::getBuffer(MNN::Tensor* tensor) { +std::pair, int> MetalBackend::getBuffer(const MNN::Tensor* tensor) { return std::make_pair(((MetalRuntimeAllocator::MetalBufferAlloc *)tensor->deviceId())->getBuffer(), TensorUtils::getDescribe(tensor)->extra.offset); } diff --git a/source/backend/opencl/core/OpenCLBackend.cpp b/source/backend/opencl/core/OpenCLBackend.cpp index 74f765912..d9bc65fab 100644 --- a/source/backend/opencl/core/OpenCLBackend.cpp +++ b/source/backend/opencl/core/OpenCLBackend.cpp @@ -338,7 +338,7 @@ Backend::MemObj* OpenCLBackend::onAcquire(const Tensor* nativeTensor, StorageTyp size = N * alignC * W * H; size = size + hR * W * 4 + wR * 4; } else { - size = nativeTensor->elementSize(); + size = N * H * W * C; size = ROUND_UP(size, 4); } if (mOpenCLRuntime->isSupportedIntelSubgroup()) { diff --git a/source/backend/opencl/core/OpenCLGemmTune.cpp b/source/backend/opencl/core/OpenCLGemmTune.cpp index 870bf7530..00cd3ed98 100644 --- a/source/backend/opencl/core/OpenCLGemmTune.cpp +++ b/source/backend/opencl/core/OpenCLGemmTune.cpp @@ -34,17 +34,10 @@ static void generateCombinations(const std::vector> &candi static bool isCandidateValid(uint32_t kwg, uint32_t kwi, uint32_t mwg, uint32_t mdimc, uint32_t vwm, uint32_t nwg, uint32_t ndimc, uint32_t vwn, uint32_t mdima, uint32_t ndimb, uint32_t sa, uint32_t sb, OpenCLRuntime *runtime, const std::vector& gemmSize) { // problem size align - if(gemmSize[0] % mwg != 0 || gemmSize[1] % nwg != 0 || gemmSize[2] % kwg != 0) { - return false; - } - // mwg nwg only for M N equal to 16 - if((gemmSize[0] > 16 && mwg == 16) || (gemmSize[1] > 16 && nwg == 16)) { - return false; - } - // params align - if(kwg % kwi != 0) { + if(gemmSize[0] % mwg != 0 || gemmSize[1] % nwg != 0) { return false; } + if(mwg % (mdimc * vwm) != 0 || mwg % (mdima * vwm) != 0) { return false; } @@ -53,9 +46,19 @@ static bool isCandidateValid(uint32_t kwg, uint32_t kwi, uint32_t mwg, uint32_t } uint32_t kdima = (mdimc * ndimc) / mdima; uint32_t kdimb = (mdimc * ndimc) / ndimb; - if(kwg % kdima != 0 || kwg % kdimb != 0) { - return false; + if(sa == 1 || sb == 1) { + // params align + if(kwg % kwi != 0) { + return false; + } + if(kwg % kdima != 0 || kwg % kdimb != 0) { + return false; + } + if(gemmSize[2] % kwg != 0) { + return false; + } } + if(mdimc != mdima || ndimc != ndimb) { return false; } @@ -63,6 +66,11 @@ static bool isCandidateValid(uint32_t kwg, uint32_t kwi, uint32_t mwg, uint32_t return false; } + // no local memory no need tune kwg + if(sa == 0 && sb == 0 && kwg == 32) { + return false; + } + // local memory limit uint32_t local_mem_size = 0; if(sa) { @@ -89,27 +97,31 @@ static bool isCandidateValid(uint32_t kwg, uint32_t kwi, uint32_t mwg, uint32_t if(mdimc != mdima || ndimc != ndimb) { return false; } + + bool totalLarge = 1.0 * gemmSize[0] / 1024 * gemmSize[1] / 1024 * gemmSize[2] / 1024 >= 0.5; bool dimLarge = gemmSize[0] > 128 && gemmSize[1] > 128 && gemmSize[2] > 128; - if(totalLarge && dimLarge) { - if(mwg * nwg < 128 * 64) { - return false; - } - if(mdimc * ndimc < 16 * 8) { - return false; - } - if(vwm * vwn < 4 * 4) { - return false; - } - } else { - if(mwg * nwg > 128 * 64) { - return false; - } - if(mdimc * ndimc > 16 * 8) { - return false; - } - if(vwm * vwn > 4 * 4) { - return false; + if(gemmSize[4] == 1) { + if(totalLarge && dimLarge) { + if(mwg * nwg < 128 * 64) { + return false; + } + if(mdimc * ndimc < 16 * 8) { + return false; + } + if(vwm * vwn < 4 * 4) { + return false; + } + } else { + if(mwg * nwg > 128 * 64) { + return false; + } + if(mdimc * ndimc > 16 * 8) { + return false; + } + if(vwm * vwn > 4 * 4) { + return false; + } } } @@ -118,14 +130,13 @@ static bool isCandidateValid(uint32_t kwg, uint32_t kwi, uint32_t mwg, uint32_t std::vector getGemmParams(const std::vector &gemmSize, const std::vector tensorMemory, OpenCLRuntime *runtime) { - MNN_ASSERT(gemmSize.size() == 5); // M, N, K, Layout, B + MNN_ASSERT(gemmSize.size() == 6); // M, N, K, Layout, Batch, Bias MNN_ASSERT(gemmSize[0] % 16 == 0); MNN_ASSERT(gemmSize[1] % 16 == 0); - MNN_ASSERT(gemmSize[2] % 16 == 0); + MNN_ASSERT(gemmSize[2] % 4 == 0); - MNN_ASSERT(tensorMemory.size() == 3); + MNN_ASSERT((gemmSize[5] == 0 && tensorMemory.size() == 3) || (gemmSize[5] == 1 && tensorMemory.size() == 4)); auto& tunedGemmParams = runtime->tunedGemmParamsMap(); - std::vector info(gemmSize); uint32_t isFp16 = runtime->isSupportedFP16(); @@ -153,123 +164,111 @@ std::vector getGemmParams(const std::vector &gemmSize, const if(gemmSize[0] >= 256 && gemmSize[1] >= 256 && gemmSize[2] >= 256) { if(multiNum > 8.0) { if(maxDivsorM >= 128 && maxDivsorN >= 64) { - return {0, 1, 16, 2, 16, 16, 128, 8, 8, 64, 0, 0, 0, 1, 8, 8}; + return {16, 2, 16, 16, 128, 8, 8, 64, 0, 0, 0, 1, 8, 8}; } } if(maxDivsorM >= 64 && maxDivsorN >= 64) { - return {0, 1, 16, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 1, 8, 8}; + return {16, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 1, 8, 8}; } } } else {// BatchGemm if(maxDivsorM >= 64 && maxDivsorN >= 128) { - return {0, 1, 16, 2, 16, 16, 64, 8, 8, 128, 0, 0, 1, 0, 2, 8}; + return {16, 2, 16, 16, 64, 8, 8, 128, 0, 0, 1, 0, 2, 8}; } else if(maxDivsorM >= 64 && maxDivsorN >= 64) { - return {0, 1, 16, 2, 8, 8, 64, 8, 8, 64, 0, 0, 1, 0, 4, 4}; + return {16, 2, 8, 8, 64, 8, 8, 64, 0, 0, 1, 0, 4, 4}; } } - return {0, 1, 16, 2, 4, 4, 16, 4, 4, 16, 0, 0, 1, 0, 2, 2}; + return {16, 2, 4, 4, 16, 4, 4, 16, 0, 0, 1, 0, 2, 2}; } std::vector> totalCombinations; // save total candidate combinations - std::vector params_prefer = {0, 1, 16, 2, 4, 4, 16, 4, 4, 16, 0, 0, 1, 0, 2, 2}; + std::vector params_prefer = {16, 2, 4, 4, 16, 4, 4, 16, 0, 0, 1, 0, 2, 2}; totalCombinations.emplace_back(params_prefer); uint32_t min_cost = UINT_MAX; - if(runtime->getCLTuneLevel() >= Normal) { - // set candidates - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 64 , 8 , 8 , 128, 0, 0, 1, 1, 2, 8});//12 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 0, 0, 8, 8});//11 - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 0, 8, 8});//4 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 1, 2, 8});//2 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 128, 0, 0, 0, 1, 8, 8}); - totalCombinations.push_back({0, 1, 16, 2, 8 , 8 , 16 , 8 , 8 , 128, 0, 0, 0, 0, 2, 8}); - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 64 , 8 , 8 , 32 , 0, 0, 0, 0, 4, 4}); - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 32 , 0, 0, 1, 0, 4, 4});//1 - totalCombinations.push_back({0, 1, 16, 2, 8, 8 , 32 , 8 , 8 , 128, 0, 0, 1, 0, 2, 8});//2 - - if(runtime->getCLTuneLevel() == Normal) { - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 64 , 8 , 8 , 128, 0, 0, 1, 1, 2, 8});//10 - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 0, 0, 8, 8});//6 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 0, 1, 8, 8});//6 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 0, 2, 8});//4 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 1, 8, 8});//4 - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 0, 1, 8, 8});//4 - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 0, 2, 8});//3 - totalCombinations.push_back({0, 1, 16, 2, 8, 8 , 64 , 8 , 8 , 64 , 0, 0, 1, 0, 2, 8});//1 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 1, 4, 4});//1 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 0, 8, 8});//2 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 64 , 8 , 8 , 128, 0, 0, 1, 0, 2, 8});//3 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 32 , 0, 0, 1, 1, 4, 4});//1 - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 8 , 8 , 32 , 0, 0, 1, 1, 4, 4});//1 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 16, 16, 128, 0, 0, 0, 0, 8, 8});//1 - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 16, 16, 128, 0, 0, 0, 0, 8, 8});//2 - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 16, 16, 128, 0, 0, 0, 1, 8, 8});//2 - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 1, 8, 8});//2 - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 16, 16, 128, 0, 0, 1, 0, 8, 8});//1 - totalCombinations.push_back({0, 1, 32, 2, 8 , 8 , 16 , 8 , 8 , 128, 0, 0, 1, 0, 2, 8});//1 - totalCombinations.push_back({0, 1, 32, 2, 8 , 8 , 16 , 8 , 8 , 128, 0, 0, 1, 1, 2, 8});//1 - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 64 , 8 , 8 , 32 , 0, 0, 0, 1, 4, 4});//1 - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 64 , 8 , 8 , 32 , 0, 0, 0, 1, 4, 4}); - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 64 , 8 , 8 , 32 , 0, 0, 1, 0, 4, 4}); - totalCombinations.push_back({0, 1, 16, 2, 8 , 8 , 16 , 8 , 8 , 128, 0, 0, 1, 0, 2, 8}); - totalCombinations.push_back({0, 1, 32, 2, 8 , 8 , 16 , 8 , 8 , 128, 0, 0, 0, 0, 2, 8}); - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 1, 2, 8}); - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 0, 4, 8}); - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 8 , 8 , 128, 0, 0, 0, 0, 8, 8}); - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 8 , 8 , 128, 0, 0, 0, 1, 8, 8}); - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 8 , 8 , 128, 0, 0, 0, 0, 8, 8}); - totalCombinations.push_back({0, 1, 32, 2, 16, 16, 128, 8 , 8 , 128, 0, 0, 1, 1, 8, 8}); - totalCombinations.push_back({0, 1, 16, 2, 16, 16, 128, 16, 16, 128, 0, 0, 1, 0, 8, 8}); + if(runtime->getCLTuneLevel() >= Wide) { + // set candidates= + totalCombinations.push_back({16, 2, 16, 16, 64 , 8 , 8 , 128, 0, 0, 0, 0, 4, 8});//12 + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 0, 0, 8, 8});//11 .. + totalCombinations.push_back({16, 2, 16, 16, 128, 16, 16, 128, 0, 0, 0, 0, 8, 8});//1 + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 32 , 0, 0, 0, 1, 8, 4});//1 + totalCombinations.push_back({16, 2, 8 , 8 , 16 , 8 , 8 , 64, 0, 0, 0, 0, 2, 8}); + totalCombinations.push_back({16, 2, 16, 16, 64 , 8 , 8 , 128, 0, 0, 0, 1, 4, 8});//10 + + totalCombinations.push_back({16, 2, 16, 16, 64 , 8 , 8 , 32 , 0, 0, 0, 0, 4, 4}); + totalCombinations.push_back({16, 2, 8, 8 , 32 , 8 , 8 , 128, 0, 0, 1, 0, 2, 8});//2 + totalCombinations.push_back({16, 2, 16, 16, 64 , 8 , 8 , 128, 0, 0, 1, 1, 2, 8});//12 + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 1, 2, 8});//2 + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 128, 0, 0, 0, 0, 8, 8}); + totalCombinations.push_back({16, 2, 8 , 8 , 16 , 8 , 8 , 128, 0, 0, 0, 0, 2, 8}); + + if(runtime->getCLTuneLevel() < Fast) { + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 0, 8, 8});//4 + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 0, 1, 8, 8});//6 + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 1, 8, 8});//4 + + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 0, 2, 8});//3 + totalCombinations.push_back({16, 2, 8, 8 , 64 , 8 , 8 , 64 , 0, 0, 1, 0, 2, 8});//1 + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 1, 4, 4});//1 + totalCombinations.push_back({16, 2, 16, 16, 64 , 8 , 8 , 128, 0, 0, 1, 0, 2, 8});//3 + + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 32 , 0, 0, 0, 0, 4, 4});//1 + totalCombinations.push_back({16, 2, 16, 16, 128, 16, 16, 128, 0, 0, 0, 1, 8, 8});//2 + totalCombinations.push_back({16, 2, 16, 16, 128, 16, 16, 128, 0, 0, 1, 0, 8, 8});//1 + totalCombinations.push_back({16, 2, 8 , 8 , 16 , 8 , 8 , 128, 0, 0, 1, 0, 2, 8});//1 + totalCombinations.push_back({16, 2, 8 , 8 , 16 , 8 , 8 , 128, 0, 0, 1, 1, 2, 8});//1 + + totalCombinations.push_back({16, 2, 16, 16, 64 , 8 , 8 , 32 , 0, 0, 0, 1, 4, 4});//1 + totalCombinations.push_back({16, 2, 16, 16, 64 , 8 , 8 , 32 , 0, 0, 1, 0, 4, 4}); + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 64 , 0, 0, 1, 0, 4, 8}); + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 128, 0, 0, 0, 1, 8, 8}); + totalCombinations.push_back({16, 2, 16, 16, 128, 8 , 8 , 128, 0, 0, 1, 1, 8, 8}); + + totalCombinations.push_back({16, 2, 8, 8, 32, 8, 8, 32, 0, 0, 1, 0, 2, 4}); + totalCombinations.push_back({16, 2, 8, 8, 16, 8, 8, 32, 0, 0, 1, 1, 2, 4}); } } else { // get all combinations std::vector> candidates = { - {0}, // GEMMK - {16, 32, 64, 128}, // MWG - {16, 32, 64, 128}, // NWG {16, 32}, // KWG - {8, 16}, // MDIMC - {8, 16}, // NDIMC + {2}, // KWI {8, 16}, // MDIMA + {8, 16}, // MDIMC + {16, 32, 64, 128}, // MWG {8, 16}, // NDIMB - {2}, // KWI - {2, 4, 8}, // VWM - {2, 4, 8}, // VWN - {0, 1}, // STRM - {0, 1}, // STRN + {8, 16}, // NDIMC + {16, 32, 64, 128}, // NWG {0}, // SA {0}, // SB - {1} // KREG + {0, 1}, // STRM + {0, 1}, // STRN + {2, 4, 8}, // VWM + {2, 4, 8} // VWN }; std::vector currentCombination(candidates.size()); generateCombinations(candidates, currentCombination, totalCombinations, 0); } - for(int i = 0; i < totalCombinations.size(); i++) { - uint32_t gemmk = totalCombinations[i][0]; - uint32_t kreg = totalCombinations[i][1]; - uint32_t kwg = totalCombinations[i][2]; - uint32_t kwi = totalCombinations[i][3]; - uint32_t mdima = totalCombinations[i][4]; - uint32_t mdimc = totalCombinations[i][5]; - uint32_t mwg = totalCombinations[i][6]; - uint32_t ndimb = totalCombinations[i][7]; - uint32_t ndimc = totalCombinations[i][8]; - uint32_t nwg = totalCombinations[i][9]; - uint32_t sa = totalCombinations[i][10]; - uint32_t sb = totalCombinations[i][11]; - uint32_t strm = totalCombinations[i][12]; - uint32_t strn = totalCombinations[i][13]; - uint32_t vwm = totalCombinations[i][14]; - uint32_t vwn = totalCombinations[i][15]; + uint32_t kwg = totalCombinations[i][0]; + uint32_t kwi = totalCombinations[i][1]; + uint32_t mdima = totalCombinations[i][2]; + uint32_t mdimc = totalCombinations[i][3]; + uint32_t mwg = totalCombinations[i][4]; + uint32_t ndimb = totalCombinations[i][5]; + uint32_t ndimc = totalCombinations[i][6]; + uint32_t nwg = totalCombinations[i][7]; + uint32_t sa = totalCombinations[i][8]; + uint32_t sb = totalCombinations[i][9]; + uint32_t strm = totalCombinations[i][10]; + uint32_t strn = totalCombinations[i][11]; + uint32_t vwm = totalCombinations[i][12]; + uint32_t vwn = totalCombinations[i][13]; if(isCandidateValid(kwg, kwi, mwg, mdimc, vwm, nwg, ndimc, vwn, mdima, ndimb, sa, sb, runtime, gemmSize)) { std::set buildOptions; buildOptions.clear(); - buildOptions.emplace("-DGEMMK=" + std::to_string(gemmk)); - buildOptions.emplace("-DKREG=" + std::to_string(kreg)); buildOptions.emplace("-DKWG=" + std::to_string(kwg)); buildOptions.emplace("-DKWI=" + std::to_string(kwi)); buildOptions.emplace("-DMDIMA=" + std::to_string(mdima)); @@ -293,12 +292,10 @@ std::vector getGemmParams(const std::vector &gemmSize, const buildOptions.emplace(" -DRELAX_WORKGROUP_SIZE=1"); } - if(runtime->isSupportedFP16()){ - buildOptions.emplace(" -DPRECISION=16"); - } else { - buildOptions.emplace(" -DPRECISION=32"); + if(gemmSize[5] == 1) { + buildOptions.emplace(" -DBIAS"); } - + int localM = mdimc; int localN = ndimc; @@ -328,9 +325,6 @@ std::vector getGemmParams(const std::vector &gemmSize, const float beta = 0.0f; // A: [n, l, e] // B: [n, l, h] - int offset_a = 0; - int offset_b = 0; - int offset_c = 0; cl::Event event; int idx = 0; @@ -341,15 +335,20 @@ std::vector getGemmParams(const std::vector &gemmSize, const ret |= kernel->get().setArg(idx++, alpha); ret |= kernel->get().setArg(idx++, beta); if(gemmSize[4] > 1) { + int batch_offset_a = gemmSize[0] * gemmSize[2]; + int batch_offset_b = gemmSize[1] * gemmSize[2]; + int batch_offset_c = gemmSize[0] * gemmSize[1]; + ret |= kernel->get().setArg(idx++, tensorMemory[0]); - ret |= kernel->get().setArg(idx++, gemmSize[0]); - ret |= kernel->get().setArg(idx++, gemmSize[2]); + ret |= kernel->get().setArg(idx++, batch_offset_a); ret |= kernel->get().setArg(idx++, tensorMemory[1]); - ret |= kernel->get().setArg(idx++, gemmSize[1]); - ret |= kernel->get().setArg(idx++, gemmSize[2]); + ret |= kernel->get().setArg(idx++, batch_offset_b); + if(gemmSize[5] == 1) { + ret |= kernel->get().setArg(idx++, tensorMemory[3]); + ret |= kernel->get().setArg(idx++, gemmSize[1]); + } ret |= kernel->get().setArg(idx++, tensorMemory[2]); - ret |= kernel->get().setArg(idx++, gemmSize[0]); - ret |= kernel->get().setArg(idx++, gemmSize[1]); + ret |= kernel->get().setArg(idx++, batch_offset_c); MNN_CHECK_CL_SUCCESS(ret, "setArg getGemmParams XgemmBatchhed Kernel"); @@ -360,8 +359,15 @@ std::vector getGemmParams(const std::vector &gemmSize, const continue; } } else { + int offset_a = 0; + int offset_b = 0; + int offset_c = 0; + ret |= kernel->get().setArg(idx++, tensorMemory[0]); ret |= kernel->get().setArg(idx++, tensorMemory[1]); + if(gemmSize[5] == 1) { + ret |= kernel->get().setArg(idx++, tensorMemory[3]); + } ret |= kernel->get().setArg(idx++, tensorMemory[2]); ret |= kernel->get().setArg(idx++, offset_a); ret |= kernel->get().setArg(idx++, offset_b); @@ -381,27 +387,26 @@ std::vector getGemmParams(const std::vector &gemmSize, const int cost_time = (int)runtime->getCostTime(&event); if(cost_time < min_cost) { min_cost = cost_time; - params_prefer[0] = gemmk; - params_prefer[1] = kreg; - params_prefer[2] = kwg; - params_prefer[3] = kwi; - params_prefer[4] = mdima; - params_prefer[5] = mdimc; - params_prefer[6] = mwg; - params_prefer[7] = ndimb; - params_prefer[8] = ndimc; - params_prefer[9] = nwg; - params_prefer[10] = sa; - params_prefer[11] = sb; - params_prefer[12] = strm; - params_prefer[13] = strn; - params_prefer[14] = vwm; - params_prefer[15] = vwn; - -// for(auto &iter : params_prefer) { -// MNN_PRINT("%d ", iter); -// } -// MNN_PRINT(": %d us, shape:%d %d %d batch:%d, flops:%f GFLOPS\n", min_cost, gemmSize[0], gemmSize[1], gemmSize[2], gemmSize[4], 2.0 / 1000.0 * gemmSize[0] * gemmSize[1] * gemmSize[2] * gemmSize[4] / min_cost); + params_prefer[0] = kwg; + params_prefer[1] = kwi; + params_prefer[2] = mdima; + params_prefer[3] = mdimc; + params_prefer[4] = mwg; + params_prefer[5] = ndimb; + params_prefer[6] = ndimc; + params_prefer[7] = nwg; + params_prefer[8] = sa; + params_prefer[9] = sb; + params_prefer[10] = strm; + params_prefer[11] = strn; + params_prefer[12] = vwm; + params_prefer[13] = vwn; + #ifdef TIME_TUNE_LOG + for(auto &iter : params_prefer) { + MNN_PRINT("%d ", iter); + } + MNN_PRINT(": %d us, shape:%d %d %d batch:%d, layout:%d bias:%d, flops:%f GFLOPS\n", min_cost, gemmSize[0], gemmSize[1], gemmSize[2], gemmSize[4], gemmSize[3], gemmSize[5], 2.0 / 1000.0 * gemmSize[0] * gemmSize[1] * gemmSize[2] * gemmSize[4] / min_cost); + #endif } } } diff --git a/source/backend/opencl/core/OpenCLOPRegister.cpp b/source/backend/opencl/core/OpenCLOPRegister.cpp index 9f244d651..3b0eeb5f6 100644 --- a/source/backend/opencl/core/OpenCLOPRegister.cpp +++ b/source/backend/opencl/core/OpenCLOPRegister.cpp @@ -2,6 +2,7 @@ #ifndef MNN_OPENCL_SEP_BUILD namespace MNN { namespace OpenCL { +#ifndef MNN_OPENCL_BUFFER_CLOSED extern void ___OpenCLInterp3DBufCreator__OpType_Interp3D__BUFFER__(); extern void ___OpenCLReductionBufCreator__OpType_Reduction__BUFFER__(); extern void ___OpenCLArgMaxBufCreator__OpType_ArgMax__BUFFER__(); @@ -29,6 +30,7 @@ extern void ___OpenCLUnaryBufCreator__OpType_Sigmoid__BUFFER__(); extern void ___OpenCLUnaryBufCreator__OpType_TanH__BUFFER__(); extern void ___OpenCLGridSampleBufCreator__OpType_GridSample__BUFFER__(); extern void ___OpenCLScaleBufCreator__OpType_Scale__BUFFER__(); +#endif extern void ___OpenCLDepthwiseConvolutionCreator__OpType_ConvolutionDepthwise__IMAGE__(); extern void ___OpenCLMatMulCreator__OpType_MatMul__IMAGE__(); extern void ___OpenCLUnaryCreator__OpType_UnaryOp__IMAGE__(); @@ -60,12 +62,13 @@ extern void ___OpenCLInterpCreator__OpType_Interp__IMAGE__(); extern void ___OpenCLGridSampleCreator__OpType_GridSample__IMAGE__(); #ifdef MNN_SUPPORT_TRANSFORMER_FUSE -extern void ___OpenCLAttentionBufCreator__OpType_Attention__BUFFER__(); extern void ___OpenCLSelfAttentionBufCreator__OpType_FmhaV2__BUFFER__(); -extern void ___OpenCLGroupNormBufCreator__OpType_GroupNorm__BUFFER__(); extern void ___OpenCLSplitGeluBufCreator__OpType_SplitGeLU__BUFFER__(); +extern void ___OpenCLGroupNormBufCreator__OpType_GroupNorm__BUFFER__(); +extern void ___OpenCLAttentionBufCreator__OpType_Attention__BUFFER__(); #endif void registerOpenCLOps() { +#ifndef MNN_OPENCL_BUFFER_CLOSED ___OpenCLInterp3DBufCreator__OpType_Interp3D__BUFFER__(); ___OpenCLReductionBufCreator__OpType_Reduction__BUFFER__(); ___OpenCLArgMaxBufCreator__OpType_ArgMax__BUFFER__(); @@ -93,6 +96,7 @@ ___OpenCLUnaryBufCreator__OpType_Sigmoid__BUFFER__(); ___OpenCLUnaryBufCreator__OpType_TanH__BUFFER__(); ___OpenCLGridSampleBufCreator__OpType_GridSample__BUFFER__(); ___OpenCLScaleBufCreator__OpType_Scale__BUFFER__(); +#endif ___OpenCLDepthwiseConvolutionCreator__OpType_ConvolutionDepthwise__IMAGE__(); ___OpenCLMatMulCreator__OpType_MatMul__IMAGE__(); ___OpenCLUnaryCreator__OpType_UnaryOp__IMAGE__(); @@ -122,11 +126,12 @@ ___OpenCLInterp3DCreator__OpType_Interp3D__IMAGE__(); ___OpenCLCastCreator__OpType_Cast__IMAGE__(); ___OpenCLInterpCreator__OpType_Interp__IMAGE__(); ___OpenCLGridSampleCreator__OpType_GridSample__IMAGE__(); + #ifdef MNN_SUPPORT_TRANSFORMER_FUSE -___OpenCLAttentionBufCreator__OpType_Attention__BUFFER__(); ___OpenCLSelfAttentionBufCreator__OpType_FmhaV2__BUFFER__(); -___OpenCLGroupNormBufCreator__OpType_GroupNorm__BUFFER__(); ___OpenCLSplitGeluBufCreator__OpType_SplitGeLU__BUFFER__(); +___OpenCLGroupNormBufCreator__OpType_GroupNorm__BUFFER__(); +___OpenCLAttentionBufCreator__OpType_Attention__BUFFER__(); #endif } } diff --git a/source/backend/opencl/core/OpenCLRunningUtils.cpp b/source/backend/opencl/core/OpenCLRunningUtils.cpp index 2ad64aa69..3898224d3 100644 --- a/source/backend/opencl/core/OpenCLRunningUtils.cpp +++ b/source/backend/opencl/core/OpenCLRunningUtils.cpp @@ -194,9 +194,9 @@ std::pair, uint32_t> localWS3DDefault(const std::vectorgetCLTuneLevel() == Fast) { while(lws[2] <= gws[2] && lws[2] <= 8) { lws[1] = 1; - while(lws[1] <= gws[1] && lws[1] <= 8) { + while(lws[1] <= gws[1] && lws[1] <= 16) { lws[0] = 1; - while(lws[0] <= gws[0] && lws[0] <= 8) { + while(lws[0] <= gws[0] && lws[0] <= 16) { if(lws[0] <= maxWorkItemSizes[0] && lws[1] <= maxWorkItemSizes[1] && lws[2] <= maxWorkItemSizes[2] && lws[0]*lws[1]*lws[2] <= std::min(maxWorkGroupSize, static_cast(64)) && lws[0]*lws[1]*lws[2] >= 16) { cl::Event event; std::vector internalGlobalWS(3, 1); diff --git a/source/backend/opencl/core/runtime/OpenCLRuntime.cpp b/source/backend/opencl/core/runtime/OpenCLRuntime.cpp index 1dde1dcbc..7d6c4e5de 100644 --- a/source/backend/opencl/core/runtime/OpenCLRuntime.cpp +++ b/source/backend/opencl/core/runtime/OpenCLRuntime.cpp @@ -951,12 +951,12 @@ bool OpenCLRuntime::setCache(std::pair cache) { MNN_ERROR("Error tunning gemm info\n"); return false; } - MNN_ASSERT(tun->gemmSize()->size() == 6); + MNN_ASSERT(tun->gemmSize()->size() == 7); std::vector info(tun->gemmSize()->size()); for (int v=0; vgemmSize()->data()[v]; } - MNN_ASSERT(tun->paramInfo()->size() == 16); + MNN_ASSERT(tun->paramInfo()->size() == 14); std::vector params(tun->paramInfo()->size()); for (int v=0; vparamInfo()->data()[v]; diff --git a/source/backend/opencl/execution/buffer/AttentionBufExecution.cpp b/source/backend/opencl/execution/buffer/AttentionBufExecution.cpp index 2382a7081..2ca359ecf 100644 --- a/source/backend/opencl/execution/buffer/AttentionBufExecution.cpp +++ b/source/backend/opencl/execution/buffer/AttentionBufExecution.cpp @@ -6,7 +6,6 @@ // Copyright © 2018, Alibaba Group Holding Limited // -#ifndef MNN_OPENCL_BUFFER_CLOSED #ifdef MNN_SUPPORT_TRANSFORMER_FUSE #include "backend/opencl/execution/buffer/AttentionBufExecution.hpp" @@ -421,10 +420,8 @@ class AttentionBufCreator : public OpenCLBackend::Creator { return new AttentionBufExecution(op, backend, param->kv_cache()); } }; -REGISTER_OPENCL_OP_CREATOR(AttentionBufCreator, OpType_Attention, BUFFER); +REGISTER_OPENCL_OP_CREATOR_TRANSFORMER(AttentionBufCreator, OpType_Attention, BUFFER); } // namespace OpenCL } // namespace MNN #endif/* MNN_SUPPORT_TRANSFORMER_FUSE */ -#endif/* MNN_OPENCL_BUFFER_CLOSED */ - diff --git a/source/backend/opencl/execution/buffer/AttentionBufExecution.hpp b/source/backend/opencl/execution/buffer/AttentionBufExecution.hpp index 9de3796d9..cb33dc05d 100644 --- a/source/backend/opencl/execution/buffer/AttentionBufExecution.hpp +++ b/source/backend/opencl/execution/buffer/AttentionBufExecution.hpp @@ -6,7 +6,6 @@ // Copyright © 2018, Alibaba Group Holding Limited // -#ifndef MNN_OPENCL_BUFFER_CLOSED #ifdef MNN_SUPPORT_TRANSFORMER_FUSE #ifndef AttentionBufExecution_hpp @@ -83,4 +82,3 @@ class AttentionBufExecution : public CommonExecution { } // namespace MNN #endif /* AttentionBufExecution_hpp */ #endif/* MNN_SUPPORT_TRANSFORMER_FUSE */ -#endif /* MNN_OPENCL_BUFFER_CLOSED */ diff --git a/source/backend/opencl/execution/buffer/ConvBufExecution.cpp b/source/backend/opencl/execution/buffer/ConvBufExecution.cpp index d0fdc0cda..8eb739f28 100644 --- a/source/backend/opencl/execution/buffer/ConvBufExecution.cpp +++ b/source/backend/opencl/execution/buffer/ConvBufExecution.cpp @@ -25,7 +25,7 @@ ConvBufCommonExecution::ConvBufCommonExecution(Backend *backend) { ConvBufCommonExecution::ConvBufCommonExecution(const Convolution2D *conv2dParams, Backend *backend) { auto openclBackend = (OpenCLBackend *)backend; int biasSize = conv2dParams->common()->outputCount(); - int buffer_size = ROUND_UP(biasSize, 16);//pack to 16 + int buffer_size = ROUND_UP(biasSize, 32);//pack to packN if(openclBackend->getOpenCLRuntime()->isSupportedFP16()) { buffer_size *= sizeof(half_float::half); } else { @@ -33,7 +33,7 @@ ConvBufCommonExecution::ConvBufCommonExecution(const Convolution2D *conv2dParams } mResource.reset(new ConvBufResource); - mResource->mBias.reset(Tensor::createDevice({1, 1, 1, ROUND_UP(biasSize, 16)})); + mResource->mBias.reset(Tensor::createDevice({1, 1, 1, ROUND_UP(biasSize, 32)})); backend->onAcquireBuffer(mResource->mBias.get(), Backend::STATIC); cl::Buffer &biasBuffer = openCLBuffer(mResource->mBias.get()); @@ -62,38 +62,6 @@ ConvBufCommonExecution::~ConvBufCommonExecution() { // Do nothing } -void ConvBufExecution::setConv1x1WeightBuffer(int packCout, int packCin, const float* filterDataPtr) { - cl_int res; - std::shared_ptr filterBuffer(Tensor::createDevice({ROUND_UP(mResource->mOutputChannel, 8)/*Cout pack set to max 8*/, ROUND_UP(mResource->mInputChannel, packCin), mResource->mKernelWidth, mResource->mKernelHeight})); - - int buffer_size = filterBuffer->elementSize(); - if(mOpenCLBackend->getOpenCLRuntime()->isSupportedFP16()) { - buffer_size *= sizeof(half_float::half); - } else { - buffer_size *= sizeof(float); - } - mResource->mKernelBuffer.reset(new cl::Buffer(mOpenCLBackend->getOpenCLRuntime()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, buffer_size)); - auto kernelBufferPtr = mOpenCLBackend->getOpenCLRuntime()->commandQueue().enqueueMapBuffer(*(mResource->mKernelBuffer.get()), true, CL_MAP_WRITE, 0, buffer_size, nullptr, nullptr, &res); - if(kernelBufferPtr != nullptr && res == CL_SUCCESS){ - ::memset(kernelBufferPtr, 0, buffer_size); - for(int o = 0; o < mResource->mOutputChannel; o++){ - for(int i = 0 ; i < mResource->mInputChannel; i++){ - int bufferIdx = (o/packCout) * ROUND_UP(mResource->mInputChannel, packCin)*packCout + (i/packCin)*packCin*packCout + (o%packCout)*packCin + (i%packCin);//(Co/packCout, Ci/packCin, packCout, packCin) - int filterIdx = o*mResource->mInputChannel + i; - if(mOpenCLBackend->getOpenCLRuntime()->isSupportedFP16()){ - ((half_float::half*)kernelBufferPtr)[bufferIdx] = (half_float::half)(filterDataPtr[filterIdx]); - }else{ - ((float*)kernelBufferPtr)[bufferIdx] = (float)(filterDataPtr[filterIdx]); - } - } - } - }else{ - MNN_ERROR("Map error ptrCL == nullptr \n"); - MNN_ASSERT(false); - } - mOpenCLBackend->getOpenCLRuntime()->commandQueue().enqueueUnmapMemObject(*(mResource->mKernelBuffer.get()), kernelBufferPtr); -} - void ConvBufExecution::_generateFilterConvertRegion(Tensor* virtualFilter, Tensor* originBuffer) const { auto filterDes = TensorUtils::getDescribe(virtualFilter); filterDes->regions.clear(); @@ -152,29 +120,19 @@ ConvBufExecution::ConvBufExecution(const std::vector &inputs, const st //select opt conv method bool isConv1x1 = (mResource->mKernelHeight == mResource->mKernelWidth && mResource->mKernelHeight == 1 && mPaddings[0] == 0 && mPaddings[1] == 0 && mResource->mStrides[0] == 1 && mResource->mStrides[1] == 1); - bool useConvGemm = isConv1x1 && inputs[0]->width() == 1 && inputs[0]->height() == 1; + + mResource->mConv1x1Opt = isConv1x1; + mResource->mConv1x1C8Opt = mResource->mConv1x1Opt && mResource->mOutputChannel >= 16; + bool useConvGemm = isConv1x1 && mResource->mInputChannel > 32 && mResource->mOutputChannel > 64; if (useConvGemm) { - // Enough computation - bool isTotalLarge = (inputs[0]->batch() * 1.0 / 512 * mResource->mInputChannel / 512 * mResource->mOutputChannel / 512 > 1.0); - bool isEachDimLarge = (inputs[0]->batch() > 256 && mResource->mInputChannel > 128 && mResource->mOutputChannel > 256); - if(isTotalLarge && isEachDimLarge) { - mResource->mConvGemmOptLevel = 2; - } else if(isTotalLarge && inputs[0]->batch() % 64 == 0 && mResource->mInputChannel % 8 == 0 && mResource->mOutputChannel % 64 == 0) { - mResource->mConvGemmOptLevel = 1; - } + mResource->mConvGemmOptLevel = 2; } - mResource->mConv1x1Opt = isConv1x1 && inputs[0]->width() >= 4; } - if (mResource->mConvGemmOptLevel > 0) { + if (mResource->mConv1x1Opt) { // Tile Match with mConvGemmOptLevel == 2 - int tileK = 32; + int tileK = 4; int tileN = 32; - if(mResource->mConvGemmOptLevel == 1) { - tileK = 8; - tileN = 64; - } - int buffer_size = ROUND_UP(mResource->mOutputChannel, tileN) * ROUND_UP(mResource->mInputChannel, tileK); mResource->mFilter.reset( Tensor::createDevice({buffer_size})); @@ -211,14 +169,6 @@ ConvBufExecution::ConvBufExecution(const std::vector &inputs, const st } mOpenCLBackend->getOpenCLRuntime()->commandQueue().enqueueUnmapMemObject(filterBuffer, ptrCL); - } else if (mResource->mConv1x1Opt) { - //At first, set packCout equal to 4 - if(mResource->mOutputChannel >= 16){ - setConv1x1WeightBuffer(8, 4, mFilterDataPtr); - mResource->mConv1x1C8Opt = true; - }else{ - setConv1x1WeightBuffer(4, 4, mFilterDataPtr); - } } else { mResource->mFilter.reset( Tensor::createDevice({ROUND_UP(mResource->mOutputChannel, 4) * ROUND_UP(mResource->mInputChannel, 4) * mResource->mKernelWidth * mResource->mKernelHeight})); @@ -325,31 +275,63 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const std::string info = std::to_string(inputChannels) + "_" + std::to_string(outChannel) + "_" + std::to_string(mResource->mKernelHeight) + "_" + std::to_string(mResource->mKernelWidth) + "_" + std::to_string(mResource->mStrides[0]) + "_" + std::to_string(mResource->mStrides[1]) + "_" + std::to_string(mResource->mDilations[0]) + "_" + std::to_string(mResource->mDilations[1]); + if (mResource->mConvGemmOptLevel > 0) { + int area = height * width; + int M = outputShape.at(0) * area; + int N = outputShape.at(3); + int K = inputShape.at(3); + + bool isAlign = (K % 8 == 0 && area == 1 && N % 64 == 0 && M % 64 == 0); + bool isLimitSize = (M * 1.0 / 512 * N / 512 * K / 512 <= 1.0) && (1.0 * M * K / N / N >= 16.0); + if(isAlign && isLimitSize) { + mResource->mConvGemmOptLevel = 1; + } else if(M < 128 || 1.0 * M / 512 * N / 512 * K / 256 < 1.0) { + mResource->mConvGemmOptLevel = 0; + } + } + if (mResource->mConvGemmOptLevel == 2) { // set large tile - int tileM = 32; + int tileM = 16; int tileN = 32; - int tileK = 32; + int tileK = 4; - int M = outputShape.at(0); + int area = height * width; + int M = outputShape.at(0) * area; int N = outputShape.at(3); int K = inputShape.at(3); + int alignM = ROUND_UP(M, tileM); int alignN = ROUND_UP(N, tileN); int alignK = ROUND_UP(K, tileK); // ReArrange input mConvGemmInpTensor.reset(Tensor::createDevice({alignK * alignM})); - mConvGemmOutTensor.reset(Tensor::createDevice({alignN * alignM})); mOpenCLBackend->onAcquireBuffer(mConvGemmInpTensor.get(), Backend::DYNAMIC); - mOpenCLBackend->onAcquireBuffer(mConvGemmOutTensor.get(), Backend::DYNAMIC); - mOpenCLBackend->onReleaseBuffer(mConvGemmOutTensor.get(), Backend::DYNAMIC); + if(N != alignN || M != alignM || area != 1) { + mNeedOutTempTensor = true; + mConvGemmOutTensor.reset(Tensor::createDevice({alignN * alignM})); + mOpenCLBackend->onAcquireBuffer(mConvGemmOutTensor.get(), Backend::DYNAMIC); + } mOpenCLBackend->onReleaseBuffer(mConvGemmInpTensor.get(), Backend::DYNAMIC); + if(mNeedOutTempTensor) { + mOpenCLBackend->onReleaseBuffer(mConvGemmOutTensor.get(), Backend::DYNAMIC); + } + { std::set buildOptions; + + int m_pack = 1; + if(area == 1) { + m_pack = 4; + buildOptions.emplace("-DAREA_EQUAL_1"); + } else if(outputShape.at(0) == 1) { + m_pack = 4; + buildOptions.emplace("-DBATCH_EQUAL_1"); + } mPreKernel = mOpenCLBackend->getOpenCLRuntime()->buildKernel("gemm_buf", "transpose_pad", buildOptions); uint32_t maxWorkGroupSize = static_cast(mOpenCLBackend->getOpenCLRuntime()->getMaxWorkGroupSize(mPreKernel)); - mPreGlobalWorkSize = {static_cast(alignM/4), static_cast(alignK/4)}; + mPreGlobalWorkSize = {static_cast(alignM/m_pack), static_cast(alignK/4)}; int offset = 0; int idx = 0; @@ -360,6 +342,7 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const ret |= mPreKernel->get().setArg(idx++, static_cast(alignK)); ret |= mPreKernel->get().setArg(idx++, static_cast(M)); ret |= mPreKernel->get().setArg(idx++, static_cast(K)); + ret |= mPreKernel->get().setArg(idx++, static_cast(area)); ret |= mPreKernel->get().setArg(idx++, openCLBuffer(input)); ret |= mPreKernel->get().setArg(idx++, openCLBuffer(mConvGemmInpTensor.get())); MNN_CHECK_CL_SUCCESS(ret, "setArg mConvgemmOptLevel==2 PreKernel"); @@ -371,13 +354,24 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const } std::set buildOptions; + uint32_t hasBias = 0; + if(!mNeedOutTempTensor) { + hasBias = 1; + buildOptions = mResource->mBuildOptions; + buildOptions.emplace("-DBIAS"); + } uint32_t layout = 4; uint32_t batch = 1; - auto param = getGemmParams({(uint32_t)alignM, (uint32_t)alignN, (uint32_t)alignK, layout, batch}, {openCLBuffer(mConvGemmInpTensor.get()), openCLBuffer(mResource->mFilter.get()), openCLBuffer(mConvGemmOutTensor.get())}, mOpenCLBackend->getOpenCLRuntime()); + + cl::Buffer outBuffer = mNeedOutTempTensor ? openCLBuffer(mConvGemmOutTensor.get()) : openCLBuffer(output); + std::vector param; + if(mNeedOutTempTensor) { + param = getGemmParams({(uint32_t)alignM, (uint32_t)alignN, (uint32_t)alignK, layout, batch, hasBias}, {openCLBuffer(mConvGemmInpTensor.get()), openCLBuffer(mResource->mFilter.get()), openCLBuffer(mConvGemmOutTensor.get())}, mOpenCLBackend->getOpenCLRuntime()); + } else { + param = getGemmParams({(uint32_t)alignM, (uint32_t)alignN, (uint32_t)alignK, layout, batch, hasBias}, {openCLBuffer(mConvGemmInpTensor.get()), openCLBuffer(mResource->mFilter.get()), openCLBuffer(output), openCLBuffer(mResource->mBias.get())}, mOpenCLBackend->getOpenCLRuntime()); + } - int GEMMK=param[0], KREG=param[1], KWG=param[2], KWI=param[3], MDIMA=param[4], MDIMC=param[5], MWG=param[6], NDIMB=param[7], NDIMC=param[8], NWG=param[9], SA=param[10], SB=param[11], STRM=param[12], STRN=param[13], VWM=param[14], VWN=param[15]; - buildOptions.emplace("-DGEMMK=" + std::to_string(GEMMK)); - buildOptions.emplace("-DKREG=" + std::to_string(KREG)); + int KWG=param[0], KWI=param[1], MDIMA=param[2], MDIMC=param[3], MWG=param[4], NDIMB=param[5], NDIMC=param[6], NWG=param[7], SA=param[8], SB=param[9], STRM=param[10], STRN=param[11], VWM=param[12], VWN=param[13]; buildOptions.emplace("-DKWG=" + std::to_string(KWG)); buildOptions.emplace("-DKWI=" + std::to_string(KWI)); buildOptions.emplace("-DMDIMA=" + std::to_string(MDIMA)); @@ -395,7 +389,7 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const if(layout >= 4) { buildOptions.emplace("-DOUTPUTMN"); } - + tileM = MWG; tileN = NWG; int localM = MDIMC; @@ -405,12 +399,7 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const buildOptions.emplace("-DUSE_CL_MAD=1"); buildOptions.emplace("-DRELAX_WORKGROUP_SIZE=1"); } - if(mOpenCLBackend->getOpenCLRuntime()->isSupportedFP16()){ - buildOptions.emplace(" -DPRECISION=16"); - } else { - buildOptions.emplace(" -DPRECISION=32"); - } - + mKernel = mOpenCLBackend->getOpenCLRuntime()->buildKernel("matmul_params_buf", "Xgemm", buildOptions); int out_per_thread_m = tileM / localM; @@ -431,8 +420,12 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const ret |= mKernel->get().setArg(idx++, beta); ret |= mKernel->get().setArg(idx++, openCLBuffer(mConvGemmInpTensor.get())); ret |= mKernel->get().setArg(idx++, openCLBuffer(mResource->mFilter.get())); -// ret |= mKernel->get().setArg(idx++, openCLBuffer(mResource->mBias.get())); - ret |= mKernel->get().setArg(idx++, openCLBuffer(mConvGemmOutTensor.get())); + if(mNeedOutTempTensor) { + ret |= mKernel->get().setArg(idx++, openCLBuffer(mConvGemmOutTensor.get())); + } else { + ret |= mKernel->get().setArg(idx++, openCLBuffer(mResource->mBias.get())); + ret |= mKernel->get().setArg(idx++, openCLBuffer(output)); + } ret |= mKernel->get().setArg(idx++, offset); ret |= mKernel->get().setArg(idx++, offset); ret |= mKernel->get().setArg(idx++, offset); @@ -442,9 +435,12 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const mGlobalWorkSize[0] = ROUND_UP(mGlobalWorkSize[0], std::max((uint32_t)1, mLocalWorkSize[0])); mGlobalWorkSize[1] = ROUND_UP(mGlobalWorkSize[1], std::max((uint32_t)1, mLocalWorkSize[1])); - { - std::set buildOptions; - mPostKernel = mOpenCLBackend->getOpenCLRuntime()->buildKernel("gemm_buf", "add_bias", buildOptions); + if(mNeedOutTempTensor) { + std::set buildOptions = mResource->mBuildOptions; + if(area == 1) { + buildOptions.emplace("-DAREA_EQUAL_1"); + } + mPostKernel = mOpenCLBackend->getOpenCLRuntime()->buildKernel("gemm_buf", "transpose_bias", buildOptions); uint32_t maxWorkGroupSize = static_cast(mOpenCLBackend->getOpenCLRuntime()->getMaxWorkGroupSize(mPostKernel)); mPostGlobalWorkSize = {static_cast(M), static_cast(UP_DIV(N, 16))}; @@ -458,12 +454,13 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const ret |= mPostKernel->get().setArg(idx++, static_cast(alignN)); ret |= mPostKernel->get().setArg(idx++, static_cast(M)); ret |= mPostKernel->get().setArg(idx++, static_cast(N)); + ret |= mPostKernel->get().setArg(idx++, static_cast(area)); ret |= mPostKernel->get().setArg(idx++, openCLBuffer(mConvGemmOutTensor.get())); ret |= mPostKernel->get().setArg(idx++, openCLBuffer(mResource->mBias.get())); ret |= mPostKernel->get().setArg(idx++, openCLBuffer(output)); MNN_CHECK_CL_SUCCESS(ret, "setArg mConvgemmOptLevel==2 PostKernel"); - mPostLocalWorkSize = localWS2DDefault(mPostGlobalWorkSize, maxWorkGroupSize, mOpenCLBackend->getOpenCLRuntime(), "add_bias", mPostKernel).first; + mPostLocalWorkSize = localWS2DDefault(mPostGlobalWorkSize, maxWorkGroupSize, mOpenCLBackend->getOpenCLRuntime(), "transpose_bias", mPostKernel).first; mOpenCLBackend->recordKernel2d(mPostKernel, mPostGlobalWorkSize, mPostLocalWorkSize); mPostGlobalWorkSize[0] = ROUND_UP(mPostGlobalWorkSize[0], std::max((uint32_t)1, mPostLocalWorkSize[0])); mPostGlobalWorkSize[1] = ROUND_UP(mPostGlobalWorkSize[1], std::max((uint32_t)1, mPostLocalWorkSize[1])); @@ -513,6 +510,7 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const MNN_CHECK_CL_SUCCESS(ret, "setArg Conv1x1Buf mConvgemmOptLevel==1 Kernel Select"); } else if (mResource->mConv1x1Opt) { + int tileN = 32; // {"conv_2d_1x1_c4h1w4", "conv_2d_1x1_c4h1w2", "conv_2d_1x1_c4h1w1", "conv_2d_1x1_c8h1w4"}; const int total_kernel = 3; std::string kernelName[total_kernel] = {"conv_2d_1x1_c4h1w4", "conv_2d_1x1_c4h1w2", "conv_2d_1x1_c4h1w1"}; @@ -554,13 +552,15 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const ret |= kernel[knl_idx]->get().setArg(idx++, globalWorkSize[knl_idx][1]); ret |= kernel[knl_idx]->get().setArg(idx++, UP_DIV(width, itemW[knl_idx])); ret |= kernel[knl_idx]->get().setArg(idx++, openCLBuffer(input)); - ret |= kernel[knl_idx]->get().setArg(idx++, *mResource->mKernelBuffer.get()); + ret |= kernel[knl_idx]->get().setArg(idx++, openCLBuffer(mResource->mFilter.get())); ret |= kernel[knl_idx]->get().setArg(idx++, openCLBuffer(mResource->mBias.get())); ret |= kernel[knl_idx]->get().setArg(idx++, openCLBuffer(output)); ret |= kernel[knl_idx]->get().setArg(idx++, static_cast(inputChannelBlocks)); ret |= kernel[knl_idx]->get().setArg(idx++, height); ret |= kernel[knl_idx]->get().setArg(idx++, width); ret |= kernel[knl_idx]->get().setArg(idx++, UP_DIV(outChannel, 4)); + ret |= kernel[knl_idx]->get().setArg(idx++, ROUND_UP(outChannel, tileN)); + MNN_CHECK_CL_SUCCESS(ret, "setArg Conv1x1Buf Kernel Select"); std::pair, int> retTune; @@ -591,13 +591,14 @@ ErrorCode ConvBufExecution::onResize(const std::vector &inputs, const ret |= mKernel->get().setArg(idx++, mGlobalWorkSize[1]); ret |= mKernel->get().setArg(idx++, UP_DIV(width, itemW[min_index])); ret |= mKernel->get().setArg(idx++, openCLBuffer(input)); - ret |= mKernel->get().setArg(idx++, *mResource->mKernelBuffer.get()); + ret |= mKernel->get().setArg(idx++, openCLBuffer(mResource->mFilter.get())); ret |= mKernel->get().setArg(idx++, openCLBuffer(mResource->mBias.get())); ret |= mKernel->get().setArg(idx++, openCLBuffer(output)); ret |= mKernel->get().setArg(idx++, static_cast(inputChannelBlocks)); ret |= mKernel->get().setArg(idx++, height); ret |= mKernel->get().setArg(idx++, width); ret |= mKernel->get().setArg(idx++, UP_DIV(outChannel, 4)); + ret |= mKernel->get().setArg(idx++, ROUND_UP(outChannel, tileN)); MNN_CHECK_CL_SUCCESS(ret, "setArg Conv1x1Buf"); //printf("conv1x1 %d, %d %d, %d %d, %d %d\n", min_index, mGlobalWorkSize[0], mGlobalWorkSize[1], mLocalWorkSize[0], mLocalWorkSize[1], outChannel, width); @@ -749,8 +750,9 @@ ErrorCode ConvBufExecution::onExecute(const std::vector &inputs, const std::string kw = std::to_string(mResource->mKernelWidth); std::string total = std::to_string(1.0 / 1000000 * inputs[0]->batch() * inputs[0]->channel() * outputs[0]->channel() * outputs[0]->height() * outputs[0]->width() * mResource->mKernelHeight * mResource->mKernelWidth); if (mResource->mConvGemmOptLevel > 0) { + std::string m = std::to_string(outputs[0]->width() * outputs[0]->height() * inputs[0]->batch()); name += "-gemm"; - name += std::to_string(mResource->mConvGemmOptLevel) + "-m" + b + "n" + co + "k" + ci; + name += std::to_string(mResource->mConvGemmOptLevel) + "-m" + m + "n" + co + "k" + ci; } else if (mResource->mConv1x1Opt) { name += "-conv1x1"; name += "-b" + b + "ci" + ci + "hi" + hi + "wi" + wi + "co" + co; diff --git a/source/backend/opencl/execution/buffer/ConvBufExecution.hpp b/source/backend/opencl/execution/buffer/ConvBufExecution.hpp index a1a523ca5..e5abe2a53 100644 --- a/source/backend/opencl/execution/buffer/ConvBufExecution.hpp +++ b/source/backend/opencl/execution/buffer/ConvBufExecution.hpp @@ -65,7 +65,6 @@ class ConvBufExecution : public ConvBufCommonExecution, public CommonExecution { virtual ErrorCode onExecute(const std::vector &inputs, const std::vector &outputs) override; virtual bool onClone(Backend* bn, const Op* op, Execution** dst) override; - void setConv1x1WeightBuffer(int packCout, int packCin, const float* filterDataPtr); private: void _generateFilterConvertRegion(Tensor *virtualFilter, Tensor *originBuffer) const; @@ -75,6 +74,7 @@ class ConvBufExecution : public ConvBufCommonExecution, public CommonExecution { std::shared_ptr mKernel; std::shared_ptr mConvGemmInpTensor; std::shared_ptr mConvGemmOutTensor; + bool mNeedOutTempTensor = false; std::shared_ptr mPreKernel = nullptr; std::vector mPreGlobalWorkSize{1, 1, 1}; std::vector mPreLocalWorkSize{1, 1, 1, 1}; diff --git a/source/backend/opencl/execution/buffer/ConvBufWinograd.cpp b/source/backend/opencl/execution/buffer/ConvBufWinograd.cpp index 52c0d39c2..17ca12a3e 100644 --- a/source/backend/opencl/execution/buffer/ConvBufWinograd.cpp +++ b/source/backend/opencl/execution/buffer/ConvBufWinograd.cpp @@ -205,7 +205,7 @@ ConvBufWinograd::ConvBufWinograd(const MNN::Op* op, Backend* backend) : CommonEx int kernelSize = kx; int alpha = unit + kernelSize - 1; - int tileK = 16; + int tileK = 4; int tileN = 32; std::shared_ptr tmpFilterTensor; @@ -460,7 +460,7 @@ ErrorCode ConvBufWinograd::onEncode(const std::vector& inputs, const st { int tileM = 16; int tileN = 32; - int tileK = 16; + int tileK = 4; mSource.reset(Tensor::createDevice( std::vector{alpha * alpha * ROUND_UP(input->channel(), tileK) * ROUND_UP(wUnit * hUnit, tileM)})); mDest.reset(Tensor::createDevice( @@ -541,11 +541,9 @@ ErrorCode ConvBufWinograd::onEncode(const std::vector& inputs, const st std::set buildOptions; uint32_t layout = 4; - auto param = getGemmParams({(uint32_t)e_pack, (uint32_t)h_pack, (uint32_t)l_pack, layout, (uint32_t)loop}, {openCLBuffer(mSource.get()), openCLBuffer(mResource->mWeight.get()), openCLBuffer(mDest.get())}, mOpenCLBackend->getOpenCLRuntime()); + auto param = getGemmParams({(uint32_t)e_pack, (uint32_t)h_pack, (uint32_t)l_pack, layout, (uint32_t)loop, (uint32_t)0}, {openCLBuffer(mSource.get()), openCLBuffer(mResource->mWeight.get()), openCLBuffer(mDest.get())}, mOpenCLBackend->getOpenCLRuntime()); - int GEMMK=param[0], KREG=param[1], KWG=param[2], KWI=param[3], MDIMA=param[4], MDIMC=param[5], MWG=param[6], NDIMB=param[7], NDIMC=param[8], NWG=param[9], SA=param[10], SB=param[11], STRM=param[12], STRN=param[13], VWM=param[14], VWN=param[15]; - buildOptions.emplace("-DGEMMK=" + std::to_string(GEMMK)); - buildOptions.emplace("-DKREG=" + std::to_string(KREG)); + int KWG=param[0], KWI=param[1], MDIMA=param[2], MDIMC=param[3], MWG=param[4], NDIMB=param[5], NDIMC=param[6], NWG=param[7], SA=param[8], SB=param[9], STRM=param[10], STRN=param[11], VWM=param[12], VWN=param[13]; buildOptions.emplace("-DKWG=" + std::to_string(KWG)); buildOptions.emplace("-DKWI=" + std::to_string(KWI)); buildOptions.emplace("-DMDIMA=" + std::to_string(MDIMA)); @@ -573,12 +571,6 @@ ErrorCode ConvBufWinograd::onEncode(const std::vector& inputs, const st buildOptions.emplace("-DUSE_CL_MAD=1"); buildOptions.emplace("-DRELAX_WORKGROUP_SIZE=1"); } - - if(mOpenCLBackend->getOpenCLRuntime()->isSupportedFP16()){ - buildOptions.emplace(" -DPRECISION=16"); - } else { - buildOptions.emplace(" -DPRECISION=32"); - } mUnits[b * 3 + 1].kernel = mOpenCLBackend->getOpenCLRuntime()->buildKernel("matmul_params_buf", "XgemmBatched", buildOptions); @@ -590,6 +582,9 @@ ErrorCode ConvBufWinograd::onEncode(const std::vector& inputs, const st float alpha = 1.0f; float beta = 0.0f; + int batch_offset_a = e_pack * l_pack; + int batch_offset_b = h_pack * l_pack; + int batch_offset_c = e_pack * h_pack; int idx = 0; cl_int ret = CL_SUCCESS; @@ -599,14 +594,11 @@ ErrorCode ConvBufWinograd::onEncode(const std::vector& inputs, const st ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, alpha); ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, beta); ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, openCLBuffer(mSource.get())); - ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, e_pack); - ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, l_pack); + ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, batch_offset_a); ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, openCLBuffer(mResource->mWeight.get())); - ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, h_pack); - ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, l_pack); + ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, batch_offset_b); ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, openCLBuffer(mDest.get())); - ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, e_pack); - ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, h_pack); + ret |= mUnits[b * 3 + 1].kernel->get().setArg(idx++, batch_offset_c); MNN_CHECK_CL_SUCCESS(ret, "setArg Winograd batchmatmul Kernel"); mOpenCLBackend->recordKernel3d(mUnits[b * 3 + 1].kernel, mGWS_M[b], mLWS_M[b]); diff --git a/source/backend/opencl/execution/buffer/GroupNormBufExecution.cpp b/source/backend/opencl/execution/buffer/GroupNormBufExecution.cpp index e2aaf0194..03589bc6b 100644 --- a/source/backend/opencl/execution/buffer/GroupNormBufExecution.cpp +++ b/source/backend/opencl/execution/buffer/GroupNormBufExecution.cpp @@ -6,7 +6,6 @@ // Created by MNN on 2024/06/24. // Copyright © 2018, Alibaba Group Holding Limited // -#ifndef MNN_OPENCL_BUFFER_CLOSED #ifdef MNN_SUPPORT_TRANSFORMER_FUSE #include "backend/opencl/execution/buffer/GroupNormBufExecution.hpp" @@ -260,8 +259,7 @@ class GroupNormBufCreator : public OpenCLBackend::Creator { } }; -REGISTER_OPENCL_OP_CREATOR(GroupNormBufCreator, OpType_GroupNorm, BUFFER); +REGISTER_OPENCL_OP_CREATOR_TRANSFORMER(GroupNormBufCreator, OpType_GroupNorm, BUFFER); } // namespace OpenCL } // namespace MNN #endif/* MNN_SUPPORT_TRANSFORMER_FUSE */ -#endif/* MNN_OPENCL_BUFFER_CLOSED */ diff --git a/source/backend/opencl/execution/buffer/GroupNormBufExecution.hpp b/source/backend/opencl/execution/buffer/GroupNormBufExecution.hpp index 2f1ce8313..bf569f983 100644 --- a/source/backend/opencl/execution/buffer/GroupNormBufExecution.hpp +++ b/source/backend/opencl/execution/buffer/GroupNormBufExecution.hpp @@ -5,7 +5,6 @@ // Created by MNN on 2024/06/24. // Copyright © 2018, Alibaba Group Holding Limited // -#ifndef MNN_OPENCL_BUFFER_CLOSED #ifdef MNN_SUPPORT_TRANSFORMER_FUSE #ifndef GroupNormBufExecution_hpp @@ -42,4 +41,3 @@ class GroupNormBufExecution : public CommonExecution { } // namespace MNN #endif /* GroupNormBufExecution_hpp */ #endif/* MNN_SUPPORT_TRANSFORMER_FUSE */ -#endif diff --git a/source/backend/opencl/execution/buffer/LoopBufExecution.cpp b/source/backend/opencl/execution/buffer/LoopBufExecution.cpp index 9935db7df..bf8dfc463 100644 --- a/source/backend/opencl/execution/buffer/LoopBufExecution.cpp +++ b/source/backend/opencl/execution/buffer/LoopBufExecution.cpp @@ -16,7 +16,7 @@ namespace OpenCL { static void _TileOrPackTensor(Tensor *input, Tensor *output, std::shared_ptr& kernelW, cl::NDRange &globalWorkSize, cl::NDRange &localWorkSize, const int Width, const int Height, const int Channel, const int Batch, OpenCLBackend *bn, const std::string& KernelName, std::set buildOptions, - const int WidthPad, const int HeightPad, const int ChannelPad) { + const int WidthPad, const int HeightPad, const int ChannelPad, OpenCLRuntime* runtime) { bool fastTileTranspose = false; if (TensorUtils::getDescribe(output)->dimensionFormat == MNN::MNN_DATA_FORMAT_NHWC || TensorUtils::getDescribe(input)->dimensionFormat == MNN::MNN_DATA_FORMAT_NHWC){ buildOptions.emplace("-DMNN_NHWC"); @@ -30,21 +30,28 @@ static void _TileOrPackTensor(Tensor *input, Tensor *output, std::shared_ptrisSupportedFP16()) { + local_mem_size = 2; + } + if(buildOptions.find("-DDIMENSION_4") != buildOptions.end()) { + local_mem_size *= (64 * 64 * 4); + if(local_mem_size <= runtime->getMaxLocalMem()) { + if((WidthPad & 63) == 0) { + tileW = 64; + } + if((HeightPad & 63) == 0) { + tileH = 64; + } + } + runKernelName = "tile_trans_4d_buf"; // match with tileW tileH tileW/localW tileH/localH buildOptions.emplace("-DWGSW=" + std::to_string(tileW)); @@ -52,6 +59,15 @@ static void _TileOrPackTensor(Tensor *input, Tensor *output, std::shared_ptronAcquireBuffer(mTmpTensors[i].get(), Backend::DYNAMIC); - _TileOrPackTensor(input, mTmpTensors[i].get(), unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, mOpenCLBackend, "tile_buf", buildOptions, WidthPad, HeightPad, ChannelPad); + _TileOrPackTensor(input, mTmpTensors[i].get(), unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, mOpenCLBackend, "tile_buf", buildOptions, WidthPad, HeightPad, ChannelPad, runTime); mUnits.emplace_back(unit); } @@ -499,7 +515,7 @@ ErrorCode LoopBatchMatMulBufExecution::onEncode(const std::vector &inp // MNN_PRINT("input%d offset, %d %d %d %d\n", i, Batch, Channel, Height, Width); Unit unit; - _TileOrPackTensor(input, mOffsetTensors.back().get(), unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, mOpenCLBackend, "tile_buf", mBuildOptions, Width, Height, Channel); + _TileOrPackTensor(input, mOffsetTensors.back().get(), unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, mOpenCLBackend, "tile_buf", mBuildOptions, Width, Height, Channel, runTime); mUnits.emplace_back(unit); } } @@ -520,11 +536,9 @@ ErrorCode LoopBatchMatMulBufExecution::onEncode(const std::vector &inp std::set buildOptions; uint32_t layout = 0; - auto param = getGemmParams({(uint32_t)e_pack, (uint32_t)h_pack, (uint32_t)l_pack, layout, (uint32_t)n}, {openCLBuffer(mTmpTensors[1].get()), openCLBuffer(mTmpTensors[2].get()), openCLBuffer(mTmpTensors[0].get())}, mOpenCLBackend->getOpenCLRuntime()); + auto param = getGemmParams({(uint32_t)e_pack, (uint32_t)h_pack, (uint32_t)l_pack, layout, (uint32_t)n, (uint32_t)0}, {openCLBuffer(mTmpTensors[1].get()), openCLBuffer(mTmpTensors[2].get()), openCLBuffer(mTmpTensors[0].get())}, mOpenCLBackend->getOpenCLRuntime()); - int GEMMK=param[0], KREG=param[1], KWG=param[2], KWI=param[3], MDIMA=param[4], MDIMC=param[5], MWG=param[6], NDIMB=param[7], NDIMC=param[8], NWG=param[9], SA=param[10], SB=param[11], STRM=param[12], STRN=param[13], VWM=param[14], VWN=param[15]; - buildOptions.emplace("-DGEMMK=" + std::to_string(GEMMK)); - buildOptions.emplace("-DKREG=" + std::to_string(KREG)); + int KWG=param[0], KWI=param[1], MDIMA=param[2], MDIMC=param[3], MWG=param[4], NDIMB=param[5], NDIMC=param[6], NWG=param[7], SA=param[8], SB=param[9], STRM=param[10], STRN=param[11], VWM=param[12], VWN=param[13]; buildOptions.emplace("-DKWG=" + std::to_string(KWG)); buildOptions.emplace("-DKWI=" + std::to_string(KWI)); buildOptions.emplace("-DMDIMA=" + std::to_string(MDIMA)); @@ -552,12 +566,6 @@ ErrorCode LoopBatchMatMulBufExecution::onEncode(const std::vector &inp buildOptions.emplace("-DUSE_CL_MAD=1"); buildOptions.emplace("-DRELAX_WORKGROUP_SIZE=1"); } - - if(mOpenCLBackend->getOpenCLRuntime()->isSupportedFP16()){ - buildOptions.emplace(" -DPRECISION=16"); - } else { - buildOptions.emplace(" -DPRECISION=32"); - } Unit unit; unit.kernel = mOpenCLBackend->getOpenCLRuntime()->buildKernel("matmul_params_buf", "XgemmBatched", buildOptions); @@ -570,7 +578,9 @@ ErrorCode LoopBatchMatMulBufExecution::onEncode(const std::vector &inp float alpha = 1.0; float beta = 0.0f; - + int batch_offset_a = e_pack * l_pack; + int batch_offset_b = h_pack * l_pack; + int batch_offset_c = e_pack * h_pack; int idx = 0; cl_int ret = CL_SUCCESS; ret |= unit.kernel->get().setArg(idx++, static_cast(e_pack)); @@ -579,14 +589,11 @@ ErrorCode LoopBatchMatMulBufExecution::onEncode(const std::vector &inp ret |= unit.kernel->get().setArg(idx++, alpha); ret |= unit.kernel->get().setArg(idx++, beta); ret |= unit.kernel->get().setArg(idx++, openCLBuffer(mTmpTensors[1].get())); - ret |= unit.kernel->get().setArg(idx++, e_pack); - ret |= unit.kernel->get().setArg(idx++, l_pack); + ret |= unit.kernel->get().setArg(idx++, batch_offset_a); ret |= unit.kernel->get().setArg(idx++, openCLBuffer(mTmpTensors[2].get())); - ret |= unit.kernel->get().setArg(idx++, h_pack); - ret |= unit.kernel->get().setArg(idx++, l_pack); + ret |= unit.kernel->get().setArg(idx++, batch_offset_b); ret |= unit.kernel->get().setArg(idx++, openCLBuffer(mTmpTensors[0].get())); - ret |= unit.kernel->get().setArg(idx++, e_pack); - ret |= unit.kernel->get().setArg(idx++, h_pack); + ret |= unit.kernel->get().setArg(idx++, batch_offset_c); MNN_CHECK_CL_SUCCESS(ret, "setArg LoopBuf GemmTile Kernel"); unit.globalWorkSize = {globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]}; @@ -686,7 +693,7 @@ ErrorCode LoopBatchMatMulBufExecution::onEncode(const std::vector &inp HeightPad = std::get<1>(shape); ChannelPad = std::get<2>(shape); } - _TileOrPackTensor(mTmpTensors[0].get(), output, unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, mOpenCLBackend, "pack_buf", buildOptions, WidthPad, HeightPad, ChannelPad); + _TileOrPackTensor(mTmpTensors[0].get(), output, unit.kernel, unit.globalWorkSize, unit.localWorkSize, Width, Height, Channel, Batch, mOpenCLBackend, "pack_buf", buildOptions, WidthPad, HeightPad, ChannelPad, runTime); mUnits.emplace_back(unit); } diff --git a/source/backend/opencl/execution/buffer/SelfAttentionBufExecution.cpp b/source/backend/opencl/execution/buffer/SelfAttentionBufExecution.cpp index dc21fcb68..bc7aba4ef 100644 --- a/source/backend/opencl/execution/buffer/SelfAttentionBufExecution.cpp +++ b/source/backend/opencl/execution/buffer/SelfAttentionBufExecution.cpp @@ -5,7 +5,6 @@ // Created by MNN on 2024/06/03. // Copyright © 2018, Alibaba Group Holding Limited // -#ifndef MNN_OPENCL_BUFFER_CLOSED #ifdef MNN_SUPPORT_TRANSFORMER_FUSE #include @@ -41,7 +40,7 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vectorshape(); int tile_mn = 32; - int tile_k = 16; // for gemm alignment + int tile_k = 4; // for gemm alignment int batch = shape[0]; int seq_len = shape[1]; mHeadDim = shape[2] / mNumHead / 3; @@ -53,7 +52,7 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vector 1024) { - mQseqSplitNum = (seq_len >= 4096) ? 8 : ((seq_len < 2048) ? 2 : 4); + mQseqSplitNum = (seq_len >= 4096 && seq_len % 64 == 0) ? 8 : ((seq_len < 2048) ? 2 : 4); } int buffer_size = batch * mNumHead * ROUND_UP(mHeadDim, tile_k) * ROUND_UP(seq_len, tile_mn); int buffer_qk_size = batch * mNumHead * ROUND_UP(seq_len, tile_mn) * ROUND_UP(seq_len, tile_mn) / mQseqSplitNum; @@ -154,7 +153,9 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vectorget().setArg(index++, seq_idx); MNN_CHECK_CL_SUCCESS(ret, "setArg split_transpose_qkv"); mLocalWorkSizeSplit[seq_idx] = localWS3DDefault(mGlobalWorkSizeSplit[seq_idx], maxWorkGroupSize, runtime, "split_transpose_qkv", mKernel_split[seq_idx]).first; - + mGlobalWorkSizeSplit[seq_idx][0] = ROUND_UP(mGlobalWorkSizeSplit[seq_idx][0], std::max((uint32_t)1, mLocalWorkSizeSplit[seq_idx][0])); + mGlobalWorkSizeSplit[seq_idx][1] = ROUND_UP(mGlobalWorkSizeSplit[seq_idx][1], std::max((uint32_t)1, mLocalWorkSizeSplit[seq_idx][1])); + mGlobalWorkSizeSplit[seq_idx][2] = ROUND_UP(mGlobalWorkSizeSplit[seq_idx][2], std::max((uint32_t)1, mLocalWorkSizeSplit[seq_idx][2])); mOpenCLBackend->recordKernel3d(mKernel_split[seq_idx], mGlobalWorkSizeSplit[seq_idx], mLocalWorkSizeSplit[seq_idx]); } @@ -171,11 +172,9 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vector buildOptions; uint32_t layout = 4; - auto param = getGemmParams({(uint32_t)e_pack, (uint32_t)h_pack, (uint32_t)l_pack, layout, (uint32_t)loop}, {openCLBuffer(mTempQ.get()), openCLBuffer(mTempK.get()), openCLBuffer(mTempQK.get())}, mOpenCLBackend->getOpenCLRuntime()); + auto param = getGemmParams({(uint32_t)e_pack, (uint32_t)h_pack, (uint32_t)l_pack, layout, (uint32_t)loop, (uint32_t)0}, {openCLBuffer(mTempQ.get()), openCLBuffer(mTempK.get()), openCLBuffer(mTempQK.get())}, mOpenCLBackend->getOpenCLRuntime()); - int GEMMK=param[0], KREG=param[1], KWG=param[2], KWI=param[3], MDIMA=param[4], MDIMC=param[5], MWG=param[6], NDIMB=param[7], NDIMC=param[8], NWG=param[9], SA=param[10], SB=param[11], STRM=param[12], STRN=param[13], VWM=param[14], VWN=param[15]; - buildOptions.emplace("-DGEMMK=" + std::to_string(GEMMK)); - buildOptions.emplace("-DKREG=" + std::to_string(KREG)); + int KWG=param[0], KWI=param[1], MDIMA=param[2], MDIMC=param[3], MWG=param[4], NDIMB=param[5], NDIMC=param[6], NWG=param[7], SA=param[8], SB=param[9], STRM=param[10], STRN=param[11], VWM=param[12], VWN=param[13]; buildOptions.emplace("-DKWG=" + std::to_string(KWG)); buildOptions.emplace("-DKWI=" + std::to_string(KWI)); buildOptions.emplace("-DMDIMA=" + std::to_string(MDIMA)); @@ -203,13 +202,7 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vectorgetOpenCLRuntime()->isSupportedFP16()){ - buildOptions.emplace(" -DPRECISION=16"); - } else { - buildOptions.emplace(" -DPRECISION=32"); - } - + buildOptions.emplace("-DONLY_HAVE_ALPHA"); mKernel_qk[seq_idx] = mOpenCLBackend->getOpenCLRuntime()->buildKernel("matmul_params_buf", "XgemmBatched", buildOptions); int out_per_thread_m = tileM / localM; @@ -220,7 +213,9 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vectorget().setArg(idx++, static_cast(e_pack)); @@ -229,14 +224,11 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vectorget().setArg(idx++, alpha); ret |= mKernel_qk[seq_idx]->get().setArg(idx++, beta); ret |= mKernel_qk[seq_idx]->get().setArg(idx++, openCLBuffer(mTempQ.get())); - ret |= mKernel_qk[seq_idx]->get().setArg(idx++, e_pack); - ret |= mKernel_qk[seq_idx]->get().setArg(idx++, l_pack); + ret |= mKernel_qk[seq_idx]->get().setArg(idx++, batch_offset_a); ret |= mKernel_qk[seq_idx]->get().setArg(idx++, openCLBuffer(mTempK.get())); - ret |= mKernel_qk[seq_idx]->get().setArg(idx++, h_pack); - ret |= mKernel_qk[seq_idx]->get().setArg(idx++, l_pack); + ret |= mKernel_qk[seq_idx]->get().setArg(idx++, batch_offset_b); ret |= mKernel_qk[seq_idx]->get().setArg(idx++, openCLBuffer(mTempQK.get())); - ret |= mKernel_qk[seq_idx]->get().setArg(idx++, e_pack); - ret |= mKernel_qk[seq_idx]->get().setArg(idx++, h_pack); + ret |= mKernel_qk[seq_idx]->get().setArg(idx++, batch_offset_c); MNN_CHECK_CL_SUCCESS(ret, "setArg Self-Attention batchmatmul qk Kernel"); mOpenCLBackend->recordKernel3d(mKernel_qk[seq_idx], mGlobalWorkSizeQk[seq_idx], mLocalWorkSizeQk[seq_idx]); @@ -259,7 +251,7 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vector buildOption; buildOption.emplace("-DSOFTMAX_LOCAL_SIZE=" + std::to_string(localSize)); - // buildOption.emplace("-DOUTPUT_TRANSPOSE"); +// buildOption.emplace("-DOUTPUT_TRANSPOSE"); mKernel_softmax[seq_idx] = runtime->buildKernel("self_attention_buf", "softmax_inside", buildOption, inputs[0], outputs[0]); mGlobalWorkSizeSoftMax[seq_idx] = {static_cast(localSize), static_cast(mSoftmaxShape[1]), static_cast(mSoftmaxShape[0])}; @@ -278,31 +270,16 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vector(localSize), 1, 1}; mOpenCLBackend->recordKernel3d(mKernel_softmax[seq_idx], mGlobalWorkSizeSoftMax[seq_idx], mLocalWorkSizeSoftMax[seq_idx]); } - { - unsigned int tileW = 32; - unsigned int tileH = 32; int loop = batch * mNumHead; int transDimW = ROUND_UP(seq_len, tile_mn) / mQseqSplitNum; int transDimH = ROUND_UP(seq_len, tile_mn); - if((transDimW & 63) == 0 && (transDimH & 63) == 0) { - tileW = 64; - tileH = 64; - } - unsigned int localW = 8; - unsigned int localH = 8; - std::set buildOptions; - buildOptions.emplace("-DWGSW=" + std::to_string(tileW)); - buildOptions.emplace("-DWGSH=" + std::to_string(tileH)); - buildOptions.emplace("-DTSW=" + std::to_string(tileW/localW)); - buildOptions.emplace("-DTSH=" + std::to_string(tileH/localH)); + std::set buildOptions; mKernel_trans[seq_idx] = runtime->buildKernel("self_attention_buf", "trans_3d_buf", buildOptions, inputs[0], outputs[0]); - - int w_per_thread = tileW / localW; - int h_per_thread = tileH / localH; - mGlobalWorkSizeTrans[seq_idx] = {(uint32_t)transDimW/w_per_thread, (uint32_t)transDimH/h_per_thread, (uint32_t)(loop)}; - mLocalWorkSizeTrans[seq_idx] = {localW, localH, 1}; + uint32_t maxWorkGroupSize = static_cast(mOpenCLBackend->getOpenCLRuntime()->getMaxWorkGroupSize(mKernel_trans[seq_idx])); + + mGlobalWorkSizeTrans[seq_idx] = {(uint32_t)transDimW/8, (uint32_t)transDimH/8, (uint32_t)(loop)}; uint32_t index = 0; cl_int ret = CL_SUCCESS; @@ -312,11 +289,11 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vectorget().setArg(index++, transDimW); ret |= mKernel_trans[seq_idx]->get().setArg(index++, transDimH); MNN_CHECK_CL_SUCCESS(ret, "setArg Self-Attention transpose"); + mLocalWorkSizeTrans[seq_idx] = localWS3DDefault(mGlobalWorkSizeTrans[seq_idx], maxWorkGroupSize, mOpenCLBackend->getOpenCLRuntime(), "trans_3d_buf", mKernel_trans[seq_idx]).first; mOpenCLBackend->recordKernel3d(mKernel_trans[seq_idx], mGlobalWorkSizeTrans[seq_idx], mLocalWorkSizeTrans[seq_idx]); } - // qk * value { // Sotmax: [Batch * mNumHead, ROUND_UP(seqLen, tile), ROUND_UP(seqLen, tile)] -> [B, K, M] @@ -340,11 +317,9 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vector A:[M, K] B:[N, K] C:[M, N] */ uint32_t layout = 0; - auto param = getGemmParams({(uint32_t)e_pack, (uint32_t)h_pack, (uint32_t)l_pack, layout, (uint32_t)loop}, {openCLBuffer(mTempTrans.get()), openCLBuffer(mTempV.get()), openCLBuffer(mTempQKV.get())}, mOpenCLBackend->getOpenCLRuntime()); - - int GEMMK=param[0], KREG=param[1], KWG=param[2], KWI=param[3], MDIMA=param[4], MDIMC=param[5], MWG=param[6], NDIMB=param[7], NDIMC=param[8], NWG=param[9], SA=param[10], SB=param[11], STRM=param[12], STRN=param[13], VWM=param[14], VWN=param[15]; - buildOptions.emplace("-DGEMMK=" + std::to_string(GEMMK)); - buildOptions.emplace("-DKREG=" + std::to_string(KREG)); + auto param = getGemmParams({(uint32_t)e_pack, (uint32_t)h_pack, (uint32_t)l_pack, layout, (uint32_t)loop, (uint32_t)0}, {openCLBuffer(mTempTrans.get()), openCLBuffer(mTempV.get()), openCLBuffer(mTempQKV.get())}, mOpenCLBackend->getOpenCLRuntime()); + + int KWG=param[0], KWI=param[1], MDIMA=param[2], MDIMC=param[3], MWG=param[4], NDIMB=param[5], NDIMC=param[6], NWG=param[7], SA=param[8], SB=param[9], STRM=param[10], STRN=param[11], VWM=param[12], VWN=param[13]; buildOptions.emplace("-DKWG=" + std::to_string(KWG)); buildOptions.emplace("-DKWI=" + std::to_string(KWI)); buildOptions.emplace("-DMDIMA=" + std::to_string(MDIMA)); @@ -372,13 +347,7 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vectorgetOpenCLRuntime()->isSupportedFP16()){ - buildOptions.emplace(" -DPRECISION=16"); - } else { - buildOptions.emplace(" -DPRECISION=32"); - } - + mKernel_qkv[seq_idx] = mOpenCLBackend->getOpenCLRuntime()->buildKernel("matmul_params_buf", "XgemmBatched", buildOptions); int out_per_thread_m = tileM / localM; @@ -389,7 +358,9 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vectorget().setArg(idx++, static_cast(e_pack)); @@ -398,14 +369,11 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vectorget().setArg(idx++, alpha); ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, beta); ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, openCLBuffer(mTempTrans.get())); - ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, e_pack); - ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, l_pack); + ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, batch_offset_a); ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, openCLBuffer(mTempV.get())); - ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, h_pack); - ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, l_pack); + ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, batch_offset_b); ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, openCLBuffer(mTempQKV.get())); - ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, e_pack); - ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, h_pack); + ret |= mKernel_qkv[seq_idx]->get().setArg(idx++, batch_offset_c); MNN_CHECK_CL_SUCCESS(ret, "setArg Self-Attention batchmatmul qkv Kernel"); mOpenCLBackend->recordKernel3d(mKernel_qkv[seq_idx], mGlobalWorkSizeQkv[seq_idx], mLocalWorkSizeQkv[seq_idx]); } @@ -438,6 +406,10 @@ ErrorCode SelfAttentionBufImpl::onResize(Backend *backend, const std::vectorget().setArg(index++, seq_idx); mLocalWorkSizeClip[seq_idx] = localWS3DDefault(mGlobalWorkSizeClip[seq_idx], maxWorkGroupSize, runtime, "clip_transpose_qkv", mKernel_clip[seq_idx]).first; + mGlobalWorkSizeClip[seq_idx][0] = ROUND_UP(mGlobalWorkSizeClip[seq_idx][0], std::max((uint32_t)1, mLocalWorkSizeClip[seq_idx][0])); + mGlobalWorkSizeClip[seq_idx][1] = ROUND_UP(mGlobalWorkSizeClip[seq_idx][1], std::max((uint32_t)1, mLocalWorkSizeClip[seq_idx][1])); + mGlobalWorkSizeClip[seq_idx][2] = ROUND_UP(mGlobalWorkSizeClip[seq_idx][2], std::max((uint32_t)1, mLocalWorkSizeClip[seq_idx][2])); + MNN_CHECK_CL_SUCCESS(ret, "setArg clip_transpose_qkv"); mOpenCLBackend->recordKernel3d(mKernel_clip[seq_idx], mGlobalWorkSizeClip[seq_idx], mLocalWorkSizeClip[seq_idx]); } @@ -578,9 +550,8 @@ class SelfAttentionBufCreator : public OpenCLBackend::Creator { return new SelfAttentionBufExecution(op, backend); } }; -REGISTER_OPENCL_OP_CREATOR(SelfAttentionBufCreator, OpType_FmhaV2, BUFFER); +REGISTER_OPENCL_OP_CREATOR_TRANSFORMER(SelfAttentionBufCreator, OpType_FmhaV2, BUFFER); } // namespace OpenCL } // namespace MNN #endif/* MNN_SUPPORT_TRANSFORMER_FUSE */ -#endif/* MNN_OPENCL_BUFFER_CLOSED */ diff --git a/source/backend/opencl/execution/buffer/SelfAttentionBufExecution.hpp b/source/backend/opencl/execution/buffer/SelfAttentionBufExecution.hpp index 31786f666..447be8f10 100644 --- a/source/backend/opencl/execution/buffer/SelfAttentionBufExecution.hpp +++ b/source/backend/opencl/execution/buffer/SelfAttentionBufExecution.hpp @@ -5,7 +5,6 @@ // Created by MNN on 2024/06/03. // Copyright © 2018, Alibaba Group Holding Limited // -#ifndef MNN_OPENCL_BUFFER_CLOSED #ifdef MNN_SUPPORT_TRANSFORMER_FUSE #ifndef SelfAttentionBufExecution_hpp @@ -75,5 +74,3 @@ class SelfAttentionBufExecution : public CommonExecution { } // namespace MNN #endif /* SelfAttentionBufExecution_hpp */ #endif/* MNN_SUPPORT_TRANSFORMER_FUSE */ - -#endif /* MNN_OPENCL_BUFFER_CLOSED */ diff --git a/source/backend/opencl/execution/buffer/SplitGeluBufExecution.cpp b/source/backend/opencl/execution/buffer/SplitGeluBufExecution.cpp index 171ee58a1..0baee6428 100644 --- a/source/backend/opencl/execution/buffer/SplitGeluBufExecution.cpp +++ b/source/backend/opencl/execution/buffer/SplitGeluBufExecution.cpp @@ -6,7 +6,6 @@ // Created by MNN on 2024/06/26. // Copyright © 2018, Alibaba Group Holding Limited // -#ifndef MNN_OPENCL_BUFFER_CLOSED #ifdef MNN_SUPPORT_TRANSFORMER_FUSE #include "backend/opencl/execution/buffer/SplitGeluBufExecution.hpp" @@ -96,8 +95,7 @@ class SplitGeluBufCreator : public OpenCLBackend::Creator { } }; -REGISTER_OPENCL_OP_CREATOR(SplitGeluBufCreator, OpType_SplitGeLU, BUFFER); +REGISTER_OPENCL_OP_CREATOR_TRANSFORMER(SplitGeluBufCreator, OpType_SplitGeLU, BUFFER); } // namespace OpenCL } // namespace MNN #endif/* MNN_SUPPORT_TRANSFORMER_FUSE */ -#endif/* MNN_OPENCL_BUFFER_CLOSED */ diff --git a/source/backend/opencl/execution/buffer/SplitGeluBufExecution.hpp b/source/backend/opencl/execution/buffer/SplitGeluBufExecution.hpp index 9ecd65b8b..0f0e6bd60 100644 --- a/source/backend/opencl/execution/buffer/SplitGeluBufExecution.hpp +++ b/source/backend/opencl/execution/buffer/SplitGeluBufExecution.hpp @@ -5,7 +5,6 @@ // Created by MNN on 2024/06/26. // Copyright © 2018, Alibaba Group Holding Limited // -#ifndef MNN_OPENCL_BUFFER_CLOSED #ifdef MNN_SUPPORT_TRANSFORMER_FUSE #ifndef SplitGeluBufExecution_hpp @@ -36,4 +35,3 @@ class SplitGeluBufExecution : public CommonExecution { } // namespace MNN #endif /* SplitGeluBufExecution_hpp */ #endif/* MNN_SUPPORT_TRANSFORMER_FUSE */ -#endif diff --git a/source/backend/opencl/execution/cl/conv_2d_buf.cl b/source/backend/opencl/execution/cl/conv_2d_buf.cl index b6d6dd475..9aed2e670 100644 --- a/source/backend/opencl/execution/cl/conv_2d_buf.cl +++ b/source/backend/opencl/execution/cl/conv_2d_buf.cl @@ -9,8 +9,6 @@ return; \ } - - __kernel void conv_2d_1x1_c4h1w4(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, __global const FLOAT *input, @@ -20,7 +18,8 @@ void conv_2d_1x1_c4h1w4(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, __private const int in_c_block, __private const int out_h, __private const int out_w, - __private const int out_c_block) { + __private const int out_c_block, + __private const int out_c_pack) { const int out_c_w_idx = get_global_id(0); //c/4 w const int out_b_h_idx = get_global_id(1); //b h @@ -40,43 +39,45 @@ void conv_2d_1x1_c4h1w4(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, const int intput_width_idx0 = out_w4_idx; - int offset = mul24(out_c_idx, in_c_block) << 2; + + int offset = out_c_idx*4; int inp_offset = (((out_b_idx*in_c_block)*out_h + out_h_idx)* out_w + intput_width_idx0) << 2; const int inp_add = out_h*out_w*4; for (ushort in_channel_block_idx = 0; in_channel_block_idx < in_c_block; ++in_channel_block_idx) { + + int offset = mad24(in_channel_block_idx*4, out_c_pack, out_c_idx*4); COMPUTE_FLOAT4 in0 = CONVERT_COMPUTE_FLOAT4(vload4(0, input+inp_offset)); COMPUTE_FLOAT4 in1 = CONVERT_COMPUTE_FLOAT4(vload4(1, input+inp_offset)); COMPUTE_FLOAT4 in2 = CONVERT_COMPUTE_FLOAT4(vload4(2, input+inp_offset)); COMPUTE_FLOAT4 in3 = CONVERT_COMPUTE_FLOAT4(vload4(3, input+inp_offset)); - - COMPUTE_FLOAT4 weights0 = CONVERT_COMPUTE_FLOAT4(vload4(offset, kernel_ptr)); - COMPUTE_FLOAT4 weights1 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 1, kernel_ptr)); - COMPUTE_FLOAT4 weights2 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 2, kernel_ptr)); - COMPUTE_FLOAT4 weights3 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 3, kernel_ptr)); - - out0.x += dot(weights0, in0); - out0.y += dot(weights1, in0); - out0.z += dot(weights2, in0); - out0.w += dot(weights3, in0); - - out1.x += dot(weights0, in1); - out1.y += dot(weights1, in1); - out1.z += dot(weights2, in1); - out1.w += dot(weights3, in1); - - out2.x += dot(weights0, in2); - out2.y += dot(weights1, in2); - out2.z += dot(weights2, in2); - out2.w += dot(weights3, in2); - - out3.x += dot(weights0, in3); - out3.y += dot(weights1, in3); - out3.z += dot(weights2, in3); - out3.w += dot(weights3, in3); + COMPUTE_FLOAT4 weights0 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset)); + COMPUTE_FLOAT4 weights1 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack)); + COMPUTE_FLOAT4 weights2 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack + out_c_pack)); + COMPUTE_FLOAT4 weights3 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack + out_c_pack + out_c_pack)); + + out0 = mad(in0.x, weights0, out0); + out0 = mad(in0.y, weights1, out0); + out0 = mad(in0.z, weights2, out0); + out0 = mad(in0.w, weights3, out0); + + out1 = mad(in1.x, weights0, out1); + out1 = mad(in1.y, weights1, out1); + out1 = mad(in1.z, weights2, out1); + out1 = mad(in1.w, weights3, out1); + + out2 = mad(in2.x, weights0, out2); + out2 = mad(in2.y, weights1, out2); + out2 = mad(in2.z, weights2, out2); + out2 = mad(in2.w, weights3, out2); - offset += 4; + out3 = mad(in3.x, weights0, out3); + out3 = mad(in3.y, weights1, out3); + out3 = mad(in3.z, weights2, out3); + out3 = mad(in3.w, weights3, out3); + + offset += 4 * out_c_pack; inp_offset += inp_add; } @@ -122,7 +123,8 @@ void conv_2d_1x1_c8h1w4(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, __private const int in_c_block, __private const int out_h, __private const int out_w, - __private const int out_c_block) { + __private const int out_c_block, + __private const int out_c_pack) { const int out_c_w_idx = get_global_id(0); //c/8 w/4 const int out_b_h_idx = get_global_id(1); //b h @@ -146,10 +148,10 @@ void conv_2d_1x1_c8h1w4(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, COMPUTE_FLOAT4 out7 = out4; const int intput_width_idx0 = out_w4_idx; - + for (int in_channel_block_idx = 0; in_channel_block_idx < in_c_block; ++in_channel_block_idx) { - int offset = mad24(out_c_idx, in_c_block, in_channel_block_idx)*8; + int offset = mad24(in_channel_block_idx*4, out_c_pack, out_c_idx*8); const int inp_offset = (((out_b_idx*in_c_block + in_channel_block_idx)*out_h + out_h_idx)* out_w + intput_width_idx0)*4; @@ -157,55 +159,55 @@ void conv_2d_1x1_c8h1w4(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, COMPUTE_FLOAT4 in1 = CONVERT_COMPUTE_FLOAT4(vload4(1, input+inp_offset)); COMPUTE_FLOAT4 in2 = CONVERT_COMPUTE_FLOAT4(vload4(2, input+inp_offset)); COMPUTE_FLOAT4 in3 = CONVERT_COMPUTE_FLOAT4(vload4(3, input+inp_offset)); - - COMPUTE_FLOAT4 weights0 = CONVERT_COMPUTE_FLOAT4(vload4(offset, kernel_ptr)); - COMPUTE_FLOAT4 weights1 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 1, kernel_ptr)); - COMPUTE_FLOAT4 weights2 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 2, kernel_ptr)); - COMPUTE_FLOAT4 weights3 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 3, kernel_ptr)); - COMPUTE_FLOAT4 weights4 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 4, kernel_ptr)); - COMPUTE_FLOAT4 weights5 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 5, kernel_ptr)); - COMPUTE_FLOAT4 weights6 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 6, kernel_ptr)); - COMPUTE_FLOAT4 weights7 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 7, kernel_ptr)); - out0.x += dot(weights0, in0); - out0.y += dot(weights1, in0); - out0.z += dot(weights2, in0); - out0.w += dot(weights3, in0); - - out1.x += dot(weights0, in1); - out1.y += dot(weights1, in1); - out1.z += dot(weights2, in1); - out1.w += dot(weights3, in1); - - out2.x += dot(weights0, in2); - out2.y += dot(weights1, in2); - out2.z += dot(weights2, in2); - out2.w += dot(weights3, in2); - - out3.x += dot(weights0, in3); - out3.y += dot(weights1, in3); - out3.z += dot(weights2, in3); - out3.w += dot(weights3, in3); + COMPUTE_FLOAT4 weights0 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset)); + COMPUTE_FLOAT4 weights1 = CONVERT_COMPUTE_FLOAT4(vload4(1, kernel_ptr + offset)); + COMPUTE_FLOAT4 weights2 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack)); + COMPUTE_FLOAT4 weights3 = CONVERT_COMPUTE_FLOAT4(vload4(1, kernel_ptr + offset + out_c_pack)); + COMPUTE_FLOAT4 weights4 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack + out_c_pack)); + COMPUTE_FLOAT4 weights5 = CONVERT_COMPUTE_FLOAT4(vload4(1, kernel_ptr + offset + out_c_pack + out_c_pack)); + COMPUTE_FLOAT4 weights6 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack + out_c_pack + out_c_pack)); + COMPUTE_FLOAT4 weights7 = CONVERT_COMPUTE_FLOAT4(vload4(1, kernel_ptr + offset + out_c_pack + out_c_pack + out_c_pack)); + + out0 = mad(in0.x, weights0, out0); + out0 = mad(in0.y, weights2, out0); + out0 = mad(in0.z, weights4, out0); + out0 = mad(in0.w, weights6, out0); + + out1 = mad(in1.x, weights0, out1); + out1 = mad(in1.y, weights2, out1); + out1 = mad(in1.z, weights4, out1); + out1 = mad(in1.w, weights6, out1); + + out2 = mad(in2.x, weights0, out2); + out2 = mad(in2.y, weights2, out2); + out2 = mad(in2.z, weights4, out2); + out2 = mad(in2.w, weights6, out2); - out4.x += dot(weights4, in0); - out4.y += dot(weights5, in0); - out4.z += dot(weights6, in0); - out4.w += dot(weights7, in0); - - out5.x += dot(weights4, in1); - out5.y += dot(weights5, in1); - out5.z += dot(weights6, in1); - out5.w += dot(weights7, in1); - - out6.x += dot(weights4, in2); - out6.y += dot(weights5, in2); - out6.z += dot(weights6, in2); - out6.w += dot(weights7, in2); - - out7.x += dot(weights4, in3); - out7.y += dot(weights5, in3); - out7.z += dot(weights6, in3); - out7.w += dot(weights7, in3); + out3 = mad(in3.x, weights0, out3); + out3 = mad(in3.y, weights2, out3); + out3 = mad(in3.z, weights4, out3); + out3 = mad(in3.w, weights6, out3); + + out4 = mad(in0.x, weights1, out4); + out4 = mad(in0.y, weights3, out4); + out4 = mad(in0.z, weights5, out4); + out4 = mad(in0.w, weights7, out4); + + out5 = mad(in1.x, weights1, out5); + out5 = mad(in1.y, weights3, out5); + out5 = mad(in1.z, weights5, out5); + out5 = mad(in1.w, weights7, out5); + + out6 = mad(in2.x, weights1, out6); + out6 = mad(in2.y, weights3, out6); + out6 = mad(in2.z, weights5, out6); + out6 = mad(in2.w, weights7, out6); + + out7 = mad(in3.x, weights1, out7); + out7 = mad(in3.y, weights3, out7); + out7 = mad(in3.z, weights5, out7); + out7 = mad(in3.w, weights7, out7); } #ifdef RELU @@ -285,7 +287,8 @@ void conv_2d_1x1_c8h1w2(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, __private const int in_c_block, __private const int out_h, __private const int out_w, - __private const int out_c_block) { // oc / 4 + __private const int out_c_block, + __private const int out_c_pack) { const int out_c_w_idx = get_global_id(0); //c/8 w/4 const int out_b_h_idx = get_global_id(1); //b h @@ -305,44 +308,42 @@ void conv_2d_1x1_c8h1w2(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, COMPUTE_FLOAT4 out5 = out4; const int intput_width_idx0 = out_w2_idx; - for (int in_channel_block_idx = 0; in_channel_block_idx < in_c_block; ++in_channel_block_idx) { - int offset = mad24(out_c_idx, in_c_block, in_channel_block_idx)*8; + int offset = mad24(in_channel_block_idx*4, out_c_pack, out_c_idx*8); const int inp_offset = (((out_b_idx*in_c_block + in_channel_block_idx)*out_h + out_h_idx)* out_w + intput_width_idx0)*4; COMPUTE_FLOAT4 in0 = CONVERT_COMPUTE_FLOAT4(vload4(0, input+inp_offset)); COMPUTE_FLOAT4 in1 = CONVERT_COMPUTE_FLOAT4(vload4(1, input+inp_offset)); - - COMPUTE_FLOAT4 weights0 = CONVERT_COMPUTE_FLOAT4(vload4(offset, kernel_ptr)); - COMPUTE_FLOAT4 weights1 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 1, kernel_ptr)); - COMPUTE_FLOAT4 weights2 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 2, kernel_ptr)); - COMPUTE_FLOAT4 weights3 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 3, kernel_ptr)); - COMPUTE_FLOAT4 weights4 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 4, kernel_ptr)); - COMPUTE_FLOAT4 weights5 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 5, kernel_ptr)); - COMPUTE_FLOAT4 weights6 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 6, kernel_ptr)); - COMPUTE_FLOAT4 weights7 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 7, kernel_ptr)); + COMPUTE_FLOAT4 weights0 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset)); + COMPUTE_FLOAT4 weights1 = CONVERT_COMPUTE_FLOAT4(vload4(1, kernel_ptr + offset)); + COMPUTE_FLOAT4 weights2 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack)); + COMPUTE_FLOAT4 weights3 = CONVERT_COMPUTE_FLOAT4(vload4(1, kernel_ptr + offset + out_c_pack)); + COMPUTE_FLOAT4 weights4 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack + out_c_pack)); + COMPUTE_FLOAT4 weights5 = CONVERT_COMPUTE_FLOAT4(vload4(1, kernel_ptr + offset + out_c_pack + out_c_pack)); + COMPUTE_FLOAT4 weights6 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack + out_c_pack + out_c_pack)); + COMPUTE_FLOAT4 weights7 = CONVERT_COMPUTE_FLOAT4(vload4(1, kernel_ptr + offset + out_c_pack + out_c_pack + out_c_pack)); + + out0 = mad(in0.x, weights0, out0); + out0 = mad(in0.y, weights2, out0); + out0 = mad(in0.z, weights4, out0); + out0 = mad(in0.w, weights6, out0); - out0.x += dot(weights0, in0); - out0.y += dot(weights1, in0); - out0.z += dot(weights2, in0); - out0.w += dot(weights3, in0); - - out1.x += dot(weights0, in1); - out1.y += dot(weights1, in1); - out1.z += dot(weights2, in1); - out1.w += dot(weights3, in1); + out1 = mad(in1.x, weights0, out1); + out1 = mad(in1.y, weights2, out1); + out1 = mad(in1.z, weights4, out1); + out1 = mad(in1.w, weights6, out1); - out4.x += dot(weights4, in0); - out4.y += dot(weights5, in0); - out4.z += dot(weights6, in0); - out4.w += dot(weights7, in0); - - out5.x += dot(weights4, in1); - out5.y += dot(weights5, in1); - out5.z += dot(weights6, in1); - out5.w += dot(weights7, in1); + out4 = mad(in0.x, weights1, out4); + out4 = mad(in0.y, weights3, out4); + out4 = mad(in0.z, weights5, out4); + out4 = mad(in0.w, weights7, out4); + + out5 = mad(in1.x, weights1, out5); + out5 = mad(in1.y, weights3, out5); + out5 = mad(in1.z, weights5, out5); + out5 = mad(in1.w, weights7, out5); } #ifdef RELU @@ -404,7 +405,8 @@ void conv_2d_1x1_c4h1w1(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, __private const int in_c_block, __private const int out_h, __private const int out_w, - __private const int out_c_block) { + __private const int out_c_block, + __private const int out_c_pack) { const int out_c_w_idx = get_global_id(0); //c/4 w const int out_b_h_idx = get_global_id(1); //b h @@ -421,21 +423,20 @@ void conv_2d_1x1_c4h1w1(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, for (int in_channel_block_idx = 0; in_channel_block_idx < in_c_block; ++in_channel_block_idx) { - int offset = mad24(out_c_idx, in_c_block, in_channel_block_idx)*4; + int offset = mad24(in_channel_block_idx*4, out_c_pack, out_c_idx*4); const int inp_offset = (((out_b_idx*in_c_block + in_channel_block_idx)*out_h + out_h_idx)* out_w + intput_width_idx0)*4; COMPUTE_FLOAT4 in0 = CONVERT_COMPUTE_FLOAT4(vload4(0, input+inp_offset)); - - COMPUTE_FLOAT4 weights0 = CONVERT_COMPUTE_FLOAT4(vload4(offset, kernel_ptr)); - COMPUTE_FLOAT4 weights1 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 1, kernel_ptr)); - COMPUTE_FLOAT4 weights2 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 2, kernel_ptr)); - COMPUTE_FLOAT4 weights3 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 3, kernel_ptr)); - - out0.x += dot(weights0, in0); - out0.y += dot(weights1, in0); - out0.z += dot(weights2, in0); - out0.w += dot(weights3, in0); + COMPUTE_FLOAT4 weights0 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset)); + COMPUTE_FLOAT4 weights1 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack)); + COMPUTE_FLOAT4 weights2 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack + out_c_pack)); + COMPUTE_FLOAT4 weights3 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack + out_c_pack + out_c_pack)); + + out0 = mad(in0.x, weights0, out0); + out0 = mad(in0.y, weights1, out0); + out0 = mad(in0.z, weights2, out0); + out0 = mad(in0.w, weights3, out0); } #ifdef RELU @@ -461,7 +462,8 @@ void conv_2d_1x1_c4h1w2(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, __private const int in_c_block, __private const int out_h, __private const int out_w, - __private const int out_c_block) { + __private const int out_c_block, + __private const int out_c_pack) { const int out_c_w_idx = get_global_id(0); //c/4 w const int out_b_h_idx = get_global_id(1); //b h @@ -482,27 +484,27 @@ void conv_2d_1x1_c4h1w2(GLOBAL_SIZE_2_DIMS __private const int out_w_blocks, for (int in_channel_block_idx = 0; in_channel_block_idx < in_c_block; ++in_channel_block_idx) { - int offset = mad24(out_c_idx, in_c_block, in_channel_block_idx)*4; + int offset = mad24(in_channel_block_idx*4, out_c_pack, out_c_idx*4); const int inp_offset = (((out_b_idx*in_c_block + in_channel_block_idx)*out_h + out_h_idx)* out_w + intput_width_idx0)*4; COMPUTE_FLOAT4 in0 = CONVERT_COMPUTE_FLOAT4(vload4(0, input+inp_offset)); COMPUTE_FLOAT4 in1 = CONVERT_COMPUTE_FLOAT4(vload4(1, input+inp_offset)); + + COMPUTE_FLOAT4 weights0 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset)); + COMPUTE_FLOAT4 weights1 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack)); + COMPUTE_FLOAT4 weights2 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack + out_c_pack)); + COMPUTE_FLOAT4 weights3 = CONVERT_COMPUTE_FLOAT4(vload4(0, kernel_ptr + offset + out_c_pack + out_c_pack + out_c_pack)); + + out0 = mad(in0.x, weights0, out0); + out0 = mad(in0.y, weights1, out0); + out0 = mad(in0.z, weights2, out0); + out0 = mad(in0.w, weights3, out0); - COMPUTE_FLOAT4 weights0 = CONVERT_COMPUTE_FLOAT4(vload4(offset, kernel_ptr)); - COMPUTE_FLOAT4 weights1 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 1, kernel_ptr)); - COMPUTE_FLOAT4 weights2 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 2, kernel_ptr)); - COMPUTE_FLOAT4 weights3 = CONVERT_COMPUTE_FLOAT4(vload4(offset + 3, kernel_ptr)); - - out0.x += dot(weights0, in0); - out0.y += dot(weights1, in0); - out0.z += dot(weights2, in0); - out0.w += dot(weights3, in0); - - out1.x += dot(weights0, in1); - out1.y += dot(weights1, in1); - out1.z += dot(weights2, in1); - out1.w += dot(weights3, in1); + out1 = mad(in1.x, weights0, out1); + out1 = mad(in1.y, weights1, out1); + out1 = mad(in1.z, weights2, out1); + out1 = mad(in1.w, weights3, out1); } #ifdef RELU diff --git a/source/backend/opencl/execution/cl/gemm_buf.cl b/source/backend/opencl/execution/cl/gemm_buf.cl index cd4c7fc94..903b62252 100644 --- a/source/backend/opencl/execution/cl/gemm_buf.cl +++ b/source/backend/opencl/execution/cl/gemm_buf.cl @@ -121,15 +121,17 @@ __kernel void gemm_buf2(GLOBAL_SIZE_DIM2 vstore4(CONVERT_FLOAT4(o1.scdef), 1, output+out_offset+12*width); } -// [M, K/4, 4] -> [alignK, alignM] +// [B, K/4, area, 4] -> [alignK, alignM] (M = B * area) __kernel void transpose_pad(GLOBAL_SIZE_DIM2 const int alignM, const int alignK, const int M, const int K, + const int area, __global const FLOAT* input, __global FLOAT* output ) { +#ifdef AREA_EQUAL_1 const int idx_m4 = get_global_id(0); // idx M const int idx_k4 = get_global_id(1); // idx K UNIFORM_BOUNDRY_CHECK(idx_m4, idx_k4); @@ -149,22 +151,75 @@ __kernel void transpose_pad(GLOBAL_SIZE_DIM2 vstore4((FLOAT4)(m0k4.y, m1k4.y, m2k4.y, m3k4.y), 0, output + out_offset_base + alignM); vstore4((FLOAT4)(m0k4.z, m1k4.z, m2k4.z, m3k4.z), 0, output + out_offset_base + alignM + alignM); vstore4((FLOAT4)(m0k4.w, m1k4.w, m2k4.w, m3k4.w), 0, output + out_offset_base + alignM + alignM + alignM); +#elif defined BATCH_EQUAL_1 + + const int idx_m4 = get_global_id(0); // idx M + const int idx_k4 = get_global_id(1); // idx K + UNIFORM_BOUNDRY_CHECK(idx_m4, idx_k4); + + const int idx_m = idx_m4 << 2; + const int idx_k = idx_k4 << 2; + const int K_4 = (K + 3) >> 2; + const int in_offset_base = (idx_k4 * area + idx_m) * 4; + const int out_offset_base = idx_k * alignM + idx_m; + + FLOAT4 m0k4 = (idx_k4 >= K_4 || idx_m + 0 >= M) ? (FLOAT4)0 : vload4(0, input + in_offset_base); + FLOAT4 m1k4 = (idx_k4 >= K_4 || idx_m + 1 >= M) ? (FLOAT4)0 : vload4(0, input + in_offset_base + 4); + FLOAT4 m2k4 = (idx_k4 >= K_4 || idx_m + 2 >= M) ? (FLOAT4)0 : vload4(0, input + in_offset_base + 8); + FLOAT4 m3k4 = (idx_k4 >= K_4 || idx_m + 3 >= M) ? (FLOAT4)0 : vload4(0, input + in_offset_base + 12); + + vstore4((FLOAT4)(m0k4.x, m1k4.x, m2k4.x, m3k4.x), 0, output + out_offset_base); + vstore4((FLOAT4)(m0k4.y, m1k4.y, m2k4.y, m3k4.y), 0, output + out_offset_base + alignM); + vstore4((FLOAT4)(m0k4.z, m1k4.z, m2k4.z, m3k4.z), 0, output + out_offset_base + alignM + alignM); + vstore4((FLOAT4)(m0k4.w, m1k4.w, m2k4.w, m3k4.w), 0, output + out_offset_base + alignM + alignM + alignM); + +#else + + const int idx_m = get_global_id(0); // idx M + const int idx_k4 = get_global_id(1); // idx K + UNIFORM_BOUNDRY_CHECK(idx_m, idx_k4); + + const int K_4 = (K + 3) >> 2; + const int idx_k = idx_k4 << 2; + const int out_offset_base = idx_k * alignM + idx_m; + + if(idx_k4 >= K_4 || idx_m >= M) { + output[out_offset_base] = (FLOAT)0; + output[out_offset_base + alignM] = (FLOAT)0; + output[out_offset_base + alignM + alignM] = (FLOAT)0; + output[out_offset_base + alignM + alignM + alignM] = (FLOAT)0; + return; + } + const int idx_b = idx_m / area; + const int idx_area = idx_m % area; + + const int in_offset_base = ((idx_b * K_4 + idx_k4) * area + idx_area) * 4; + FLOAT4 data = vload4(0, input + in_offset_base); + + output[out_offset_base] = data.x; + output[out_offset_base + alignM] = data.y; + output[out_offset_base + alignM + alignM] = data.z; + output[out_offset_base + alignM + alignM + alignM] = data.w; +#endif } -// [alignM, alignN] -> [M, N/4, 4] -__kernel void add_bias(GLOBAL_SIZE_DIM2 +// [alignM, alignN] -> [B, N/4, area, 4] (M = B * area) +__kernel void transpose_bias(GLOBAL_SIZE_DIM2 const int alignM, const int alignN, const int M, const int N, + const int area, __global const FLOAT* input0, __global const FLOAT* input1, __global FLOAT* output ) { +#ifdef AREA_EQUAL_1 const int idx_m = get_global_id(0); // idx M const int idx_n_16 = get_global_id(1); // idx N UNIFORM_BOUNDRY_CHECK(idx_m, idx_n_16); + const int N_4 = (N + 3) >> 2; const int N_16 = (N + 15) >> 4; const int N_left = N & 15; bool canVec16 = (N_left == 0 || (N_left != 0 && idx_n_16 < N_16 - 1)); @@ -172,31 +227,120 @@ __kernel void add_bias(GLOBAL_SIZE_DIM2 FLOAT16 res0 = vload16(0, input0 + idx_m * alignN + (idx_n_16 << 4)); FLOAT16 res1 = vload16(0, input1 + (idx_n_16 << 4)); FLOAT16 res = res0 + res1; - vstore16(res, 0, output + ((idx_m * N_16 + idx_n_16) << 4)); + #ifdef RELU + res = fmax(res, (FLOAT16)0); + #endif + #ifdef RELU6 + res = clamp(res, (FLOAT16)0, (FLOAT16)6); + #endif + vstore16(res, 0, output + ((idx_m * N_4 + (idx_n_16 << 2)) << 2)); } else { - const int N_4 = (N + 3) >> 2; FLOAT4 res0 = vload4(0, input0 + idx_m * alignN + (idx_n_16 << 4)); FLOAT4 res1 = vload4(0, input1 + (idx_n_16 << 4)); FLOAT4 res = res0 + res1; - vstore4(res, 0, output + ((idx_m * N_16 + idx_n_16) << 4)); + #ifdef RELU + res = fmax(res, (FLOAT4)0); + #endif + #ifdef RELU6 + res = clamp(res, (FLOAT4)0, (FLOAT4)6); + #endif + vstore4(res, 0, output + ((idx_m * N_4 + (idx_n_16 << 2)) << 2)); if(idx_n_16 * 4 + 1 >= N_4) return; res0 = vload4(0, input0 + idx_m * alignN + (idx_n_16 << 4) + 4); res1 = vload4(0, input1 + (idx_n_16 << 4) + 4); res = res0 + res1; - vstore4(res, 0, output + ((idx_m * N_16 + idx_n_16) << 4) + 4); + #ifdef RELU + res = fmax(res, (FLOAT4)0); + #endif + #ifdef RELU6 + res = clamp(res, (FLOAT4)0, (FLOAT4)6); + #endif + vstore4(res, 0, output + ((idx_m * N_4 + (idx_n_16 << 2)) << 2) + 4); if(idx_n_16 * 4 + 2 >= N_4) return; res0 = vload4(0, input0 + idx_m * alignN + (idx_n_16 << 4) + 8); res1 = vload4(0, input1 + (idx_n_16 << 4) + 8); res = res0 + res1; - vstore4(res, 0, output + ((idx_m * N_16 + idx_n_16) << 4) + 8); + #ifdef RELU + res = fmax(res, (FLOAT4)0); + #endif + #ifdef RELU6 + res = clamp(res, (FLOAT4)0, (FLOAT4)6); + #endif + vstore4(res, 0, output + ((idx_m * N_4 + (idx_n_16 << 2)) << 2) + 8); if(idx_n_16 * 4 + 3 >= N_4) return; res0 = vload4(0, input0 + idx_m * alignN + (idx_n_16 << 4) + 12); res1 = vload4(0, input1 + (idx_n_16 << 4) + 12); res = res0 + res1; - vstore4(res, 0, output + ((idx_m * N_16 + idx_n_16) << 4) + 12); + #ifdef RELU + res = fmax(res, (FLOAT4)0); + #endif + #ifdef RELU6 + res = clamp(res, (FLOAT4)0, (FLOAT4)6); + #endif + vstore4(res, 0, output + ((idx_m * N_4 + (idx_n_16 << 2)) << 2) + 12); } +#else + const int idx_m = get_global_id(0); // idx M + const int idx_n_16 = get_global_id(1); // idx N + UNIFORM_BOUNDRY_CHECK(idx_m, idx_n_16); + + const int N_4 = (N + 3) >> 2; + + const int idx_b = idx_m / area; + const int idx_area = idx_m % area; + + const int inp_base_offset = idx_m * alignN + (idx_n_16 << 4); + const int out_base_offset = ((idx_b * N_4 + idx_n_16 * 4) * area + idx_area) * 4; + + FLOAT4 res0 = vload4(0, input0 + inp_base_offset); + FLOAT4 res1 = vload4(0, input1 + (idx_n_16 << 4)); + FLOAT4 res = res0 + res1; + #ifdef RELU + res = fmax(res, (FLOAT4)0); + #endif + #ifdef RELU6 + res = clamp(res, (FLOAT4)0, (FLOAT4)6); + #endif + vstore4(res, 0, output + out_base_offset); + + if(idx_n_16 * 4 + 1 >= N_4) return; + res0 = vload4(0, input0 + inp_base_offset + 4); + res1 = vload4(0, input1 + (idx_n_16 << 4) + 4); + res = res0 + res1; + #ifdef RELU + res = fmax(res, (FLOAT4)0); + #endif + #ifdef RELU6 + res = clamp(res, (FLOAT4)0, (FLOAT4)6); + #endif + vstore4(res, 0, output + out_base_offset + area * 4); + + if(idx_n_16 * 4 + 2 >= N_4) return; + res0 = vload4(0, input0 + inp_base_offset + 8); + res1 = vload4(0, input1 + (idx_n_16 << 4) + 8); + res = res0 + res1; + #ifdef RELU + res = fmax(res, (FLOAT4)0); + #endif + #ifdef RELU6 + res = clamp(res, (FLOAT4)0, (FLOAT4)6); + #endif + vstore4(res, 0, output + out_base_offset + area * 8); + + if(idx_n_16 * 4 + 3 >= N_4) return; + res0 = vload4(0, input0 + inp_base_offset + 12); + res1 = vload4(0, input1 + (idx_n_16 << 4) + 12); + res = res0 + res1; + #ifdef RELU + res = fmax(res, (FLOAT4)0); + #endif + #ifdef RELU6 + res = clamp(res, (FLOAT4)0, (FLOAT4)6); + #endif + vstore4(res, 0, output + out_base_offset + area * 12); +#endif } diff --git a/source/backend/opencl/execution/cl/matmul_params_buf.cl b/source/backend/opencl/execution/cl/matmul_params_buf.cl index 601dc50e6..ce1895d44 100644 --- a/source/backend/opencl/execution/cl/matmul_params_buf.cl +++ b/source/backend/opencl/execution/cl/matmul_params_buf.cl @@ -5,11 +5,6 @@ // ================================================================================================= #define USE_INLINE_KEYWORD 1 -// Parameters set by the tuner or by the database. Here they are given a basic default value in case -// this kernel file is used outside of the CLBlast library. -#ifndef GEMMK - #define GEMMK 0 // Kernel to choose: 0 regular, 1 with 2D register tiling -#endif #ifndef MWG #define MWG 8 // Tile-size in dimension M (e.g. 64, 128) #endif @@ -17,7 +12,7 @@ #define NWG 8 // Tile-size in dimension N (e.g. 64, 128) #endif #ifndef KWG - #define KWG 8 // Tile-size in dimension K (e.g. 8, 16) + #define KWG 16 // Tile-size in dimension K (e.g. 8, 16) #endif #ifndef MDIMC #define MDIMC 8 // Threads per workgroup in M-dimension (e.g. 8, 16, 32) @@ -32,7 +27,7 @@ #define NDIMB 8 // Re-shaped tile dimension of matrix B: KDIMB * NDIMB (kernel 0 only) #endif #ifndef KWI - #define KWI 1 // Unroll factor of the KWG loop (smaller or equal than KWG) + #define KWI 2 // Unroll factor of the KWG loop (smaller or equal than KWG) #endif #ifndef VWM #define VWM 1 // Vector width of matrices A and C @@ -52,9 +47,6 @@ #ifndef SB #define SB 0 // Use local/shared memory to cache matrix B (1) or not (0) (kernel 0 only) #endif -#ifndef KREG - #define KREG 1 // Amount of register tiling in second dimension, multiple of VWN (kernel 1 only) -#endif // Helper parameters based on the above tuning parameters #define MWI (MWG/MDIMC) // Work per work-item (M-dimension) @@ -74,39 +66,6 @@ #define GLOBAL_MEM_FENCE 0 // Global synchronisation barrier for potential better performance #endif -// Half-precision -#if PRECISION == 16 - typedef half real; - typedef half2 real2; - typedef half4 real4; - typedef half8 real8; - typedef half16 real16; - #define ZERO 0 - #define ONE 1 - #define SMALLEST -1.0e14 - -// Single-precision -#elif PRECISION == 32 - typedef float real; - typedef float2 real2; - typedef float4 real4; - typedef float8 real8; - typedef float16 real16; - #define ZERO 0.0f - #define ONE 1.0f - #define SMALLEST -1.0e37f -#endif - -// Converts a 'real argument' value to a 'real' value as passed to the kernel. Normally there is no -// conversion, but half-precision is not supported as kernel argument so it is converted from float. -#if PRECISION == 16 - typedef float real_arg; - #define GetRealArg(x) (half)x -#else - typedef real real_arg; - #define GetRealArg(x) x -#endif - // Pointers to local memory objects (using a define because CUDA doesn't need them) #ifndef LOCAL_PTR #define LOCAL_PTR __local @@ -124,115 +83,18 @@ #define RELAX_WORKGROUP_SIZE 0 #endif +#define ZERO (FLOAT)0.0f // Sets a variable to zero -#if PRECISION == 3232 || PRECISION == 6464 - #define SetToZero(a) a.x = ZERO; a.y = ZERO -#else - #define SetToZero(a) a = ZERO -#endif - -// Sets a variable to zero (only the imaginary part) -#if PRECISION == 3232 || PRECISION == 6464 - #define ImagToZero(a) a.y = ZERO -#else - #define ImagToZero(a) -#endif - -// Sets a variable to one -#if PRECISION == 3232 || PRECISION == 6464 - #define SetToOne(a) a.x = ONE; a.y = ZERO -#else - #define SetToOne(a) a = ONE -#endif - -// Determines whether a variable is zero -#if PRECISION == 3232 || PRECISION == 6464 - #define IsZero(a) ((a.x == ZERO) && (a.y == ZERO)) -#else - #define IsZero(a) (a == ZERO) -#endif - -// The absolute value (component-wise) -#if PRECISION == 3232 || PRECISION == 6464 - #define AbsoluteValue(value) value.x = fabs(value.x); value.y = fabs(value.y) -#else - #define AbsoluteValue(value) value = fabs(value) -#endif - -// Negation (component-wise) -#if PRECISION == 3232 || PRECISION == 6464 - #define Negate(value) value.x = -(value.x); value.y = -(value.y) -#else - #define Negate(value) value = -(value) -#endif - -// Adds two complex variables -#if PRECISION == 3232 || PRECISION == 6464 - #define Add(c,a,b) c.x = a.x + b.x; c.y = a.y + b.y -#else - #define Add(c,a,b) c = a + b -#endif - -// Subtracts two complex variables -#if PRECISION == 3232 || PRECISION == 6464 - #define Subtract(c,a,b) c.x = a.x - b.x; c.y = a.y - b.y -#else - #define Subtract(c,a,b) c = a - b -#endif - -// Multiply two complex variables (used in the defines below) -#if PRECISION == 3232 || PRECISION == 6464 - #define MulReal(a,b) a.x*b.x - a.y*b.y - #define MulImag(a,b) a.x*b.y + a.y*b.x -#endif - -// The scalar multiply function -#if PRECISION == 3232 || PRECISION == 6464 - #define Multiply(c,a,b) c.x = MulReal(a,b); c.y = MulImag(a,b) -#else - #define Multiply(c,a,b) c = a * b -#endif - -// The scalar multiply-add function -#if PRECISION == 3232 || PRECISION == 6464 - #define MultiplyAdd(c,a,b) c.x += MulReal(a,b); c.y += MulImag(a,b) -#else - #if USE_CL_MAD == 1 - #define MultiplyAdd(c,a,b) c = mad(a, b, c) - #else - #define MultiplyAdd(c,a,b) c += a * b - #endif -#endif - -// The scalar multiply-subtract function -#if PRECISION == 3232 || PRECISION == 6464 - #define MultiplySubtract(c,a,b) c.x -= MulReal(a,b); c.y -= MulImag(a,b) -#else - #define MultiplySubtract(c,a,b) c -= a * b -#endif - -// The scalar division function: full division -#if PRECISION == 3232 || PRECISION == 6464 - #define DivideFull(c,a,b) singlereal num_x = (a.x * b.x) + (a.y * b.y); singlereal num_y = (a.y * b.x) - (a.x * b.y); singlereal denom = (b.x * b.x) + (b.y * b.y); c.x = num_x / denom; c.y = num_y / denom -#else - #define DivideFull(c,a,b) c = a / b -#endif - -// The scalar AXPBY function -#if PRECISION == 3232 || PRECISION == 6464 - #define AXPBY(e,a,b,c,d) e.x = MulReal(a,b) + MulReal(c,d); e.y = MulImag(a,b) + MulImag(c,d) -#else - #define AXPBY(e,a,b,c,d) e = a*b + c*d -#endif - -// The complex conjugate operation for complex transforms -#if PRECISION == 3232 || PRECISION == 6464 - #define COMPLEX_CONJUGATE(value) value.x = value.x; value.y = -value.y +#define SetToZero(a) a = ZERO +#define IsZero(a) (a == ZERO) +#define Multiply(c,a,b) c = a * b +#if USE_CL_MAD == 1 +#define MultiplyAdd(c,a,b) c = mad(a, b, c) #else - #define COMPLEX_CONJUGATE(value) +#define MultiplyAdd(c,a,b) c += a * b #endif -// ================================================================================================= +#define AXPBY(e,a,b,c,d) e = a*b + c*d // Force inlining functions or not: some compilers don't support the inline keyword #ifdef USE_INLINE_KEYWORD @@ -241,39 +103,42 @@ #define INLINE_FUNC #endif -// ================================================================================================= - INLINE_FUNC int GetGroupID1() { return get_group_id(1); } - INLINE_FUNC int GetGroupID0() { return get_group_id(0); } +INLINE_FUNC int GetGroupID1() { return get_group_id(1); } +INLINE_FUNC int GetGroupID0() { return get_group_id(0); } // ================================================================================================= // End of the C++11 raw string literal +typedef float real_arg; +#define GetRealArg(x) (FLOAT)x +typedef FLOAT real; + // Data-widths in dimension M #if VWM == 1 - typedef real realM; + typedef FLOAT realM; #elif VWM == 2 - typedef real2 realM; + typedef FLOAT2 realM; #elif VWM == 4 - typedef real4 realM; + typedef FLOAT4 realM; #elif VWM == 8 - typedef real8 realM; + typedef FLOAT8 realM; #elif VWM == 16 - typedef real16 realM; + typedef FLOAT16 realM; #endif // Data-widths in dimension N #if VWN == 1 - typedef real realN; + typedef FLOAT realN; #elif VWN == 2 - typedef real2 realN; + typedef FLOAT2 realN; #elif VWN == 4 - typedef real4 realN; + typedef FLOAT4 realN; #elif VWN == 8 - typedef real8 realN; + typedef FLOAT8 realN; #elif VWN == 16 - typedef real16 realN; + typedef FLOAT16 realN; #endif // ================================================================================================= @@ -430,26 +295,89 @@ INLINE_FUNC void GlobalToLocalB(const __global realN* restrict bgm, LOCAL_PTR re // Caches global off-chip memory directly into per-thread private memory (registers). This function // is specific for caching the A input matrix. -#if SA == 0 && GEMMK == 0 +#if SA == 0 +INLINE_FUNC int GlobalIndexA() { + // Computes the indices based on strided/non-strided access + #if STRM == 0 + // [MWG/MWI, MWI/VWM, VWM] + int mg = get_local_id(0)*(MWI/VWM); + #elif STRM == 1 + // [MWI/VWM, MWG/MWI, VWM] + int mg = get_local_id(0); + #endif + + // Computes the indices for the global memory + // [kSizeM/MWG, (MWG/VWM), VWM] + int idm = mg + GetGroupID0() * (MWG/VWM); + return idm; +} + +INLINE_FUNC realM GlobalToPrivateOptA(const __global realM* restrict agm, const int base, const int _mi, + const int kSizeM, const int idk) { + // Computes the indices based on strided/non-strided access + #if STRM == 0 + // [MWG/MWI, MWI/VWM, VWM] + int idm = base + _mi; + #elif STRM == 1 + // [MWI/VWM, MWG/MWI, VWM] + int idm = base + _mi*MDIMC; + #endif + + // Loads the data from global memory (not transposed) and stores into registers + // [kSizeK, kSizeM/VWM, VWM] + return agm[idk*(kSizeM/VWM) + idm]; +} + INLINE_FUNC realM GlobalToPrivateA(const __global realM* restrict agm, const int _mi, - const int kSizeM, const int idk, const int kwg) { + const int kSizeM, const int idk) { // Computes the indices based on strided/non-strided access #if STRM == 0 + // [MWG/MWI, MWI/VWM, VWM] int mg = _mi + get_local_id(0)*(MWI/VWM); #elif STRM == 1 + // [MWI/VWM, MWG/MWI, VWM] int mg = get_local_id(0) + _mi*MDIMC; #endif // Computes the indices for the global memory + // [kSizeM/MWG, (MWG/VWM), VWM] int idm = mg + GetGroupID0() * (MWG/VWM); // Loads the data from global memory (not transposed) and stores into registers + // [kSizeK, kSizeM/VWM, VWM] return agm[idk*(kSizeM/VWM) + idm]; } + #endif // Same as above, but now for the B input matrix -#if SB == 0 && GEMMK == 0 +#if SB == 0 +INLINE_FUNC int GlobalIndexB() { + // Computes the indices based on strided/non-strided access + #if STRN == 0 + int ng = get_local_id(1)*(NWI/VWN); + #elif STRN == 1 + int ng = get_local_id(1); + #endif + + // Computes the indices for the global memory + int idn = ng + GetGroupID1() * (NWG/VWN); + return idn; +} + +INLINE_FUNC realN GlobalToPrivateOptB(const __global realN* restrict bgm, const int base, const int _ni, + const int kSizeN, const int idk) { + // Computes the indices based on strided/non-strided access + #if STRN == 0 + int idn = base + _ni; + #elif STRN == 1 + int idn = base + _ni*NDIMC; + #endif + + // Loads the data from global memory (transposed) and stores into registers + return bgm[idk*(kSizeN/VWN) + idn]; +} + INLINE_FUNC realN GlobalToPrivateB(const __global realN* restrict bgm, const int _ni, const int kSizeN, const int idk) { // Computes the indices based on strided/non-strided access @@ -494,12 +422,14 @@ INLINE_FUNC realN LocalToPrivateB(LOCAL_PTR realN* blm, const int _ni, const int } #endif - - // The vectorised multiply-add function INLINE_FUNC realM MultiplyAddVector(realM cvec, const realM avec, const real bval) { #if USE_VECTOR_MAD == 1 + #if USE_CL_MAD == 1 + cvec = mad(avec, (realM)bval, cvec); + #else cvec += avec * bval; + #endif #else #if VWM == 1 MultiplyAdd(cvec, avec, bval); @@ -545,7 +475,11 @@ INLINE_FUNC realM MultiplyAddVector(realM cvec, const realM avec, const real bva // The vectorised multiply-add function INLINE_FUNC realN MultiplyAddVectorN(realN cvec, const real avec, const realN bval) { #if USE_VECTOR_MAD == 1 + #if USE_CL_MAD == 1 + cvec = mad((realN)avec, bval, cvec); + #else cvec += avec * bval; + #endif #else #if VWN == 1 MultiplyAdd(cvec, avec, bval); @@ -592,28 +526,51 @@ INLINE_FUNC realN MultiplyAddVectorN(realN cvec, const real avec, const realN bv // Merges the results in Cpm with the global array in Cgm. This also performs the multiplication // with the constants: Cgm = alpha*A*B + beta*Cgm = alpha*Cpm + beta*Cgm -// layout : [N, M] -INLINE_FUNC void StoreResultsM(__global realM* cgm, realM c_value, const int _mi, const int _ni, - const int kSizeM, const real alpha, const real beta) { + +typedef struct { + int index[2]; +} INT2; + +INLINE_FUNC INT2 StoreIndexM() { + INT2 res; #if STRM == 0 - int mg = _mi + get_local_id(0)*(MWI/VWM); + int mg = get_local_id(0)*(MWI/VWM); #elif STRM == 1 - int mg = get_local_id(0) + _mi*MDIMC; + int mg = get_local_id(0); #endif #if STRN == 0 - int ng = _ni + get_local_id(1)*NWI; + int ng = get_local_id(1)*NWI; #elif STRN == 1 - int ng = _ni%VWN + get_local_id(1)*VWN + (_ni/VWN)*VWN*NDIMC; + int ng = get_local_id(1)*VWN; #endif int idm = mg + GetGroupID0() * (MWG/VWM); int idn = ng + GetGroupID1() * NWG; + res.index[0] = idm; + res.index[1] = idn; + return res; +} + +// layout : [N, M] +INLINE_FUNC void StoreResultsM(__global realM* cgm, realM c_value, const INT2 baseOffset, const int _mi, const int _ni, + const int kSizeM, const real alpha, const real beta) { + #if STRM == 0 + int idm = _mi + baseOffset.index[0]; + #elif STRM == 1 + int idm = baseOffset.index[0] + _mi*MDIMC; + #endif + #if STRN == 0 + int idn = _ni + baseOffset.index[1]; + #elif STRN == 1 + int idn = _ni%VWN + baseOffset.index[1] + (_ni/VWN)*VWN*NDIMC; + #endif + int index = idn*(kSizeM/VWM) + idm; - realM result; - realM xval = c_value; + realM result = c_value; // The final multiplication with alpha (in case beta == 0) - if (IsZero(beta)) { + #ifdef ONLY_HAVE_ALPHA + realM xval = c_value; #if VWM == 1 Multiply(result, alpha, xval); #elif VWM == 2 @@ -651,10 +608,11 @@ INLINE_FUNC void StoreResultsM(__global realM* cgm, realM c_value, const int _mi Multiply(result.sE, alpha, xval.sE); Multiply(result.sF, alpha, xval.sF); #endif - } + #endif // The final multiplication with alpha and the addition with beta*C - else { + #ifdef HAVE_ALPHA_BETA + realM xval = c_value; realM yval = cgm[index]; #if VWM == 1 AXPBY(result, alpha, xval, beta, yval); @@ -693,39 +651,56 @@ INLINE_FUNC void StoreResultsM(__global realM* cgm, realM c_value, const int _mi AXPBY(result.sE, alpha, xval.sE, beta, yval.sE); AXPBY(result.sF, alpha, xval.sF, beta, yval.sF); #endif - } + #endif cgm[index] = result; } - +INLINE_FUNC INT2 StoreIndexN() { + INT2 res; + #if STRM == 0 + int mg = get_local_id(0)*MWI; + #elif STRM == 1 + int mg = get_local_id(0)*VWM; + #endif + #if STRN == 0 + int ng = get_local_id(1)*(NWI/VWN); + #elif STRN == 1 + int ng = get_local_id(1); + #endif + int idm = mg + GetGroupID0() * MWG; + int idn = ng + GetGroupID1() * (NWG/VWN); + + res.index[0] = idm; + res.index[1] = idn; + return res; +} // layout : [M, N] INLINE_FUNC void StoreResultsN(__global realN* cgn, realN c_value, + const INT2 baseOffset, #ifdef BIAS - __global realN* egn, + realN* epm, #endif const int _mi, const int _ni, const int kSizeN, const real alpha, const real beta) { - - + #if STRM == 0 - int mg = _mi + get_local_id(0)*MWI; + int idm = _mi + baseOffset.index[0]; #elif STRM == 1 - int mg = _mi%VWM + get_local_id(0)*VWM + (_mi/VWM)*VWM*MDIMC; + int idm = _mi%VWM + baseOffset.index[0] + (_mi/VWM)*VWM*MDIMC; #endif #if STRN == 0 - int ng = _ni + get_local_id(1)*(NWI/VWN); + int idn = _ni + baseOffset.index[1]; #elif STRN == 1 - int ng = get_local_id(1) + _ni*NDIMC; + int idn = baseOffset.index[1] + _ni*NDIMC; #endif - int idm = mg + GetGroupID0() * MWG; - int idn = ng + GetGroupID1() * (NWG/VWN); + int index = idm * (kSizeN/VWN) + idn; - realN result = 0; - realN xval = c_value; + realN result = c_value; // The final multiplication with alpha (in case beta == 0) - if (IsZero(beta)) { + #ifdef ONLY_HAVE_ALPHA + realN xval = c_value; #if VWN == 1 Multiply(result, alpha, xval); #elif VWN == 2 @@ -763,10 +738,11 @@ INLINE_FUNC void StoreResultsN(__global realN* cgn, realN c_value, Multiply(result.sE, alpha, xval.sE); Multiply(result.sF, alpha, xval.sF); #endif - } + #endif // The final multiplication with alpha and the addition with beta*C - else { + #ifdef HAVE_ALPHA_BETA + realN xval = c_value; realN yval = cgn[index]; #if VWN == 1 AXPBY(result, alpha, xval, beta, yval); @@ -805,48 +781,78 @@ INLINE_FUNC void StoreResultsN(__global realN* cgn, realN c_value, AXPBY(result.sE, alpha, xval.sE, beta, yval.sE); AXPBY(result.sF, alpha, xval.sF, beta, yval.sF); #endif - } + #endif #ifdef BIAS - realN xval = egn[idn]; + realN eval = epm[_ni]; #if VWN == 1 - result += xval; + result += eval; + #ifdef RELU + result = fmax(result, (FLOAT)0); + #endif + #ifdef RELU6 + result = clamp(result, (FLOAT)0, (FLOAT)6); + #endif #elif VWN == 2 - result.x += xval.x; - result.y += xval.y; + result.x += eval.x; + result.y += eval.y; + #ifdef RELU + result = fmax(result, (FLOAT2)0); + #endif + #ifdef RELU6 + result = clamp(result, (FLOAT2)0, (FLOAT2)6); + #endif #elif VWN == 4 - result.x += xval.x; - result.y += xval.y; - result.z += xval.z; - result.w += xval.w; + result.x += eval.x; + result.y += eval.y; + result.z += eval.z; + result.w += eval.w; + #ifdef RELU + result = fmax(result, (FLOAT4)0); + #endif + #ifdef RELU6 + result = clamp(result, (FLOAT4)0, (FLOAT4)6); + #endif #elif VWN == 8 - result.s0 += xval.s0; - result.s1 += xval.s1; - result.s2 += xval.s2; - result.s3 += xval.s3; - result.s4 += xval.s4; - result.s5 += xval.s5; - result.s6 += xval.s6; - result.s7 += xval.s7; + result.s0 += eval.s0; + result.s1 += eval.s1; + result.s2 += eval.s2; + result.s3 += eval.s3; + result.s4 += eval.s4; + result.s5 += eval.s5; + result.s6 += eval.s6; + result.s7 += eval.s7; + #ifdef RELU + result = fmax(result, (FLOAT8)0); + #endif + #ifdef RELU6 + result = clamp(result, (FLOAT8)0, (FLOAT8)6); + #endif #elif VWN == 16 - result.s0 += xval.s0; - result.s1 += xval.s1; - result.s2 += xval.s2; - result.s3 += xval.s3; - result.s4 += xval.s4; - result.s5 += xval.s5; - result.s6 += xval.s6; - result.s7 += xval.s7; - result.s8 += xval.s8; - result.s9 += xval.s9; - result.sA += xval.sA; - result.sB += xval.sB; - result.sC += xval.sC; - result.sD += xval.sD; - result.sE += xval.sE; - result.sF += xval.sF; + result.s0 += eval.s0; + result.s1 += eval.s1; + result.s2 += eval.s2; + result.s3 += eval.s3; + result.s4 += eval.s4; + result.s5 += eval.s5; + result.s6 += eval.s6; + result.s7 += eval.s7; + result.s8 += eval.s8; + result.s9 += eval.s9; + result.sA += eval.sA; + result.sB += eval.sB; + result.sC += eval.sC; + result.sD += eval.sD; + result.sE += eval.sE; + result.sF += eval.sF; + #ifdef RELU + result = fmax(result, (FLOAT16)0); + #endif + #ifdef RELU6 + result = clamp(result, (FLOAT16)0, (FLOAT16)6); + #endif #endif #endif @@ -869,13 +875,6 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, , LOCAL_PTR realN* blm #endif ) { - - // Allocates workitem-private memory (registers) - #pragma promote_to_registers - realM apm[MWI/VWM]; // MWI * 1 - #pragma promote_to_registers - realN bpm[NWI/VWN]; // 1 * NWI - #ifdef OUTPUTMN #pragma promote_to_registers realN cpn[MWI*(NWI/VWN)]; // MWI * NWI @@ -909,64 +908,182 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, #endif // Loops over all workgroup tiles - for (int kwg = 0; kwg < kSizeK; kwg += KWG * KREG) { - - // Loads data: off-chip --> local (matrix A) - #if SA == 1 - GlobalToLocalA(agm, alm, kSizeM, tid, kwg); - #endif - // Loads data: off-chip --> local (matrix B) - #if SB == 1 - GlobalToLocalB(bgm, blm, kSizeN, tid, kwg); - #endif - #if SA == 1 || SB == 1 - barrier(CLK_LOCAL_MEM_FENCE); - #endif - - // Loops over all workitem tiles, unrolled by a factor KWI - for (int pwi = 0; pwi < KWG * KREG; pwi += KWI * KREG) { - #pragma unroll - for (int _pit = 0; _pit < KWI*KREG; _pit += KREG) { - #if SA == 0 || SB == 0 - int idk = kwg + pwi + _pit; + #if SA == 1 || SB == 1 + // Allocates workitem-private memory (registers) + #pragma promote_to_registers + realM apm[MWI/VWM]; // MWI * 1 + #pragma promote_to_registers + realN bpm[NWI/VWN]; // 1 * NWI + + for (int kwg = 0; kwg < kSizeK; kwg += KWG) { + // Loads data: off-chip --> local (matrix A) + #if SA == 1 + GlobalToLocalA(agm, alm, kSizeM, tid, kwg); #endif - #if SA == 1 || SB == 1 - int kg = pwi + _pit; + // Loads data: off-chip --> local (matrix B) + #if SB == 1 + GlobalToLocalB(bgm, blm, kSizeN, tid, kwg); #endif + barrier(CLK_LOCAL_MEM_FENCE); + + // Loops over all workitem tiles, unrolled by a factor KWI + for (int pwi = 0; pwi < KWG; pwi += KWI) { + #pragma unroll + for (int _pit = 0; _pit < KWI; _pit += 1) { + #if SA == 0 || SB == 0 + int idk = kwg + pwi + _pit; + #endif + int kg = pwi + _pit; + + // Loads matrix A (kernel 0) or matrix B (kernel 1) + #pragma unroll + for (int _mi = 0; _mi < MWI/VWM; _mi += 1) { + // Loads data: local --> private (matrix A) + #if SA == 1 + apm[_mi] = LocalToPrivateA(alm, _mi, kg); + // Loads data: off-chip --> private (matrix A) + #elif SA == 0 + apm[_mi] = GlobalToPrivateA(agm, _mi, kSizeM, idk); + #endif + } - // Loads matrix A (kernel 0) or matrix B (kernel 1) - #pragma unroll - for (int _mi = 0; _mi < MWI/VWM; _mi += 1) { - // Loads data: local --> private (matrix A) - #if GEMMK == 0 && SA == 1 - apm[_mi] = LocalToPrivateA(alm, _mi, kg); - // Loads data: off-chip --> private (matrix A) - #elif GEMMK == 0 && SA == 0 - apm[_mi] = GlobalToPrivateA(agm, _mi, kSizeM, idk, kwg); - #endif - } + // Loads matrix B (kernel 0) or matrix A (kernel 1) - // Loads matrix B (kernel 0) or matrix A (kernel 1) + #pragma unroll + for (int _ni = 0; _ni < NWI/VWN; _ni += 1) { + // Loads data: local --> private (matrix B) + #if SB == 1 + bpm[_ni] = LocalToPrivateB(blm, _ni, kg); + // Loads data: off-chip --> private (matrix B) + #else + bpm[_ni] = GlobalToPrivateB(bgm, _ni, kSizeN, idk); + #endif + } - #pragma unroll - for (int _ni = 0; _ni < NWI/VWN; _ni += 1) { - // Loads data: local --> private (matrix B) - #if SB == 1 - bpm[_ni] = LocalToPrivateB(blm, _ni, kg); - // Loads data: off-chip --> private (matrix B) - #else - bpm[_ni] = GlobalToPrivateB(bgm, _ni, kSizeN, idk); - #endif + // Performs the accumulation (Cpm += Apm * Bpm) + + #ifdef OUTPUTMN + #pragma unroll + for (int _mi = 0; _mi < MWI/VWM; _mi += 1) { + #pragma unroll + for (int _ni = 0; _ni < NWI/VWN; _ni += 1) { + const realM aval = apm[_mi]; + #if VWM == 1 + // [MWI/VWM, VWM, NWI/VWN, VWN] + cpn[(_mi*VWM + 0)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 0)*(NWI/VWN) + _ni], aval, bpm[_ni]); + #elif VWM == 2 + cpn[(_mi*VWM + 0)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 0)*(NWI/VWN) + _ni], aval.x, bpm[_ni]); + cpn[(_mi*VWM + 1)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 1)*(NWI/VWN) + _ni], aval.y, bpm[_ni]); + #elif VWM == 4 + cpn[(_mi*VWM + 0)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 0)*(NWI/VWN) + _ni], aval.x, bpm[_ni]); + cpn[(_mi*VWM + 1)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 1)*(NWI/VWN) + _ni], aval.y, bpm[_ni]); + cpn[(_mi*VWM + 2)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 2)*(NWI/VWN) + _ni], aval.z, bpm[_ni]); + cpn[(_mi*VWM + 3)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 3)*(NWI/VWN) + _ni], aval.w, bpm[_ni]); + #elif VWM == 8 + cpn[(_mi*VWM + 0)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 0)*(NWI/VWN) + _ni], aval.s0, bpm[_ni]); + cpn[(_mi*VWM + 1)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 1)*(NWI/VWN) + _ni], aval.s1, bpm[_ni]); + cpn[(_mi*VWM + 2)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 2)*(NWI/VWN) + _ni], aval.s2, bpm[_ni]); + cpn[(_mi*VWM + 3)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 3)*(NWI/VWN) + _ni], aval.s3, bpm[_ni]); + cpn[(_mi*VWM + 4)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 4)*(NWI/VWN) + _ni], aval.s4, bpm[_ni]); + cpn[(_mi*VWM + 5)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 5)*(NWI/VWN) + _ni], aval.s5, bpm[_ni]); + cpn[(_mi*VWM + 6)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 6)*(NWI/VWN) + _ni], aval.s6, bpm[_ni]); + cpn[(_mi*VWM + 7)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 7)*(NWI/VWN) + _ni], aval.s7, bpm[_ni]); + #elif VWM == 16 + cpn[(_mi*VWM + 0 )*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 0 )*(NWI/VWN) + _ni], aval.s0, bpm[_ni]); + cpn[(_mi*VWM + 1 )*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 1 )*(NWI/VWN) + _ni], aval.s1, bpm[_ni]); + cpn[(_mi*VWM + 2 )*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 2 )*(NWI/VWN) + _ni], aval.s2, bpm[_ni]); + cpn[(_mi*VWM + 3 )*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 3 )*(NWI/VWN) + _ni], aval.s3, bpm[_ni]); + cpn[(_mi*VWM + 4 )*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 4 )*(NWI/VWN) + _ni], aval.s4, bpm[_ni]); + cpn[(_mi*VWM + 5 )*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 5 )*(NWI/VWN) + _ni], aval.s5, bpm[_ni]); + cpn[(_mi*VWM + 6 )*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 6 )*(NWI/VWN) + _ni], aval.s6, bpm[_ni]); + cpn[(_mi*VWM + 7 )*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 7 )*(NWI/VWN) + _ni], aval.s7, bpm[_ni]); + cpn[(_mi*VWM + 8 )*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 8 )*(NWI/VWN) + _ni], aval.s8, bpm[_ni]); + cpn[(_mi*VWM + 9 )*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 9 )*(NWI/VWN) + _ni], aval.s9, bpm[_ni]); + cpn[(_mi*VWM + 10)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 10)*(NWI/VWN) + _ni], aval.sA, bpm[_ni]); + cpn[(_mi*VWM + 11)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 11)*(NWI/VWN) + _ni], aval.sB, bpm[_ni]); + cpn[(_mi*VWM + 12)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 12)*(NWI/VWN) + _ni], aval.sC, bpm[_ni]); + cpn[(_mi*VWM + 13)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 13)*(NWI/VWN) + _ni], aval.sD, bpm[_ni]); + cpn[(_mi*VWM + 14)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 14)*(NWI/VWN) + _ni], aval.sE, bpm[_ni]); + cpn[(_mi*VWM + 15)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 15)*(NWI/VWN) + _ni], aval.sF, bpm[_ni]); + #endif + } + } + #else + #pragma unroll + for (int _ni = 0; _ni < NWI/VWN; _ni += 1) { + #pragma unroll + for (int _mi = 0; _mi < MWI/VWM; _mi += 1) { + const realM aval = apm[_mi]; + #if VWN == 1 + cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bpm[_ni]); + #elif VWN == 2 + cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bpm[_ni].x); + cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi], aval, bpm[_ni].y); + #elif VWN == 4 + cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bpm[_ni].x); + cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi], aval, bpm[_ni].y); + cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi], aval, bpm[_ni].z); + cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi], aval, bpm[_ni].w); + #elif VWN == 8 + cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bpm[_ni].s0); + cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi], aval, bpm[_ni].s1); + cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi], aval, bpm[_ni].s2); + cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi], aval, bpm[_ni].s3); + cpm[(_ni*VWN + 4)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 4)*(MWI/VWM) + _mi], aval, bpm[_ni].s4); + cpm[(_ni*VWN + 5)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 5)*(MWI/VWM) + _mi], aval, bpm[_ni].s5); + cpm[(_ni*VWN + 6)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 6)*(MWI/VWM) + _mi], aval, bpm[_ni].s6); + cpm[(_ni*VWN + 7)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 7)*(MWI/VWM) + _mi], aval, bpm[_ni].s7); + #elif VWN == 16 + cpm[(_ni*VWN + 0 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0 )*(MWI/VWM) + _mi], aval, bpm[_ni].s0); + cpm[(_ni*VWN + 1 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1 )*(MWI/VWM) + _mi], aval, bpm[_ni].s1); + cpm[(_ni*VWN + 2 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 2 )*(MWI/VWM) + _mi], aval, bpm[_ni].s2); + cpm[(_ni*VWN + 3 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 3 )*(MWI/VWM) + _mi], aval, bpm[_ni].s3); + cpm[(_ni*VWN + 4 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 4 )*(MWI/VWM) + _mi], aval, bpm[_ni].s4); + cpm[(_ni*VWN + 5 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 5 )*(MWI/VWM) + _mi], aval, bpm[_ni].s5); + cpm[(_ni*VWN + 6 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 6 )*(MWI/VWM) + _mi], aval, bpm[_ni].s6); + cpm[(_ni*VWN + 7 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 7 )*(MWI/VWM) + _mi], aval, bpm[_ni].s7); + cpm[(_ni*VWN + 8 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 8 )*(MWI/VWM) + _mi], aval, bpm[_ni].s8); + cpm[(_ni*VWN + 9 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 9 )*(MWI/VWM) + _mi], aval, bpm[_ni].s9); + cpm[(_ni*VWN + 10)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 10)*(MWI/VWM) + _mi], aval, bpm[_ni].sA); + cpm[(_ni*VWN + 11)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 11)*(MWI/VWM) + _mi], aval, bpm[_ni].sB); + cpm[(_ni*VWN + 12)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 12)*(MWI/VWM) + _mi], aval, bpm[_ni].sC); + cpm[(_ni*VWN + 13)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 13)*(MWI/VWM) + _mi], aval, bpm[_ni].sD); + cpm[(_ni*VWN + 14)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 14)*(MWI/VWM) + _mi], aval, bpm[_ni].sE); + cpm[(_ni*VWN + 15)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 15)*(MWI/VWM) + _mi], aval, bpm[_ni].sF); + #endif + } + } + #endif + } } + barrier(CLK_LOCAL_MEM_FENCE); + } + #else + // Allocates workitem-private memory (registers) - // Performs the accumulation (Cpm += Apm * Bpm) + int baseIndexA = GlobalIndexA(); + int baseIndexB = GlobalIndexB(); + #pragma unroll + for (int _kj = 0; _kj < kSizeK; _kj += 4) { #ifdef OUTPUTMN + #pragma promote_to_registers + realN bpm[NWI/VWN]; // 1 * NWI + + #pragma unroll + for(int _ki = 0; _ki < 4; _ki += 1) { + int idk = _kj + _ki; + #pragma unroll + for (int _ni = 0; _ni < NWI/VWN; _ni += 1) { + // Loads data: off-chip --> private (matrix B) + bpm[_ni] = GlobalToPrivateOptB(bgm, baseIndexB, _ni, kSizeN, idk); + } + #pragma unroll for (int _mi = 0; _mi < MWI/VWM; _mi += 1) { + const realM aval = GlobalToPrivateOptA(agm, baseIndexA, _mi, kSizeM, idk); #pragma unroll for (int _ni = 0; _ni < NWI/VWN; _ni += 1) { - const realM aval = apm[_mi]; #if VWM == 1 // [MWI/VWM, VWM, NWI/VWN, VWN] cpn[(_mi*VWM + 0)*(NWI/VWN) + _ni] = MultiplyAddVectorN(cpn[(_mi*VWM + 0)*(NWI/VWN) + _ni], aval, bpm[_ni]); @@ -1007,77 +1124,104 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, #endif } } + } #else + + #pragma promote_to_registers + realM apm[MWI/VWM]; // MWI * 1 + #pragma unroll + for(int _ki = 0; _ki < 4; _ki += 1) { + int idk = _kj + _ki; + #pragma unroll + for (int _mi = 0; _mi < MWI/VWM; _mi += 1) { + // Loads data: off-chip --> private (matrix B) + apm[_mi] = GlobalToPrivateOptA(agm, baseIndexA, _mi, kSizeM, idk); + } #pragma unroll for (int _ni = 0; _ni < NWI/VWN; _ni += 1) { + const realN bval = GlobalToPrivateOptB(bgm, baseIndexB, _ni, kSizeN, idk); + #pragma unroll for (int _mi = 0; _mi < MWI/VWM; _mi += 1) { const realM aval = apm[_mi]; #if VWN == 1 - cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bpm[_ni]); + cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bval); #elif VWN == 2 - cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bpm[_ni].x); - cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi], aval, bpm[_ni].y); + cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bval.x); + cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi], aval, bval.y); #elif VWN == 4 - cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bpm[_ni].x); - cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi], aval, bpm[_ni].y); - cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi], aval, bpm[_ni].z); - cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi], aval, bpm[_ni].w); + cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bval.x); + cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi], aval, bval.y); + cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi], aval, bval.z); + cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi], aval, bval.w); #elif VWN == 8 - cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bpm[_ni].s0); - cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi], aval, bpm[_ni].s1); - cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi], aval, bpm[_ni].s2); - cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi], aval, bpm[_ni].s3); - cpm[(_ni*VWN + 4)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 4)*(MWI/VWM) + _mi], aval, bpm[_ni].s4); - cpm[(_ni*VWN + 5)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 5)*(MWI/VWM) + _mi], aval, bpm[_ni].s5); - cpm[(_ni*VWN + 6)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 6)*(MWI/VWM) + _mi], aval, bpm[_ni].s6); - cpm[(_ni*VWN + 7)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 7)*(MWI/VWM) + _mi], aval, bpm[_ni].s7); + cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0)*(MWI/VWM) + _mi], aval, bval.s0); + cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1)*(MWI/VWM) + _mi], aval, bval.s1); + cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 2)*(MWI/VWM) + _mi], aval, bval.s2); + cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 3)*(MWI/VWM) + _mi], aval, bval.s3); + cpm[(_ni*VWN + 4)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 4)*(MWI/VWM) + _mi], aval, bval.s4); + cpm[(_ni*VWN + 5)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 5)*(MWI/VWM) + _mi], aval, bval.s5); + cpm[(_ni*VWN + 6)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 6)*(MWI/VWM) + _mi], aval, bval.s6); + cpm[(_ni*VWN + 7)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 7)*(MWI/VWM) + _mi], aval, bval.s7); #elif VWN == 16 - cpm[(_ni*VWN + 0 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0 )*(MWI/VWM) + _mi], aval, bpm[_ni].s0); - cpm[(_ni*VWN + 1 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1 )*(MWI/VWM) + _mi], aval, bpm[_ni].s1); - cpm[(_ni*VWN + 2 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 2 )*(MWI/VWM) + _mi], aval, bpm[_ni].s2); - cpm[(_ni*VWN + 3 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 3 )*(MWI/VWM) + _mi], aval, bpm[_ni].s3); - cpm[(_ni*VWN + 4 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 4 )*(MWI/VWM) + _mi], aval, bpm[_ni].s4); - cpm[(_ni*VWN + 5 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 5 )*(MWI/VWM) + _mi], aval, bpm[_ni].s5); - cpm[(_ni*VWN + 6 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 6 )*(MWI/VWM) + _mi], aval, bpm[_ni].s6); - cpm[(_ni*VWN + 7 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 7 )*(MWI/VWM) + _mi], aval, bpm[_ni].s7); - cpm[(_ni*VWN + 8 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 8 )*(MWI/VWM) + _mi], aval, bpm[_ni].s8); - cpm[(_ni*VWN + 9 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 9 )*(MWI/VWM) + _mi], aval, bpm[_ni].s9); - cpm[(_ni*VWN + 10)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 10)*(MWI/VWM) + _mi], aval, bpm[_ni].sA); - cpm[(_ni*VWN + 11)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 11)*(MWI/VWM) + _mi], aval, bpm[_ni].sB); - cpm[(_ni*VWN + 12)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 12)*(MWI/VWM) + _mi], aval, bpm[_ni].sC); - cpm[(_ni*VWN + 13)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 13)*(MWI/VWM) + _mi], aval, bpm[_ni].sD); - cpm[(_ni*VWN + 14)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 14)*(MWI/VWM) + _mi], aval, bpm[_ni].sE); - cpm[(_ni*VWN + 15)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 15)*(MWI/VWM) + _mi], aval, bpm[_ni].sF); + cpm[(_ni*VWN + 0 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 0 )*(MWI/VWM) + _mi], aval, bval.s0); + cpm[(_ni*VWN + 1 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 1 )*(MWI/VWM) + _mi], aval, bval.s1); + cpm[(_ni*VWN + 2 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 2 )*(MWI/VWM) + _mi], aval, bval.s2); + cpm[(_ni*VWN + 3 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 3 )*(MWI/VWM) + _mi], aval, bval.s3); + cpm[(_ni*VWN + 4 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 4 )*(MWI/VWM) + _mi], aval, bval.s4); + cpm[(_ni*VWN + 5 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 5 )*(MWI/VWM) + _mi], aval, bval.s5); + cpm[(_ni*VWN + 6 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 6 )*(MWI/VWM) + _mi], aval, bval.s6); + cpm[(_ni*VWN + 7 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 7 )*(MWI/VWM) + _mi], aval, bval.s7); + cpm[(_ni*VWN + 8 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 8 )*(MWI/VWM) + _mi], aval, bval.s8); + cpm[(_ni*VWN + 9 )*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 9 )*(MWI/VWM) + _mi], aval, bval.s9); + cpm[(_ni*VWN + 10)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 10)*(MWI/VWM) + _mi], aval, bval.sA); + cpm[(_ni*VWN + 11)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 11)*(MWI/VWM) + _mi], aval, bval.sB); + cpm[(_ni*VWN + 12)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 12)*(MWI/VWM) + _mi], aval, bval.sC); + cpm[(_ni*VWN + 13)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 13)*(MWI/VWM) + _mi], aval, bval.sD); + cpm[(_ni*VWN + 14)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 14)*(MWI/VWM) + _mi], aval, bval.sE); + cpm[(_ni*VWN + 15)*(MWI/VWM) + _mi] = MultiplyAddVector(cpm[(_ni*VWN + 15)*(MWI/VWM) + _mi], aval, bval.sF); #endif } } + } #endif } - } - #if SA == 1 || SB == 1 - barrier(CLK_LOCAL_MEM_FENCE); - #endif - } + #endif + #if GLOBAL_MEM_FENCE == 1 barrier(CLK_GLOBAL_MEM_FENCE); #endif #ifdef OUTPUTMN + INT2 baseOffset = StoreIndexN(); + #ifdef BIAS + #pragma promote_to_registers + realN epm[NWI/VWN]; // MWI * 1 + for (int _ni = 0; _ni < NWI/VWN; _ni += 1) { + #if STRN == 0 + int idn = _ni + baseOffset.index[1]; + #elif STRN == 1 + int idn = baseOffset.index[1] + _ni*NDIMC; + #endif + epm[_ni] = egm[idn]; + } + #endif #pragma unroll for (int _mi = 0; _mi < MWI; _mi += 1) { #pragma unroll for (int _ni = 0; _ni < NWI/VWN; _ni += 1) { StoreResultsN((__global realN* )cgm, cpn[_mi * (NWI/VWN) + _ni], + baseOffset, #ifdef BIAS - egm, + (realN*)epm, #endif _mi, _ni, kSizeN, alpha, beta); } } #else - + INT2 baseOffset = StoreIndexM(); + // Stores an MWG * NWG tile of results and performs the multiplication with alpha and beta const int cld = kSizeM; @@ -1085,7 +1229,7 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, for (int _ni = 0; _ni < NWI; _ni += 1) { #pragma unroll for (int _mi = 0; _mi < MWI/VWM; _mi += 1) { - StoreResultsM(cgm, cpm[_ni * (MWI/VWM) + _mi], _mi, _ni, cld, alpha, beta); + StoreResultsM(cgm, cpm[_ni * (MWI/VWM) + _mi], baseOffset, _mi, _ni, cld, alpha, beta); } } #endif @@ -1160,20 +1304,28 @@ void Xgemm(const int kSizeM, const int kSizeN, const int kSizeK, void XgemmBatched(const int kSizeM, const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, - const __global realM* restrict agm, const int a_one, const int a_two, - const __global realN* restrict bgm, const int b_one, const int b_two, - __global realM* cgm, const int c_one, const int c_two) { + const __global realM* restrict agm, const int batch_offset_a, + const __global realN* restrict bgm, const int batch_offset_b, + #ifdef BIAS + const __global realN* restrict egm, const int batch_offset_e, + #endif + __global realM* cgm, const int batch_offset_c) { const int batch = get_group_id(2); const real alpha = GetRealArg(arg_alpha); const real beta = GetRealArg(arg_beta); // Sets the offsets - const int a_offset = batch * a_one * a_two; - const int b_offset = batch * b_one * b_two; - const int c_offset = batch * c_one * c_two; + const int a_offset = batch * batch_offset_a; + const int b_offset = batch * batch_offset_b; + const int c_offset = batch * batch_offset_c; const __global realM* restrict agm_ = &agm[a_offset / VWM]; const __global realN* restrict bgm_ = &bgm[b_offset / VWN]; __global realM* restrict cgm_ = &cgm[c_offset / VWM]; + + #ifdef BIAS + const int e_offset = batch * batch_offset_e; + const __global realN* restrict egm_ = &egm[e_offset / VWN]; + #endif // Allocates workgroup-private memory (local memory) #if SA == 1 @@ -1185,12 +1337,28 @@ void XgemmBatched(const int kSizeM, const int kSizeN, const int kSizeK, // Computes the matrix-multiplication and stores the result in global memory #if SA == 1 && SB == 1 - XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alpha, beta, alm, blm); + XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, + #ifdef BIAS + egm_, + #endif + cgm_, alpha, beta, alm, blm); #elif SA == 1 - XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alpha, beta, alm); + XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, + #ifdef BIAS + egm_, + #endif + cgm_, alpha, beta, alm); #elif SB == 1 - XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alpha, beta, blm); + XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, + #ifdef BIAS + egm_, + #endif + cgm_, alpha, beta, blm); #else - XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alpha, beta); + XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, + #ifdef BIAS + egm_, + #endif + cgm_, alpha, beta); #endif } diff --git a/source/backend/opencl/execution/cl/opencl_program.cc b/source/backend/opencl/execution/cl/opencl_program.cc index b72288012..f98f47417 100644 --- a/source/backend/opencl/execution/cl/opencl_program.cc +++ b/source/backend/opencl/execution/cl/opencl_program.cc @@ -1931,7 +1931,8 @@ const char* self_attention_buf = " }\n" " sum[lid]=maxValue;\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" -" for(int i=SOFTMAX_LOCAL_SIZE/2; i>0; i /= 2){\n" +" #pragma unroll\n" +" for(int i=SOFTMAX_LOCAL_SIZE/2; i>0; i >>= 1){\n" " if (lid0; i /= 2){\n" +" #pragma unroll\n" +" for(int i=SOFTMAX_LOCAL_SIZE/2; i>0; i >>= 1){\n" " if (lid [N Y X]\n" "__kernel void trans_3d_buf(__global const FLOAT* input,\n" " __global FLOAT* output,\n" @@ -1984,50 +1974,28 @@ const char* self_attention_buf = ") {\n" " int b=get_global_id(2);\n" " \n" -" const int lidw=get_local_id(0);\n" -" const int lidh=get_local_id(1);\n" -" // group id\n" -" const int w=get_group_id(0)*WGSW;\n" -" const int h=get_group_id(1)*WGSH;\n" -" int iw=lidw;\n" -" int jh=lidh;\n" -" \n" -" __local FLOAT4 localData[WGSW][WGSH/4];//w64h64\n" -" \n" -" #pragma unroll\n" -" for(int i=0; i [alignK,alignM]\n" +"// [B,K/4,area,4] -> [alignK,alignM] (M=B*area)\n" "__kernel void transpose_pad(GLOBAL_SIZE_DIM2\n" " const int alignM,\n" " const int alignK,\n" " const int M,\n" " const int K,\n" +" const int area,\n" " __global const FLOAT* input,\n" " __global FLOAT* output\n" " ) {\n" +"#ifdef AREA_EQUAL_1\n" " const int idx_m4=get_global_id(0); // idx M\n" " const int idx_k4=get_global_id(1); // idx K\n" " UNIFORM_BOUNDRY_CHECK(idx_m4,idx_k4);\n" @@ -12066,20 +12030,67 @@ const char* gemm_buf = " vstore4((FLOAT4)(m0k4.y,m1k4.y,m2k4.y,m3k4.y),0,output+out_offset_base+alignM);\n" " vstore4((FLOAT4)(m0k4.z,m1k4.z,m2k4.z,m3k4.z),0,output+out_offset_base+alignM+alignM);\n" " vstore4((FLOAT4)(m0k4.w,m1k4.w,m2k4.w,m3k4.w),0,output+out_offset_base+alignM+alignM+alignM);\n" +"#elif defined BATCH_EQUAL_1\n" +" const int idx_m4=get_global_id(0); // idx M\n" +" const int idx_k4=get_global_id(1); // idx K\n" +" UNIFORM_BOUNDRY_CHECK(idx_m4,idx_k4);\n" +" const int idx_m=idx_m4 << 2;\n" +" const int idx_k=idx_k4 << 2;\n" +" const int K_4=(K+3) >> 2;\n" +" const int in_offset_base=(idx_k4*area+idx_m)*4;\n" +" const int out_offset_base=idx_k*alignM+idx_m;\n" +" FLOAT4 m0k4=(idx_k4 >= K_4 || idx_m+0 >= M) ? (FLOAT4)0 : vload4(0,input+in_offset_base);\n" +" FLOAT4 m1k4=(idx_k4 >= K_4 || idx_m+1 >= M) ? (FLOAT4)0 : vload4(0,input+in_offset_base+4);\n" +" FLOAT4 m2k4=(idx_k4 >= K_4 || idx_m+2 >= M) ? (FLOAT4)0 : vload4(0,input+in_offset_base+8);\n" +" FLOAT4 m3k4=(idx_k4 >= K_4 || idx_m+3 >= M) ? (FLOAT4)0 : vload4(0,input+in_offset_base+12);\n" +" vstore4((FLOAT4)(m0k4.x,m1k4.x,m2k4.x,m3k4.x),0,output+out_offset_base);\n" +" vstore4((FLOAT4)(m0k4.y,m1k4.y,m2k4.y,m3k4.y),0,output+out_offset_base+alignM);\n" +" vstore4((FLOAT4)(m0k4.z,m1k4.z,m2k4.z,m3k4.z),0,output+out_offset_base+alignM+alignM);\n" +" vstore4((FLOAT4)(m0k4.w,m1k4.w,m2k4.w,m3k4.w),0,output+out_offset_base+alignM+alignM+alignM);\n" +"#else\n" +" const int idx_m=get_global_id(0); // idx M\n" +" const int idx_k4=get_global_id(1); // idx K\n" +" UNIFORM_BOUNDRY_CHECK(idx_m,idx_k4);\n" +" \n" +" const int K_4=(K+3) >> 2;\n" +" const int idx_k=idx_k4 << 2;\n" +" const int out_offset_base=idx_k*alignM+idx_m;\n" +" \n" +" if(idx_k4 >= K_4 || idx_m >= M) {\n" +" output[out_offset_base]=(FLOAT)0;\n" +" output[out_offset_base+alignM]=(FLOAT)0;\n" +" output[out_offset_base+alignM+alignM]=(FLOAT)0;\n" +" output[out_offset_base+alignM+alignM+alignM]=(FLOAT)0;\n" +" return;\n" +" }\n" +" const int idx_b=idx_m/area;\n" +" const int idx_area=idx_m % area;\n" +" \n" +" const int in_offset_base=((idx_b*K_4+idx_k4)*area+idx_area)*4;\n" +" FLOAT4 data=vload4(0,input+in_offset_base);\n" +" \n" +" output[out_offset_base]=data.x;\n" +" output[out_offset_base+alignM]=data.y;\n" +" output[out_offset_base+alignM+alignM]=data.z;\n" +" output[out_offset_base+alignM+alignM+alignM]=data.w;\n" +"#endif\n" "}\n" -"// [alignM,alignN] -> [M,N/4,4]\n" -"__kernel void add_bias(GLOBAL_SIZE_DIM2\n" +"// [alignM,alignN] -> [B,N/4,area,4] (M=B*area)\n" +"__kernel void transpose_bias(GLOBAL_SIZE_DIM2\n" " const int alignM,\n" " const int alignN,\n" " const int M,\n" " const int N,\n" +" const int area,\n" " __global const FLOAT* input0,\n" " __global const FLOAT* input1,\n" " __global FLOAT* output\n" " ) {\n" +"#ifdef AREA_EQUAL_1\n" " const int idx_m=get_global_id(0); // idx M\n" " const int idx_n_16=get_global_id(1); // idx N\n" " UNIFORM_BOUNDRY_CHECK(idx_m,idx_n_16);\n" +" const int N_4=(N+3) >> 2;\n" " const int N_16=(N+15) >> 4;\n" " const int N_left=N & 15;\n" " bool canVec16=(N_left == 0 || (N_left != 0 && idx_n_16> 2;\n" " FLOAT4 res0=vload4(0,input0+idx_m*alignN+(idx_n_16 << 4));\n" " FLOAT4 res1=vload4(0,input1+(idx_n_16 << 4));\n" " FLOAT4 res=res0+res1;\n" -" vstore4(res,0,output+((idx_m*N_16+idx_n_16) << 4));\n" +" #ifdef RELU\n" +" res=fmax(res,(FLOAT4)0);\n" +" #endif\n" +" #ifdef RELU6\n" +" res=clamp(res,(FLOAT4)0,(FLOAT4)6);\n" +" #endif\n" +" vstore4(res,0,output+((idx_m*N_4+(idx_n_16 << 2)) << 2));\n" " \n" " if(idx_n_16*4+1 >= N_4) return;\n" " res0=vload4(0,input0+idx_m*alignN+(idx_n_16 << 4)+4);\n" " res1=vload4(0,input1+(idx_n_16 << 4)+4);\n" " res=res0+res1;\n" -" vstore4(res,0,output+((idx_m*N_16+idx_n_16) << 4)+4);\n" +" #ifdef RELU\n" +" res=fmax(res,(FLOAT4)0);\n" +" #endif\n" +" #ifdef RELU6\n" +" res=clamp(res,(FLOAT4)0,(FLOAT4)6);\n" +" #endif\n" +" vstore4(res,0,output+((idx_m*N_4+(idx_n_16 << 2)) << 2)+4);\n" " \n" " if(idx_n_16*4+2 >= N_4) return;\n" " res0=vload4(0,input0+idx_m*alignN+(idx_n_16 << 4)+8);\n" " res1=vload4(0,input1+(idx_n_16 << 4)+8);\n" " res=res0+res1;\n" -" vstore4(res,0,output+((idx_m*N_16+idx_n_16) << 4)+8);\n" +" #ifdef RELU\n" +" res=fmax(res,(FLOAT4)0);\n" +" #endif\n" +" #ifdef RELU6\n" +" res=clamp(res,(FLOAT4)0,(FLOAT4)6);\n" +" #endif\n" +" vstore4(res,0,output+((idx_m*N_4+(idx_n_16 << 2)) << 2)+8);\n" " \n" " if(idx_n_16*4+3 >= N_4) return;\n" " res0=vload4(0,input0+idx_m*alignN+(idx_n_16 << 4)+12);\n" " res1=vload4(0,input1+(idx_n_16 << 4)+12);\n" " res=res0+res1;\n" -" vstore4(res,0,output+((idx_m*N_16+idx_n_16) << 4)+12);\n" +" #ifdef RELU\n" +" res=fmax(res,(FLOAT4)0);\n" +" #endif\n" +" #ifdef RELU6\n" +" res=clamp(res,(FLOAT4)0,(FLOAT4)6);\n" +" #endif\n" +" vstore4(res,0,output+((idx_m*N_4+(idx_n_16 << 2)) << 2)+12);\n" " }\n" +"#else\n" +" const int idx_m=get_global_id(0); // idx M\n" +" const int idx_n_16=get_global_id(1); // idx N\n" +" UNIFORM_BOUNDRY_CHECK(idx_m,idx_n_16);\n" +" \n" +" const int N_4=(N+3) >> 2;\n" +" const int idx_b=idx_m/area;\n" +" const int idx_area=idx_m % area;\n" +" \n" +" const int inp_base_offset=idx_m*alignN+(idx_n_16 << 4);\n" +" const int out_base_offset=((idx_b*N_4+idx_n_16*4)*area+idx_area)*4;\n" +" \n" +" FLOAT4 res0=vload4(0,input0+inp_base_offset);\n" +" FLOAT4 res1=vload4(0,input1+(idx_n_16 << 4));\n" +" FLOAT4 res=res0+res1;\n" +" #ifdef RELU\n" +" res=fmax(res,(FLOAT4)0);\n" +" #endif\n" +" #ifdef RELU6\n" +" res=clamp(res,(FLOAT4)0,(FLOAT4)6);\n" +" #endif\n" +" vstore4(res,0,output+out_base_offset);\n" +" \n" +" if(idx_n_16*4+1 >= N_4) return;\n" +" res0=vload4(0,input0+inp_base_offset+4);\n" +" res1=vload4(0,input1+(idx_n_16 << 4)+4);\n" +" res=res0+res1;\n" +" #ifdef RELU\n" +" res=fmax(res,(FLOAT4)0);\n" +" #endif\n" +" #ifdef RELU6\n" +" res=clamp(res,(FLOAT4)0,(FLOAT4)6);\n" +" #endif\n" +" vstore4(res,0,output+out_base_offset+area*4);\n" +" \n" +" if(idx_n_16*4+2 >= N_4) return;\n" +" res0=vload4(0,input0+inp_base_offset+8);\n" +" res1=vload4(0,input1+(idx_n_16 << 4)+8);\n" +" res=res0+res1;\n" +" #ifdef RELU\n" +" res=fmax(res,(FLOAT4)0);\n" +" #endif\n" +" #ifdef RELU6\n" +" res=clamp(res,(FLOAT4)0,(FLOAT4)6);\n" +" #endif\n" +" vstore4(res,0,output+out_base_offset+area*8);\n" +" \n" +" if(idx_n_16*4+3 >= N_4) return;\n" +" res0=vload4(0,input0+inp_base_offset+12);\n" +" res1=vload4(0,input1+(idx_n_16 << 4)+12);\n" +" res=res0+res1;\n" +" #ifdef RELU\n" +" res=fmax(res,(FLOAT4)0);\n" +" #endif\n" +" #ifdef RELU6\n" +" res=clamp(res,(FLOAT4)0,(FLOAT4)6);\n" +" #endif\n" +" vstore4(res,0,output+out_base_offset+area*12);\n" +"#endif\n" "}\n" ; #endif @@ -14956,7 +15055,8 @@ const char* conv_2d_buf = " __private const int in_c_block,\n" " __private const int out_h,\n" " __private const int out_w,\n" -" __private const int out_c_block) {\n" +" __private const int out_c_block,\n" +" __private const int out_c_pack) {\n" " const int out_c_w_idx=get_global_id(0); //c/4 w\n" " const int out_b_h_idx=get_global_id(1); //b h\n" " DEAL_NON_UNIFORM_DIM2(out_c_w_idx,out_b_h_idx);\n" @@ -14971,37 +15071,42 @@ const char* conv_2d_buf = " COMPUTE_FLOAT4 out3=out0;\n" " const int intput_width_idx0=out_w4_idx;\n" " \n" -" int offset=mul24(out_c_idx,in_c_block) << 2;\n" +" int offset=out_c_idx*4;\n" " int inp_offset=(((out_b_idx*in_c_block)*out_h+out_h_idx)* out_w+intput_width_idx0) << 2;\n" " \n" " const int inp_add=out_h*out_w*4;\n" " for (ushort in_channel_block_idx=0; in_channel_block_idx local (matrix A)\n" " #if SA == 1\n" " GlobalToLocalA(agm,alm,kSizeM,tid,kwg);\n" @@ -20101,28 +20226,24 @@ const char* matmul_params_buf = " #if SB == 1\n" " GlobalToLocalB(bgm,blm,kSizeN,tid,kwg);\n" " #endif\n" -" #if SA == 1 || SB == 1\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" -" #endif\n" " // Loops over all workitem tiles,unrolled by a factor KWI\n" -" for (int pwi=0; pwi private (matrix A)\n" -" #if GEMMK == 0 && SA == 1\n" +" #if SA == 1\n" " apm[_mi]=LocalToPrivateA(alm,_mi,kg);\n" " // Loads data: off-chip --> private (matrix A)\n" -" #elif GEMMK == 0 && SA == 0\n" -" apm[_mi]=GlobalToPrivateA(agm,_mi,kSizeM,idk,kwg);\n" +" #elif SA == 0\n" +" apm[_mi]=GlobalToPrivateA(agm,_mi,kSizeM,idk);\n" " #endif\n" " }\n" " // Loads matrix B (kernel 0) or matrix A (kernel 1)\n" @@ -20231,28 +20352,166 @@ const char* matmul_params_buf = " #endif\n" " }\n" " }\n" -" #if SA == 1 || SB == 1\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" +" }\n" +" #else\n" +" // Allocates workitem-private memory (registers)\n" +" int baseIndexA=GlobalIndexA();\n" +" int baseIndexB=GlobalIndexB();\n" +" #pragma unroll\n" +" for (int _kj=0; _kj private (matrix B)\n" +" bpm[_ni]=GlobalToPrivateOptB(bgm,baseIndexB,_ni,kSizeN,idk);\n" +" }\n" +" #pragma unroll\n" +" for (int _mi=0; _mi private (matrix B)\n" +" apm[_mi]=GlobalToPrivateOptA(agm,baseIndexA,_mi,kSizeM,idk);\n" +" }\n" +" #pragma unroll\n" +" for (int _ni=0; _ni 0; i /= 2){ + #pragma unroll + for(int i = SOFTMAX_LOCAL_SIZE/2; i > 0; i >>= 1){ if (lid < i) sum[lid] = fmax(sum[lid], sum[lid + i]); barrier(CLK_LOCAL_MEM_FENCE); @@ -225,7 +226,8 @@ __kernel void softmax_inside(GLOBAL_SIZE_3_DIMS } sum[lid] = sumValue; barrier(CLK_LOCAL_MEM_FENCE); - for(int i = SOFTMAX_LOCAL_SIZE/2; i > 0; i /= 2){ + #pragma unroll + for(int i = SOFTMAX_LOCAL_SIZE/2; i > 0; i >>= 1){ if (lid < i) sum[lid] = sum[lid] + sum[lid + i]; barrier(CLK_LOCAL_MEM_FENCE); @@ -245,18 +247,7 @@ __kernel void softmax_inside(GLOBAL_SIZE_3_DIMS #endif } } -#ifndef WGSW - #define WGSW 64 // work-group handle size W dimension -#endif -#ifndef WGSH - #define WGSH 64 // work-group handle size H dimension -#endif -#ifndef TSW - #define TSW 8 // thread handle size W dimension -#endif -#ifndef TSH - #define TSH 8 // thread handle size H dimension -#endif + // [N X Y4 4] -> [N Y X] __kernel void trans_3d_buf(__global const FLOAT* input, __global FLOAT* output, @@ -266,56 +257,31 @@ __kernel void trans_3d_buf(__global const FLOAT* input, ) { int b = get_global_id(2); - const int lidw = get_local_id(0); - const int lidh = get_local_id(1); - // group id - const int w = get_group_id(0) * WGSW; - const int h = get_group_id(1) * WGSH; - - int iw = lidw; - int jh = lidh; - - __local FLOAT4 localData[WGSW][WGSH/4];//w64h64 - - #pragma unroll - for(int i = 0; i < TSW; i++) { - int offset_w = i * WGSW / TSW + iw; - #pragma unroll - for(int j = 0; j < TSH / 4; j++) { - int offset_h = j * WGSH / TSH + jh; - // [TSW, WGSW / TSW] [TSH / 4, WGSH / TSH, 4] - localData[offset_w][offset_h] = vload4(0, input + ((b * width + (w+offset_w)) * height/4 + (h/4+offset_h)) * 4); - } - } - - barrier(CLK_LOCAL_MEM_FENCE); + const int w = get_global_id(0) << 3; + const int h = get_global_id(1) << 3; - // H offset: [WGSH / TSH, TSH / 4, 4] - // W offset: [WGSW / TSW, TSW / 4, 4] - int oh_base = jh * TSH / 4; - int ow_base = iw * TSW / 4; + const int inp_offset = (b * width + w) * height + h; + const int out_offset = (b * height + h) * width + w; - //#pragma unroll - for(int j = 0; j < TSH / 4; j++) { - int oh = oh_base + j; - - //#pragma unroll - for(int i = 0; i < TSW / 4; i++) { - int ow = ow_base + i; - - FLOAT4 value_0 = (localData[4*ow][oh]); - FLOAT4 value_1 = (localData[4*ow+1][oh]); - FLOAT4 value_2 = (localData[4*ow+2][oh]); - FLOAT4 value_3 = (localData[4*ow+3][oh]); - vstore4((FLOAT4){value_0.x, value_1.x, value_2.x, value_3.x}, 0, output + ((b * height + h + 4*oh+0) * width + w + 4 * ow)); - vstore4((FLOAT4){value_0.y, value_1.y, value_2.y, value_3.y}, 0, output + ((b * height + h + 4*oh+1) * width + w + 4 * ow)); - vstore4((FLOAT4){value_0.z, value_1.z, value_2.z, value_3.z}, 0, output + ((b * height + h + 4*oh+2) * width + w + 4 * ow)); - vstore4((FLOAT4){value_0.w, value_1.w, value_2.w, value_3.w}, 0, output + ((b * height + h + 4*oh+3) * width + w + 4 * ow)); - } - } + FLOAT8 value_0 = vload8(0, input+inp_offset); + FLOAT8 value_1 = vload8(0, input+inp_offset + height); + FLOAT8 value_2 = vload8(0, input+inp_offset + height + height); + FLOAT8 value_3 = vload8(0, input+inp_offset + height + height + height); + FLOAT8 value_4 = vload8(0, input+inp_offset + (height << 2)); + FLOAT8 value_5 = vload8(0, input+inp_offset + height * 5); + FLOAT8 value_6 = vload8(0, input+inp_offset + height * 6); + FLOAT8 value_7 = vload8(0, input+inp_offset + height * 7); + + vstore8((FLOAT8){value_0.s0, value_1.s0, value_2.s0, value_3.s0, value_4.s0, value_5.s0, value_6.s0, value_7.s0}, 0, output + out_offset); + vstore8((FLOAT8){value_0.s1, value_1.s1, value_2.s1, value_3.s1, value_4.s1, value_5.s1, value_6.s1, value_7.s1}, 0, output + out_offset + width); + vstore8((FLOAT8){value_0.s2, value_1.s2, value_2.s2, value_3.s2, value_4.s2, value_5.s2, value_6.s2, value_7.s2}, 0, output + out_offset + width + width); + vstore8((FLOAT8){value_0.s3, value_1.s3, value_2.s3, value_3.s3, value_4.s3, value_5.s3, value_6.s3, value_7.s3}, 0, output + out_offset + width + width + width); + vstore8((FLOAT8){value_0.s4, value_1.s4, value_2.s4, value_3.s4, value_4.s4, value_5.s4, value_6.s4, value_7.s4}, 0, output + out_offset + (width << 2)); + vstore8((FLOAT8){value_0.s5, value_1.s5, value_2.s5, value_3.s5, value_4.s5, value_5.s5, value_6.s5, value_7.s5}, 0, output + out_offset + width * 5); + vstore8((FLOAT8){value_0.s6, value_1.s6, value_2.s6, value_3.s6, value_4.s6, value_5.s6, value_6.s6, value_7.s6}, 0, output + out_offset + width * 6); + vstore8((FLOAT8){value_0.s7, value_1.s7, value_2.s7, value_3.s7, value_4.s7, value_5.s7, value_6.s7, value_7.s7}, 0, output + out_offset + width * 7); } - __kernel void clip_transpose_qkv(GLOBAL_SIZE_3_DIMS __global const FLOAT *input, // [Batch * mNumHead, ROUND_UP(mHeadDim, tile), ROUND_UP(seqLen, tile)] __global FLOAT *output, // [Batch, seqLen/4, mNumHead * mHeadDim, 4] diff --git a/source/backend/opencl/execution/cl/winogradTransform_buf.cl b/source/backend/opencl/execution/cl/winogradTransform_buf.cl index efb799643..0caf484b0 100644 --- a/source/backend/opencl/execution/cl/winogradTransform_buf.cl +++ b/source/backend/opencl/execution/cl/winogradTransform_buf.cl @@ -111,9 +111,6 @@ __kernel void winoTransSrcBuf2_3_1(GLOBAL_SIZE_DIM2 int batchIndex = pos.y / srcChannelC4; int srcZ = pos.y % srcChannelC4; int dstYOrigin = unitWidth * unitHeight_idx + unitWidth_idx; - int dstHeight = (unitWidth * unitHeight + 3) / 4; - int dstY = dstYOrigin / 4; - int dstX = dstYOrigin % 4 + 4 * dstXOrigin; batchIndex = batchOffset; { @@ -405,10 +402,7 @@ __kernel void winoTransDstBuf2_3_1(GLOBAL_SIZE_DIM2 int unitWidth_idx = pos.x % unitWidth; int unitHeight_idx = pos.x / unitWidth; int2 realPos = (int2)(unitWidth_idx, unitHeight_idx); - int srcWidth = (unitWidth * unitHeight + 3) / 4; int dstXOrigin = unitWidth * unitHeight_idx + unitWidth_idx; - int dstX = dstXOrigin / 4; - int dstY = 4 * pos.y + dstXOrigin % 4; int oz = pos.y % dstChannelC4; FLOAT4 bias = vload4(0, uBias+oz*4); diff --git a/source/core/Backend.hpp b/source/core/Backend.hpp index 111f09877..2605047ec 100644 --- a/source/core/Backend.hpp +++ b/source/core/Backend.hpp @@ -25,6 +25,21 @@ class Execution; class Runtime; class Backend; +struct RuntimeHint { + // 0: Defer, 1: Eager + int memoryAllocatorType = 0; + int winogradMemoryUsed = 3; + + // 0-100, 50 means litter core has 50% capacity of large core + int cpuDecreaseRate = 50; + int dynamicQuantOption = 0; + + // 0: Do not quantize kvcache, just store float + // 1: Only quantize key cache, use int8 asymmetric quantization + // 2: Only quantize value cache, use fp8 quantization + // 3: quantize both key and value cache as described above + int kvcacheQuantOption = 0; +}; /** abstract backend */ class Backend : public NonCopyable { @@ -48,11 +63,6 @@ class Backend : public NonCopyable { INDIRECT = 1 }; Mode mode = DIRECT; - enum Allocator { - DEFER = 0, - EAGER = 1 - }; - Allocator allocator = DEFER; }; /** backend buffer storage type */ @@ -232,21 +242,11 @@ class Runtime : public NonCopyable { Allocator_Defer = 0, Allocator_Eager = 1, }; - - void setWinogradMemoryLevel(int level) { - mWinogradMemoryLevel = level; - } - - int getWinogradMemoryLevel() const { - return mWinogradMemoryLevel; - } - - void setAllocatorType(int type) { - mAllocatorType = static_cast(type); + void setRuntimeHint(const RuntimeHint& hint) { + mHint = hint; } - - AllocatorType getAllocatorType() const { - return mAllocatorType; + const RuntimeHint& hint() const { + return mHint; } virtual CompilerType onGetCompilerType() const { @@ -260,6 +260,13 @@ class Runtime : public NonCopyable { */ virtual Backend* onCreate(const BackendConfig* config = nullptr) const = 0; + /** + @brief reset runtime + */ + virtual void onReset(int numberThread, const BackendConfig* config) { + // Do nothing + } + /** @brief clear unuseful resource @param level clear level: 0 - 100, bigger mean clear more, smaller mean cache more @@ -319,8 +326,7 @@ class Runtime : public NonCopyable { MNN_PUBLIC void waitAsyncWork(); private: std::future mFuture; - AllocatorType mAllocatorType = Allocator_Eager; - int mWinogradMemoryLevel = 3; + RuntimeHint mHint; }; /** abstract Runtime register */ diff --git a/source/core/Concurrency.h b/source/core/Concurrency.h index a3c06622f..08887eb9c 100644 --- a/source/core/Concurrency.h +++ b/source/core/Concurrency.h @@ -26,7 +26,7 @@ } \ ; \ auto cpuBn = (CPUBackend*)backend(); \ - MNN::ThreadPool::enqueue(std::move(task), cpuBn->taskIndex()); \ + MNN::ThreadPool::enqueue(std::move(task), cpuBn->taskIndex(), cpuBn->threadOpen() ? cpuBn->threadNumber() : 1); \ } #else diff --git a/source/core/ConvolutionCommon.cpp b/source/core/ConvolutionCommon.cpp index 6a333f0fa..2418bd211 100644 --- a/source/core/ConvolutionCommon.cpp +++ b/source/core/ConvolutionCommon.cpp @@ -9,6 +9,7 @@ #include "ConvolutionCommon.hpp" #include #include "backend/cpu/compute/CommonOptFunction.h" +#include "backend/cpu/CPUBackend.hpp" #include "half.hpp" #include "core/OpCommonUtils.hpp" #include "core/IDSTDecoder.hpp" @@ -187,16 +188,18 @@ void ConvolutionCommon::getConvParameters(std::shared_ptr *quanCommo } bool ConvolutionCommon::getConvInt8Parameters(const MNN::Convolution2D* conv2d, std::shared_ptr& quanCommon, Backend* backend, - const int8_t*& weight, int& weightSize, float*& scale, int32_t*& bias) { + const int8_t*& weight, int& weightSize, float*& scale, int32_t*& bias, int32_t*& weightQuantZeroPoint) { int outputCount = conv2d->common()->outputCount(); weightSize = 0; + auto core = static_cast(backend)->functions(); // fix xcode UndefinedBehaviorSanitizer - if (conv2d->symmetricQuan()->weight() != nullptr) { + if (conv2d->symmetricQuan() && conv2d->symmetricQuan()->weight() != nullptr) { weight = conv2d->symmetricQuan()->weight()->data(); weightSize = conv2d->symmetricQuan()->weight()->size(); } - if (conv2d->quanParameter() && conv2d->quanParameter()->buffer()) { + if (conv2d->quanParameter() && conv2d->quanParameter()->buffer()) { // int8 weight quanCommon = ConvolutionCommon::load(conv2d, backend, false, true); + MNN_ASSERT(quanCommon != nullptr); weight = quanCommon->weight.get(); weightSize = quanCommon->weight.size(); } @@ -204,16 +207,47 @@ bool ConvolutionCommon::getConvInt8Parameters(const MNN::Convolution2D* conv2d, MNN_ERROR("ConvolutionCommon::getConvInt8Parameters: No weight data!"); return false; } - if (conv2d->symmetricQuan()->bias() && conv2d->symmetricQuan()->scale()) { + bool weightAsy = false; + if (quanCommon && quanCommon->asymmetric) { + weightAsy = true; + } + if (conv2d->symmetricQuan() && conv2d->symmetricQuan()->bias() && conv2d->symmetricQuan()->scale()) { // Compability for old model MNN_ASSERT(conv2d->symmetricQuan()->bias()->size() == outputCount && conv2d->symmetricQuan()->scale()->size() == outputCount); ::memcpy(bias, conv2d->symmetricQuan()->bias()->data(), outputCount * sizeof(int32_t)); ::memcpy(scale, conv2d->symmetricQuan()->scale()->data(), outputCount * sizeof(float)); return true; } - if (conv2d->bias() && conv2d->quanParameter()->alpha()) { + if (conv2d->bias()) { ::memcpy(bias, conv2d->bias()->data(), outputCount * sizeof(float)); - ::memcpy(scale, conv2d->quanParameter()->alpha()->data(), outputCount * sizeof(float)); + } + if (conv2d->quanParameter() && conv2d->quanParameter()->alpha()) { + auto alphaAndBeta = conv2d->quanParameter()->alpha()->data(); + int quantCount = conv2d->quanParameter()->alpha()->size(); + if (false == weightAsy) { // symmetric quant + if (core->bytes == 2) { + core->MNNFp32ToLowp(quanCommon->alpha.get(), reinterpret_cast(scale), quantCount); + } else { + ::memcpy(scale, conv2d->quanParameter()->alpha()->data(), quantCount * core->bytes); + } + } else if (true == weightAsy) { // asymmetric + // int ocx2 = 2 * outputCount; + int scaleSize = quantCount / 2; + float clampMin = conv2d->quanParameter()->aMin() == 0 ? -128 : conv2d->quanParameter()->aMin(); + if (core->bytes == 2) { + std::unique_ptr tmp(new int16_t[quantCount]); + core->MNNFp32ToLowp(alphaAndBeta, tmp.get(), quantCount); + for (int i = 0; i < scaleSize; ++i) { + weightQuantZeroPoint[i] = static_cast(roundf((-1) * tmp[2 * i] / tmp[2 * i + 1]) + clampMin); + reinterpret_cast(scale)[i] = tmp[2 * i + 1]; + } + } else { + for (int i = 0; i < scaleSize; ++i) { + weightQuantZeroPoint[i] = static_cast(roundf((-1) * alphaAndBeta[2 * i] / alphaAndBeta[2 * i + 1]) + clampMin); + scale[i] = alphaAndBeta[2 * i + 1]; + } + } + } return true; } MNN_ERROR("ConvolutionCommon::getConvInt8Parameters: No bias & scale data!"); diff --git a/source/core/ConvolutionCommon.hpp b/source/core/ConvolutionCommon.hpp index a61daa38f..28e3acf83 100644 --- a/source/core/ConvolutionCommon.hpp +++ b/source/core/ConvolutionCommon.hpp @@ -27,7 +27,7 @@ class MNN_PUBLIC ConvolutionCommon : public Execution { static std::shared_ptr load(const Convolution2D* conv, Backend* backend = nullptr, bool forceFloat = false, bool forceInt8 = false); static void getConvParameters(std::shared_ptr *quanCommon, Backend* backend, const MNN::Convolution2D *conv2d, const float** originWeight, int* originWeightSize); static bool getConvInt8Parameters(const MNN::Convolution2D* conv2d, std::shared_ptr& quanCommon, Backend* backend, - const int8_t*& weight, int& weightSize, float*& scale, int32_t*& bias); + const int8_t*& weight, int& weightSize, float*& scale, int32_t*& bias, int32_t*& weightQuantZero); // Return padX, padY static std::pair convolutionPad(const Tensor* input, const Tensor* output, diff --git a/source/core/FileLoader.cpp b/source/core/FileLoader.cpp index 021dcc5de..1b183ea5e 100644 --- a/source/core/FileLoader.cpp +++ b/source/core/FileLoader.cpp @@ -11,7 +11,7 @@ #include "Windows.h" #endif namespace MNN { -static FILE* _OpenFile(const char* file) { +static FILE* _OpenFile(const char* file, bool read) { #if defined(_MSC_VER) wchar_t wFilename[1024]; if (0 == MultiByteToWideChar(CP_ACP, 0, file, -1, wFilename, sizeof(wFilename))) { @@ -19,16 +19,31 @@ static FILE* _OpenFile(const char* file) { } #if _MSC_VER >= 1400 FILE* mFile = nullptr; - if (0 != _wfopen_s(&mFile, wFilename, L"rb")) { - return nullptr; + if (read) { + if (0 != _wfopen_s(&mFile, wFilename, L"rb")) { + return nullptr; + } + } else { + if (0 != _wfopen_s(&mFile, wFilename, L"wb")) { + return nullptr; + } } return mFile; #else - return _wfopen(wFilename, L"rb"); + if (read) { + return _wfopen(wFilename, L"rb"); + } else { + return _wfopen(wFilename, L"wb"); + } #endif #else - return fopen(file, "rb"); + if (read) { + return fopen(file, "rb"); + } else { + return fopen(file, "wb"); + } #endif + return nullptr; } FileLoader::FileLoader(const char* file, bool init) { if (nullptr == file) { @@ -86,7 +101,7 @@ bool FileLoader::read() { } bool FileLoader::write(const char* filePath, std::pair cacheInfo) { - FILE* f = fopen(filePath, "wb"); + FILE* f = _OpenFile(filePath, false); if (nullptr == f) { MNN_ERROR("Open %s error\n", filePath); return false; @@ -132,7 +147,7 @@ void FileLoader::_init() { } mInited = true; if (!mFilePath.empty()) { - mFile = _OpenFile(mFilePath.c_str()); + mFile = _OpenFile(mFilePath.c_str(), true); } if (nullptr == mFile) { MNN_ERROR("Can't open file:%s\n", mFilePath.c_str()); diff --git a/source/core/IDSTDecoder.hpp b/source/core/IDSTDecoder.hpp index 05f61ca77..679e92fcc 100644 --- a/source/core/IDSTDecoder.hpp +++ b/source/core/IDSTDecoder.hpp @@ -11,7 +11,6 @@ #include #include -#include #include "MNN_generated.h" #include "core/ConvolutionCommon.hpp" diff --git a/source/core/Interpreter.cpp b/source/core/Interpreter.cpp index 5078d1493..127bd6e52 100644 --- a/source/core/Interpreter.cpp +++ b/source/core/Interpreter.cpp @@ -221,7 +221,7 @@ Interpreter::Interpreter(Content* net) { mNet->bizCode = std::string(mNet->net->bizCode() ? mNet->net->bizCode()->c_str() : ""); mNet->uuid = std::string(mNet->net->mnn_uuid() ? mNet->net->mnn_uuid()->c_str() : ""); #ifdef MNN_INTERNAL_ENABLED - mNet->basicLogginData = getBasicLoggingData(); + mNet->basicLogginData = logBasicInfo(); mNet->basicLogginData.emplace("ModelVersion", getModelVersion()); #endif } @@ -238,8 +238,6 @@ Interpreter::~Interpreter() { Session* Interpreter::createMultiPathSession(const std::vector& configs) { RuntimeInfo runtime = createRuntime(configs); - runtime.second->setAllocatorType(mNet->modes.memoryAllocatorType); - runtime.second->setWinogradMemoryLevel(mNet->modes.winogradMemoryUsed); if (runtime.first.empty()) { MNN_ERROR("Runtime not valid for create session\n"); return nullptr; @@ -248,6 +246,11 @@ Session* Interpreter::createMultiPathSession(const std::vector& } Session* Interpreter::createMultiPathSession(const std::vector& configs, const RuntimeInfo& runtime) { + for (auto& iter : runtime.first) { + iter.second->setRuntimeHint(mNet->modes.runtimeHint); + } + runtime.second->setRuntimeHint(mNet->modes.runtimeHint); + if (nullptr == mNet->buffer.get()) { MNN_ERROR("The model buffer has been released. Can't create session\n"); return nullptr; @@ -267,6 +270,10 @@ Session* Interpreter::createMultiPathSession(const std::vector& if (!success) { return nullptr; } + if (info.needInputContentForShape) { + MNN_ERROR("Interpreter don't support case for shape compute need input content, please use module api instead\n"); + return nullptr; + } RuntimeInfo rt = runtime; bool valid = false; if (mNet->cacheBuffer.get() != nullptr) { diff --git a/source/core/OpCommonUtils.cpp b/source/core/OpCommonUtils.cpp index fcf0dd32e..f5e385605 100644 --- a/source/core/OpCommonUtils.cpp +++ b/source/core/OpCommonUtils.cpp @@ -11,7 +11,6 @@ #include "MNN_generated.h" #include "Macro.h" #include -#include namespace MNN { Tensor::DimensionType OpCommonUtils::convertDimType(MNN_DATA_FORMAT dimensionFormat) { diff --git a/source/core/Session.cpp b/source/core/Session.cpp index 9ab6b460c..9b27d5e1f 100644 --- a/source/core/Session.cpp +++ b/source/core/Session.cpp @@ -67,10 +67,13 @@ void Session::ModeGroup::setHint(Interpreter::HintMode mode, int hint) { maxTuningNumber = hint; break; case Interpreter::MEM_ALLOCATOR_TYPE: - memoryAllocatorType = hint; + runtimeHint.memoryAllocatorType = hint; break; case Interpreter::WINOGRAD_MEMORY_LEVEL: - winogradMemoryUsed = hint; + runtimeHint.winogradMemoryUsed = hint; + break; + case Interpreter::CPU_LITTLECORE_DECREASE_RATE: + runtimeHint.cpuDecreaseRate = hint; break; case Interpreter::GEOMETRY_COMPUTE_MASK: geometryMask = hint; @@ -78,6 +81,12 @@ void Session::ModeGroup::setHint(Interpreter::HintMode mode, int hint) { case Interpreter::STRICT_CHECK_MODEL: checkNetBuffer = hint > 0; break; + case Interpreter::DYNAMIC_QUANT_OPTIONS: + runtimeHint.dynamicQuantOption = hint; + break; + case Interpreter::KVCACHE_QUANT_OPTIONS: + runtimeHint.kvcacheQuantOption = hint; + break; default: break; } diff --git a/source/core/Session.hpp b/source/core/Session.hpp index 7b3ac7caf..c753a6c51 100644 --- a/source/core/Session.hpp +++ b/source/core/Session.hpp @@ -33,11 +33,10 @@ class MNN_PUBLIC Session { Interpreter::SessionMode resizeMode = Interpreter::Session_Resize_Direct; Interpreter::SessionMode memoryUsageMode = Interpreter::Session_Memory_Collect; Interpreter::SessionMode codegenMode = Interpreter::Session_Codegen_Disable; - int memoryAllocatorType = 0; int maxTuningNumber = MNN_DEFAULT_TUNING_NUMBER; - int winogradMemoryUsed = 3; int geometryMask = 0xFFFF; bool checkNetBuffer = true; + RuntimeHint runtimeHint; void setHint(Interpreter::HintMode hint, int magic); void setMode(Interpreter::SessionMode mode); }; diff --git a/source/geometry/GeometryOPRegister.cpp b/source/geometry/GeometryOPRegister.cpp index 91a743a1a..11982a07b 100644 --- a/source/geometry/GeometryOPRegister.cpp +++ b/source/geometry/GeometryOPRegister.cpp @@ -9,6 +9,7 @@ extern void ___GeometryReshape___create__(); extern void ___GeometryReduce___create__(); extern void ___GeometryInnerProduct___create__(); extern void ___GeometryTopK___create__(); +extern void ___GeometryLayerNorm___create__(); extern void ___GeometryDepthToSpace___create__(); extern void ___GeometryBroadcastTo___create__(); extern void ___GeometryConvert___create__(); @@ -40,7 +41,6 @@ extern void ___GeometrySlice___create__(); extern void ___GeometryConcat___create__(); extern void ___GeometryUnary___create__(); extern void ___GeometryBinary___create__(); -extern void ___GeometryLayerNorm___create__(); void registerGeometryOps() { ___GeometryShape___create__(); @@ -51,6 +51,7 @@ ___GeometryReshape___create__(); ___GeometryReduce___create__(); ___GeometryInnerProduct___create__(); ___GeometryTopK___create__(); +___GeometryLayerNorm___create__(); ___GeometryDepthToSpace___create__(); ___GeometryBroadcastTo___create__(); ___GeometryConvert___create__(); @@ -82,6 +83,5 @@ ___GeometrySlice___create__(); ___GeometryConcat___create__(); ___GeometryUnary___create__(); ___GeometryBinary___create__(); -___GeometryLayerNorm___create__(); } } diff --git a/test.sh b/test.sh index 9d7c1b2d7..ef4edd95b 100755 --- a/test.sh +++ b/test.sh @@ -547,14 +547,22 @@ android_model_test() { models=`ls ~/AliNNModel/TestResource/` for model in $models do - adb shell "cd /data/local/tmp/MNN&&export LD_LIBRARY_PATH=.&&./testModel.out ../AliNNModel/TestResource/$model/temp.bin ../AliNNModel/TestResource/$model/input_0.txt ../AliNNModel/TestResource/$model/output.txt 0 0.002" + if [ $model == 'mobilenetv1quan' ]; then + adb shell "cd /data/local/tmp/MNN&&export LD_LIBRARY_PATH=.&&./testModel.out ../AliNNModel/TestResource/$model/temp.bin ../AliNNModel/TestResource/$model/input_0.txt ../AliNNModel/TestResource/$model/output.txt 0 0.1" + else + adb shell "cd /data/local/tmp/MNN&&export LD_LIBRARY_PATH=.&&./testModel.out ../AliNNModel/TestResource/$model/temp.bin ../AliNNModel/TestResource/$model/input_0.txt ../AliNNModel/TestResource/$model/output.txt 0 0.002" + fi if [ $? -ne 0 ]; then fail_num=$[$fail_num+1] else pass_num=$[$pass_num+1] fi if [ "$OPENCL_CHANGE" ]; then - adb shell "cd /data/local/tmp/MNN&&export LD_LIBRARY_PATH=.&&./testModel.out ../AliNNModel/TestResource/$model/temp.bin ../AliNNModel/TestResource/$model/input_0.txt ../AliNNModel/TestResource/$model/output.txt 3 0.002 1" + if [ $model == 'mobilenetv1quan' ]; then + adb shell "cd /data/local/tmp/MNN&&export LD_LIBRARY_PATH=.&&./testModel.out ../AliNNModel/TestResource/$model/temp.bin ../AliNNModel/TestResource/$model/input_0.txt ../AliNNModel/TestResource/$model/output.txt 3 0.1 1" + else + adb shell "cd /data/local/tmp/MNN&&export LD_LIBRARY_PATH=.&&./testModel.out ../AliNNModel/TestResource/$model/temp.bin ../AliNNModel/TestResource/$model/input_0.txt ../AliNNModel/TestResource/$model/output.txt 3 0.002 1" + fi if [ $? -ne 0 ]; then fail_cl_num=$[$fail_cl_num+1] else diff --git a/test/TestUtils.h b/test/TestUtils.h index 94fa9f4d5..6a5dd2c20 100644 --- a/test/TestUtils.h +++ b/test/TestUtils.h @@ -47,7 +47,7 @@ bool checkVector(const T* result, const T* rightData, int size, T threshold){ MNN_ASSERT(size >= 0); for(int i = 0; i < size; ++i){ if(fabs(result[i] - rightData[i]) > threshold){ - std::cout << i << " error, right: " << rightData[i] << ", compute: " << result[i] << std::endl; + std::cout << "No." << i << " error, right: " << rightData[i] << ", compute: " << result[i] << std::endl; return false; } } diff --git a/test/core/ThreadPoolTest.cpp b/test/core/ThreadPoolTest.cpp index 51dfaf8f1..a0103cfc5 100644 --- a/test/core/ThreadPoolTest.cpp +++ b/test/core/ThreadPoolTest.cpp @@ -20,17 +20,17 @@ class ThreadPoolTest : public MNNTestCase { std::vector threads; for (int i = 0; i < 10; ++i) { threads.emplace_back([i]() { - MNN::ThreadPool::init(10 - i); + int number = MNN::ThreadPool::init(10 - i); // initializer auto workIndex = ThreadPool::acquireWorkIndex(); FUNC_PRINT(workIndex); - ThreadPool::active(); + ThreadPool::active(number); auto func = [](int index) { FUNC_PRINT(index); std::this_thread::yield(); }; - ThreadPool::enqueue(std::make_pair(std::move(func), 10), workIndex); - ThreadPool::deactive(); + ThreadPool::enqueue(std::make_pair(std::move(func), 10), workIndex, number); + ThreadPool::deactive(number); ThreadPool::releaseWorkIndex(workIndex); }); } diff --git a/test/expr/ModuleTest.cpp b/test/expr/ModuleTest.cpp index d2f4fc19e..84fb16d11 100644 --- a/test/expr/ModuleTest.cpp +++ b/test/expr/ModuleTest.cpp @@ -851,7 +851,7 @@ class MemeoryUsageTest : public MNNTestCase { BackendConfig bnConfig; bnConfig.precision = (MNN::BackendConfig::PrecisionMode)precision; config.numThread = 1; - config.type = ExecutorScope::Current()->getAttr()->firstType.first; + config.type = ExecutorScope::Current()->getAttr()->firstType; config.backendConfig = &bnConfig; auto s1 = net->createSession(config); float memory = 0.0f; @@ -947,7 +947,7 @@ class ConstMemoryReplaceTest : public MNNTestCase { std::shared_ptr net(Interpreter::createFromBuffer((void*)bufferOutput, sizeOutput), Interpreter::destroy); ScheduleConfig config; config.numThread = 4; - config.type = ExecutorScope::Current()->getAttr()->firstType.first; + config.type = ExecutorScope::Current()->getAttr()->firstType; auto s1 = net->createSession(config); int resizeCode; net->getSessionInfo(s1, Interpreter::RESIZE_STATUS, &resizeCode); @@ -984,7 +984,7 @@ class MutlThreadConstReplaceTest : public MNNTestCase { BackendConfig bnConfig; bnConfig.precision = (MNN::BackendConfig::PrecisionMode)precision; config.numThread = 1; - config.type = ExecutorScope::Current()->getAttr()->firstType.first; + config.type = ExecutorScope::Current()->getAttr()->firstType; config.backendConfig = &bnConfig; std::vector threads; diff --git a/test/op/ConvInt8Test.cpp b/test/op/ConvInt8Test.cpp index 31f716046..428a37d72 100644 --- a/test/op/ConvInt8Test.cpp +++ b/test/op/ConvInt8Test.cpp @@ -257,7 +257,7 @@ class ConvInt8TestCommon : public MNNTestCase { // Because of round implement in ARM / X86 / PC may cause 1 / 0 / -1 diff, don't care about this error auto error = (int32_t)targetValue - (int32_t)computeResult; if (error * error > 1) { - MNN_PRINT("%d x %d, ConvInt8 result %d Error: %d -> %d\n", ow, oh, i, targetValue, computeResult); + MNN_PRINT("ic=%d, oc=%d, ow=%d, oh=%d, ConvInt8 result No.%d Error: right=%d, error=%d\n", channel[0], channel[1], ow, oh, i, targetValue, computeResult); #ifdef DEBUG x->writeMap(); auto ptr = y->readMap(); @@ -293,7 +293,7 @@ class ConvInt8Im2colGemmTest : public ConvInt8TestCommon { std::vector> kernels = { {4, 2}, {1, 5}, {7, 1} }; - int iw = 24; int ih = 17; + int iw = 14; int ih = 11; std::vector titles = {"4x2", "1x5", "7x1"}; for (int sx=1; sx<2; ++sx) { for (int sy=1; sy<2; ++sy) { @@ -309,6 +309,7 @@ class ConvInt8Im2colGemmTest : public ConvInt8TestCommon { auto res = testKernel(inputShape, kernels[i], channel, pad, strides, dilate, 8, false, 1, 2, MNN::SparseAlgo_RANDOM, 1, false); if (!res) { MNN_ERROR("Error for test kernel %s for convint8 215, 204 (im2col + gemm)\n", titles[i].c_str()); + MNN_ERROR("overflow=false, bit=8, batch=2, Conv info: sx=%d, sy=%d, dx=%d, dy=%d, px=%d, py=%d, ic=%d, oc=%d\n", sx, sy, dx, dy, px, py, ic, oc); return false; } } @@ -316,6 +317,7 @@ class ConvInt8Im2colGemmTest : public ConvInt8TestCommon { auto res = testKernel(inputShape, kernels[i], channel, pad, strides, dilate, 3, true, 1, 3, MNN::SparseAlgo_RANDOM, 1, false); if (!res) { MNN_ERROR("Error for test kernel %s for convint8 215, 204 (im2col + gemm + overflow aware)\n", titles[i].c_str()); + MNN_ERROR("overflow=true,bit=3, batch=3, Conv info: sx=%d, sy=%d, dx=%d, dy=%d, px=%d, py=%d, ic=%d, oc=%d\n", sx, sy, dx, dy, px, py, ic, oc); return false; } } @@ -323,6 +325,7 @@ class ConvInt8Im2colGemmTest : public ConvInt8TestCommon { auto res = testKernel(inputShape, kernels[i], channel, pad, strides, dilate, 8, false, 1, 5, MNN::SparseAlgo_RANDOM, 1, false); if (!res) { MNN_ERROR("Error for test kernel %s for convint8 215, 201 (im2col + gemm)\n", titles[i].c_str()); + MNN_ERROR("overflow=false,bit=8, batch=5, Conv info: sx=%d, sy=%d, dx=%d, dy=%d, px=%d, py=%d, ic=%d, oc=%d\n", sx, sy, dx, dy, px, py, ic, oc); return false; } } @@ -330,6 +333,7 @@ class ConvInt8Im2colGemmTest : public ConvInt8TestCommon { auto res = testKernel(inputShape, kernels[i], channel, pad, strides, dilate, 3, true, 1, 2, MNN::SparseAlgo_RANDOM, 1, false); if (!res) { MNN_ERROR("Error for test kernel %s for convint8 215, 201 (im2col + gemm + overflow aware)\n", titles[i].c_str()); + MNN_ERROR("overflow=true,bit=3, batch=2, Conv info: sx=%d, sy=%d, dx=%d, dy=%d, px=%d, py=%d, ic=%d, oc=%d\n", sx, sy, dx, dy, px, py, ic, oc); return false; } } @@ -414,22 +418,22 @@ class SparseConvInt8Im2colGemmTest : public ConvInt8TestCommon { for (int i = 0; i < kernels.size(); ++i) { auto res = testKernel(inputShape, kernels[i], channel, pad, strides, dilate, 3, true, 1, 3, SparseList[is].first, SparseList[is].second, false); if (!res) { - MNN_ERROR("Error for test kernel %s for convint8 215, 204 (im2col + gemm + overflow aware)\n", titles[i].c_str()); + MNN_ERROR("Error for test kernel %s for convint8 (im2col + gemm + overflow aware)\n", titles[i].c_str()); return false; } } - inputShape = {215, 201}; + inputShape = {123, 65}; for (int i = 0; i < kernels.size(); ++i) { auto res = testKernel(inputShape, kernels[i], channel, pad, strides, dilate, 8, false, 1, 5, SparseList[is].first, SparseList[is].second, false); if (!res) { - MNN_ERROR("Error for test kernel %s for convint8 215, 201 (im2col + gemm)\n", titles[i].c_str()); + MNN_ERROR("Error for test kernel %s for convint8 (im2col + gemm)\n", titles[i].c_str()); return false; } } for (int i = 0; i < kernels.size(); ++i) { auto res = testKernel(inputShape, kernels[i], channel, pad, strides, dilate, 3, true, 1, 2, SparseList[is].first, SparseList[is].second, false); if (!res) { - MNN_ERROR("Error for test kernel %s for convint8 215, 201 (im2col + gemm + overflow aware)\n", titles[i].c_str()); + MNN_ERROR("Error for test kernel %s for convint8 (im2col + gemm + overflow aware)\n", titles[i].c_str()); return false; } } @@ -567,7 +571,7 @@ class ConvInt8WinogradTestCommon : public MNNTestCase { return false; } if (!checkVector(yPtr, yTargetPtr, yInfo->size, 1)) { - MNN_ERROR("[ConvInt8WinogradTestCommon] result error for batchSize = %d\n", batchSize); + MNN_ERROR("[ConvInt8WinogradTestCommon] result error for batchSize = %d, oc=%d, oh=%d, ow=%d\n", batchSize, yInfo->dim[1], yInfo->dim[2], yInfo->dim[3]); return false; } if (speed) { @@ -593,7 +597,7 @@ class ConvInt8WinogradTestCommon : public MNNTestCase { class ConvInt8WinogradTest : public ConvInt8WinogradTestCommon { virtual bool run(int precision) { - INTS pad = {1, 1}, inputShape = {128, 128}; // {w, h} + INTS pad = {1, 1}, inputShape = {47, 39}; // {w, h} INTS channel = {32, 32}; // {ci, co} std::vector> kernels = { diff --git a/test/op/ConvolutionTest.cpp b/test/op/ConvolutionTest.cpp index 836ace993..6c127d5a8 100644 --- a/test/op/ConvolutionTest.cpp +++ b/test/op/ConvolutionTest.cpp @@ -665,7 +665,7 @@ class ConvolutionInt8CommonTest : public ConvolutionCommonTest { MNN_PRINT("precision:%d, expect:\t expect2:\t real:\t\n", precision); for (int i = 0; i < toutputData.size(); ++i) { - MNN_PRINT("%f\t, %f\t, %f\n", toutputData[i],outputDataSeparateBias[i], outputPtr[i]); + MNN_PRINT("%f\t, %f\n", toutputData[i], outputPtr[i]); } MNN_ERROR("%s(%s) test failed for %d bits, async=%d , relu: %d, relu6: %d!\n", test_op_name.c_str(), device_name.c_str(), nbit, async, activation.first, activation.second); return false; diff --git a/test/op/DeconvolutionTest.cpp b/test/op/DeconvolutionTest.cpp index 87912379d..c4e46af32 100644 --- a/test/op/DeconvolutionTest.cpp +++ b/test/op/DeconvolutionTest.cpp @@ -119,7 +119,7 @@ class DeconvolutionCommonTestInt8 : public MNNTestCase { auto outputPtr = y->readMap(); float errorScale = precision <= MNN::BackendConfig::Precision_High ? 1 : 20; if (!checkVectorByRelativeError(outputPtr, rightOutData.data(), rightOutData.size(), 0.005 * errorScale)) { - MNN_ERROR("%s(%s) test failed!\n", test_op_name.c_str(), device_name.c_str()); + MNN_ERROR("%s(%s) test failed: batch=%d, oc=%d, oh=%d, ow=%d!\n", test_op_name.c_str(), device_name.c_str(), y->getInfo()->dim[0], y->getInfo()->dim[1], y->getInfo()->dim[2], y->getInfo()->dim[3]); return false; } return true; @@ -441,6 +441,75 @@ class DeconvolutionInt8Test : public DeconvolutionCommonTestInt8 { return false; } } + MNN_PRINT("begin testcase 3\n"); + { + std::vector data_a = {// channel 0 + 1.0, 2.0, 4.0, 5.0, + // channel 1 + 1.1, 2.1, 4.1, 5.1, + // channel 2 + 1.2, 2.2, 4.2, 5.2}; + + std::vector weight = {//IOHW + // input channel0 + + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + + // input channel1 + + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + + // input channel2 + + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, + }; + std::vector bias(9, 0); + std::vector data_c = {3.3, 3.3, 9.6, 6.3, 3.3, 3.3, 9.6, 6.3, 15.6, 15.6, 37.2, + 21.6, 12.3, 12.3, 27.6, 15.3, + + 6.6, 6.6, 19.2, 12.6, 6.6, 6.6, 19.2, 12.6, 31.2, 31.2, 74.4, + 43.2, 24.6, 24.6, 55.2, 30.6}; + int ic = 3, oc = 9; + int kw = 3, kh = 3, ih = 2, iw = 2; + int stride = 2, dilation = 1; + int group = 1, batch = 1; + int pad_w = 0, pad_h = 0; + + std::vector scale = {1., 1.}; + std::vector zeroPoints = {0, 0}; + std::vector quantScales = {0.0416, 0.6112}; + + bool succ = DeconvolutionCommonTestInt8::test("CPU", "Deconv", data_a, weight, bias, data_c, + batch, ic, oc, ih, iw, PadMode_SAME, pad_h, pad_w, kh, kw, + stride, dilation, group, precision, scale, zeroPoints, quantScales); + if (!succ) { + return false; + } + } return true; } }; diff --git a/test/op/PReLUTest.cpp b/test/op/PReLUTest.cpp index d73050317..f6a3d1365 100644 --- a/test/op/PReLUTest.cpp +++ b/test/op/PReLUTest.cpp @@ -40,11 +40,19 @@ class PreluTestInt8 : public MNNTestCase { public: virtual ~PreluTestInt8() = default; virtual bool run(int precision) { - auto input = _Input({1, 4, 4, 2}, NCHW); + auto input = _Input({1, 12, 4, 2}, NCHW); input->setName("input_tensor"); // set input data input->writeScaleMap(0.03567, 1.0); const float inpudata[] = {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, + 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, + -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, + 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, + -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, + 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, + -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, + 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, + -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0}; @@ -52,12 +60,21 @@ class PreluTestInt8 : public MNNTestCase { memcpy(inputPtr, inpudata, 4 * sizeof(float)); input->unMap(); input = _Convert(input, NC4HW4); - auto output = _PRelu(input, {3.0, 1.5, 1.5, 1.5}); + auto output = _PRelu(input, {3.0, 1.5, 1.5, 1.5, 3.0, 1.5, 1.5, 1.5, 3.0, 1.5, 1.5, 1.5}); output = _Convert(output, NCHW); const std::vector expectedOutput = {-3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, - 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0}; + 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, + -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, + 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, + -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, + 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, + -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, + 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, + -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, -4.5, + 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0 + }; output->writeScaleMap(0.03567, 1.0); auto gotOutput = output->readMap(); if (!checkVector(gotOutput, expectedOutput.data(), 4, 0.05)) { diff --git a/test/op/SoftmaxTest.cpp b/test/op/SoftmaxTest.cpp index 8a3578591..ebbed4224 100644 --- a/test/op/SoftmaxTest.cpp +++ b/test/op/SoftmaxTest.cpp @@ -282,53 +282,6 @@ class SoftmaxInt8Test: public MNNTestCase { } } } - - // testcase 2 - { - auto input = _Input({2, 5}, NCHW); - input->setName("input_tensor"); - // set input data - const float inpudata[] = {1.0, 2.0, 3.0, 4.0, 5.0, -1.0, -2.0, -3.0, -4.0, -5.0}; - const float quantScales[] = {1.0, 0.00784}; - const float zeroPoints[] = {1., 2.}; - input->writeScaleMap(quantScales[0], zeroPoints[0]); - auto inputPtr = input->writeMap(); - memcpy(inputPtr, inpudata, 10 * sizeof(float)); - input->unMap(); - auto output = _Softmax(input); - const std::vector expectedOrder = {0, 1, 2, 3, 4, 9, 8, 7, 6, 5}; - const std::vector expectedOutput = {0.0117, 0.0317, 0.0861, 0.2341, 0.6364, 0.6364, 0.2341, 0.0861, 0.0317, 0.0117}; - output->writeScaleMap(quantScales[1], zeroPoints[1]); - auto gotOutput = output->readMap(); - bool result = checkProbAndOrder((float*)gotOutput, expectedOutput.data(), expectedOrder.data(), 10, {2, 5}, 1); - if (!result) { - MNN_PRINT("SoftmaxInt8 case2 failed!\n"); - return false; - } - } - // testcase 3 - { - auto input = _Input({2, 2}, NCHW); - input->setName("input_tensor"); - // set input data - const float inpudata[] = {-1.0, -2.0, 3.0, 4.0}; - const float quantScales[] = {1.0, 0.00784}; - const float zeroPoints[] = {1., 2.}; - input->writeScaleMap(quantScales[0], zeroPoints[0]); - auto inputPtr = input->writeMap(); - memcpy(inputPtr, inpudata, 4 * sizeof(float)); - input->unMap(); - auto output = _Softmax(input); - const std::vector expectedOrder = {1, 2, 0, 3}; - const std::vector expectedOutput = {0.7310586, 0.26894143, 0.26894143, 0.7310586}; - output->writeScaleMap(quantScales[1], zeroPoints[1]); - auto gotOutput = output->readMap(); - bool result = checkProbAndOrder((float*)gotOutput, expectedOutput.data(), expectedOrder.data(), 4, {2, 2}, 1); - if (!result) { - MNN_PRINT("SoftmaxInt8 case3 failed!\n"); - return false; - } - } return true; } }; diff --git a/test/speed/HybridConvSpeedTest.cpp b/test/speed/HybridConvSpeedTest.cpp index 548596dd3..2354c4c58 100644 --- a/test/speed/HybridConvSpeedTest.cpp +++ b/test/speed/HybridConvSpeedTest.cpp @@ -63,22 +63,21 @@ class HybridConvSpeedTestCommon : public MNNTestCase { #else #define FLOAT_T float #endif + y = _Convert(y, NCHW); + yfp32 = _Convert(yfp32, NCHW); auto yPtr = y->readMap(); auto tgPtr = yfp32->readMap(); auto elesize = batch * oc * oh * ow; - float limit = 0.02f; - if (nbit < 8) { - limit = 0.1f; - } + float limit = 0.1f; for (int i = 0; i < elesize; ++i) { float targetValue = tgPtr[i], computeResult = yPtr[i]; float diff = targetValue - computeResult; float ratio = fabsf(diff) / fmax(targetValue, computeResult); if (targetValue != 0 && computeResult != 0 && ratio > limit) { - MNN_PRINT("HybridConv result Error: %f -> %f\n", targetValue, computeResult); + MNN_PRINT("%d result Error ratio=%f: right=%f, error=%f\n", i, ratio, targetValue, computeResult); return false; } else if ((targetValue == 0 || computeResult == 0) && fabsf(diff) > limit) { - MNN_PRINT("HybridConv result Error: %f -> %f\n", targetValue, computeResult); + MNN_PRINT("%d result Error ratio=%f: right=%f, error=%f\n", i, ratio, targetValue, computeResult); return false; } } @@ -103,9 +102,9 @@ class HybridConvSpeedInt8Test : public HybridConvSpeedTestCommon { public: virtual bool run(int precision) { INTS strides = {1, 1}, dilate = {1, 1}, pad = {0, 0}, inputShape = {1, 1}; // {w, h} - INTS channel0 = {2048, 512}; // {ci, co} + INTS channel0 = {2048, 512}; // {ic, co} INTS channel1 = {1496, 256}; - int batch[2] = {1, 13}; + int batch[2] = {23, 13}; std::vector kernels = {1, 1}; std::vector weightBits = {8, 4}; bool lowmemory = true; @@ -114,14 +113,14 @@ class HybridConvSpeedInt8Test : public HybridConvSpeedTestCommon { for (int n = 0; n < 2; ++n) { auto res = testKernel("Low memory HybridConv test:", inputShape, kernels, channel0, pad, strides, dilate, batch[n], bits, precision, true); if (!res) { - MNN_ERROR("Error: low memory hybridConv when n=%d, ci=%d, c0=%d\n", batch[n], channel0[0], channel0[1]); + MNN_ERROR("Error: low memory hybridConv when n=%d, ic=%d, oc=%d\n", batch[n], channel0[0], channel0[1]); return false; } } for (int n = 0; n < 2; ++n) { auto res = testKernel("Low memory HybridConv test:", inputShape, kernels, channel1, pad, strides, dilate, batch[n], bits, precision, true); if (!res) { - MNN_ERROR("Error: low memory hybridConv when n=%d, ci=%d, c0=%d\n", batch[n], channel1[0], channel1[1]); + MNN_ERROR("Error: low memory hybridConv when n=%d, ic=%d, oc=%d\n", batch[n], channel1[0], channel1[1]); return false; } } @@ -133,26 +132,22 @@ class HybridConvSpeedInt8Test : public HybridConvSpeedTestCommon { class HybridConvInt8Test : public HybridConvSpeedTestCommon { public: virtual bool run(int precision) { - INTS channel0 = {2048, 512}; // {ci, co} - INTS channel1 = {1496, 256}; + std::vector< std::vector> channels = {{7, 9}, {2048, 6144}, {1, 10}, {20, 153}, {9, 18}}; INTS strides = {1, 1}, dilate = {1, 1}, pad = {0, 0}, inputShape = {1, 1}; // {w, h} - int batch[2] = {1, 13}; + int testBatchCount = 5; + // std::vector batch(testBatchCount); + std::vector batch = {1, 23, 1479, 38, 29}; std::vector kernels = {1, 1}; std::vector weightBits = {8}; bool lowmemory = true; for (auto& bits : weightBits) { - for (int n = 0; n < 2; ++n) { - auto res = testKernel("Low memory HybridConv test:", inputShape, kernels, channel0, pad, strides, dilate, batch[n], bits, precision); - if (!res) { - MNN_ERROR("Error: low memory hybridConv when n=%d, ci=%d, c0=%d\n", batch[n], channel0[0], channel0[1]); - return false; - } - } - for (int n = 0; n < 2; ++n) { - auto res = testKernel("Low memory HybridConv test:", inputShape, kernels, channel1, pad, strides, dilate, batch[n], bits, precision); - if (!res) { - MNN_ERROR("Error: low memory hybridConv when n=%d, ci=%d, c0=%d\n", batch[n], channel1[0], channel1[1]); - return false; + for (int i = 0; i < channels.size(); ++i) { + for (int n = 0; n < batch.size(); ++n) { + auto res = testKernel("Low memory HybridConv test:", inputShape, kernels, channels[i], pad, strides, dilate, batch[n], bits, precision); + if (!res) { + MNN_ERROR("Error: low memory hybridConv when n=%d, ic=%d, oc=%d\n", batch[n], channels[i][0], channels[i][1]); + return false; + } } } } @@ -163,8 +158,7 @@ class HybridConvInt8Test : public HybridConvSpeedTestCommon { class DenseConvInt8Test : public HybridConvSpeedTestCommon { public: virtual bool run(int precision) { - INTS channel0 = {256, 256}; // {ci, co} - INTS channel1 = {1496, 256}; + std::vector< std::vector> channels = {{4, 256}, {2048, 256}, {1, 8}, {7, 9}}; INTS strides = {1, 1}, dilate = {1, 3}, pad = {0, 3}, inputShape = {1, 2640}; // {w, h} int batch[2] = {1, 13}; std::vector kernels = {1, 3}; @@ -173,17 +167,12 @@ class DenseConvInt8Test : public HybridConvSpeedTestCommon { int n = 0; for (auto& bits : weightBits) { for (int n = 0; n < 2; ++n) { - auto res = testKernel("Low memory HybridConv test:", inputShape, kernels, channel0, pad, strides, dilate, batch[n], bits, precision); - if (!res) { - MNN_ERROR("Error: low memory hybridConv when n=%d, ci=%d, c0=%d\n", batch[n], channel0[0], channel0[1]); - return false; - } - } - for (int n = 0; n < 2; ++n) { - auto res = testKernel("Low memory HybridConv test:", inputShape, kernels, channel1, pad, strides, dilate, batch[n], bits, precision); - if (!res) { - MNN_ERROR("Error: low memory hybridConv when n=%d, ci=%d, c0=%d\n", batch[n], channel1[0], channel1[1]); - return false; + for (int i = 0; i < channels.size(); ++i) { + auto res = testKernel("Low memory ConvInt8 with 1x3 kernel test:", inputShape, kernels, channels[i], pad, strides, dilate, batch[n], bits, precision); + if (!res) { + MNN_ERROR("Error: low memory ConvInt8 with 1x3 kernel when n=%d, ic=%d, oc=%d\n", batch[n], channels[i][0], channels[i][1]); + return false; + } } } } diff --git a/tools/converter/source/common/cli.cpp b/tools/converter/source/common/cli.cpp index ffe8c8ae9..89e83ab26 100644 --- a/tools/converter/source/common/cli.cpp +++ b/tools/converter/source/common/cli.cpp @@ -40,7 +40,7 @@ #include "core/MemoryFormater.h" namespace MNN { - +using namespace MNN::Express; static std::string _getDataType(const halide_type_t& type) { switch (type.code) { case halide_type_float: @@ -153,7 +153,7 @@ bool Cli::initializeMNNConvertArgs(modelConfig &modelPath, int argc, char **argv ) ( "keepInputFormat", - "keep input dimension format or not, default: false", + "keep input dimension format or not, default: true", cxxopts::value() ) ( @@ -492,6 +492,151 @@ bool Cli::initializeMNNConvertArgs(modelConfig &modelPath, int argc, char **argv return true; } +typedef VARP (*unaryProc)(VARP input); +static unaryProc selectUnaryProc(int type) { + switch (type) { + case UnaryOpOperation_ABS: + return MNN::Express::_Abs; + case UnaryOpOperation_SQUARE: + return MNN::Express::_Square; + case UnaryOpOperation_NEG: + return MNN::Express::_Negative; + case UnaryOpOperation_RSQRT: + return MNN::Express::_Rsqrt; + case UnaryOpOperation_EXP: + return MNN::Express::_Exp; + case UnaryOpOperation_COS: + return MNN::Express::_Cos; + case UnaryOpOperation_SIN: + return MNN::Express::_Sin; + case UnaryOpOperation_SIGMOID: + return MNN::Express::_Sigmoid; + case UnaryOpOperation_TANH: + return MNN::Express::_Tanh; + case UnaryOpOperation_TAN: + return MNN::Express::_Tan; + case UnaryOpOperation_ATAN: + return MNN::Express::_Atan; + case UnaryOpOperation_SQRT: + return MNN::Express::_Sqrt; + case UnaryOpOperation_RECIPROCAL: + return MNN::Express::_Reciprocal; + case UnaryOpOperation_LOG1P: + return MNN::Express::_Log1p; + case UnaryOpOperation_LOG: + return MNN::Express::_Log; + case UnaryOpOperation_ACOSH: + return MNN::Express::_Acosh; + case UnaryOpOperation_SINH: + return MNN::Express::_Sinh; + case UnaryOpOperation_ASINH: + return MNN::Express::_Asinh; + case UnaryOpOperation_ATANH: + return MNN::Express::_Atanh; + case UnaryOpOperation_SIGN: + return MNN::Express::_Sign; + case UnaryOpOperation_COSH: + return MNN::Express::_Cosh; + case UnaryOpOperation_ERF: + return MNN::Express::_Erf; + case UnaryOpOperation_ERFC: + return MNN::Express::_Erfc; + case UnaryOpOperation_ERFINV: + return MNN::Express::_Erfinv; + case UnaryOpOperation_EXPM1: + return MNN::Express::_Expm1; + case UnaryOpOperation_ASIN: + return MNN::Express::_Asin; + case UnaryOpOperation_ACOS: + return MNN::Express::_Acos; + case UnaryOpOperation_HARDSWISH: + return MNN::Express::_Hardswish; + case UnaryOpOperation_GELU: + return MNN::Express::_Gelu; + default: + MNN_ASSERT(false); + break; + } + return nullptr; +} +static void computeUnaryBuffer(MNN::NetT* net) { + for (auto iter = net->oplists.begin(); iter != net->oplists.end(); ++iter) { + auto op = iter->get(); + auto opType = op->type; + std::map describes; + for (auto& des : net->extraTensorDescribe) { + describes.insert(std::make_pair(des->index, des.get())); + } + if (opType == MNN::OpType_Sigmoid || opType == MNN::OpType_TanH) { + op->type = OpType_UnaryOp; + op->main.value = new UnaryOpT; + op->main.type = OpParameter_UnaryOp; + op->main.AsUnaryOp()->opType = UnaryOpOperation_SIGMOID; + if (opType == MNN::OpType_TanH) { + op->main.AsUnaryOp()->opType = UnaryOpOperation_TANH; + } + opType = op->type; + } + if (opType == MNN::OpType_UnaryOp) { + auto type = op->main.AsUnaryOp()->opType; + if (type == UnaryOpOperation_ABS || type == UnaryOpOperation_NEG || type == UnaryOpOperation_SIGN) { + continue; + } + op->main.AsUnaryOp()->tableInt8.resize(255); + auto unaryParam = op->main.AsUnaryOp()->tableInt8.data(); + + auto outputId = op->outputIndexes[0]; + if (describes.find(outputId) == describes.end()) { + continue; + } + auto unaryDes = describes.find(outputId)->second; + float outScale = unaryDes->quantInfo->scale; + float outZero = unaryDes->quantInfo->zero; + auto inputId = op->inputIndexes[0]; + if (describes.find(inputId) == describes.end()) { + auto iter = describes.find(outputId); + + } + unaryDes = describes.find(inputId)->second; + float inpScale = unaryDes->quantInfo->scale; + float inpZero = unaryDes->quantInfo->zero; + + // Read input data. + std::vector dataInput; + float fx = 0.f; + auto input = _Input({255}, NCHW, halide_type_of()); + input->setName("input_tensor"); + auto ptr_in = input->template writeMap(); + for (int i = -127; i <= 127; ++i) { + fx = (i - inpZero) * inpScale; + dataInput.push_back(fx); + ptr_in[i + 127] = fx; + } + input->unMap(); + // Compute output data. + VARP output; + auto func = selectUnaryProc(type); + if (nullptr == func) { + MNN_ERROR("Don't support quantizing UnaryOP: %s to Int8\n", op->name.c_str()); + } + output = func(input); + auto gotOutput = output->template readMap(); + // Write output data. + int val; + for (int i = 0; i < 255; ++i) { + val = (int)roundf(gotOutput[i] / outScale) + outZero; + if (val > 127) { + val = 127; + } + if (val < -127) { + val = -127; + } + unaryParam[i] = val; + } + } + } +} + bool Cli::convertModel(modelConfig& modelPath) { if (modelPath.dumpInfo) { dumpModelInfo(modelPath.modelFile.c_str()); @@ -555,6 +700,11 @@ bool Cli::convertModel(modelConfig& modelPath) { if (modelPath.model != modelConfig::MNN || modelPath.optimizeLevel >= 2) { std::cout << "Start to Optimize the MNN Net..." << std::endl; std::unique_ptr newNet = optimizeNet(netT, modelPath.forTraining, modelPath); + if (newNet->extraTensorDescribe.size()>0) { + MNN_PRINT("MNN net has tensor quant info\n"); + computeUnaryBuffer(newNet.get()); + } + error = writeFb(newNet, modelPath.MNNModel, modelPath); } else { error = writeFb(netT, modelPath.MNNModel, modelPath); diff --git a/tools/converter/source/optimizer/PostConverter.cpp b/tools/converter/source/optimizer/PostConverter.cpp index 97bd8ec39..26535ebe4 100644 --- a/tools/converter/source/optimizer/PostConverter.cpp +++ b/tools/converter/source/optimizer/PostConverter.cpp @@ -274,7 +274,11 @@ std::unique_ptr optimizeNetImpl(std::unique_ptr& originNet // Remove Invalid Cast "RemoveInvalidCast" }; - auto tensorDescribe = std::move(originNet->extraTensorDescribe); + std::vector> tensorDescribe; + if (originNet->extraTensorDescribe.size() > 0) { + tensorDescribe = std::move(originNet->extraTensorDescribe); + } + std::unique_ptr newNet; newNet = std::move(RunExtraPass(originNet, inputs)); RunNetPass(midOptPass, newNet); @@ -344,7 +348,9 @@ std::unique_ptr optimizeNetImpl(std::unique_ptr& originNet newNet = std::move(RunMergePass(newNet, inputs, PASS_PRIORITY_LOW)); newNet = std::move(RunMergePass(newNet, inputs, PASS_PRIORITY_FINAL)); - newNet->extraTensorDescribe = std::move(tensorDescribe); + if (tensorDescribe.size() > 0) { + newNet->extraTensorDescribe = std::move(tensorDescribe); + } RunNetPass({"ReIndexTensor"}, newNet); RunNetPass({"ReIndexOnnxIfAlias"}, newNet); diff --git a/tools/converter/source/optimizer/Program.cpp b/tools/converter/source/optimizer/Program.cpp index 28be3f37e..461a403ff 100644 --- a/tools/converter/source/optimizer/Program.cpp +++ b/tools/converter/source/optimizer/Program.cpp @@ -20,11 +20,11 @@ namespace MNN { namespace Express { void Program::createUnit(std::map& varMap, std::vector& inputIndexes, const std::vector>& oplists, MNN::OpT* op, const MNN::NetT* net, std::set& invalidSet, std::set& extraInputIndexes) { - createUnit(varMap, inputIndexes, oplists, op, net->tensorName, invalidSet, extraInputIndexes); + createUnit(varMap, inputIndexes, oplists, op, net->tensorName, invalidSet, extraInputIndexes, net); } void Program::createUnit(std::map& varMap, std::vector& inputIndexes, const std::vector>& oplists, - MNN::OpT* op, const std::vector& tensorName, std::set& invalidSet, std::set& extraInputIndexes, const MNN::NetT* net) { + MNN::OpT* op, const std::vector& tensorName, std::set& invalidSet, std::set& extraInputIndexes, const MNN::NetT* net, std::map TensorDescribeName) { if (invalidSet.find(op) != invalidSet.end()) { return; } @@ -46,7 +46,7 @@ void Program::createUnit(std::map& varMap, std::vector& inputInd for (int j = 0; j < oplists.size(); ++j) { for (auto outputIndex : oplists[j]->outputIndexes) { if (outputIndex == input) { - createUnit(varMap, inputIndexes, oplists, oplists[j].get(), tensorName, invalidSet, extraInputIndexes, net); + createUnit(varMap, inputIndexes, oplists, oplists[j].get(), tensorName, invalidSet, extraInputIndexes, net, TensorDescribeName); } } } @@ -69,10 +69,11 @@ void Program::createUnit(std::map& varMap, std::vector& inputInd } auto newVar = Variable::create(expr, j); newVar->setName(tensorName[outputIndexes[j]]); - if (op->type != OpType_ConvertTensor && nullptr != net && !net->extraTensorDescribe.empty()) { + if (nullptr != net && !net->extraTensorDescribe.empty()) { auto& extraDescribes = net->extraTensorDescribe; - int idx = outputIndexes[j]; - if (idx < extraDescribes.size() && nullptr != extraDescribes[idx] && nullptr != extraDescribes[idx]->quantInfo) { +// int idx = outputIndexes[j]; + if (TensorDescribeName.find(op->name) != TensorDescribeName.end()) { + int idx = TensorDescribeName[op->name]; float scale = extraDescribes[idx]->quantInfo->scale; float zero = extraDescribes[idx]->quantInfo->zero; newVar->writeScaleMap(scale, zero); @@ -112,9 +113,15 @@ std::shared_ptr Program::create(const std::vector> std::map varMap; std::vector inputIndexes; std::set extraInputIndexes; + std::map TensorDescribeName; + if (net && net->extraTensorDescribe.size() > 0) { + for (int i = 0; i < net->extraTensorDescribe.size(); ++i) { + TensorDescribeName.insert(std::make_pair(net->extraTensorDescribe[i]->name, i)); + } + } for (int index = 0; index < oplists.size(); ++index) { std::set invalidSet; - createUnit(varMap, inputIndexes, oplists, oplists[index].get(), tensorName, invalidSet, extraInputIndexes, net); + createUnit(varMap, inputIndexes, oplists, oplists[index].get(), tensorName, invalidSet, extraInputIndexes, net, TensorDescribeName); } std::map outputs; for (auto& iter : varMap) { diff --git a/tools/converter/source/optimizer/Program.hpp b/tools/converter/source/optimizer/Program.hpp index 6f27c6ab0..4a7cefe9d 100644 --- a/tools/converter/source/optimizer/Program.hpp +++ b/tools/converter/source/optimizer/Program.hpp @@ -36,7 +36,7 @@ class Program { void save(MNN::NetT* net); private: static std::shared_ptr create(const std::vector>& oplists, const std::vector& tensorName, const std::vector& outputName, bool supportExtra, bool saveAllVars, const MNN::NetT* net=nullptr); - static void createUnit(std::map& varMap, std::vector& inputIndexes, const std::vector>& oplists, MNN::OpT* op, const std::vector& tensorName, std::set& invalidSet, std::set& extraInputIndexes, const MNN::NetT* net=nullptr); + static void createUnit(std::map& varMap, std::vector& inputIndexes, const std::vector>& oplists, MNN::OpT* op, const std::vector& tensorName, std::set& invalidSet, std::set& extraInputIndexes, const MNN::NetT* net=nullptr, std::map TensorDescribeName = {}); Program() { } std::map mVars; diff --git a/tools/converter/source/optimizer/TemplateMerge.cpp b/tools/converter/source/optimizer/TemplateMerge.cpp index 4cdfeb537..d187dda18 100644 --- a/tools/converter/source/optimizer/TemplateMerge.cpp +++ b/tools/converter/source/optimizer/TemplateMerge.cpp @@ -156,10 +156,6 @@ bool TemplateMerge::onExecute(const std::vector& outputs, PassPriority pri } else { invalidVARP.insert(var); } - if (var->get() && var->get()->type() == 19) { - auto updateInputs = updateInputVarOfExpr(var); - updateVars.insert(updateInputs.begin(), updateInputs.end()); - } } } MNN::Express::ExecutorScope::Current()->gc(); diff --git a/tools/converter/source/optimizer/merge/ConvDeQuantizeLinearFuseToConvInt8.cpp b/tools/converter/source/optimizer/merge/ConvDeQuantizeLinearFuseToConvInt8.cpp index 849063dff..83d45af22 100644 --- a/tools/converter/source/optimizer/merge/ConvDeQuantizeLinearFuseToConvInt8.cpp +++ b/tools/converter/source/optimizer/merge/ConvDeQuantizeLinearFuseToConvInt8.cpp @@ -24,6 +24,17 @@ static VARP _ReshapeF(VARP x, VARP shape, MNN::MNN_DATA_FORMAT format) { reshape->main.AsReshape()->dimType = format; return (Variable::create(Expr::create(reshape.get(), {x, shape}))); } + +static VARP _ConvertF(VARP input, MNN::MNN_DATA_FORMAT format) { + std::unique_ptr convert(new OpT); + convert->type = OpType_ConvertTensor; + convert->main.type = OpParameter_TensorConvertInfo; + convert->main.value = new TensorConvertInfoT; + convert->main.AsTensorConvertInfo()->source = MNN_DATA_FORMAT_NC4HW4; + convert->main.AsTensorConvertInfo()->dest = format; + return (Variable::create(Expr::create(convert.get(), {input}))); +} + static bool matchConvInt8ToOther(EXPRP expr, int i) { // convint8->quant->cast->dequant->other // check op type not convint8. if (nullptr == expr->get()) { @@ -60,9 +71,16 @@ static bool matchConvInt8ToOther(EXPRP expr, int i) { // convint8->quant->cast-> VARP conv_var = quan_expr->inputs().at(0); EXPRP conv_expr = conv_var->expr().first; - if (!conv_expr->get() || (conv_expr->get()->type() != OpType_ConvInt8 && conv_expr->get()->type() != OpType_DepthwiseConvInt8)) { + if (!conv_expr->get() || (conv_expr->get()->type() != OpType_ConvInt8 && conv_expr->get()->type() != OpType_DepthwiseConvInt8 && conv_expr->get()->type() != OpType_ReLU && conv_expr->get()->type() != OpType_ReLU6)) { return false; } + if (conv_expr->get()->type() == OpType_ReLU || conv_expr->get()->type() == OpType_ReLU6) { + conv_var = conv_expr->inputs().at(0); + conv_expr = conv_var->expr().first; + if (!conv_expr->get() || (conv_expr->get()->type() != OpType_ConvInt8 && conv_expr->get()->type() != OpType_DepthwiseConvInt8)) { + return false; + } + } return true; } static VARP transformConvInt8ToOther(EXPRP expr, int i) { // convint8->quant->cast->dequant->other => convInt8(float output)->other @@ -75,37 +93,74 @@ static VARP transformConvInt8ToOther(EXPRP expr, int i) { // convint8->quant->ca auto conv_var = quan_expr->inputs().at(0); auto conv_expr = conv_var->expr().first; auto convInt8Input = conv_expr->inputs().at(0); + bool hasRelu = false, hasRelu6 = false; + if (conv_expr->get()->type() == OpType_ReLU || conv_expr->get()->type() == OpType_ReLU6) { + hasRelu = conv_expr->get()->type() == OpType_ReLU ? true : false; + hasRelu6 = conv_expr->get()->type() == OpType_ReLU6 ? true : false; + conv_expr = convInt8Input->expr().first; + convInt8Input = conv_expr->inputs().at(0); + } // change old convInt8 to return a float value, which is input to expr; std::unique_ptr newConvInt8(new MNN::Convolution2DT); std::unique_ptr oldConvOp(conv_expr->get()->UnPack()); auto oldConvParams = oldConvOp->main.AsConvolution2D(); + + float output_zero = oldConvParams->symmetricQuan->outputZeroPoint; + float output_scale = oldConvParams->quanParameter->scaleOut; + float input_scale = oldConvParams->quanParameter->scaleIn; + float input_zero = oldConvParams->symmetricQuan->zeroPoint; + newConvInt8->common.reset(new MNN::Convolution2DCommonT); newConvInt8->common = std::move(oldConvParams->common); + newConvInt8->common->relu = hasRelu; + newConvInt8->common->relu6 = hasRelu6; newConvInt8->symmetricQuan.reset(new QuantizedFloatParamT); newConvInt8->symmetricQuan = std::move(oldConvParams->symmetricQuan); - newConvInt8->symmetricQuan->outputDataType = MNN::DataType_DT_FLOAT; - // newConvInt8->bias = std::move(oldConvParams->bias); - // newConvInt8->quanParameter = std::move(oldConvParams->quanParameter); - - //Update newConvInt8 scale - float outputScale = quan_expr->inputs().at(2)->readMap()[0]; - int oc = static_cast(newConvInt8->symmetricQuan->scale.size()); - float* ptr = newConvInt8->symmetricQuan->scale.data(); - for (int i = 0; i < oc; ++i) { - ptr[i] = ptr[i] * outputScale; - } + //newConvInt8->symmetricQuan->outputDataType = MNN::DataType_DT_FLOAT; + newConvInt8->quanParameter.reset(new IDSTQuanT); + newConvInt8->bias = std::move(oldConvParams->bias); + newConvInt8->quanParameter = std::move(oldConvParams->quanParameter); std::unique_ptr conv_op(new OpT); conv_op->name = conv_expr->name(); - conv_op->type = oldConvOp->type; + conv_op->type = OpType_ConvInt8; conv_op->main.type = OpParameter_Convolution2D; conv_op->main.value = newConvInt8.release(); + convInt8Input->writeScaleMap(input_scale, input_zero); auto newconv_expr = Expr::create(conv_op.get(), {convInt8Input}); newconv_expr->setName(conv_expr->name()); auto newconv_var = Variable::create(newconv_expr); newconv_var->setName(conv_expr->outputName(0)); + newconv_var->writeScaleMap(output_scale, output_zero); + if (conv_expr->inputs().size() == 5) { // Process matmul output + auto config = Global::Get(); + auto format = MNN::MNN_DATA_FORMAT_NCHW; + if (config->model == modelConfig::TFLITE || config->model == modelConfig::TENSORFLOW) { + format = MNN_DATA_FORMAT_NHWC; + } + // expr->inputs = {input, concat, needSqueezeA, needSqueezeB, transposeA} + auto concat_var = conv_expr->inputs().at(1); + bool needSqueezeA = conv_expr->inputs().at(2)->readMap()[0] > 0.f; + bool needSqueezeB = conv_expr->inputs().at(3)->readMap()[0] > 0.f; + + auto output = _ConvertF(newconv_var, format); + output->writeScaleMap(output_scale, output_zero); + VARP reshapeVar = _ReshapeF(output, concat_var, format); + reshapeVar->writeScaleMap(output_scale, output_zero); + if (needSqueezeA) { + reshapeVar = _Squeeze(reshapeVar, {0}); + reshapeVar->writeScaleMap(output_scale, output_zero); + } + if (needSqueezeB) { + reshapeVar = _Squeeze(reshapeVar, {1}); + reshapeVar->writeScaleMap(output_scale, output_zero); + } + reshapeVar->setName(expr->outputName(0) + "__matmul_cvt_convInt8_reshape"); + Expr::replace(conv_expr, reshapeVar->expr().first); + return reshapeVar; + } Expr::replace(conv_expr, newconv_expr); return newconv_var; @@ -162,9 +217,12 @@ static VARP transformOtherToOther (EXPRP expr, int i) { // ohter->quant->cast->d auto cast_expr = cast_var->expr().first; auto quan_var = cast_expr->inputs().at(0); auto quan_expr = quan_var->expr().first; - auto other_var = quan_expr->inputs().at(0); + auto input_var = quan_expr->inputs().at(0); - return other_var; + float scale = quan_expr->inputs().at(2)->readMap()[0]; + float zero = quan_expr->inputs().at(3)->readMap()[0]; + input_var->writeScaleMap(scale, zero); + return input_var; } static VARP buildInputForMatmulInt8 (VARP input, VARP transposeA, VARP SqueezeA, int num_input) { auto transposeAType = transposeA->expr().first; @@ -197,6 +255,41 @@ static VARP buildInputForMatmulInt8 (VARP input, VARP transposeA, VARP SqueezeA, return newInput; } +static EXPRP buildNewConvExpr(EXPRP oldConvExpr, VARP convInput, std::vector updateInfo = {}) { + std::unique_ptr newConvInt8(new MNN::Convolution2DT); + std::unique_ptr oldConvOp(oldConvExpr->get()->UnPack()); + auto oldConvParams = oldConvOp->main.AsConvolution2D(); + newConvInt8->common.reset(new MNN::Convolution2DCommonT); + newConvInt8->common = std::move(oldConvParams->common); + newConvInt8->symmetricQuan.reset(new QuantizedFloatParamT); + newConvInt8->symmetricQuan = std::move(oldConvParams->symmetricQuan); + newConvInt8->quanParameter.reset(new IDSTQuanT); + newConvInt8->quanParameter = std::move(oldConvParams->quanParameter); + newConvInt8->bias = std::move(oldConvParams->bias); + + if (updateInfo.size() > 0) { + newConvInt8->common->relu = updateInfo[0] ? true : false; + } + if (updateInfo.size() > 1) { + newConvInt8->common->relu6 = updateInfo[1] ? true : false; + } + if (updateInfo.size() > 2) { + newConvInt8->symmetricQuan->outputDataType = updateInfo[2] ? DataType_DT_FLOAT : DataType_DT_INT8; + } + float input_scale = newConvInt8->quanParameter->scaleIn; + float input_zero = newConvInt8->symmetricQuan->zeroPoint; + convInput->writeScaleMap(input_scale, input_zero); + + std::unique_ptr conv_op(new OpT); + conv_op->name = oldConvExpr->name(); + conv_op->type = oldConvOp->type; + conv_op->main.type = OpParameter_Convolution2D; + conv_op->main.value = newConvInt8.release(); + + auto new_conv_expr = Expr::create(conv_op.get(), {convInput}); + return new_conv_expr; +} + static auto gRegister = []() { // convInt8->(relu)->quant->cast->dequant->convInt8 auto matchConvInt8ToConvInt8 = [](EXPRP expr) { // check convInt8 @@ -259,33 +352,98 @@ static auto gRegister = []() { // convInt8->(relu)->quant->cast->dequant->convIn auto quan_var = cast_expr->inputs().at(0); auto quan_expr = quan_var->expr().first; auto convInt8Input = quan_expr->inputs().at(0); - if (expr->inputs().size() == 3) { + /* conv params*/ + std::unique_ptr newConvInt8(new MNN::Convolution2DT); + std::unique_ptr oldConvOp(expr->get()->UnPack()); + auto oldConvParams = oldConvOp->main.AsConvolution2D(); + float input_scale = oldConvParams->quanParameter->scaleIn; + float input_zero = oldConvParams->symmetricQuan->zeroPoint; + /* check */ + auto conv_var = quan_expr->inputs().at(0); + conv_var->writeScaleMap(input_scale, input_zero); + EXPRP conv_expr = conv_var->expr().first; + VARP first_conv_input_var = conv_expr->inputs().at(0); + if (conv_expr->get()->type() == OpType_PReLU || conv_expr->get()->type() == OpType_ReLU || conv_expr->get()->type() == OpType_ReLU6) { + auto relu_expr = conv_expr; + bool relu_ = relu_expr->get()->type() == OpType_ReLU ? true: false; + bool relu6_ = relu_expr->get()->type() == OpType_ReLU6 ? true: false; + VARP conv_var_0 = relu_expr->inputs().at(0); + conv_expr = conv_var_0->expr().first; + first_conv_input_var = conv_expr->inputs().at(0); + auto newFirstConvExpr = buildNewConvExpr(conv_expr, first_conv_input_var, {relu_, relu6_}); // write scale for first_conv_input_var + Expr::replace(conv_expr, newFirstConvExpr); + convInt8Input = Variable::create(conv_expr); + conv_var = convInt8Input; + conv_var->writeScaleMap(input_scale, input_zero); + } else { + auto newFirstConvExpr = buildNewConvExpr(conv_expr, first_conv_input_var); // Just write scale for first_conv_input_var, do not update conv info. + Expr::replace(conv_expr, newFirstConvExpr); + convInt8Input = Variable::create(conv_expr); + conv_var = convInt8Input; + conv_var->writeScaleMap(input_scale, input_zero); + } + if (conv_expr->inputs().size() == 5) { + // Process matmul output + auto config = Global::Get(); + auto format = MNN::MNN_DATA_FORMAT_NCHW; + if (config->model == modelConfig::TFLITE || config->model == modelConfig::TENSORFLOW) { + format = MNN_DATA_FORMAT_NHWC; + } + // expr->inputs = {input, concat, needSqueezeA, needSqueezeB, transposeA} + auto concat_var = conv_expr->inputs().at(1); + bool needSqueezeA = conv_expr->inputs().at(2)->readMap()[0] > 0.f; + bool needSqueezeB = conv_expr->inputs().at(3)->readMap()[0] > 0.f; + + auto output = _ConvertF(conv_var, format); + output->writeScaleMap(input_scale, input_zero); + + VARP reshapeVar = _ReshapeF(output, concat_var, format); + reshapeVar->writeScaleMap(input_scale, input_zero); + if (needSqueezeA) { + reshapeVar = _Squeeze(reshapeVar, {0}); + } + if (needSqueezeB) { + reshapeVar = _Squeeze(reshapeVar, {1}); + } + reshapeVar->setName(conv_expr->outputName(0) + "__matmul_cvt_convInt8_reshape"); + Expr::replace(conv_expr, reshapeVar->expr().first); + convInt8Input = reshapeVar; + convInt8Input->writeScaleMap(input_scale, input_zero); + } + + if (expr->inputs().size() == 5) { auto matmulop = expr->get(); auto count_input = matmulop->main_as_Convolution2D()->common()->inputCount(); - convInt8Input = buildInputForMatmulInt8(convInt8Input, expr->inputs().at(1), expr->inputs().at(2), count_input); + convInt8Input = buildInputForMatmulInt8(convInt8Input, expr->inputs().at(4), expr->inputs().at(2), count_input); + convInt8Input->writeScaleMap(input_scale, input_zero); } - std::unique_ptr newConvInt8(new MNN::Convolution2DT); - std::unique_ptr oldConvOp(expr->get()->UnPack()); - auto oldConvParams = oldConvOp->main.AsConvolution2D(); + newConvInt8->common.reset(new MNN::Convolution2DCommonT); newConvInt8->common = std::move(oldConvParams->common); newConvInt8->symmetricQuan.reset(new QuantizedFloatParamT); newConvInt8->symmetricQuan = std::move(oldConvParams->symmetricQuan); - // newConvInt8->bias = std::move(oldConvParams->bias); - // newConvInt8->quanParameter = std::move(oldConvParams->quanParameter); + newConvInt8->quanParameter.reset(new IDSTQuanT); + newConvInt8->quanParameter = std::move(oldConvParams->quanParameter); + newConvInt8->bias = std::move(oldConvParams->bias); + float scaleout = newConvInt8->quanParameter->scaleOut; + float zeroout = newConvInt8->symmetricQuan->outputZeroPoint; std::unique_ptr conv_op(new OpT); conv_op->name = expr->name(); conv_op->type = oldConvOp->type; conv_op->main.type = OpParameter_Convolution2D; conv_op->main.value = newConvInt8.release(); + - auto conv_expr = Expr::create(conv_op.get(), {convInt8Input}); - conv_expr->setName(expr->name()); -// auto conv_var = Variable::create(conv_expr); -// conv_var->setName(expr->outputName(0)); - Expr::replace(expr, conv_expr); + auto new_conv_expr = Expr::create(conv_op.get(), {convInt8Input}); + if (expr->inputs().size() == 5) { + new_conv_expr = Expr::create(conv_op.get(), {convInt8Input, expr->inputs()[1], expr->inputs()[2], expr->inputs()[3], expr->inputs()[4]}); + } + new_conv_expr->setName(expr->name()); + auto new_conv_var = Variable::create(new_conv_expr); + new_conv_var->writeScaleMap(scaleout, zeroout); + Expr::replace(expr, new_conv_expr); return true; }; @@ -341,31 +499,46 @@ static auto gRegister = []() { // convInt8->(relu)->quant->cast->dequant->convIn auto cast_expr = cast_var->expr().first; auto quan_var = cast_expr->inputs().at(0); auto quan_expr = quan_var->expr().first; - auto convInt8Input = quan_expr->inputs().at(1); - if (expr->inputs().size() == 3) { // The convInt8 comes from matmul. + auto convInt8Input = quan_expr->inputs().at(0); + auto other_var = convInt8Input; + if (expr->inputs().size() == 5) { + // [input,concat,squeezeA,squeezeB,transposeA] auto matmulop = expr->get(); auto count_input = matmulop->main_as_Convolution2D()->common()->inputCount(); - auto matmulInput = expr->inputs().at(0); - convInt8Input = buildInputForMatmulInt8(convInt8Input, expr->inputs().at(1), expr->inputs().at(2), count_input); + convInt8Input = buildInputForMatmulInt8(convInt8Input, expr->inputs().at(4), expr->inputs().at(2), count_input); + convInt8Input->setName(expr->name() + "__matmul_converted_input"); } std::unique_ptr newConvInt8(new MNN::Convolution2DT); std::unique_ptr oldConvOp(expr->get()->UnPack()); auto oldConvParams = oldConvOp->main.AsConvolution2D(); + float input_scale = oldConvParams->quanParameter->scaleIn; + float output_scale = oldConvParams->quanParameter->scaleOut; + float input_zero = static_cast(oldConvParams->symmetricQuan->zeroPoint); + float output_zero = static_cast(oldConvParams->symmetricQuan->outputZeroPoint); + newConvInt8->common.reset(new MNN::Convolution2DCommonT); newConvInt8->common = std::move(oldConvParams->common); newConvInt8->symmetricQuan.reset(new QuantizedFloatParamT); newConvInt8->symmetricQuan = std::move(oldConvParams->symmetricQuan); - // newConvInt8->bias = std::move(oldConvParams->bias); - // newConvInt8->quanParameter = std::move(oldConvParams->quanParameter); + newConvInt8->bias = std::move(oldConvParams->bias); + newConvInt8->quanParameter.reset(new IDSTQuanT); + newConvInt8->quanParameter = std::move(oldConvParams->quanParameter); std::unique_ptr conv_op(new OpT); conv_op->name = expr->name(); conv_op->type = oldConvOp->type; conv_op->main.type = OpParameter_Convolution2D; conv_op->main.value = newConvInt8.release(); - + + other_var->writeScaleMap(input_scale, input_zero); + convInt8Input->writeScaleMap(input_scale, input_zero); auto conv_expr = Expr::create(conv_op.get(), {convInt8Input}); + if (expr->inputs().size() == 5) { + conv_expr = Expr::create(conv_op.get(), {convInt8Input, expr->inputs()[1], expr->inputs()[2], expr->inputs()[3], expr->inputs()[4]}); + } + auto conv_var = Variable::create(conv_expr); + conv_var->writeScaleMap(output_scale, output_zero); conv_expr->setName(expr->name()); Expr::replace(expr, conv_expr); return true; @@ -389,7 +562,7 @@ static auto gRegister = []() { // convInt8->(relu)->quant->cast->dequant->convIn } return true; }; - auto transformXToOther = [](EXPRP expr) { // ohter->quant->cast->dequant->other => other->other + auto transformXToOther = [](EXPRP expr) { // X->quant->cast->dequant->output_other => X->output_other int input_size = static_cast(expr->inputs().size()); std::vector new_inputs(input_size); for (int i = 0; i < input_size; ++i) { @@ -473,23 +646,6 @@ static auto gRegister = []() { // convInt8->(relu)->quant->cast->dequant->convIn auto X_expr = X_var->expr().first; bool convInt8End = X_expr->get()->type() == OpType_ConvInt8; - bool hasReshape = X_expr->get()->type() == OpType_Reshape; - if (X_expr->get()->type() == OpType_Reshape) { - auto convert_var = X_expr->inputs().at(0); - auto convert_expr = convert_var->expr().first; - if (convert_expr->get() && convert_expr->get()->type() == OpType_ConvertTensor) { - auto convint8_var = convert_expr->inputs().at(0); - auto convint8_expr = convint8_var->expr().first; - if (convint8_expr->get() && convint8_expr->get()->type() == OpType_ConvInt8) { - convInt8End = true; - X_expr = std::move(convint8_expr); - } - } - if (convert_expr->get() && convert_expr->get()->type() == OpType_ConvInt8) { - convInt8End = true; - X_expr = std::move(convert_expr); - } - } if (convInt8End) { auto convInt8Input = X_expr->inputs().at(0); @@ -500,17 +656,13 @@ static auto gRegister = []() { // convInt8->(relu)->quant->cast->dequant->convIn newConvInt8->common = std::move(oldConvParams->common); newConvInt8->symmetricQuan.reset(new QuantizedFloatParamT); newConvInt8->symmetricQuan = std::move(oldConvParams->symmetricQuan); - newConvInt8->symmetricQuan->outputDataType = DataType_DT_FLOAT; // If convInt8 is the last op, float value is the torch-fx model's output. - // newConvInt8->bias = std::move(oldConvParams->bias); - // newConvInt8->quanParameter = std::move(oldConvParams->quanParameter); + newConvInt8->quanParameter.reset(new IDSTQuanT); + //newConvInt8->symmetricQuan->outputDataType = DataType_DT_FLOAT; // If convInt8 is the last op, float value is the torch-fx model's output. + newConvInt8->bias = std::move(oldConvParams->bias); + newConvInt8->quanParameter = std::move(oldConvParams->quanParameter); - //Update convInt8 scale. - float outputScale = quan_expr->inputs().at(2)->readMap()[0]; - int oc = static_cast(newConvInt8->symmetricQuan->scale.size()); - float* ptr = newConvInt8->symmetricQuan->scale.data(); - for (int i = 0; i < oc; ++i) { - ptr[i] = ptr[i] * outputScale; - } + float output_scale = newConvInt8->quanParameter->scaleOut; + float output_zero = newConvInt8->symmetricQuan->outputZeroPoint; std::unique_ptr conv_op(new OpT); conv_op->name = X_expr->name(); @@ -519,23 +671,51 @@ static auto gRegister = []() { // convInt8->(relu)->quant->cast->dequant->convIn conv_op->main.value = newConvInt8.release(); auto conv_expr = Expr::create(conv_op.get(), {convInt8Input}); + auto conv_var = Variable::create(conv_expr); + conv_var->writeScaleMap(output_scale, output_zero); + if (X_expr->inputs().size() == 5) { + // Process matmul output + auto config = Global::Get(); + auto format = MNN::MNN_DATA_FORMAT_NCHW; + if (config->model == modelConfig::TFLITE || config->model == modelConfig::TENSORFLOW) { + format = MNN_DATA_FORMAT_NHWC; + } + + conv_var->setName(X_expr->outputName(0)); +// newconv_var->setName(conv_expr->outputName(0)); + // expr->inputs = {input, concat, needSqueezeA, needSqueezeB, transposeA} + auto concat_var = X_expr->inputs().at(1); + bool needSqueezeA = X_expr->inputs().at(2)->readMap()[0] > 0.f; + bool needSqueezeB = X_expr->inputs().at(3)->readMap()[0] > 0.f; + + auto output = _ConvertF(conv_var, format); + output->writeScaleMap(output_scale, output_zero); + VARP reshapeVar = _ReshapeF(output, concat_var, format); + reshapeVar->writeScaleMap(output_scale, output_zero); + if (needSqueezeA) { + reshapeVar = _Squeeze(reshapeVar, {0}); + reshapeVar->writeScaleMap(output_scale, output_zero); + } + if (needSqueezeB) { + reshapeVar = _Squeeze(reshapeVar, {1}); + reshapeVar->writeScaleMap(output_scale, output_zero); + } + reshapeVar->setName(expr->name()); + Expr::replace(expr, reshapeVar->expr().first); + return true; + } conv_expr->setName(expr->name()); - - if (hasReshape) { - conv_expr->setName(X_expr->name()); - std::unique_ptr reshapeOp(X_var->expr().first->get()->UnPack()); - auto new_reshape_expr = Expr::create(reshapeOp.get(), X_var->expr().first->inputs()); - new_reshape_expr->setName(expr->name()); - Expr::replace(expr, new_reshape_expr); - } - Expr::replace(X_expr, conv_expr); + Expr::replace(expr, conv_expr); return true; } - + float output_scale = quan_expr->inputs().at(2)->readMap()[0]; + float output_zero = quan_expr->inputs().at(3)->readMap()[0]; // directly return the op output. std::unique_ptr oldOtherOp(X_expr->get()->UnPack()); auto newop_expr = Expr::create(oldOtherOp.get(), X_expr->inputs()); newop_expr->setName(expr->name()); + auto newop_var = Variable::create(newop_expr); + newop_var->writeScaleMap(output_scale, output_zero); Expr::replace(expr, newop_expr); return true; }; diff --git a/tools/converter/source/optimizer/merge/ConvertMatMulToConv2D.cpp b/tools/converter/source/optimizer/merge/ConvertMatMulToConv2D.cpp index 0330a7342..6138f14a0 100644 --- a/tools/converter/source/optimizer/merge/ConvertMatMulToConv2D.cpp +++ b/tools/converter/source/optimizer/merge/ConvertMatMulToConv2D.cpp @@ -479,17 +479,19 @@ ConvertMatMulToConv2D::ConvertMatMulToConv2D() { } } auto matmulInput = matmul_expr->inputs().at(0); - auto inputScale = matmul_expr->inputs().at(2); - auto inputZero = matmul_expr->inputs().at(3); + auto inputScale = matmul_expr->inputs().at(2); + auto inputZero = matmul_expr->inputs().at(3); auto weightScale = matmul_expr->inputs().at(4); + auto weightZero = matmul_expr->inputs().at(5); auto outputScale = matmul_expr->inputs().at(6); - auto outputZero = matmul_expr->inputs().at(7); + auto outputZero = matmul_expr->inputs().at(7); - float input_zero = inputZero->readMap()[0]; - float input_scale = inputScale->readMap()[0]; + float input_zero = inputZero->readMap()[0]; + float input_scale = inputScale->readMap()[0]; const float* weight_scale = weightScale->readMap(); - float output_scale = outputScale->readMap()[0]; - uint8_t output_zero = outputZero->readMap()[0]; + const float* weight_zero = weightZero->readMap(); + float output_scale = outputScale->readMap()[0]; + int output_zero = static_cast(outputZero->readMap()[0]); // Convint8 std::unique_ptr dense(new MNN::Convolution2DT); dense->common.reset(new MNN::Convolution2DCommonT); @@ -502,42 +504,29 @@ ConvertMatMulToConv2D::ConvertMatMulToConv2D() { dense->symmetricQuan->clampMax = 127; dense->symmetricQuan->zeroPoint = static_cast(input_zero); dense->symmetricQuan->outputZeroPoint = static_cast(output_zero); - // weight and bias - auto weight_ptr = weight->readMap(); - dense->symmetricQuan->weight.resize(weightInfo->size); - memcpy(dense->symmetricQuan->weight.data(), weight_ptr, weightInfo->size * sizeof(int8_t)); - std::vector weightKenelSum(numberOutput); - int kernelSize = weightInfo->size / numberOutput; - for (int i = 0; i < numberOutput; i++) { - int temp = 0; - int offset = i * kernelSize; - for (int j = 0; j < kernelSize; j++) { - temp += int(weight_ptr[offset + j]); - } - weightKenelSum[i] = temp; - } - - - dense->symmetricQuan->bias.resize(numberOutput, 0); - // compute conv scale=input_scale * weight_scale / output_scale - std::vector conv_scale(numberOutput); - for (int k = 0; k < numberOutput; ++k) { - if (output_scale != 0) { - conv_scale[k] = input_scale * weight_scale[k] / output_scale; - } else { - conv_scale[k] = 0.f; - } + // quantParameter + dense->quanParameter.reset(new IDSTQuanT); + dense->quanParameter->scaleIn = input_scale; + dense->quanParameter->scaleOut = output_scale; + dense->quanParameter->type = 4; + dense->quanParameter->aMin = -128; + dense->quanParameter->readType = numberOutput; + dense->quanParameter->quantScale = 1.0f; + dense->quanParameter->buffer.resize(weightInfo->size); + ::memcpy(dense->quanParameter->buffer.data(), weight->readMap(), weightInfo->size * sizeof(int8_t)); + dense->bias.resize(numberOutput, 0); + // quan alpha + dense->quanParameter->alpha.resize(2 * numberOutput); + for (int i = 0; i < numberOutput; ++i) { + dense->quanParameter->alpha[2 * i] = (-1)*(weight_zero[i] + 128) * weight_scale[i]; + dense->quanParameter->alpha[2 * i + 1] = weight_scale[i]; } + if (matmul_expr->inputs().size() == 9) { bias_var = matmul_expr->inputs().at(8); - auto bias_ptr = bias_var->readMap(); - auto biasInt32 = dense->symmetricQuan->bias.data(); - for (int cnt = 0; cnt < numberOutput; ++cnt) { - biasInt32[cnt] = bias_ptr[cnt] - weightKenelSum[cnt] * static_cast(input_zero) + static_cast(static_cast(output_zero) / conv_scale[cnt]); - } -// memcpy(dense->symmetricQuan->bias.data(), bias_ptr, sizeof(int32_t) * numberOutput); + auto bias_ptr = bias_var->readMap(); + memcpy(dense->bias.data(), bias_ptr, sizeof(int32_t) * numberOutput); } - dense->symmetricQuan->scale = std::move(conv_scale); // Third, build convint8 op std::unique_ptr dense_op(new OpT); @@ -554,42 +543,27 @@ ConvertMatMulToConv2D::ConvertMatMulToConv2D() { VARP inputRemain = _StridedSlice(inputShape, _Unsqueeze(_Scalar(0), {0}), _Unsqueeze(rank - _Scalar(2), {0}), _Unsqueeze(_Scalar(1), {0}), 0, 0, 0, 0, 0); if (transposeA) { inputE = _Slice(inputShape, _Unsqueeze(rank - _Scalar(1), {0}), _Unsqueeze(_Scalar(1), {0})); - if (format == MNN_DATA_FORMAT_NHWC) { - input = _ReshapeF(input, _Concat({_Unsqueeze(_Scalar(-1), {0}), inputE, _Unsqueeze(_Scalar(1), {0}), inputL}, 0), format); - } else { - input = _ReshapeF(input, _Concat({_Unsqueeze(_Scalar(-1), {0}), inputL, inputE, _Unsqueeze(_Scalar(1), {0})}, 0), format); - } } else { inputE = _Slice(inputShape, _Unsqueeze(rank - _Scalar(2), {0}), _Unsqueeze(_Scalar(1), {0})); - if (format == MNN_DATA_FORMAT_NHWC) { - input = _ReshapeF(input, _Concat({_Unsqueeze(_Scalar(-1), {0}), _Unsqueeze(_Scalar(1), {0}), _Unsqueeze(_Scalar(1), {0}), inputL}, 0), format); - } else { - input = _ReshapeF(input, _Concat({_Unsqueeze(_Scalar(-1), {0}), inputL, _Unsqueeze(_Scalar(1), {0}), _Unsqueeze(_Scalar(1), {0})}, 0), format); - } } if (config->externalFile && weightInfo->size >= config->externalTreshold) { RemoveAndStoreParam(dense_op, config->externalFile, config->externalOffset); } - float ta = 0, sa = 0; + float ta = 0, sa = 0, sqzb = 0; if (transposeA) { ta = 1.0f; } if (needSqueezeA) { sa = 1.0f; } - EXPRP dense_expr = Expr::create(dense_op.get(), {matmul_input, _Const(ta), _Const(sa)}, 1); - VARP output = Variable::create(dense_expr); - output->setName(matmul_expr->outputName(0) + "__matmul_converted"); - output = _ConvertF(output, format); - VARP reshapeVar = _ReshapeF(output, _Concat({inputRemain, inputE, outputH}, 0), format); - if (needSqueezeA) { - reshapeVar = _Squeeze(reshapeVar, {0}); - } if (needSqueezeB) { - reshapeVar = _Squeeze(reshapeVar, {1}); + sqzb = 1.0f; } - reshapeVar->setName(matmul_expr->outputName(0) + "__matmul_cvt_convInt8"); - Expr::replace(matmul_expr, reshapeVar->expr().first); + EXPRP dense_expr = Expr::create(dense_op.get(), {matmul_input, _Concat({inputRemain, inputE, outputH}, 0), _Const(sa), _Const(sqzb), _Const(ta)}, 1); + VARP output = Variable::create(dense_expr); + // output->setName(matmul_expr->outputName(0)); + dense_expr->setName(matmul_expr->outputName(0) + "__matmul_converted"); + Expr::replace(matmul_expr, dense_expr); return true; }; TemplateMerge::getInstance("Merge").insertTemplateV2("MatMulInt8ToConvInt8", fold, PASS_PRIORITY_HIGH); diff --git a/tools/converter/source/optimizer/onnxextra/OnnxConvolutionMerge.cpp b/tools/converter/source/optimizer/onnxextra/OnnxConvolutionMerge.cpp index 18ba327ac..b122d2fb7 100644 --- a/tools/converter/source/optimizer/onnxextra/OnnxConvolutionMerge.cpp +++ b/tools/converter/source/optimizer/onnxextra/OnnxConvolutionMerge.cpp @@ -308,12 +308,14 @@ class OnnxConvolutionTransform : public OnnxExtraManager::Transform { } auto outputScaleVar = outputExpr->inputs()[1]; float outputScale = outputScaleVar->readMap()[0]; - if (hasRelu) { - outputScale = 1.0f; - } int8_t outputZero = 0; if (outputExpr->inputs().size() > 2) { - outputZero = static_cast(outputExpr->inputs()[2]->readMap()[0]); + if (outputExpr->inputs()[2]->getInfo()->type.code == halide_type_uint) { + outputZero = static_cast(outputExpr->inputs()[2]->readMap()[0] - 128); + } else { + outputZero = static_cast(outputExpr->inputs()[2]->readMap()[0]); + } + } // Get weight quant info. float inputClampMin = -128; @@ -337,13 +339,17 @@ class OnnxConvolutionTransform : public OnnxExtraManager::Transform { weightKenelSum[i] = temp; } std::vector biasInt32(common->outputCount, 0); + convParam->quanParameter.reset(new IDSTQuanT); + convParam->quanParameter->aMin = -128; + convParam->quanParameter->aMax = co; + convParam->quanParameter->readType = co; + convParam->quanParameter->type = 4; + convParam->quanParameter->buffer.resize(weightSize); + ::memcpy(convParam->quanParameter->buffer.data(), pw, weightSize * sizeof(int8_t)); + convParam->quanParameter->quantScale = 1.0f; + convParam->quanParameter->scaleOut = outputScale; convParam->symmetricQuan.reset(new QuantizedFloatParamT); - convParam->symmetricQuan->weight.resize(weightSize); - ::memcpy(convParam->symmetricQuan->weight.data(), pw, weightSize * sizeof(int8_t)); convParam->symmetricQuan->nbits = 8; - if (hasRelu) { - convParam->symmetricQuan->outputDataType = DataType_DT_FLOAT; - } // Get input quant info. auto inputExpr = inputs[0]->expr().first; @@ -352,32 +358,30 @@ class OnnxConvolutionTransform : public OnnxExtraManager::Transform { auto inputZeroVar = inputExpr->inputs()[3]; float inputScale = inputScaleVar->readMap()[0]; int8_t inputZero = static_cast(inputZeroVar->readMap()[0]); + + convParam->quanParameter->scaleIn = inputScale; + convParam->quanParameter->alpha.resize(2 * co); // Compute convInt8 scale=(inputScale * weightScale)/outputScale std::vector scale(co); auto weightScale = weightexpr->inputs().at(2); auto ptrscale = weightScale->readMap(); + auto weightZero = weightexpr->inputs().at(3); + auto ptrzero = weightZero->readMap(); for (int cnt = 0; cnt < co; ++cnt) { - if (outputScale != 0){ - scale[cnt] = ptrscale[cnt] * inputScale / outputScale; - } else { - scale[cnt] = 0.f; - } + convParam->quanParameter->alpha[2 * cnt + 1] = ptrscale[cnt]; + convParam->quanParameter->alpha[2 * cnt] = (-1)*(ptrzero[cnt] + 128) * ptrscale[cnt]; } + convParam->bias.resize(co); if (inputSize > 2) { auto biasExpr = inputs[2]->expr().first; - auto biasInt32Var = biasExpr->inputs()[0]; - auto ptr = biasInt32Var->readMap(); - if (!ptr) { + auto biasfp32Var = biasExpr->inputs()[1]; + if (biasfp32Var->readMap() == nullptr) { MNN_ERROR("Convolution bias should be constant\n"); return nullptr; } - for (int cnt = 0; cnt < co; ++cnt) { - biasInt32[cnt] = ptr[cnt] - weightKenelSum[cnt] * static_cast(inputZero) + static_cast(static_cast(outputZero) / scale[cnt]); - } + ::memcpy(convParam->bias.data(), biasfp32Var->readMap(), co * sizeof(float)); } - convParam->symmetricQuan->bias = std::move(biasInt32); - convParam->symmetricQuan->scale = std::move(scale); convParam->symmetricQuan->clampMax = 127; convParam->symmetricQuan->clampMin = -128; convParam->symmetricQuan->zeroPoint = std::move(inputZero); diff --git a/tools/converter/source/optimizer/onnxextra/OnnxDeQuantizeLinear.cpp b/tools/converter/source/optimizer/onnxextra/OnnxDeQuantizeLinear.cpp index b3c241104..6927e246d 100644 --- a/tools/converter/source/optimizer/onnxextra/OnnxDeQuantizeLinear.cpp +++ b/tools/converter/source/optimizer/onnxextra/OnnxDeQuantizeLinear.cpp @@ -30,28 +30,55 @@ class OnnxDequantizeLinearTransform : public OnnxExtraManager::Transform { MNN_ERROR("QuantizeLinear should provide scale and input\n"); return nullptr; } - VARP zeropoint = nullptr; + + uint8_t dataType = halide_type_int; + VARP zeropoint = _Const(0.f); if (inputs.size() > 2) { - zeropoint = inputs[2]; + if (inputs[2]->getInfo() == nullptr) { + MNN_ERROR("DequantizeLinear layer inputs.size>2, but zeroPoint is not const\n"); + } + MNN_ASSERT(inputs[2]->getInfo() != nullptr); + auto zeroDim = inputs[2]->getInfo()->dim; + dataType = inputs[2]->getInfo()->type.code; + std::vector fp32Zero(inputs[2]->getInfo()->size); + if (dataType == halide_type_int) { + const int8_t* zeroPtr = inputs[2]->readMap(); + for (int j = 0; j < fp32Zero.size(); ++j) { + fp32Zero[j] = static_cast(zeroPtr[j]); + } + zeropoint = _Const(fp32Zero.data(), zeroDim, inputs[2]->getInfo()->order, halide_type_of()); + } else { + const uint8_t* zeroPtr = inputs[2]->readMap(); + for (int j = 0; j < fp32Zero.size(); ++j) { + fp32Zero[j] = static_cast(zeroPtr[j]) - 128.f; + } + zeropoint = _Const(fp32Zero.data(), zeroDim, inputs[2]->getInfo()->order, halide_type_of()); + } + zeropoint = _Cast(inputs[2]); } std::vector inputDim = {}; if (input->getInfo()) { inputDim = input->getInfo()->dim; + dataType = input->getInfo()->type.code; } - if (!scale->getInfo()->dim.empty()) { - zeropoint = _Unsqueeze(zeropoint, {1,2,3}); - scale = _Unsqueeze(scale, {1, 2, 3}); - } else { - scale = _Reshape(scale, {1}); - zeropoint = _Reshape(zeropoint, {1}); + auto offset = _Const(0.f); + if (dataType == halide_type_uint) { + offset = _Const(128.f); } + // if (!scale->getInfo()->dim.empty()) { + // zeropoint = _Unsqueeze(zeropoint, {1,2,3}); + // scale = _Unsqueeze(scale, {1, 2, 3}); + // } else { + // scale = _Reshape(scale, {1}); + // zeropoint = _Reshape(zeropoint, {1}); + // } auto _shape = _Const(inputDim.data(), {static_cast(inputDim.size())}, NHWC, halide_type_of()); - auto output = (_Cast(input) - _Cast(zeropoint)) * scale; + auto output = (_Cast(input) - zeropoint) * scale; std::unique_ptr iden(new MNN::OpT); iden->type = OpType_Int8ToFloat; - auto newExpr = MNN::Express::Expr::create(iden.get(), {input, output, scale, _Cast(zeropoint), _shape}, 5); + auto newExpr = MNN::Express::Expr::create(iden.get(), {input, output, scale, zeropoint - offset, _shape}, 5); newExpr->setName(expr->name()); return newExpr; } diff --git a/tools/converter/source/optimizer/onnxextra/OnnxGemm.cpp b/tools/converter/source/optimizer/onnxextra/OnnxGemm.cpp index 70490e32f..f5a96cb27 100644 --- a/tools/converter/source/optimizer/onnxextra/OnnxGemm.cpp +++ b/tools/converter/source/optimizer/onnxextra/OnnxGemm.cpp @@ -70,12 +70,19 @@ class OnnxGemmTransform : public OnnxExtraManager::Transform { // output quant info auto outputExpr = expr->outputs().front().lock(); auto outputScaleVar = outputExpr->inputs()[1]; - auto outputZero = outputExpr->inputs()[2]; + auto outputZero = _Const(0.f); + if (outputExpr->inputs().size() > 2 && outputExpr->inputs()[2]->getInfo()) { + if (outputExpr->inputs()[2]->getInfo()->type.code == halide_type_int) { + outputZero = _Cast(outputExpr->inputs()[2]); + } else { + outputZero = _Cast(outputExpr->inputs()[2]) - _Const(128.f); + } + } Z = _MatMul_Int8(X, y_int8, transA, transB, x_scale, x_zero, y_scale, y_zero, outputScaleVar, outputZero); if (inputs.size() > 2) { auto bias_expr = inputs[2]->expr().first; - auto bias_int32 = bias_expr->inputs().at(0); + auto bias_int32 = bias_expr->inputs().at(1); Z = _MatMul_Int8(X, y_int8, transA, transB, x_scale, x_zero, y_scale, y_zero, outputScaleVar, outputZero, bias_int32); } Z->setName(expr->name()); diff --git a/tools/converter/source/optimizer/onnxextra/OnnxQuantizeLinear.cpp b/tools/converter/source/optimizer/onnxextra/OnnxQuantizeLinear.cpp index 5f9fbe7ce..c94cfee75 100644 --- a/tools/converter/source/optimizer/onnxextra/OnnxQuantizeLinear.cpp +++ b/tools/converter/source/optimizer/onnxextra/OnnxQuantizeLinear.cpp @@ -31,13 +31,19 @@ class OnnxQuantizeLinearTransform : public OnnxExtraManager::Transform { MNN_ERROR("QuantizeLinear should provide scale and input\n"); return nullptr; } + uint8_t dataType = halide_type_int; VARP zeropoint = _Const(0.f); + auto offset = _Const(0.f); if (inputs.size() > 2) { zeropoint = _Cast(inputs[2]); + dataType = inputs[2]->getInfo()->type.code; + } + if (dataType == halide_type_uint) { + offset = _Const(128.f); } auto scaleReq = _Reciprocal(scale); // auto output = _Cast(_Round(_Relu6(_Round(input * scaleReq) + zeropoint, -128.0f, 127.0f))); - auto output = _FloatToInt8(input, scaleReq, -128, 127, static_cast(zeropoint->readMap()[0])); + auto output = _FloatToInt8(input, scaleReq, -128, 127, static_cast(zeropoint->readMap()[0] - offset->readMap()[0])); std::unique_ptr iden(new MNN::OpT); iden->type = OpType_FloatToInt8; std::vector inputDim = {}; @@ -46,7 +52,7 @@ class OnnxQuantizeLinearTransform : public OnnxExtraManager::Transform { inputDim = input->getInfo()->dim; } auto _shape = _Const(inputDim.data(), {static_cast(inputDim.size())}, NHWC, halide_type_of()); - auto newExpr = MNN::Express::Expr::create(iden.get(), {input, output, scale, zeropoint, _shape}, 5); + auto newExpr = MNN::Express::Expr::create(iden.get(), {input, output, scale, zeropoint - offset, _shape}, 5); newExpr->setName(expr->name()); return newExpr; } diff --git a/tools/converter/source/optimizer/postconvert/TransformGroupConvolution.cpp b/tools/converter/source/optimizer/postconvert/TransformGroupConvolution.cpp index 1c5498e26..95e8d6389 100644 --- a/tools/converter/source/optimizer/postconvert/TransformGroupConvolution.cpp +++ b/tools/converter/source/optimizer/postconvert/TransformGroupConvolution.cpp @@ -182,7 +182,7 @@ class TransformGroupConvolution : public PostConverter { auto& common = conv2D->common; const int srcCount = common->inputCount; const bool depthwiseLike = srcCount % common->group != 0 || common->outputCount % common->group != 0; - if (common->group == 1 || op->inputIndexes.size() > 1 || depthwiseLike) { + if (common->group == 1 || depthwiseLike) { iter++; continue; } @@ -212,7 +212,7 @@ class TransformGroupConvolution : public PostConverter { MNN::OpT* sliceOp = new MNN::OpT; sliceOp->type = MNN::OpType_Slice; sliceOp->name = op->name + "_____slice"; - sliceOp->inputIndexes = op->inputIndexes; + sliceOp->inputIndexes = {op->inputIndexes[0]}; sliceOp->outputIndexes = newConvolutionInputIndex; auto sliceT = new MNN::SliceT; sliceOp->main.type = MNN::OpParameter_Slice; @@ -224,38 +224,121 @@ class TransformGroupConvolution : public PostConverter { newOp.push_back(sliceOp); } - int partWeightSize = conv2D->weight.size() / common->group; - int partBiasSize = conv2D->bias.size() / common->group; + if(op->inputIndexes.size() > 1){ + std::vector newConvolutionWeightInputIndex; + std::vector newConvolutionBiasInputIndex; + // splice weight + { + for (int i = 0; i < common->group; ++i) { + std::ostringstream newTensorNameOs; + newTensorNameOs << op->name << "___input___weight___" << i; + newConvolutionWeightInputIndex.push_back(mNet->tensorName.size()); + mNet->tensorName.push_back(newTensorNameOs.str()); + } - // Create Sub Convolution - flatbuffers::FlatBufferBuilder tmpBuilder; - tmpBuilder.Finish(Convolution2DCommon::Pack(tmpBuilder, common.get())); - auto originCommon = flatbuffers::GetRoot(tmpBuilder.GetBufferPointer()); - for (int i = 0; i < common->group; ++i) { - std::ostringstream opNameOs; - auto newConvOp = new MNN::OpT; - opNameOs << op->name << "__group__" << i; - newConvOp->type = op->type; - newConvOp->name = opNameOs.str(); - newConvOp->main.type = MNN::OpParameter_Convolution2D; - newConvOp->inputIndexes.push_back(newConvolutionInputIndex[i]); - newConvOp->outputIndexes.push_back(newConvolutionOutputIndex[i]); + // Create slice op for weight + { + MNN::OpT* sliceOp = new MNN::OpT; + sliceOp->type = MNN::OpType_Slice; + sliceOp->name = op->name + "_____weight_____slice"; + sliceOp->inputIndexes = {op->inputIndexes[1]}; + sliceOp->outputIndexes = newConvolutionWeightInputIndex; + auto sliceT = new MNN::SliceT; + sliceOp->main.type = MNN::OpParameter_Slice; + sliceOp->main.value = sliceT; + sliceT->axis = 0; + for (int i = 0; i < common->group - 1; ++i) { + sliceT->slicePoints.push_back(common->outputCount / (common->group) * (i + 1)); + } + newOp.push_back(sliceOp); + } + } + // slice bias + if(op->inputIndexes.size() == 3){ + for (int i = 0; i < common->group; ++i) { + std::ostringstream newTensorNameOs; + newTensorNameOs << op->name << "___input___bias___" << i; + newConvolutionBiasInputIndex.push_back(mNet->tensorName.size()); + mNet->tensorName.push_back(newTensorNameOs.str()); + } - auto newConvolutionT = new MNN::Convolution2DT; - newConvOp->main.value = newConvolutionT; - newConvolutionT->common = std::unique_ptr(originCommon->UnPack()); - newConvolutionT->common->group = 1; - newConvolutionT->common->outputCount = common->outputCount / common->group; - newConvolutionT->common->inputCount = common->inputCount / common->group; - int startWeight = partWeightSize * i; - int startBias = partBiasSize * i; - for (int v = 0; v < partWeightSize; ++v) { - newConvolutionT->weight.push_back(conv2D->weight[startWeight + v]); + // Create slice op for bias + { + MNN::OpT* sliceOp = new MNN::OpT; + sliceOp->type = MNN::OpType_Slice; + sliceOp->name = op->name + "_____bias_____slice"; + sliceOp->inputIndexes = {op->inputIndexes[2]}; + sliceOp->outputIndexes = newConvolutionBiasInputIndex; + auto sliceT = new MNN::SliceT; + sliceOp->main.type = MNN::OpParameter_Slice; + sliceOp->main.value = sliceT; + sliceT->axis = 0; + for (int i = 0; i < common->group - 1; ++i) { + sliceT->slicePoints.push_back(common->outputCount / (common->group) * (i + 1)); + } + newOp.push_back(sliceOp); + } } - for (int v = 0; v < partBiasSize; ++v) { - newConvolutionT->bias.push_back(conv2D->bias[startBias + v]); + // Create Sub Convolution + flatbuffers::FlatBufferBuilder tmpBuilder; + tmpBuilder.Finish(Convolution2DCommon::Pack(tmpBuilder, common.get())); + auto originCommon = flatbuffers::GetRoot(tmpBuilder.GetBufferPointer()); + for (int i = 0; i < common->group; ++i) { + std::ostringstream opNameOs; + auto newConvOp = new MNN::OpT; + opNameOs << op->name << "__group__" << i; + newConvOp->type = op->type; + newConvOp->name = opNameOs.str(); + newConvOp->main.type = MNN::OpParameter_Convolution2D; + newConvOp->inputIndexes.push_back(newConvolutionInputIndex[i]); + newConvOp->inputIndexes.push_back(newConvolutionWeightInputIndex[i]); + if(op->inputIndexes.size() == 3){ + newConvOp->inputIndexes.push_back(newConvolutionBiasInputIndex[i]); + } + newConvOp->outputIndexes.push_back(newConvolutionOutputIndex[i]); + + auto newConvolutionT = new MNN::Convolution2DT; + newConvOp->main.value = newConvolutionT; + newConvolutionT->common = std::unique_ptr(originCommon->UnPack()); + newConvolutionT->common->group = 1; + newConvolutionT->common->outputCount = common->outputCount / common->group; + newConvolutionT->common->inputCount = common->inputCount / common->group; + newOp.push_back(newConvOp); + } + }else{ + int partWeightSize = conv2D->weight.size() / common->group; + int partBiasSize = conv2D->bias.size() / common->group; + + // Create Sub Convolution + flatbuffers::FlatBufferBuilder tmpBuilder; + tmpBuilder.Finish(Convolution2DCommon::Pack(tmpBuilder, common.get())); + auto originCommon = flatbuffers::GetRoot(tmpBuilder.GetBufferPointer()); + for (int i = 0; i < common->group; ++i) { + std::ostringstream opNameOs; + auto newConvOp = new MNN::OpT; + opNameOs << op->name << "__group__" << i; + newConvOp->type = op->type; + newConvOp->name = opNameOs.str(); + newConvOp->main.type = MNN::OpParameter_Convolution2D; + newConvOp->inputIndexes.push_back(newConvolutionInputIndex[i]); + newConvOp->outputIndexes.push_back(newConvolutionOutputIndex[i]); + + auto newConvolutionT = new MNN::Convolution2DT; + newConvOp->main.value = newConvolutionT; + newConvolutionT->common = std::unique_ptr(originCommon->UnPack()); + newConvolutionT->common->group = 1; + newConvolutionT->common->outputCount = common->outputCount / common->group; + newConvolutionT->common->inputCount = common->inputCount / common->group; + int startWeight = partWeightSize * i; + int startBias = partBiasSize * i; + for (int v = 0; v < partWeightSize; ++v) { + newConvolutionT->weight.push_back(conv2D->weight[startWeight + v]); + } + for (int v = 0; v < partBiasSize; ++v) { + newConvolutionT->bias.push_back(conv2D->bias[startBias + v]); + } + newOp.push_back(newConvOp); } - newOp.push_back(newConvOp); } // Set this op be Concat Op diff --git a/tools/cpp/ModuleBasic.cpp b/tools/cpp/ModuleBasic.cpp index c9b4e93ad..04954e7a4 100644 --- a/tools/cpp/ModuleBasic.cpp +++ b/tools/cpp/ModuleBasic.cpp @@ -15,6 +15,9 @@ #include "rapidjson/document.h" #include "core/MemoryFormater.h" #include +#include +#include +#include #include "ExprDebug.hpp" using namespace MNN::Express; @@ -127,6 +130,9 @@ int main(int argc, char *argv[]) { } checkOutput = outputs.size() > 0; } + // Call Time / Per Second + float freq = 0.0f; + int cpuDecreaseRate = -1; if (inputNames.empty()) { rapidjson::Document document; std::ostringstream jsonNameOs; @@ -176,6 +182,12 @@ int main(int argc, char *argv[]) { if (document.HasMember("repeat")) { repeatNumber = document["repeat"].GetInt(); } + if (document.HasMember("freq")) { + freq = document["freq"].GetFloat(); + } + if (document.HasMember("cpu_decrease_rate")) { + cpuDecreaseRate = document["cpu_decrease_rate"].GetInt(); + } } auto type = MNN_FORWARD_CPU; if (argc > 4) { @@ -189,12 +201,14 @@ int main(int argc, char *argv[]) { modeNum = ::atoi(argv[6]); } + int power = BackendConfig::Power_Normal; int precision = BackendConfig::Precision_Normal; int memory = BackendConfig::Memory_Normal; if (argc > 7) { int mask = atoi(argv[7]); precision = mask % 4; memory = (mask / 4) % 4; + power = (mask / 16) % 4; } const char* cacheFileName = ".tempcache"; if (argc > 8) { @@ -202,6 +216,7 @@ int main(int argc, char *argv[]) { } FUNC_PRINT(precision); FUNC_PRINT(memory); + FUNC_PRINT(power); FUNC_PRINT_ALL(cacheFileName, s); // create session MNN::ScheduleConfig config; @@ -212,7 +227,7 @@ int main(int argc, char *argv[]) { config.backupType = type; BackendConfig backendConfig; // config.path.outputs.push_back("ResizeBilinear_2"); - // backendConfig.power = BackendConfig::Power_High; + backendConfig.power = (BackendConfig::PowerMode)power; backendConfig.precision = static_cast(precision); backendConfig.memory = static_cast(memory); config.backendConfig = &backendConfig; @@ -224,6 +239,9 @@ int main(int argc, char *argv[]) { mConfig.shapeMutable = shapeMutable; std::shared_ptr rtmgr(Executor::RuntimeManager::createRuntimeManager(config)); rtmgr->setCache(cacheFileName); + if (cpuDecreaseRate > 0 && cpuDecreaseRate <= 100) { + rtmgr->setHint(Interpreter::CPU_LITTLECORE_DECREASE_RATE, cpuDecreaseRate); + } if (runMask & 1) { // Need dump tensor, open debug rtmgr->setMode(Interpreter::Session_Debug); @@ -256,6 +274,9 @@ int main(int argc, char *argv[]) { if (runMask & 512) { rtmgr->setHint(Interpreter::WINOGRAD_MEMORY_LEVEL, 0); } + if (runMask & 1024) { + rtmgr->setHint(Interpreter::DYNAMIC_QUANT_OPTIONS, 1); + } std::shared_ptr net; { AUTOTIME; @@ -402,6 +423,12 @@ int main(int argc, char *argv[]) { ((MNN::Tensor*)o->getTensor())->wait(MNN::Tensor::MAP_TENSOR_READ, true); } times[i] = _l.durationInUs() / 1000.0f; + if (freq > 0.0f) { + float remainMs = (1000.0f / freq) - times[i]; + if (remainMs > 0.0f) { + std::this_thread::sleep_for(std::chrono::milliseconds((int)remainMs)); + } + } } if (nullptr != gTimeTraceInfo) { float opSummer = 0.0f; diff --git a/tools/cpp/checkFile.cpp b/tools/cpp/checkFile.cpp index ec7de403f..afa6c2a10 100644 --- a/tools/cpp/checkFile.cpp +++ b/tools/cpp/checkFile.cpp @@ -11,6 +11,7 @@ #include #include #include +#include using namespace std; @@ -25,7 +26,8 @@ int main(int argc, char* argv[]) { const char* file2 = argv[2]; float tolerance = 0.001; if (argc > 3) { - tolerance = atof(argv[3]); + std::istringstream ss(argv[3]); + ss >> tolerance; } // open file diff --git a/tools/cpp/getPerformance.cpp b/tools/cpp/getPerformance.cpp index d56c3c0bb..ff3b5dfc4 100644 --- a/tools/cpp/getPerformance.cpp +++ b/tools/cpp/getPerformance.cpp @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include #include "core/Macro.h" @@ -201,10 +203,38 @@ void cpuFLOPSPerformance() { MNN_PRINT("CPU float gflops : %f\n", gflops); } +static void _testMemcpy() { + int size = 1024 * 1024; + int loop = 10000; + std::vector threads; + MNN::Timer _t; + for (int i=0; i<2; ++i) { + threads.emplace_back(std::thread([size, loop]() { + std::vector tmp0(size); + std::vector tmp1(size); + auto t0 = tmp0.data(); + auto t1 = tmp1.data(); + for (int i=0; i=0: transformerNamse.append(funcName) + elif end == '__IMAGE__': + opNamesImage.append(funcName) else: - opNames.append(funcName) + opNamesBuffer.append(funcName) + bufferFileNames = os.listdir(openclBufferDir) print(bufferFileNames) collectFile(bufferFileNames, openclBufferDir) @@ -194,7 +198,11 @@ def collectFile(fileNames, dirname): f.write('#ifndef MNN_OPENCL_SEP_BUILD\n') f.write('namespace MNN {\n') f.write('namespace OpenCL {\n') - for l in opNames: + f.write('#ifndef ' + 'MNN_OPENCL_BUFFER_CLOSED' + '\n') + for l in opNamesBuffer: + f.write("extern void " + l + '();\n') + f.write('#endif\n') + for l in opNamesImage: f.write("extern void " + l + '();\n') f.write('\n') f.write('#ifdef ' + 'MNN_SUPPORT_TRANSFORMER_FUSE' + '\n') @@ -202,8 +210,13 @@ def collectFile(fileNames, dirname): f.write("extern void " + l + '();\n') f.write('#endif\n') f.write('void registerOpenCLOps() {\n') - for l in opNames: - f.write(l+'();\n') + f.write('#ifndef ' + 'MNN_OPENCL_BUFFER_CLOSED' + '\n') + for l in opNamesBuffer: + f.write(l + '();\n') + f.write('#endif\n') + for l in opNamesImage: + f.write(l + '();\n') + f.write('\n') f.write('#ifdef ' + 'MNN_SUPPORT_TRANSFORMER_FUSE' + '\n') for l in transformerNamse: f.write(l+'();\n') diff --git a/tools/script/testMNNFromOnnx.py b/tools/script/testMNNFromOnnx.py index 01cf38847..8b032c86a 100644 --- a/tools/script/testMNNFromOnnx.py +++ b/tools/script/testMNNFromOnnx.py @@ -122,7 +122,7 @@ def __run_mnn(self): if not os.path.exists(mnnconvert_name): print("./MNNConvert not exist in this path. Use pymnn instead of C++ to test") mnnconvert_name = 'mnnconvert' - convert = mnnconvert_name + ' -f ONNX --bizCode MNN --modelFile onnx/test.onnx --MNNModel convert_cache.mnn --keepInputFormat --testdir onnx' + convert = mnnconvert_name + ' -f ONNX --bizCode MNN --modelFile onnx/test.onnx --MNNModel convert_cache.mnn --keepInputFormat=1 --testdir onnx' result = os.popen(convert).read() print(result) return result diff --git a/tools/script/testMNNFromTf.py b/tools/script/testMNNFromTf.py index e267681d7..c30a22116 100644 --- a/tools/script/testMNNFromTf.py +++ b/tools/script/testMNNFromTf.py @@ -32,7 +32,7 @@ def __run_mnn(self): if not os.path.exists(mnnconvert_name): print("./MNNConvert not exist in this path. Use pymnn instead of C++ to test") mnnconvert_name = 'mnnconvert' - convert = mnnconvert_name + ' -f TF --bizCode MNN --modelFile tf/test.pb --MNNModel convert_cache.mnn --keepInputFormat --testdir tf' + convert = mnnconvert_name + ' -f TF --bizCode MNN --modelFile tf/test.pb --MNNModel convert_cache.mnn --keepInputFormat=1 --testdir tf' result = os.popen(convert).read() print(result) return result diff --git a/tools/script/testMNNFromTflite.py b/tools/script/testMNNFromTflite.py index bfa24c65d..dd9ffef71 100644 --- a/tools/script/testMNNFromTflite.py +++ b/tools/script/testMNNFromTflite.py @@ -54,7 +54,7 @@ def __run_mnn(self): if not os.path.exists(mnnconvert_name): print("./MNNConvert not exist in this path. Use pymnn instead of C++ to test") mnnconvert_name = 'mnnconvert' - convert = mnnconvert_name + ' -f TFLITE --bizCode MNN --modelFile tflite/test.tflite --MNNModel convert_cache.mnn --keepInputFormat --testdir tflite' + convert = mnnconvert_name + ' -f TFLITE --bizCode MNN --modelFile tflite/test.tflite --MNNModel convert_cache.mnn --keepInputFormat=1 --testdir tflite' result = os.popen(convert).read() print(result) return result diff --git a/tools/script/testMNNFromTorch.py b/tools/script/testMNNFromTorch.py index 385598dce..173db53c2 100644 --- a/tools/script/testMNNFromTorch.py +++ b/tools/script/testMNNFromTorch.py @@ -31,7 +31,7 @@ def __run_mnn(self): if not os.path.exists(mnnconvert_name): print("./MNNConvert not exist in this path. Use pymnn instead of C++ to test") mnnconvert_name = 'mnnconvert' - convert = mnnconvert_name + ' -f TORCH --bizCode MNN --modelFile torch/test.pt --MNNModel convert_cache.mnn --keepInputFormat --testdir torch' + convert = mnnconvert_name + ' -f TORCH --bizCode MNN --modelFile torch/test.pt --MNNModel convert_cache.mnn --keepInputFormat=1 --testdir torch' result = os.popen(convert).read() print(result) return result diff --git a/tools/train/source/demo/demoMain.cpp b/tools/train/source/demo/demoMain.cpp index 1417e5b6b..30c844e75 100644 --- a/tools/train/source/demo/demoMain.cpp +++ b/tools/train/source/demo/demoMain.cpp @@ -10,7 +10,7 @@ #include "DemoUnit.hpp" #include int main(int argc, const char* argv[]) { - ExecutorScope::Current()->setLazyComputeMode(MNN::Express::Executor::LAZY_CONTENT); +// ExecutorScope::Current()->setLazyComputeMode(MNN::Express::Executor::LAZY_CONTENT); if (argc < 2) { MNN_ERROR("Usage: ./runTrainDemo.out CASENAME [ARGS]\n"); auto& list = DemoUnitSet::get()->list(); diff --git a/tools/train/source/nn/NN.cpp b/tools/train/source/nn/NN.cpp index 7594e7197..a49c6afaf 100644 --- a/tools/train/source/nn/NN.cpp +++ b/tools/train/source/nn/NN.cpp @@ -6,6 +6,7 @@ // Copyright © 2018, Alibaba Group Holding Limited // +#include #include "NN.hpp" #include "Distributions.hpp" #include "module/PipelineModule.hpp" @@ -397,6 +398,11 @@ Module* NN::Linear(int l, int t, bool hasBias, std::shared_ptr weig } auto weight = weightInit->createConstVar({t, l}, NCHW); weight.fix(VARP::TRAINABLE); + // Save lazy mode + auto lazyEval = ExecutorScope::Current()->lazyEval; + auto lazyMode = ExecutorScope::Current()->getLazyMode(); + ExecutorScope::Current()->lazyEval = true; + ExecutorScope::Current()->setLazyComputeMode(Executor::LAZY_FULL); auto input = _Input({l}, NCHW); auto output = _MatMul(input, weight, false, true); if (!hasBias) { @@ -407,6 +413,10 @@ Module* NN::Linear(int l, int t, bool hasBias, std::shared_ptr weig output = _Add(output, bias); auto module = NN::extract({input}, {output}, true); module->setType("Linear"); + // Revert lazy mode + ExecutorScope::Current()->lazyEval = lazyEval; + ExecutorScope::Current()->setLazyComputeMode(lazyMode); + return module; } diff --git a/transformers/diffusion/README.md b/transformers/diffusion/README.md deleted file mode 100644 index 42d247ca8..000000000 --- a/transformers/diffusion/README.md +++ /dev/null @@ -1,45 +0,0 @@ -# Diffusion使用方法 - -## 模型支持与下载 - -[Download-runwayml/stable-diffusion-v1-5]: -https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main -[Download-IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1]: -https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-v0.1/tree/main - -## 模型转换 -### 将Huggingface的Stable Diffusion模型 转为onnx模型 -python export/onnx_export.py \ - --model_path hf_sd_load_path \ - --output_path onnx_save_path - -### 将onnx模型转为mnn模型 -新建diffusion mnn模型文件夹,将转好的mnn文件放在该文件夹下。 -./MNNConvert -f ONNX --modelFile onnx_save_path/text_encoder/model.onnx --MNNModel mnn_save_path/text_encoder.mnn --weightQuantBits 8 --bizCode biz -./MNNConvert -f ONNX --modelFile onnx_save_path/unet/model.onnx --MNNModel mnn_save_path/unet.mnn --transformerFuse --weightQuantBits 8 --bizCode biz -./MNNConvert -f ONNX --modelFile onnx_save_path/vae_decoder/model.onnx --keepInputFormat --MNNModel mnn_save_path/vae_decoder.mnn --weightQuantBits 8 --bizCode biz - -## 编译Diffusion Demo -### Linux/MAC/Windows上 -cmake .. -DMNN_BUILD_DIFFUSION=ON -DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON -DMNN_OPENCL=ON -DMNN_SEP_BUILD=OFF -DMNN_SUPPORT_TRANSFORMER_FUSE=ON - -### Android上 -cd project/android/build -../build_64.sh -DMNN_BUILD_DIFFUSION=ON -DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON -DMNN_OPENCL=ON -DMNN_SEP_BUILD=OFF -DMNN_SUPPORT_TRANSFORMER_FUSE=ON - -## 运行Diffusion Demo -./diffusion_demo -其中,resource_path 就是mnn模型文件的路径,除了mnn文件,还需要 -(1)将MNN目录transformers/diffusion/scheduler/alphas.txt文件拷贝到该文件夹下。 -(2)针对stable-diffusion-v1-5模型需要将huggingfacetokenizer目录下merges.txt和vocab.json拷贝到该文件夹中。针对Taiyi-Stable-Diffusion模型需要将huggingfacetokenizer目录下vocab.txt拷贝到该文件夹中。 - -model_type是目前支持的两种diffusion模型的类别。如果是stable-diffusion-v1-5模型设为0,如果是Taiyi-Stable-Diffusion模型设为1。 - -output_image_name是生成图片的名字,默认图片位置在当前运行目录下。 - -input_text是文生图的prompt,如果是stable-diffusion-v1-5模型建议英文prompt,如果是Taiyi-Stable-Diffusion建议中文prompt。 - -运行指令例如: -./diffusion_demo mnn_save_path 0 demo.jpg "a cute cat" -./diffusion_demo mnn_save_path 1 demo.jpg "一只可爱的猫" - diff --git a/transformers/diffusion/env.yaml b/transformers/diffusion/env.yaml new file mode 100644 index 000000000..de4ae3699 --- /dev/null +++ b/transformers/diffusion/env.yaml @@ -0,0 +1,10 @@ +name: ldm +channels: + - pytorch + - defaults +dependencies: + - pytorch + - numpy + - diffusers + - onnx + - transformers diff --git a/transformers/diffusion/main.cpp b/transformers/diffusion/main.cpp index 4cbb03d1e..946175e34 100644 --- a/transformers/diffusion/main.cpp +++ b/transformers/diffusion/main.cpp @@ -3,7 +3,7 @@ int main(int argc, const char* argv[]) { if (argc < 3) { - printf("Usage: ./diffusion_demo \n"); + MNN_PRINT("Usage: ./diffusion_demo \n"); return 0; } @@ -19,16 +19,16 @@ int main(int argc, const char* argv[]) { } } - printf("model resource path: %s\n", resource_path); + MNN_PRINT("model resource path: %s\n", resource_path); if(model_type == diffusion::STABLE_DIFFUSION_1_5) { - printf("model resourc is stable diffusion 1.5\n"); + MNN_PRINT("model type is stable diffusion 1.5\n"); } else if (model_type == diffusion::STABLE_DIFFUSION_TAIYI_CHINESE) { - printf("model resourc is stable diffusion taiyi chinese version\n"); + MNN_PRINT("model type is stable diffusion taiyi chinese version\n"); } else { - printf("model type: %d not supported, please check\n", (int)model_type); + MNN_PRINT("model type: %d not supported, please check\n", (int)model_type); } - printf("output img_name: %s\n", img_name); - printf("input texts: %s\n", input_text.c_str()); + MNN_PRINT("output img_name: %s\n", img_name); + MNN_PRINT("input texts: %s\n", input_text.c_str()); diffusion::Pipeline pipeline(resource_path, model_type); diff --git a/transformers/diffusion/pipeline.cpp b/transformers/diffusion/pipeline.cpp index 32d794684..194ebebd2 100644 --- a/transformers/diffusion/pipeline.cpp +++ b/transformers/diffusion/pipeline.cpp @@ -43,10 +43,10 @@ static inline int64_t getTime() { void display_progress(int cur, int total){ putchar('\r'); - printf("["); + MNN_PRINT("["); for (int i = 0; i < cur; i++) putchar('#'); for (int i = 0; i < total - cur; i++) putchar('-'); - printf("]"); + MNN_PRINT("]"); fprintf(stdout, " [%3d%%]", cur * 100 / total); if (cur == total) putchar('\n'); fflush(stdout); @@ -113,8 +113,8 @@ bool Pipeline::load_modules(std::string modelPath) { mTimestepVar = _Input({1}, NCHW, halide_type_of()); mSampleVar = _Concat({mLatentVar, mLatentVar}, 0); - printf("Model loading and initilizing...\n"); - printf("First time initilizing may cost a few seconds to create cachefile, please wait ...\n"); + MNN_PRINT("Model loading and initilizing...\n"); + MNN_PRINT("First time initilizing may cost a few seconds to create cachefile, please wait ...\n"); VARP text_embeddings; mModules.resize(3); @@ -170,9 +170,9 @@ VARP Pipeline::text_encoder(const std::vector& ids) { #ifdef MNN_DUMP_DATA auto xx = output->readMap(); for(int i=0; i<10; i+=2) { - printf("%f %f ", xx[i], xx[i+mMaxTextLen*768]); + MNN_PRINT("%f %f ", xx[i], xx[i+mMaxTextLen*768]); } - printf("\n\n"); + MNN_PRINT("\n\n"); #endif return output; } @@ -276,12 +276,12 @@ VARP Pipeline::unet(VARP text_embeddings) { auto zz = text_embeddings->readMap(); for(int i=0; i<6; i+=2) { - printf("%f %f %f ", xx[i], yy[i], zz[i]); + MNN_PRINT("%f %f %f ", xx[i], yy[i], zz[i]); } for(int i=0; i<6; i+=2) { - printf("%f %f %f ", xx[16384+i], yy[16384+i], zz[mMaxTextLen*768+i]); + MNN_PRINT("%f %f %f ", xx[16384+i], yy[16384+i], zz[mMaxTextLen*768+i]); } - printf("\n\n"); + MNN_PRINT("\n\n"); #endif } mLatentVar.fix(VARP::CONSTANT); @@ -289,9 +289,9 @@ VARP Pipeline::unet(VARP text_embeddings) { #ifdef MNN_DUMP_DATA auto xx = mLatentVar->readMap(); for(int i=0; i<10; i+=2) { - printf("%f ", xx[i]); + MNN_PRINT("%f ", xx[i]); } - printf("\n\n"); + MNN_PRINT("\n\n"); #endif return mLatentVar; } @@ -307,9 +307,9 @@ VARP Pipeline::vae_decoder(VARP latent) { #ifdef MNN_DUMP_DATA auto xx = output->readMap(); for(int i=0; i<320; i+=32) { - printf("%f ", xx[i]); + MNN_PRINT("%f ", xx[i]); } - printf("\n\n"); + MNN_PRINT("\n\n"); #endif auto image = output; @@ -340,7 +340,7 @@ bool Pipeline::run(const std::string& sentence, const std::string& img_name) { auto image = vae_decoder(latent); bool res = imwrite(img_name, image); if (res) { - printf("SUCCESS! write to %s\n", img_name.c_str()); + MNN_PRINT("SUCCESS! write to %s\n", img_name.c_str()); } return true; } diff --git a/transformers/llm/engine/include/llm/llm.hpp b/transformers/llm/engine/include/llm/llm.hpp new file mode 100644 index 000000000..4e1f445df --- /dev/null +++ b/transformers/llm/engine/include/llm/llm.hpp @@ -0,0 +1,121 @@ +// +// llm.hpp +// +// Created by MNN on 2023/08/25. +// ZhaodeWang +// + +#ifndef LLM_hpp +#define LLM_hpp + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace MNN { +namespace Transformer { +class Tokenizer; +class Pipeline; +class LlmConfig; + +// Llm start +// llm stream buffer with callback +class MNN_PUBLIC LlmStreamBuffer : public std::streambuf { +public: + using CallBack = std::function;; + LlmStreamBuffer(CallBack callback) : callback_(callback) {} + +protected: + virtual std::streamsize xsputn(const char* s, std::streamsize n) override { + if (callback_) { + callback_(s, n); + } + return n; + } + +private: + CallBack callback_ = nullptr; +}; +class MNN_PUBLIC Llm { + using PromptItem = std::pair; // +public: + Llm(std::shared_ptr config) : config_(config) {} + virtual ~Llm(); + static Llm* createLLM(const std::string& config_path); + void chat(); + void reset(); + void trace(bool start); + virtual void load(); + MNN::Express::VARP forward(const std::vector& input_ids); + int sample(MNN::Express::VARP logits, const std::vector& pre_ids); + std::string apply_prompt_template(const std::string& user_content) const; + std::string apply_chat_template(const std::vector& chat_prompts) const; + std::string response(const std::string& user_content, std::ostream* os = &std::cout, const char* end_with = nullptr); + std::string response(const std::vector& chat_prompts, std::ostream* os = &std::cout, const char* end_with = nullptr); + void generate_init(); + std::string generate(const std::vector& input_ids, std::ostream* os, const char* end_with); + std::vector generate(const std::vector& input_ids, int max_new_tokens = -1); + void print_speed(); + // config function + std::string dump_config(); + bool set_config(const std::string& content); + friend class Pipeline; +public: + // forward info + int prompt_len_ = 0; + int gen_seq_len_ = 0; + int all_seq_len_ = 0; + std::vector history_ids_; + // time + int64_t prefill_us_ = 0; + int64_t decode_us_ = 0; + bool is_single_ = true; +protected: + std::shared_ptr config_; + std::shared_ptr tokenizer_; + std::vector key_value_shape_ = {}; + std::vector past_key_values_; + MNN::Express::VARP inputs_embeds_, attention_mask_, position_ids_; + std::shared_ptr runtime_manager_; + std::vector> modules_; + std::vector> decode_modules_; + std::vector> prefill_modules_; + void init_runtime(); + std::string decode(int id); + bool is_stop(int token_id); + virtual std::vector tokenizer(const std::string& query); + virtual MNN::Express::VARP embedding(const std::vector& input_ids); + virtual MNN::Express::VARP gen_attention_mask(int seq_len); + virtual MNN::Express::VARP gen_position_ids(int seq_len); +}; + +// Embedding start +class Embedding : public Llm { +public: + Embedding(std::shared_ptr config); + static Embedding* createEmbedding(const std::string& config_path); + static float dist(MNN::Express::VARP var0, MNN::Express::VARP var1); + virtual void load() override; + MNN::Express::VARP embedding(const std::string& txt); + int dim() const; +private: + virtual std::vector tokenizer(const std::string& query) override; + virtual MNN::Express::VARP gen_attention_mask(int seq_len) override; + virtual MNN::Express::VARP gen_position_ids(int seq_len) override; +}; +// Embedding end +} +} + +#endif // LLM_hpp diff --git a/transformers/llm/engine/llm_demo.cpp b/transformers/llm/engine/llm_demo.cpp index 4edc7731d..416154f84 100644 --- a/transformers/llm/engine/llm_demo.cpp +++ b/transformers/llm/engine/llm_demo.cpp @@ -5,13 +5,13 @@ // ZhaodeWang // -#include "llm.hpp" +#include "llm/llm.hpp" #define MNN_OPEN_TIME_TRACE #include #include #include #include - +using namespace MNN::Transformer; static void trace_prepare(Llm* llm) { MNN_PRINT("Prepare for resize opt Begin\n"); std::vector prompts = { diff --git a/transformers/llm/engine/src/llm.cpp b/transformers/llm/engine/src/llm.cpp index 3838d8538..4ed60d9c2 100644 --- a/transformers/llm/engine/src/llm.cpp +++ b/transformers/llm/engine/src/llm.cpp @@ -15,8 +15,9 @@ #include #include #include "cpp/ExprDebug.hpp" -#include "llm.hpp" +#include "llm/llm.hpp" #include "tokenizer.hpp" +#include "llmconfig.hpp" // 0: no debug, 1: test op time, 2: print tensor info #define DEBUG_MODE 0 @@ -24,6 +25,29 @@ #include "httplib.h" #include #endif +using namespace MNN::Express; +namespace MNN { +namespace Transformer { + +class Lvlm : public Llm { +public: + Lvlm(std::shared_ptr config) : Llm(config) { + img_size_ = config->llm_config_.value("img_size", img_size_); + imgpad_len_ = config->llm_config_.value("imgpad_len", imgpad_len_); + img_start_ = config->llm_config_.value("img_start", img_start_); + img_end_ = config->llm_config_.value("img_end", img_end_); + img_pad_ = config->llm_config_.value("img_pad", img_pad_); + } + ~Lvlm() { visual_module_.reset(); } + virtual void load() override; +private: + int img_size_ = 448, imgpad_len_ = 256, img_start_ = 151857, img_end_ = 151858, img_pad_ = 151859; + std::shared_ptr visual_module_; + MNN::Express::VARP visual_embedding(const std::vector& input_ids); + std::vector url_encode(const std::string& url); + virtual std::vector tokenizer(const std::string& query) override; + virtual MNN::Express::VARP embedding(const std::vector& input_ids) override; +}; // Llm start Llm* Llm::createLLM(const std::string& config_path) { @@ -48,6 +72,14 @@ static MNNForwardType backend_type_convert(const std::string& type_str) { return MNN_FORWARD_AUTO; } +std::string Llm::dump_config() { + return config_->config_.dump(); +} + +bool Llm::set_config(const std::string& content) { + return config_->config_.merge(content.c_str()); +} + void Llm::init_runtime() { ScheduleConfig config; BackendConfig cpuBackendConfig; @@ -64,6 +96,9 @@ void Llm::init_runtime() { runtime_manager_.reset(Executor::RuntimeManager::createRuntimeManager(config)); runtime_manager_->setHint(MNN::Interpreter::MEM_ALLOCATOR_TYPE, 0); + runtime_manager_->setHint(MNN::Interpreter::DYNAMIC_QUANT_OPTIONS, 1); // 1: per batch quant, 2: per tensor quant + runtime_manager_->setHint(MNN::Interpreter::KVCACHE_QUANT_OPTIONS, config_->quant_kv()); // 0: no quant, 1: quant key, 2: quant value, 3: quant kv + #if DEBUG_MODE==1 runtime_manager_->setMode(MNN::Interpreter::Session_Debug); _initTimeTrace(); @@ -104,8 +139,8 @@ void Llm::load() { MNN_PRINT("load %s ... ", model_path.c_str()); runtime_manager_->setExternalFile(config_->llm_weight()); modules_[0].reset(Module::load( - {"input_ids", "attention_mask", "position_ids", "past_key_values"}, - {"logits", "presents"}, model_path.c_str(), runtime_manager_, &module_config)); + {"input_ids", "attention_mask", "position_ids", "past_key_values"}, + {"logits", "presents"}, model_path.c_str(), runtime_manager_, &module_config)); MNN_PRINT("Done!\n"); } else { // load split models @@ -117,8 +152,8 @@ void Llm::load() { std::string model_path = config_->block_model(i); MNN_PRINT("load %s ... ", model_path.c_str()); modules_[i].reset(Module::load( - {"inputs_embeds", "attention_mask", "position_ids", "past_key_values"}, - {"hidden_states", "presents"}, model_path.c_str(), runtime_manager_, &module_config)); + {"inputs_embeds", "attention_mask", "position_ids", "past_key_values"}, + {"hidden_states", "presents"}, model_path.c_str(), runtime_manager_, &module_config)); MNN_PRINT("Done!\n"); } } @@ -261,10 +296,14 @@ void Llm::chat() { } } +void Llm::reset() { + history_ids_.clear(); + all_seq_len_ = 0; +} + void Llm::generate_init() { // init status gen_seq_len_ = 0; - all_seq_len_ = 0; prefill_us_ = 0; decode_us_ = 0; past_key_values_.clear(); @@ -275,6 +314,10 @@ void Llm::generate_init() { past_key_values_.push_back(_Input(key_value_shape_, NCHW)); } } + if (!config_->reuse_kv()) { + all_seq_len_ = 0; + history_ids_.clear(); + } } std::vector Llm::generate(const std::vector& input_ids, int max_new_tokens) { @@ -306,15 +349,14 @@ std::vector Llm::generate(const std::vector& input_ids, int max_new_to std::string Llm::generate(const std::vector& input_ids, std::ostream* os, const char* end_with) { prompt_len_ = static_cast(input_ids.size()); - std::vector all_ids = input_ids; + history_ids_.insert(history_ids_.end(), input_ids.begin(), input_ids.end()); // push to history_ids_ auto st = std::chrono::system_clock::now(); modules_ = prefill_modules_; auto logits = forward(input_ids); if (nullptr == logits.get()) { return ""; } - int token = sample(logits, all_ids); - all_ids.push_back(token); + int token = sample(logits, history_ids_); auto et = std::chrono::system_clock::now(); modules_ = decode_modules_; std::string output_str = decode(token); @@ -322,6 +364,7 @@ std::string Llm::generate(const std::vector& input_ids, std::ostream* os, c *os << output_str << std::flush; while (gen_seq_len_ < config_->max_new_tokens()) { st = std::chrono::system_clock::now(); + history_ids_.push_back(token); logits = forward({token}); if (nullptr == logits.get()) { return ""; @@ -329,14 +372,13 @@ std::string Llm::generate(const std::vector& input_ids, std::ostream* os, c if (logits->getInfo()->size == 0) { return ""; } - token = sample(logits, all_ids); + token = sample(logits, history_ids_); et = std::chrono::system_clock::now(); decode_us_ += std::chrono::duration_cast(et - st).count(); if (is_stop(token)) { *os << end_with << std::flush; break; } - all_ids.push_back(token); auto word = decode(token); *os << word << std::flush; output_str += word; @@ -356,7 +398,11 @@ std::vector Llm::tokenizer(const std::string& user_content) { std::string Llm::response(const std::string& user_content, std::ostream* os, const char* end_with) { generate_init(); if (!end_with) { end_with = "\n"; } - auto input_ids = tokenizer(user_content); + auto prompt = apply_prompt_template(user_content); + if (config_->reuse_kv() && all_seq_len_ > 0) { + prompt = "<|im_end|>\n" + prompt; + } + auto input_ids = tokenizer_->encode(prompt); return generate(input_ids, os, end_with); } @@ -365,7 +411,12 @@ std::string Llm::response(const std::vector& chat_prompts, std::ostr generate_init(); if (!end_with) { end_with = "\n"; } auto prompt = apply_chat_template(chat_prompts); + if (config_->reuse_kv() && all_seq_len_ > 0) { + prompt = "<|im_end|>\n" + prompt; + } + std::cout << "# prompt : " << prompt << std::endl; auto input_ids = tokenizer_->encode(prompt); + printf("input_ids (%lu): ", input_ids.size()); for (auto id : input_ids) printf("%d, ", id); printf("\n"); return generate(input_ids, os, end_with); } @@ -462,29 +513,34 @@ std::string Llm::decode(int id) { } VARP Llm::gen_attention_mask(int seq_len) { + int kv_seq_len = all_seq_len_ + seq_len; + if (seq_len == 1) { + kv_seq_len = seq_len; + } if (config_->attention_mask() == "float") { if (needNewVar(attention_mask_, 2, seq_len)) { - attention_mask_ = _Input({1, 1, seq_len, seq_len}, NCHW, halide_type_of()); + attention_mask_ = _Input({1, 1, seq_len, kv_seq_len}, NCHW, halide_type_of()); } else { return attention_mask_; } auto ptr = attention_mask_->writeMap(); for (int i = 0; i < seq_len; i++) { - for (int j = 0; j < seq_len; j++) { - ptr[seq_len * i + j] = (j > i) * std::numeric_limits::lowest(); + for (int j = 0; j < kv_seq_len; j++) { + int row = i + all_seq_len_; + ptr[kv_seq_len * i + j] = (j > row) * std::numeric_limits::lowest(); } } return attention_mask_; } else { if (needNewVar(attention_mask_, 2, seq_len)) { - attention_mask_ = _Input({1, 1, seq_len, seq_len}, NCHW, halide_type_of()); + attention_mask_ = _Input({1, 1, seq_len, kv_seq_len}, NCHW, halide_type_of()); } else { return attention_mask_; } auto ptr = attention_mask_->writeMap(); if (config_->attention_mask() == "glm") { // chatglm - for (int i = 0; i < seq_len * seq_len; i++) { + for (int i = 0; i < seq_len * kv_seq_len; i++) { ptr[i] = 0; } if (seq_len > 1) { @@ -495,8 +551,9 @@ VARP Llm::gen_attention_mask(int seq_len) { } else { bool is_glm2 = config_->attention_mask() == "glm2"; for (int i = 0; i < seq_len; i++) { - for (int j = 0; j < seq_len; j++) { - ptr[seq_len * i + j] = is_glm2 ? j > i : j <= i; + for (int j = 0; j < kv_seq_len; j++) { + int row = i + all_seq_len_; + ptr[seq_len * i + j] = is_glm2 ? j > row : j <= row; } } } @@ -533,7 +590,7 @@ VARP Llm::gen_position_ids(int seq_len) { ptr[0] = is_glm2 ? gen_seq_len_ : all_seq_len_; } else { for (int i = 0; i < seq_len; i++) { - ptr[i] = i; + ptr[i] = i + all_seq_len_; } } return position_ids_; @@ -671,6 +728,10 @@ Embedding* Embedding::createEmbedding(const std::string& config_path) { return embedding; } +Embedding::Embedding(std::shared_ptr config) : Llm(config) {} + +int Embedding::dim() const { return config_->hidden_size(); } + void Embedding::load() { init_runtime(); printf("load tokenizer\n"); @@ -686,8 +747,8 @@ void Embedding::load() { MNN_PRINT("load %s ... ", model_path.c_str()); modules_.resize(1); modules_[0].reset(Module::load( - {"input_ids", "attention_mask", "position_ids"}, - {"sentence_embeddings"}, model_path.c_str(), runtime_manager_, &module_config)); + {"input_ids", "attention_mask", "position_ids"}, + {"sentence_embeddings"}, model_path.c_str(), runtime_manager_, &module_config)); MNN_PRINT("Done!\n"); } @@ -730,3 +791,5 @@ VARP Embedding::gen_position_ids(int seq_len) { return position_ids; } // Embedding end +} +} diff --git a/transformers/llm/engine/include/llm.hpp b/transformers/llm/engine/src/llmconfig.hpp similarity index 58% rename from transformers/llm/engine/include/llm.hpp rename to transformers/llm/engine/src/llmconfig.hpp index 71c3f00fd..71ef7291f 100644 --- a/transformers/llm/engine/include/llm.hpp +++ b/transformers/llm/engine/src/llmconfig.hpp @@ -1,58 +1,20 @@ // -// llm.hpp +// llmconfig.hpp // -// Created by MNN on 2023/08/25. +// Created by MNN on 2024/07/19. // ZhaodeWang // -#ifndef LLM_hpp -#define LLM_hpp - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include "tokenizer.hpp" #include "rapidjson/document.h" +#include +#include -using namespace MNN; -using namespace Express; -using namespace rapidjson; -class Tokenizer; -class Pipeline; - -// Llm start -// llm stream buffer with callback -class LlmStreamBuffer : public std::streambuf { -public: - using CallBack = std::function;; - LlmStreamBuffer(CallBack callback) : callback_(callback) {} - -protected: - virtual std::streamsize xsputn(const char* s, std::streamsize n) override { - if (callback_) { - callback_(s, n); - } - return n; - } - -private: - CallBack callback_ = nullptr; -}; +namespace MNN { +namespace Transformer { static inline bool has_suffix(const std::string& str, const std::string& suffix) { return str.size() >= suffix.size() && - str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; + str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; } static inline std::string base_dir(const std::string& path) { @@ -73,26 +35,69 @@ static inline std::string file_name(const std::string& path) { } } +bool merge_json(rapidjson::Value& destination, const rapidjson::Value& source, + rapidjson::Document::AllocatorType& allocator) { + if (!source.IsObject() || !destination.IsObject()) { + return false; + } + + for (auto it = source.MemberBegin(); it != source.MemberEnd(); ++it) { + const char* key = it->name.GetString(); + if (destination.HasMember(key)) { + if (destination[key].IsObject() && it->value.IsObject()) { + // Recursively merge the two JSON objects + merge_json(destination[key], it->value, allocator); + } else { + // Overwrite the value in the destination + destination[key].CopyFrom(it->value, allocator); + } + } else { + // Add the value to the destination + rapidjson::Value newKey(key, allocator); + rapidjson::Value newValue; + newValue.CopyFrom(it->value, allocator); + destination.AddMember(newKey, newValue, allocator); + } + } + return true; +} + class rapid_json_wrapper { public: - Document document; + rapidjson::Document document; rapid_json_wrapper() {} - rapid_json_wrapper(Document doc) : document(std::move(doc)) {} + rapid_json_wrapper(rapidjson::Document doc) : document(std::move(doc)) {} static rapid_json_wrapper parse(const std::ifstream& ifile) { std::ostringstream ostr; ostr << ifile.rdbuf(); - Document document; + rapidjson::Document document; document.Parse(ostr.str().c_str()); rapid_json_wrapper json_wrapper(std::move(document)); return json_wrapper; } static rapid_json_wrapper parse(const char* str) { - Document document; + rapidjson::Document document; document.Parse(str); rapid_json_wrapper json_wrapper(std::move(document)); return json_wrapper; } - + bool merge(const char* str) { + rapidjson::Document input_doc; + input_doc.Parse(str); + if (input_doc.HasParseError()) { + return false; + } + // merge + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + return merge_json(document, input_doc, allocator); + } + std::string dump() { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + document.Accept(writer); + return buffer.GetString(); + } + // read value int value(const char* key, const int& default_value) const { if (document.HasMember(key)) { const auto& value = document[key]; @@ -218,6 +223,14 @@ class LlmConfig { int max_new_tokens() const { return config_.value("max_new_tokens", 512); } + + bool reuse_kv() const { + return config_.value("reuse_kv", false); + } + + int quant_kv() const { + return config_.value("quant_kv", 0); + } // generate config end > // < backend config start @@ -272,90 +285,5 @@ class LlmConfig { } // llm model config end > }; - -class MNN_PUBLIC Llm { - using PromptItem = std::pair; // -public: - Llm(std::shared_ptr config) : config_(config) {} - virtual ~Llm(); - static Llm* createLLM(const std::string& config_path); - void chat(); - void trace(bool start); - virtual void load(); - VARP forward(const std::vector& input_ids); - int sample(VARP logits, const std::vector& pre_ids); - std::string apply_prompt_template(const std::string& user_content) const; - std::string apply_chat_template(const std::vector& chat_prompts) const; - std::string response(const std::string& user_content, std::ostream* os = &std::cout, const char* end_with = nullptr); - std::string response(const std::vector& chat_prompts, std::ostream* os = &std::cout, const char* end_with = nullptr); - void generate_init(); - std::string generate(const std::vector& input_ids, std::ostream* os, const char* end_with); - std::vector generate(const std::vector& input_ids, int max_new_tokens = -1); - void print_speed(); - friend class Pipeline; -public: - // forward info - int prompt_len_ = 0; - int gen_seq_len_ = 0; - int all_seq_len_ = 0; - // time - int64_t prefill_us_ = 0; - int64_t decode_us_ = 0; - bool is_single_ = true; - std::shared_ptr config_; - std::unique_ptr tokenizer_; -protected: - std::vector key_value_shape_ = {}; - std::vector past_key_values_; - VARP inputs_embeds_, attention_mask_, position_ids_; - std::shared_ptr runtime_manager_; - std::vector> modules_; - std::vector> decode_modules_; - std::vector> prefill_modules_; - void init_runtime(); - std::string decode(int id); - bool is_stop(int token_id); - virtual std::vector tokenizer(const std::string& query); - virtual VARP embedding(const std::vector& input_ids); - virtual VARP gen_attention_mask(int seq_len); - virtual VARP gen_position_ids(int seq_len); -}; - -class Lvlm : public Llm { -public: - Lvlm(std::shared_ptr config) : Llm(config) { - img_size_ = config->llm_config_.value("img_size", img_size_); - imgpad_len_ = config->llm_config_.value("imgpad_len", imgpad_len_); - img_start_ = config->llm_config_.value("img_start", img_start_); - img_end_ = config->llm_config_.value("img_end", img_end_); - img_pad_ = config->llm_config_.value("img_pad", img_pad_); - } - ~Lvlm() { visual_module_.reset(); } - virtual void load() override; -private: - int img_size_ = 448, imgpad_len_ = 256, img_start_ = 151857, img_end_ = 151858, img_pad_ = 151859; - std::shared_ptr visual_module_; - VARP visual_embedding(const std::vector& input_ids); - std::vector url_encode(const std::string& url); - virtual std::vector tokenizer(const std::string& query) override; - virtual VARP embedding(const std::vector& input_ids) override; -}; -// Llm end - -// Embedding start -class Embedding : public Llm { -public: - Embedding(std::shared_ptr config) : Llm(config) {} - static Embedding* createEmbedding(const std::string& config_path); - static float dist(VARP var0, VARP var1); - virtual void load() override; - VARP embedding(const std::string& txt); - int dim() { return config_->hidden_size(); } -private: - virtual std::vector tokenizer(const std::string& query) override; - virtual VARP gen_attention_mask(int seq_len) override; - virtual VARP gen_position_ids(int seq_len) override; -}; -// Embedding end - -#endif // LLM_hpp +} // Transformer +} // MNN \ No newline at end of file diff --git a/transformers/llm/engine/src/tokenizer.cpp b/transformers/llm/engine/src/tokenizer.cpp index f0350adc9..6330d8885 100644 --- a/transformers/llm/engine/src/tokenizer.cpp +++ b/transformers/llm/engine/src/tokenizer.cpp @@ -15,19 +15,21 @@ #include #include #include +namespace MNN { +namespace Transformer { // base64 static const std::string base64_chars = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; +"ABCDEFGHIJKLMNOPQRSTUVWXYZ" +"abcdefghijklmnopqrstuvwxyz" +"0123456789+/"; static inline bool is_base64(unsigned char c) { return (isalnum(c) || (c == '+') || (c == '/')); } static inline size_t one_char_len(const char *src) { - return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"[(*src & 0xFF) >> 4]; + return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"[(*src & 0xFF) >> 4]; } static std::string base64_decode(const std::string& str) { @@ -343,13 +345,13 @@ Sentencepiece::EncodeResult Sentencepiece::bpe_encode(string_view_ normalized, f if (skip_merge()) continue; // Replaces symbols with `top` rule. symbols[top->left].piece = string_view_( - symbols[top->left].piece.data(), - symbols[top->left].piece.size() + symbols[top->right].piece.size()); + symbols[top->left].piece.data(), + symbols[top->left].piece.size() + symbols[top->right].piece.size()); // Updates prev/next pointers. symbols[top->left].next = symbols[top->right].next; if (symbols[top->right].next >= 0) { - symbols[symbols[top->right].next].prev = top->left; + symbols[symbols[top->right].next].prev = top->left; } symbols[top->right].piece = string_view_(""); @@ -585,11 +587,11 @@ std::string wstring_to_utf8(const std::wstring& str) { void byte_encode_token(const std::string& token, const std::unordered_map& b2u, std::wstring* result) { - result->resize(0); - for (char c : token) { - wchar_t wc = b2u.at(uint8_t(c)); - result->push_back(wc); - } + result->resize(0); + for (char c : token) { + wchar_t wc = b2u.at(uint8_t(c)); + result->push_back(wc); + } } bool HuggingfaceTokenizer::load_vocab(std::ifstream& tok_file) { @@ -611,10 +613,10 @@ bool HuggingfaceTokenizer::load_vocab(std::ifstream& tok_file) { std::getline(tok_file, line); int d = line.find(" "); bpe_ranks_.insert({{utf8_to_wstring(line.substr(0, d)), - utf8_to_wstring(line.substr(d + 1))}, i}); + utf8_to_wstring(line.substr(d + 1))}, i}); } // bytes_to_unicode - auto _insert_range = [=](int start, int end) { + auto _insert_range = [=](int start, int end) { for (int c = start; c <= end; c++) { b2u_.insert({uint8_t(c), wchar_t(c)}); } @@ -654,13 +656,13 @@ void HuggingfaceTokenizer::bpe(const std::wstring& token, const BPERanks& bpe_ra std::set merged; // records indices in pairs that were merged. auto _left = [](int i, std::set& merged) { for (int j = i - 1; j >= -1; j--) { - if (merged.find(j) == merged.end()) return j; + if (merged.find(j) == merged.end()) return j; } return -1; }; auto _right = [](int i, int cap, std::set& merged) { for (int j = i + 1; j < cap; j++) { - if (merged.find(j) == merged.end()) return j; + if (merged.find(j) == merged.end()) return j; } return cap; }; @@ -673,15 +675,15 @@ void HuggingfaceTokenizer::bpe(const std::wstring& token, const BPERanks& bpe_ra int to_merge = -1; // indices into pairs. for (int i = 0; i < pairs.size(); ++i) { - if (merged.find(i) == merged.end()) { // pair i is not merged. - auto iter = bpe_ranks.find(pairs[i]); - int score = iter != bpe_ranks.end() ? iter->second : INT_MAX; - if (score < min_score) { - min_score = score; - to_merge = i; + if (merged.find(i) == merged.end()) { // pair i is not merged. + auto iter = bpe_ranks.find(pairs[i]); + int score = iter != bpe_ranks.end() ? iter->second : INT_MAX; + if (score < min_score) { + min_score = score; + to_merge = i; + } } } - } if (to_merge == -1) break; @@ -747,3 +749,5 @@ std::string HuggingfaceTokenizer::decode(int id) { } return r; } +} +} diff --git a/transformers/llm/engine/include/tokenizer.hpp b/transformers/llm/engine/src/tokenizer.hpp similarity index 98% rename from transformers/llm/engine/include/tokenizer.hpp rename to transformers/llm/engine/src/tokenizer.hpp index e30cd980e..77ceeda5d 100644 --- a/transformers/llm/engine/include/tokenizer.hpp +++ b/transformers/llm/engine/src/tokenizer.hpp @@ -15,8 +15,6 @@ #include // #include #include - -// std::string_view impl in c++11 start class string_view_ { public: string_view_() : data_(nullptr), size_(0) {} @@ -46,6 +44,7 @@ class string_view_ { const char* data_; std::size_t size_ = 0; }; +// std::string_view impl in c++11 end namespace std { template<> @@ -60,7 +59,9 @@ namespace std { } }; } -// std::string_view impl in c++11 end +namespace MNN { +namespace Transformer { +// std::string_view impl in c++11 start class Tokenizer { public: @@ -183,5 +184,7 @@ using BPERanks = std::unordered_map, int, std::unordered_map encoder_; std::vector decoder_; }; +}; +}; -#endif // TOKENIZER_hpp \ No newline at end of file +#endif // TOKENIZER_hpp