diff --git a/include/llama.h b/include/llama.h
index f23355a6bc9593..baa48f5603f8f7 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #ifndef LLAMA_H
 #define LLAMA_H
 
@@ -1238,3 +1240,5 @@ llama_token llama_sample_token_with_rng(struct llama_context * ctx, llama_token_
 #endif // LLAMA_API_INTERNAL
 
 #endif // LLAMA_H
+
+#endif
\ No newline at end of file
diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp
index b123d733100ce8..aa4d654dc6d95f 100644
--- a/src/llama-grammar.cpp
+++ b/src/llama-grammar.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #include "llama-grammar.h"
 
 #include "llama-vocab.h"
@@ -537,3 +539,5 @@ void llama_grammar_accept_token_impl(struct llama_grammar * grammar, const struc
 
     smpl->t_sample_us += ggml_time_us() - t_start_sample_us;
 }
+
+#endif
\ No newline at end of file
diff --git a/src/llama-grammar.h b/src/llama-grammar.h
index 695ea0632bb84c..302b6bfe89ff69 100644
--- a/src/llama-grammar.h
+++ b/src/llama-grammar.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #include "llama-impl.h"
@@ -37,3 +39,5 @@ void llama_grammar_accept_token_impl(
           const struct llama_vocab * vocab,
        const struct llama_sampling * smpl,
                        llama_token   token);
+
+#endif
\ No newline at end of file
diff --git a/src/llama-impl.h b/src/llama-impl.h
index dcc8c1c15a1b1e..fc911a03e53021 100644
--- a/src/llama-impl.h
+++ b/src/llama-impl.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #define LLAMA_API_INTERNAL
@@ -24,3 +26,5 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
 #define LLAMA_LOG_INFO(...)  llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
 #define LLAMA_LOG_WARN(...)  llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
 #define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
+
+#endif
\ No newline at end of file
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 8910f6d6542e91..76fdf3811dea49 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #include "llama-sampling.h"
 
 #include <algorithm>
@@ -633,3 +635,5 @@ llama_token llama_sample_token_with_rng_impl(struct llama_sampling * smpl, llama
 llama_token llama_sample_token_impl(struct llama_sampling * smpl, llama_token_data_array * candidates) {
     return llama_sample_token_with_rng_impl(smpl, candidates, smpl->rng);
 }
+
+#endif
\ No newline at end of file
diff --git a/src/llama-sampling.h b/src/llama-sampling.h
index f7f8e3ef706bc8..1965e847a5d073 100644
--- a/src/llama-sampling.h
+++ b/src/llama-sampling.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #include "llama-impl.h"
@@ -54,3 +56,4 @@ llama_token llama_sample_token_greedy_impl     (struct llama_sampling * smpl, ll
 llama_token llama_sample_token_with_rng_impl   (struct llama_sampling * smpl, llama_token_data_array * candidates, std::mt19937 & rng);
 llama_token llama_sample_token_impl            (struct llama_sampling * smpl, llama_token_data_array * candidates);
 
+#endif
\ No newline at end of file
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 133094904c2d20..c01839e6fe3ff7 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #include "llama-vocab.h"
 
 #include "unicode.h"
@@ -1719,3 +1721,5 @@ int32_t llama_detokenize_impl(
 
     return total <= text_len_max ? total : -total;
 }
+
+#endif
\ No newline at end of file
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 30b565d55dad5b..911bbedffe4065 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #include "llama-impl.h"
@@ -128,3 +130,5 @@ int32_t llama_detokenize_impl(
                          int32_t   text_len_max,
                             bool   remove_special,
                             bool   unparse_special);
+
+#endif
\ No newline at end of file
diff --git a/src/llama.cpp b/src/llama.cpp
index e6f303d31b3bff..f99002f694d34f 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #include "llama-impl.h"
 #include "llama-vocab.h"
 #include "llama-grammar.h"
@@ -19160,3 +19162,5 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
     fputs(text, stderr);
     fflush(stderr);
 }
+
+#endif
\ No newline at end of file
diff --git a/src/unicode-data.cpp b/src/unicode-data.cpp
index 02bdf782380fe7..bed427306ceb43 100644
--- a/src/unicode-data.cpp
+++ b/src/unicode-data.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 // generated with scripts/gen-unicode-data.py
 
 #include "unicode-data.h"
@@ -7030,3 +7032,5 @@ const std::vector<range_nfd> unicode_ranges_nfd = {  // start, last, nfd
 {0x02FA1C, 0x02FA1C, 0x009F3B},
 {0x02FA1D, 0x02FA1D, 0x02A600},
 };
+
+#endif
diff --git a/src/unicode-data.h b/src/unicode-data.h
index e27fe1770710a3..1199bac807dc9e 100644
--- a/src/unicode-data.h
+++ b/src/unicode-data.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #include <cstdint>
@@ -18,3 +20,5 @@ extern const std::unordered_set<uint32_t> unicode_set_whitespace;
 extern const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase;
 extern const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase;
 extern const std::vector<range_nfd> unicode_ranges_nfd;
+
+#endif
\ No newline at end of file
diff --git a/src/unicode.cpp b/src/unicode.cpp
index 46650bff06d15e..09671bcf2aaa40 100644
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #if defined(_MSC_VER)
 #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
 #endif
@@ -816,3 +818,5 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
 
     return unicode_byte_encoding_process(bpe_words);
 }
+
+#endif
\ No newline at end of file
diff --git a/src/unicode.h b/src/unicode.h
index 008532a242ab8d..7f7d619923fa90 100644
--- a/src/unicode.h
+++ b/src/unicode.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #include <cstdint>
@@ -65,3 +67,5 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8);
 uint32_t unicode_tolower(uint32_t cp);
 
 std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
+
+#endif
\ No newline at end of file