diff --git a/include/llama.h b/include/llama.h
index ccb48f73cef5cb..d93388e8bf1f59 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #ifndef LLAMA_H
 #define LLAMA_H
 
@@ -1251,3 +1253,5 @@ extern "C" {
 #endif
 
 #endif // LLAMA_H
+
+#endif
\ No newline at end of file
diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp
index 74e9f64b393b2f..7378ee27be43c5 100644
--- a/src/llama-grammar.cpp
+++ b/src/llama-grammar.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #include "llama-grammar.h"
 
 #include "llama-vocab.h"
@@ -1136,3 +1138,5 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
     grammar.partial_utf8 = decoded.second;
     GGML_ASSERT(!grammar.stacks.empty());
 }
+
+#endif
\ No newline at end of file
diff --git a/src/llama-grammar.h b/src/llama-grammar.h
index f529ce351e4167..9311d300c7ef61 100644
--- a/src/llama-grammar.h
+++ b/src/llama-grammar.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #include "llama-impl.h"
@@ -142,3 +144,5 @@ void llama_grammar_apply_impl(
 void llama_grammar_accept_impl(
               struct llama_grammar & grammar,
                        llama_token   token);
+
+#endif
\ No newline at end of file
diff --git a/src/llama-impl.h b/src/llama-impl.h
index 70f16b61c12e07..fee8ba1afdb97e 100644
--- a/src/llama-impl.h
+++ b/src/llama-impl.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #include "llama.h"
@@ -179,3 +181,5 @@ struct ring_buffer {
     size_t pos = 0;
     std::vector<T> data;
 };
+
+#endif
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index c2cfe0a77ad842..2cb25261e14a76 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #include "llama-sampling.h"
 
 #include "llama-vocab.h"
@@ -2342,3 +2344,5 @@ void llama_perf_sampler_reset(struct llama_sampler * chain) {
 
     ctx->t_sample_us = ctx->n_sample = 0;
 }
+
+#endif
\ No newline at end of file
diff --git a/src/llama-sampling.h b/src/llama-sampling.h
index 919f6fdfcefb81..95966da32b193e 100644
--- a/src/llama-sampling.h
+++ b/src/llama-sampling.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 // TODO: rename llama-sampling.h/.cpp to llama-sampler.h/.cpp ?
@@ -46,3 +48,5 @@ struct llama_sampler * llama_sampler_init_dry_testing(
                          int32_t   dry_allowed_length,
                          int32_t   dry_penalty_last_n,
   const std::vector<std::vector<llama_token>>& seq_breakers);
+
+#endif
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index d1dc96276c2a27..bb4370965fe163 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #include "llama-vocab.h"
 
 #include "unicode.h"
@@ -1982,3 +1984,5 @@ std::string llama_detokenize(const struct llama_vocab & vocab, const std::vector
     // NOTE: the original tokenizer decodes bytes after collecting the pieces.
     return text;
 }
+
+#endif
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 4bb16d2e4299f7..528f9ae961e1af 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #include "llama-impl.h"
@@ -168,3 +170,5 @@ std::string llama_detokenize(
         const struct llama_vocab & vocab,
   const std::vector<llama_token> & tokens,
                             bool   special);
+
+#endif
diff --git a/src/llama.cpp b/src/llama.cpp
index 6719edb38808f5..2e545b6b1c9377 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #include "llama-impl.h"
 #include "llama-vocab.h"
 #include "llama-sampling.h"
@@ -22035,3 +22037,5 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
     fputs(text, stderr);
     fflush(stderr);
 }
+
+#endif
\ No newline at end of file
diff --git a/src/unicode-data.cpp b/src/unicode-data.cpp
index 04dcd7fcfbcea9..834f2f059c5d35 100644
--- a/src/unicode-data.cpp
+++ b/src/unicode-data.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 // generated with scripts/gen-unicode-data.py
 
 #include "unicode-data.h"
@@ -7032,3 +7034,5 @@ const std::initializer_list<range_nfd> unicode_ranges_nfd = {  // start, last, n
 {0x02FA1C, 0x02FA1C, 0x009F3B},
 {0x02FA1D, 0x02FA1D, 0x02A600},
 };
+
+#endif
diff --git a/src/unicode-data.h b/src/unicode-data.h
index f6973ebd2e3506..0b40f10cff6a00 100644
--- a/src/unicode-data.h
+++ b/src/unicode-data.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #include <cstdint>
@@ -18,3 +20,5 @@ extern const std::unordered_set<uint32_t> unicode_set_whitespace;
 extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase;
 extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase;
 extern const std::initializer_list<range_nfd> unicode_ranges_nfd;
+
+#endif
diff --git a/src/unicode.cpp b/src/unicode.cpp
index 50b35bbbc918ce..5c883462a16854 100644
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #if defined(_MSC_VER)
 #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
 #endif
@@ -824,3 +826,5 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
 
     return unicode_byte_encoding_process(bpe_words);
 }
+
+#endif
\ No newline at end of file
diff --git a/src/unicode.h b/src/unicode.h
index 008532a242ab8d..7f7d619923fa90 100644
--- a/src/unicode.h
+++ b/src/unicode.h
@@ -1,3 +1,5 @@
+#if defined(__arm64__)
+
 #pragma once
 
 #include <cstdint>
@@ -65,3 +67,5 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8);
 uint32_t unicode_tolower(uint32_t cp);
 
 std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
+
+#endif
\ No newline at end of file