diff --git a/examples/batched.swift/Sources/main.swift b/examples/batched.swift/Sources/main.swift index 9a62c5bb7ebfe1..2bc5fce7dfb6ee 100644 --- a/examples/batched.swift/Sources/main.swift +++ b/examples/batched.swift/Sources/main.swift @@ -214,7 +214,7 @@ let t_main_end = ggml_time_us() print("decoded \(n_decode) tokens in \(String(format: "%.2f", Double(t_main_end - t_main_start) / 1_000_000.0)) s, speed: \(String(format: "%.2f", Double(n_decode) / (Double(t_main_end - t_main_start) / 1_000_000.0))) t/s\n") -llama_print_timings(context, smpl, nil) +llama_print_timings(context, smpl) private func tokenize(text: String, add_bos: Bool) -> [llama_token] { let utf8Count = text.utf8.count diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index b29e086ccd617a..8d69acae2d6d5f 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -51,7 +51,6 @@ static std::vector k_prompts = { struct client { ~client() { if (ctx_sampling) { - llama_sampling_free(ctx_sampling->smpl); llama_sampling_free(ctx_sampling); } } diff --git a/examples/server/server.cpp b/examples/server/server.cpp index fbf60132633fa0..be5ab0fcf2c2b9 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -663,7 +663,6 @@ struct server_context { // Clear any sampling context for (server_slot & slot : slots) { if (slot.ctx_sampling != nullptr) { - llama_sampling_free(slot.ctx_sampling->smpl); llama_sampling_free(slot.ctx_sampling); } } @@ -1088,7 +1087,6 @@ struct server_context { { if (slot.ctx_sampling != nullptr) { - llama_sampling_free(slot.ctx_sampling->smpl); llama_sampling_free(slot.ctx_sampling); } diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 013ea0d97b0e91..dcf56dafa2fa8d 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -600,7 +600,6 @@ int main(int argc, char ** argv) { llama_sampling_free(ctx_sampling); for (int s = 0; s < n_seq_dft; ++s) { - llama_sampling_free(drafts[s].ctx_sampling->smpl); llama_sampling_free(drafts[s].ctx_sampling); }