Skip to content

Commit

Permalink
llama : move sampling code into llama-sampling
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Jul 16, 2024
1 parent 1666f92 commit a3ee316
Show file tree
Hide file tree
Showing 7 changed files with 754 additions and 699 deletions.
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,7 @@ OBJ_GGML += \

OBJ_LLAMA = \
src/llama.o \
src/llama-sampling.o \
src/unicode.o \
src/unicode-data.o

Expand Down Expand Up @@ -1047,6 +1048,7 @@ src/unicode-data.o: \

src/llama.o: \
src/llama.cpp \
src/llama-impl.h \
src/unicode.h \
include/llama.h \
ggml/include/ggml-cuda.h \
Expand All @@ -1056,6 +1058,13 @@ src/llama.o: \
ggml/include/ggml-backend.h
$(CXX) $(CXXFLAGS) -c $< -o $@

src/llama-sampling.o: \
src/llama-sampling.cpp \
src/llama-sampling.h \
src/llama-impl.h \
include/llama.h
$(CXX) $(CXXFLAGS) -c $< -o $@

$(LIB_LLAMA): \
$(OBJ_LLAMA) \
$(LIB_GGML)
Expand Down
12 changes: 6 additions & 6 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -1081,12 +1081,6 @@ extern "C" {
llama_token_data_array * candidates,
float temp);

/// @details Apply constraints from grammar
LLAMA_API void llama_sample_grammar(
struct llama_context * ctx,
llama_token_data_array * candidates,
const struct llama_grammar * grammar);

/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
/// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
Expand Down Expand Up @@ -1124,6 +1118,12 @@ extern "C" {
struct llama_context * ctx,
llama_token_data_array * candidates);

/// @details Apply constraints from grammar
LLAMA_API void llama_sample_grammar(
struct llama_context * ctx,
llama_token_data_array * candidates,
const struct llama_grammar * grammar);

/// @details Accepts the sampled token into the grammar
LLAMA_API void llama_grammar_accept_token(
struct llama_context * ctx,
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ endif()
add_library(llama
../include/llama.h
llama.cpp
llama-sampling.cpp
unicode.h
unicode.cpp
unicode-data.cpp
Expand Down
50 changes: 50 additions & 0 deletions src/llama-impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#pragma once

#define LLAMA_API_INTERNAL
#include "llama.h"

#include <array>
#include <set>
#include <map>
#include <cstdint>
#include <random>

#ifdef __has_include
#if __has_include(<unistd.h>)
#include <unistd.h>
#if defined(_POSIX_MAPPED_FILES)
#include <sys/mman.h>
#include <fcntl.h>
#endif
#if defined(_POSIX_MEMLOCK_RANGE)
#include <sys/resource.h>
#endif
#endif
#endif

// bump if necessary
#define LLAMA_MAX_NODES 8192
#define LLAMA_MAX_LAYERS 256
#define LLAMA_MAX_EXPERTS 160 // DeepSeekV2

#ifdef __GNUC__
#ifdef __MINGW32__
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
#else
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
#endif
#else
#define LLAMA_ATTRIBUTE_FORMAT(...)
#endif

//
// logging
//

LLAMA_ATTRIBUTE_FORMAT(2, 3)
void llama_log_internal (ggml_log_level level, const char * format, ...);
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);

#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
Loading

0 comments on commit a3ee316

Please sign in to comment.