Skip to content

Commit

Permalink
attempt to reduce the impact of a worst-case scenario
Browse files Browse the repository at this point in the history
  • Loading branch information
Xarbirus committed Mar 13, 2024
1 parent bcebd7d commit 283ab2f
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9061,6 +9061,11 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
// number of cells moved
uint32_t n_moves = 0;

// each move requires 6*n_layer tensors (see build_defrag)
// - source view, destination view, copy operation
// - x2 for keys and values
const uint32_t max_moves = LLAMA_MAX_NODES/(6*n_layer);

// determine which KV cells to move where
//
// cell i moves to ids[i]
Expand All @@ -9087,15 +9092,6 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
nh++;
}

// each move requires 6*n_layer tensors (see build_defrag)
// - source view, destination view, copy operation
// - x2 for keys and values
//
if (6*(n_moves + nh)*n_layer >= LLAMA_MAX_NODES) {
// the graph is too big, we cannot move more cells
break;
}

uint32_t nf = 0;
uint32_t is = n_kv - 1;

Expand Down Expand Up @@ -9125,11 +9121,19 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
// are we moving a continuous block of memory?
bool cont = false;

// should we stop searching for the next move?
bool stop = false;

// go back and move the nf cells to the hole
for (; i1 < n_kv; ++i1) {
auto & cell1 = kv_self.cells[i1];

if (cell1.is_empty() || ids[i1] != n_kv) {
if (n_moves == max_moves) {
stop = true;
break;
}

cont = false;
continue;
}
Expand All @@ -9156,6 +9160,10 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
}
}

if (stop || n_moves == max_moves) {
break;
}

//LLAMA_LOG_INFO("(tmp log) KV defrag: move [%u, %u) to [%u, %u)\n", is, i1 + 1, i0, i0 + nh);

i0 += nh - 1;
Expand Down

0 comments on commit 283ab2f

Please sign in to comment.