diff --git a/.devops/main-intel.Dockerfile b/.devops/main-intel.Dockerfile index 7516c8313c1d6..b7992f47b8f92 100644 --- a/.devops/main-intel.Dockerfile +++ b/.devops/main-intel.Dockerfile @@ -1,15 +1,7 @@ -ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04 +ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04 FROM intel/oneapi-basekit:$ONEAPI_VERSION as build -RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \ - echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \ - chmod 644 /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \ - rm /etc/apt/sources.list.d/intel-graphics.list && \ - wget -O- https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics.gpg > /dev/null && \ - echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy arc" | tee /etc/apt/sources.list.d/intel.gpu.jammy.list && \ - chmod 644 /usr/share/keyrings/intel-graphics.gpg - ARG LLAMA_SYCL_F16=OFF RUN apt-get update && \ apt-get install -y git diff --git a/.devops/server-intel.Dockerfile b/.devops/server-intel.Dockerfile index 13d00b7371744..c5adcb6da0408 100644 --- a/.devops/server-intel.Dockerfile +++ b/.devops/server-intel.Dockerfile @@ -1,15 +1,7 @@ -ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04 +ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04 FROM intel/oneapi-basekit:$ONEAPI_VERSION as build -RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \ - echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \ - chmod 644 /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \ - rm /etc/apt/sources.list.d/intel-graphics.list && \ - wget -O- https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics.gpg > /dev/null && \ - echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy arc" | tee /etc/apt/sources.list.d/intel.gpu.jammy.list && \ - chmod 644 /usr/share/keyrings/intel-graphics.gpg - ARG LLAMA_SYCL_F16=OFF RUN apt-get update && \ apt-get install -y git libcurl4-openssl-dev @@ -27,14 +19,6 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime -RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \ - echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \ - chmod 644 /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \ - rm /etc/apt/sources.list.d/intel-graphics.list && \ - wget -O- https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics.gpg > /dev/null && \ - echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy arc" | tee /etc/apt/sources.list.d/intel.gpu.jammy.list && \ - chmod 644 /usr/share/keyrings/intel-graphics.gpg - RUN apt-get update && \ apt-get install -y libcurl4-openssl-dev diff --git a/README.md b/README.md index ecb9d00db7957..8c065aace09be 100644 --- a/README.md +++ b/README.md @@ -576,7 +576,9 @@ Building the program with BLAS support may lead to some performance improvements vulkaninfo ``` - Alternatively your package manager might be able to provide the appropiate libraries. For example for Ubuntu 22.04 you can install `libvulkan-dev` instead. + Alternatively your package manager might be able to provide the appropriate libraries. + For example for Ubuntu 22.04 you can install `libvulkan-dev` instead. + For Fedora 40, you can install `vulkan-devel`, `glslc` and `glslang` packages. Then, build llama.cpp using the cmake command below: diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 80714fa58360b..919078f2bd920 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -147,7 +147,7 @@ struct server_slot { int32_t n_prompt_tokens = 0; int32_t n_prompt_tokens_processed = 0; - std::string prompt; + json prompt; // can be either a string, array of strings or array of token ids // when a task is submitted, we first tokenize the prompt and store it here std::vector prompt_tokens; @@ -822,8 +822,13 @@ struct server_context { continue; } + // skip the slot if it does not contains prompt + if (!slot.prompt.is_string()) { + continue; + } + // current slot's prompt - std::string slot_prompt = slot.prompt; + std::string slot_prompt = slot.prompt.get(); // length of the current slot's prompt int slot_prompt_len = slot_prompt.size(); @@ -957,12 +962,12 @@ struct server_context { return false; } - if (prompt->is_string()) { - slot.prompt = prompt->get(); - } else if (prompt->is_array() && prompt->size() == 1 && prompt->at(0).is_string()) { - slot.prompt = prompt->at(0).get(); + if ((prompt->is_string()) || + (prompt->is_array() && prompt->size() == 1 && prompt->at(0).is_string()) || + (prompt->is_array() && !prompt->empty() && prompt->at(0).is_number_integer())) { + slot.prompt = *prompt; } else { - send_error(task, "\"prompt\" must be a string or an array of strings", ERROR_TYPE_INVALID_REQUEST); + send_error(task, "\"prompt\" must be a string or an array of integers", ERROR_TYPE_INVALID_REQUEST); return false; } } diff --git a/ggml-cuda.cu b/ggml-cuda.cu index af10f21a0a92a..c6bc3f64c90de 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2740,7 +2740,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons case GGML_UNARY_OP_HARDSWISH: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_TANH: - return true; + return ggml_is_contiguous(op->src[0]); default: return false; } diff --git a/ggml-cuda/unary.cu b/ggml-cuda/unary.cu index ac03d5c6fce54..a5ff96320f23f 100644 --- a/ggml-cuda/unary.cu +++ b/ggml-cuda/unary.cu @@ -148,6 +148,8 @@ void ggml_cuda_op_gelu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -160,6 +162,8 @@ void ggml_cuda_op_silu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -172,6 +176,8 @@ void ggml_cuda_op_gelu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst) float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -184,6 +190,8 @@ void ggml_cuda_op_tanh(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -196,6 +204,8 @@ void ggml_cuda_op_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -208,6 +218,8 @@ void ggml_cuda_op_sigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -220,6 +232,8 @@ void ggml_cuda_op_hardsigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -232,6 +246,8 @@ void ggml_cuda_op_hardswish(ggml_backend_cuda_context & ctx, ggml_tensor * dst) float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -244,6 +260,8 @@ void ggml_cuda_op_leaky_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -259,6 +277,8 @@ void ggml_cuda_op_sqr(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp index 5592741be4255..18c6f4a104f36 100644 --- a/ggml-kompute.cpp +++ b/ggml-kompute.cpp @@ -1340,7 +1340,7 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) { case GGML_UNARY_OP_RELU: case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_SILU: - return true; + return ggml_is_contiguous(op->src[0]); default: ; } diff --git a/ggml-metal.m b/ggml-metal.m index 946f11813dcf9..b5c287347e7c9 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -744,7 +744,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_SILU: - return true; + return ggml_is_contiguous(op->src[0]); default: return false; } diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 42fc0df203537..e7d260bd4ebe3 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -17190,7 +17190,7 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons case GGML_UNARY_OP_HARDSWISH: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_TANH: - return true; + return ggml_is_contiguous(op->src[0]); default: return false; } diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 06ba23313955b..5b92804915658 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -6439,7 +6439,7 @@ GGML_CALL static bool ggml_backend_vk_supports_op(ggml_backend_t backend, const case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_RELU: - return true; + return ggml_is_contiguous(op->src[0]); default: return false; } diff --git a/ggml.c b/ggml.c index 1fc77743bc7b9..2ea1d76773893 100644 --- a/ggml.c +++ b/ggml.c @@ -3212,35 +3212,42 @@ GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor) { return tensor->nb[0] > tensor->nb[1]; } -GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) { - static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); +static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) { + size_t next_nb = ggml_type_size(tensor->type); + if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) { + return false; + } + next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type); + for (int i = 1; i < GGML_MAX_DIMS; i++) { + if (tensor->ne[i] != 1) { + if (i > n) { + if (tensor->nb[i] != next_nb) { + return false; + } + next_nb *= tensor->ne[i]; + } else { + // this dimension does not need to be contiguous + next_nb = tensor->ne[i]*tensor->nb[i]; + } + } + } + return true; +} - return - tensor->nb[0] == ggml_type_size(tensor->type) && - tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) && - tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && - tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; +GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) { + return ggml_is_contiguous_0(tensor); } GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) { - return ggml_is_contiguous(tensor); + return ggml_is_contiguous_n(tensor, 0); } GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) { - static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); - - return - tensor->nb[0] == ggml_type_size(tensor->type) && - tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && - tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; + return ggml_is_contiguous_n(tensor, 1); } GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) { - static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); - - return - tensor->nb[0] == ggml_type_size(tensor->type) && - tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; + return ggml_is_contiguous_n(tensor, 2); } GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) { @@ -3272,20 +3279,20 @@ bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return - (t0->ne[0] == t1->ne[0] ) && - (t0->ne[1] == t1->ne[1] ) && - (t0->ne[2] == t1->ne[2] ) && - (t0->ne[3] == t1->ne[3] ); + (t0->ne[0] == t1->ne[0]) && + (t0->ne[1] == t1->ne[1]) && + (t0->ne[2] == t1->ne[2]) && + (t0->ne[3] == t1->ne[3]); } bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return - (t0->nb[0] == t1->nb[0] ) && - (t0->nb[1] == t1->nb[1] ) && - (t0->nb[2] == t1->nb[2] ) && - (t0->nb[3] == t1->nb[3] ); + (t0->nb[0] == t1->nb[0]) && + (t0->nb[1] == t1->nb[1]) && + (t0->nb[2] == t1->nb[2]) && + (t0->nb[3] == t1->nb[3]); } // check if t1 can be represented as a repeatition of t0 @@ -4078,32 +4085,26 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) { switch (tensor->type) { case GGML_TYPE_I8: { - GGML_ASSERT(tensor->nb[0] == sizeof(int8_t)); return ((int8_t *)(tensor->data))[i]; } case GGML_TYPE_I16: { - GGML_ASSERT(tensor->nb[0] == sizeof(int16_t)); return ((int16_t *)(tensor->data))[i]; } case GGML_TYPE_I32: { - GGML_ASSERT(tensor->nb[0] == sizeof(int32_t)); return ((int32_t *)(tensor->data))[i]; } case GGML_TYPE_F16: { - GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]); } case GGML_TYPE_BF16: { - GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t)); return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]); } case GGML_TYPE_F32: { - GGML_ASSERT(tensor->nb[0] == sizeof(float)); return ((float *)(tensor->data))[i]; } default: @@ -4125,32 +4126,26 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) { switch (tensor->type) { case GGML_TYPE_I8: { - GGML_ASSERT(tensor->nb[0] == sizeof(int8_t)); ((int8_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_I16: { - GGML_ASSERT(tensor->nb[0] == sizeof(int16_t)); ((int16_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_I32: { - GGML_ASSERT(tensor->nb[0] == sizeof(int32_t)); ((int32_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_F16: { - GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value); } break; case GGML_TYPE_BF16: { - GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t)); ((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value); } break; case GGML_TYPE_F32: { - GGML_ASSERT(tensor->nb[0] == sizeof(float)); ((float *)(tensor->data))[i] = value; } break; default: @@ -7343,13 +7338,15 @@ struct ggml_tensor * ggml_add_rel_pos_inplace( return ggml_add_rel_pos_impl(ctx, a, pw, ph, true); } -// gmml_unary +// ggml_unary static struct ggml_tensor * ggml_unary_impl( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_unary_op op, bool inplace) { + GGML_ASSERT(ggml_is_contiguous_1(a)); + bool is_node = false; if (!inplace && (a->grad)) { @@ -11014,6 +11011,8 @@ static void ggml_compute_forward_abs_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11023,9 +11022,6 @@ static void ggml_compute_forward_abs_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_abs_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11060,6 +11056,8 @@ static void ggml_compute_forward_sgn_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11069,9 +11067,6 @@ static void ggml_compute_forward_sgn_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_sgn_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11106,6 +11101,8 @@ static void ggml_compute_forward_neg_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11115,9 +11112,6 @@ static void ggml_compute_forward_neg_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_neg_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11152,6 +11146,8 @@ static void ggml_compute_forward_step_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11161,9 +11157,6 @@ static void ggml_compute_forward_step_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_step_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11198,6 +11191,8 @@ static void ggml_compute_forward_tanh_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11207,9 +11202,6 @@ static void ggml_compute_forward_tanh_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_tanh_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11244,6 +11236,8 @@ static void ggml_compute_forward_elu_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11253,9 +11247,6 @@ static void ggml_compute_forward_elu_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_elu_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11290,6 +11281,8 @@ static void ggml_compute_forward_relu_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11299,9 +11292,6 @@ static void ggml_compute_forward_relu_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_relu_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11336,6 +11326,8 @@ static void ggml_compute_forward_sigmoid_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11345,9 +11337,6 @@ static void ggml_compute_forward_sigmoid_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_sigmoid_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11381,9 +11370,9 @@ static void ggml_compute_forward_gelu_f32( const struct ggml_tensor * src0 = dst->src[0]; - GGML_ASSERT(ggml_is_contiguous_1(src0)); - GGML_ASSERT(ggml_is_contiguous_1(dst)); - GGML_ASSERT(ggml_are_same_shape(src0, dst)); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); + assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -11444,9 +11433,9 @@ static void ggml_compute_forward_gelu_quick_f32( const struct ggml_tensor * src0 = dst->src[0]; - GGML_ASSERT(ggml_is_contiguous_1(src0)); - GGML_ASSERT(ggml_is_contiguous_1(dst)); - GGML_ASSERT(ggml_are_same_shape(src0, dst)); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); + assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -11507,9 +11496,9 @@ static void ggml_compute_forward_silu_f32( const struct ggml_tensor * src0 = dst->src[0]; - GGML_ASSERT(ggml_is_contiguous_1(src0)); - GGML_ASSERT(ggml_is_contiguous_1(dst)); - GGML_ASSERT(ggml_are_same_shape(src0, dst)); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); + assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -11570,6 +11559,8 @@ static void ggml_compute_forward_leaky_relu_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11619,11 +11610,11 @@ static void ggml_compute_forward_silu_back_f32( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * grad = dst->src[1]; - GGML_ASSERT(ggml_is_contiguous_1(grad)); - GGML_ASSERT(ggml_is_contiguous_1(src0)); - GGML_ASSERT(ggml_is_contiguous_1(dst)); - GGML_ASSERT(ggml_are_same_shape(src0, dst)); - GGML_ASSERT(ggml_are_same_shape(src0, grad)); + assert(ggml_is_contiguous_1(grad)); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); + assert(ggml_are_same_shape(src0, dst)); + assert(ggml_are_same_shape(src0, grad)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -11685,6 +11676,8 @@ static void ggml_compute_forward_hardswish_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11694,9 +11687,6 @@ static void ggml_compute_forward_hardswish_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_hardswish_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11728,6 +11718,8 @@ static void ggml_compute_forward_hardsigmoid_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11737,9 +11729,6 @@ static void ggml_compute_forward_hardsigmoid_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_hardsigmoid_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -16686,7 +16675,10 @@ static void ggml_compute_forward_map_unary_f32( const struct ggml_tensor * src0 = dst->src[0]; - GGML_ASSERT(ggml_are_same_shape(src0, dst)); + assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); + assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -16695,9 +16687,6 @@ static void ggml_compute_forward_map_unary_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert( dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { fun(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -16735,6 +16724,9 @@ static void ggml_compute_forward_map_binary_f32( const struct ggml_tensor * src1 = dst->src[1]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(src1)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -16744,10 +16736,6 @@ static void ggml_compute_forward_map_binary_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert( dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - assert(src1->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { fun(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index ce406a8af867a..2b48e623e3476 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -642,20 +642,29 @@ struct test_case { struct test_unary : public test_case { const ggml_unary_op op; const ggml_type type; - const std::array ne; + const std::array ne_a; + int v; // view (1 : non-contiguous a) std::string vars() override { - return VARS_TO_STR2(type, ne); + return VARS_TO_STR3(type, ne_a, v); } test_unary(ggml_unary_op op, ggml_type type = GGML_TYPE_F32, - std::array ne = {128, 10, 10, 10}) - : op(op), type(type), ne(ne) {} + std::array ne_a = {128, 10, 10, 10}, + int v = 0) + : op(op), type(type), ne_a(ne_a), v(v) {} ggml_tensor * build_graph(ggml_context * ctx) override { - ggml_tensor * in = ggml_new_tensor(ctx, type, 4, ne.data()); - ggml_tensor * out = ggml_unary(ctx, in, op); + ggml_tensor * a; + if (v & 1) { + auto ne = ne_a; ne[0] *= 3; + a = ggml_new_tensor(ctx, type, 4, ne.data()); + a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0); + } else { + a = ggml_new_tensor(ctx, type, 4, ne_a.data()); + } + ggml_tensor * out = ggml_unary(ctx, a, op); return out; } @@ -2016,9 +2025,11 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op }; // unary ops - for (int op = 0; op < GGML_UNARY_OP_COUNT; op++) { - test_cases.emplace_back(new test_unary((ggml_unary_op) op)); - test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 7, 13, 19, 23 })); + for (int v : {0, 1}) { + for (int op = 0; op < GGML_UNARY_OP_COUNT; op++) { + test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 128, 10, 10, 10 }, v)); + test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 7, 13, 19, 23 }, v)); + } } test_cases.emplace_back(new test_get_rows(GGML_TYPE_F32, 1, 8, 2, 1, false));