How to use ggml_mul_mat? #1000

Ucag · 2024-10-30T19:28:14Z

Ucag
Oct 30, 2024

I'm going to develop a new operator which supports 6dim matrix matmul. But I can not get the right result as it in the pytorch.

Python code:

from csv import writer
import torch
import numpy as np
from gguf import GGUFWriter

# 6 dim tensor
A = torch.arange(1, 37, dtype=torch.float32).reshape(3, 1, 2, 1, 2, 3)
B = torch.arange(36, 0, step=-1,dtype=torch.float32).reshape(3, 1, 2, 1, 3, 2)

ground_truth = torch.matmul(A, B)


def dim4_matmul(a, b):
    if a.dim() != 4:
        raise ValueError("Input tensors must have 4 dimensions")
    if b.dim() != 4:
        raise ValueError("Input tensors must have 4 dimensions")
    return torch.matmul(a, b)


def dim6_matmul_using_dim4(a, b):
    a_reshape = a.reshape(
        a.shape[0] * a.shape[1] * a.shape[2], a.shape[3], a.shape[4], a.shape[5]
    )
    b_reshape = b.reshape(
        b.shape[0] * b.shape[1] * b.shape[2], b.shape[3], b.shape[4], b.shape[5]
    )
    result = dim4_matmul(a_reshape, b_reshape)
    return result.reshape(
        a.shape[0], a.shape[1], a.shape[2], a.shape[3], a.shape[4], b.shape[5]
    )


result = dim6_matmul_using_dim4(A, B)
print(result.shape)
assert torch.equal(result, ground_truth)

# write data to gguf file
writer = GGUFWriter("model.gguf", "llama")
A_reshape = A.reshape(
    A.shape[0] * A.shape[1] * A.shape[2], A.shape[3], A.shape[4], A.shape[5]
)
B_reshape = B.reshape(
    B.shape[0] * B.shape[1] * B.shape[2], B.shape[3], B.shape[4], B.shape[5]
)
result_reshape = result.reshape(
    result.shape[0] * result.shape[1] * result.shape[2],
    result.shape[3],
    result.shape[4],
    result.shape[5],
)
# np_b = np.moveaxis(B_reshape.numpy(),2,3) # ggml mul mat requires .T on the second matrix
np_b = B_reshape.mT.numpy()# ggml mul mat requires .T on the second matrix
writer.add_tensor("A", A_reshape.numpy(), A_reshape.numpy().shape)
writer.add_tensor("B", np_b, np_b.shape)
writer.add_tensor("ground_truth", result_reshape.numpy(), result_reshape.numpy().shape)
writer.write_header_to_file()
writer.write_kv_data_to_file()
writer.write_tensors_to_file()
writer.close()

c++ code

#include <iostream>
#include <ggml.h>

class model
{
public:
    ggml_context *ctx_weight;
    ggml_context *ctx_compute;
    ggml_tensor *A;
    ggml_tensor *B;
    ggml_tensor *ground_truth;

    model()
    {
        ctx_weight = ggml_init({
            .mem_buffer = nullptr,
            .mem_size = 100 * 1024 * 1024,
            .no_alloc = false,
        });
        ctx_compute = ggml_init({
            .mem_buffer = nullptr,
            .mem_size = 1 * 1024 * 1024 * 1024,
            .no_alloc = false,
        });
        A = nullptr;
        B = nullptr;
        ground_truth = nullptr;
    }
    ~model()
    {
        ggml_free(ctx_weight);
        ggml_free(ctx_compute);
    }
};

model load_model_from_file(const char *model_path)
{
    model m;
    auto *ctx = gguf_init_from_file(model_path, {
                                                    .ctx = &m.ctx_weight,
                                                    .no_alloc = false,
                                                });
    m.A = ggml_get_tensor(m.ctx_weight, "A");
    m.B = ggml_get_tensor(m.ctx_weight, "B");
    m.ground_truth = ggml_get_tensor(m.ctx_weight, "ground_truth");
    return m;
}

void print_tensor_shape(ggml_tensor *t)
{
    for (int i = 0; i < GGML_MAX_DIMS; i++)
    {
        std::cout << t->ne[i] << " ";
    }
    std::cout << std::endl;
}
void print_tensor_data(ggml_tensor* t)
{
    float *d = (float *)t->data;
    for (int i = 0; i < t->ne[3]; i++)
    {
        for (int j = 0; j < t->ne[2]; j++)
        {
            for (int k = 0; k < t->ne[1]; k++)
            {
                for (int l = 0; l < t->ne[0]; l++)
                {
                    std::cout << d[i * t->ne[2] * t->ne[1] * t->ne[0] + j * t->ne[1] * t->ne[0] + k * t->ne[0] + l] << " ";
                }
            }
        }
    }
    std::cout << std::endl;
}
int main(int argc, char **argv)
{
    auto m = load_model_from_file("model.gguf");
    std::cout << "A shape:" << std::endl;
    print_tensor_shape(m.A);
    std::cout << "B shape:" << std::endl;
    print_tensor_shape(m.B);
    std::cout << "ground truth shape:" << std::endl;
    print_tensor_shape(m.ground_truth);
    auto *C = ggml_mul_mat(m.ctx_compute, m.A, m.B);
    auto *gf = ggml_new_graph(m.ctx_compute);
    ggml_build_forward_expand(gf, C);
    ggml_graph_compute_with_ctx(m.ctx_compute, gf, 1);
    int n_node = ggml_graph_n_nodes(gf);
    auto *output_node = ggml_graph_nodes(gf)[n_node - 1];
    std::cout << "output node ele:" << ggml_nelements(output_node) << std::endl;
    std::cout << "ground truth ele:" << ggml_nelements(m.ground_truth) << std::endl;
    std::cout << "output shape:" << std::endl;
    print_tensor_shape(output_node);
    GGML_ASSERT(ggml_are_same_shape(output_node, m.ground_truth));
    std::cout << "output data:" << std::endl;
    print_tensor_data(output_node);
    std::cout << "ground truth data:" << std::endl;
    print_tensor_data(m.ground_truth);
    return 0;
}

What I got print in c++ code is

A shape:
3 2 1 6 
B shape:
3 2 1 6 
ground truth shape:
2 2 1 6 
output node ele:24
ground truth ele:24
output shape:
2 2 1 6 
output data:
200 506 194 491 668 920 644 887 920 1118 878 1067 956 1100 896 1031 776 866 698 779 380 416 284 311 
ground truth data:
200 194 506 491 668 644 920 887 920 878 1118 1067 956 896 1100 1031 776 698 866 779 380 284 416 311

I can not figure out the reason why it calculates the different value than ground_truth. Is there anything I miss?

Answered by slaren

Oct 31, 2024

ggml expects the second operand to be tranposed, and the returned matrix is also transposed. Check the last part of https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md

View full answer

slaren · 2024-10-31T23:51:39Z

slaren
Oct 31, 2024
Collaborator

ggml expects the second operand to be tranposed, and the returned matrix is also transposed. Check the last part of https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md

1 reply

Ucag Nov 1, 2024
Author

Thank you a lot. Love u 3000.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

How to use ggml_mul_mat? #1000

{{title}}

Replies: 1 comment 1 reply

{{title}}

{{title}}

Select a reply

How to use ggml_mul_mat? #1000

Ucag Oct 30, 2024

Replies: 1 comment · 1 reply

slaren Oct 31, 2024 Collaborator

Ucag Nov 1, 2024 Author

Ucag
Oct 30, 2024

Replies: 1 comment 1 reply

slaren
Oct 31, 2024
Collaborator

Ucag Nov 1, 2024
Author