-
I'm going to develop a new operator which supports 6dim matrix matmul. But I can not get the right result as it in the pytorch. Python code: from csv import writer
import torch
import numpy as np
from gguf import GGUFWriter
# 6 dim tensor
A = torch.arange(1, 37, dtype=torch.float32).reshape(3, 1, 2, 1, 2, 3)
B = torch.arange(36, 0, step=-1,dtype=torch.float32).reshape(3, 1, 2, 1, 3, 2)
ground_truth = torch.matmul(A, B)
def dim4_matmul(a, b):
if a.dim() != 4:
raise ValueError("Input tensors must have 4 dimensions")
if b.dim() != 4:
raise ValueError("Input tensors must have 4 dimensions")
return torch.matmul(a, b)
def dim6_matmul_using_dim4(a, b):
a_reshape = a.reshape(
a.shape[0] * a.shape[1] * a.shape[2], a.shape[3], a.shape[4], a.shape[5]
)
b_reshape = b.reshape(
b.shape[0] * b.shape[1] * b.shape[2], b.shape[3], b.shape[4], b.shape[5]
)
result = dim4_matmul(a_reshape, b_reshape)
return result.reshape(
a.shape[0], a.shape[1], a.shape[2], a.shape[3], a.shape[4], b.shape[5]
)
result = dim6_matmul_using_dim4(A, B)
print(result.shape)
assert torch.equal(result, ground_truth)
# write data to gguf file
writer = GGUFWriter("model.gguf", "llama")
A_reshape = A.reshape(
A.shape[0] * A.shape[1] * A.shape[2], A.shape[3], A.shape[4], A.shape[5]
)
B_reshape = B.reshape(
B.shape[0] * B.shape[1] * B.shape[2], B.shape[3], B.shape[4], B.shape[5]
)
result_reshape = result.reshape(
result.shape[0] * result.shape[1] * result.shape[2],
result.shape[3],
result.shape[4],
result.shape[5],
)
# np_b = np.moveaxis(B_reshape.numpy(),2,3) # ggml mul mat requires .T on the second matrix
np_b = B_reshape.mT.numpy()# ggml mul mat requires .T on the second matrix
writer.add_tensor("A", A_reshape.numpy(), A_reshape.numpy().shape)
writer.add_tensor("B", np_b, np_b.shape)
writer.add_tensor("ground_truth", result_reshape.numpy(), result_reshape.numpy().shape)
writer.write_header_to_file()
writer.write_kv_data_to_file()
writer.write_tensors_to_file()
writer.close() c++ code #include <iostream>
#include <ggml.h>
class model
{
public:
ggml_context *ctx_weight;
ggml_context *ctx_compute;
ggml_tensor *A;
ggml_tensor *B;
ggml_tensor *ground_truth;
model()
{
ctx_weight = ggml_init({
.mem_buffer = nullptr,
.mem_size = 100 * 1024 * 1024,
.no_alloc = false,
});
ctx_compute = ggml_init({
.mem_buffer = nullptr,
.mem_size = 1 * 1024 * 1024 * 1024,
.no_alloc = false,
});
A = nullptr;
B = nullptr;
ground_truth = nullptr;
}
~model()
{
ggml_free(ctx_weight);
ggml_free(ctx_compute);
}
};
model load_model_from_file(const char *model_path)
{
model m;
auto *ctx = gguf_init_from_file(model_path, {
.ctx = &m.ctx_weight,
.no_alloc = false,
});
m.A = ggml_get_tensor(m.ctx_weight, "A");
m.B = ggml_get_tensor(m.ctx_weight, "B");
m.ground_truth = ggml_get_tensor(m.ctx_weight, "ground_truth");
return m;
}
void print_tensor_shape(ggml_tensor *t)
{
for (int i = 0; i < GGML_MAX_DIMS; i++)
{
std::cout << t->ne[i] << " ";
}
std::cout << std::endl;
}
void print_tensor_data(ggml_tensor* t)
{
float *d = (float *)t->data;
for (int i = 0; i < t->ne[3]; i++)
{
for (int j = 0; j < t->ne[2]; j++)
{
for (int k = 0; k < t->ne[1]; k++)
{
for (int l = 0; l < t->ne[0]; l++)
{
std::cout << d[i * t->ne[2] * t->ne[1] * t->ne[0] + j * t->ne[1] * t->ne[0] + k * t->ne[0] + l] << " ";
}
}
}
}
std::cout << std::endl;
}
int main(int argc, char **argv)
{
auto m = load_model_from_file("model.gguf");
std::cout << "A shape:" << std::endl;
print_tensor_shape(m.A);
std::cout << "B shape:" << std::endl;
print_tensor_shape(m.B);
std::cout << "ground truth shape:" << std::endl;
print_tensor_shape(m.ground_truth);
auto *C = ggml_mul_mat(m.ctx_compute, m.A, m.B);
auto *gf = ggml_new_graph(m.ctx_compute);
ggml_build_forward_expand(gf, C);
ggml_graph_compute_with_ctx(m.ctx_compute, gf, 1);
int n_node = ggml_graph_n_nodes(gf);
auto *output_node = ggml_graph_nodes(gf)[n_node - 1];
std::cout << "output node ele:" << ggml_nelements(output_node) << std::endl;
std::cout << "ground truth ele:" << ggml_nelements(m.ground_truth) << std::endl;
std::cout << "output shape:" << std::endl;
print_tensor_shape(output_node);
GGML_ASSERT(ggml_are_same_shape(output_node, m.ground_truth));
std::cout << "output data:" << std::endl;
print_tensor_data(output_node);
std::cout << "ground truth data:" << std::endl;
print_tensor_data(m.ground_truth);
return 0;
}
What I got print in c++ code is A shape:
3 2 1 6
B shape:
3 2 1 6
ground truth shape:
2 2 1 6
output node ele:24
ground truth ele:24
output shape:
2 2 1 6
output data:
200 506 194 491 668 920 644 887 920 1118 878 1067 956 1100 896 1031 776 866 698 779 380 416 284 311
ground truth data:
200 194 506 491 668 644 920 887 920 878 1118 1067 956 896 1100 1031 776 698 866 779 380 284 416 311 I can not figure out the reason why it calculates the different value than ground_truth. Is there anything I miss? |
Beta Was this translation helpful? Give feedback.
Answered by
slaren
Oct 31, 2024
Replies: 1 comment 1 reply
-
ggml expects the second operand to be tranposed, and the returned matrix is also transposed. Check the last part of https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md |
Beta Was this translation helpful? Give feedback.
1 reply
Answer selected by
Ucag
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
ggml expects the second operand to be tranposed, and the returned matrix is also transposed. Check the last part of https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md