v0.0.8 (#2)

* Start updating to recent ggml-sys-bleedingedge. * Continue half-hearted update to current GGML.
KerfuffleV2 · Sep 2, 2023 · 6f18440 · 6f18440
1 parent b11fbc9
commit 6f18440
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 46 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rusty-ggml"
-version = "0.0.7"
+version = "0.0.8"
 description = "Idiomatic bindings for the GGML library (pre-alpha)"
 repository = "https://github.com/KerfuffleV2/rusty-ggml"
 keywords = ["deep-learning", "machine-learning", "tensor", "ggml", "ml"]
@@ -13,14 +13,15 @@ default = ["ggml-sys-bleedingedge/use_cmake"]
 no_k_quants = ["ggml-sys-bleedingedge/no_k_quants"]
 no_accelerate = ["ggml-sys-bleedingedge/no_accelerate"]
 cublas = ["ggml-sys-bleedingedge/cublas"]
+hipblas = ["ggml-sys-bleedingedge/hipblas"]
 clblast = ["ggml-sys-bleedingedge/clblast"]
 openblas = ["ggml-sys-bleedingedge/openblas"]
 metal = ["ggml-sys-bleedingedge/metal"]
 
 [dependencies]
-ggml-sys-bleedingedge = "=2306220059.0.0"
+ggml-sys-bleedingedge = "=2309021811.0.0"
 anyhow = "1"
 thiserror = "1"
 num-traits = "0.2"
-num-derive="0.3"
+num-derive="0.4"
 bytemuck = { version = "1", features = ["extern_crate_alloc"] }
diff --git a/README.md b/README.md
@@ -13,4 +13,12 @@ See:
 
 Not suitable for general use. Consider this to be pre-alpha code.
 
-Example usage: https://github.com/KerfuffleV2/smolrsrwkv/blob/600718ebee029aa684c4a6abbe035d21283a446c/smolrwkv/src/ggml/graph.rs
+**`v0.0.8` Warning**: Keeping this in sync with recent GGML changes has lagged. It compiles and seems to work but there might be weird stuff I haven't caught.
+
+**Note**: There are special considerations when using GPU features like `cublas`, `hipblas`. See the `ggml-sys-bleedingedge` repo or crate documentation for more information
+
+Example usage: https://github.com/KerfuffleV2/smolrsrwkv/blob/189915ec68b28d057b440f75803d3d056e150733/smolrwkv/src/ggml/graph.rs
+
+## Related
+
+For your token sampling needs see https://github.com/KerfuffleV2/llm-samplers ( https://crates.io/crates/llm-samplers )
diff --git a/src/context.rs b/src/context.rs
@@ -64,6 +64,9 @@ pub(crate) struct IContext {
     pub(crate) failed: Option<Arc<anyhow::Error>>,
 }
 
+// FIXME: YOLO? It's an internal struct and only lives in an Arc.
+unsafe impl Send for IContext {}
+
 impl Drop for IContext {
     // Since `IContext` lives inside an `Arc` this will only happen
     // when the very last instance of the `Arc` is dropped.
@@ -405,8 +408,9 @@ impl GContext {
     /// Runs the supplied graph using this context.
     pub fn compute(&self, graph: &mut GGraph) -> Result<()> {
         ensure!(!self.no_alloc, GContextError::NoAlloc);
+        let n_threads = graph.n_threads;
         self.with_icontext_infallible(|ictx| unsafe {
-            gg::ggml_graph_compute(ictx.gptr(), &mut graph.0)
+            gg::ggml_graph_compute_with_ctx(ictx.gptr(), &mut graph.graph, n_threads as i32)
         })
     }
 
@@ -416,15 +420,16 @@ impl GContext {
     }
 }
 
-#[repr(transparent)]
-pub struct GGraph(gg::ggml_cgraph);
+pub struct GGraph {
+    n_threads: usize,
+    graph: gg::ggml_cgraph,
+}
 
 impl GGraph {
     /// Create a new computation graph with the specified number of threads.
     pub fn new(n_threads: usize) -> Self {
-        let mut graph = unsafe { std::mem::zeroed::<gg::ggml_cgraph>() };
-        graph.n_threads = n_threads as i32;
-        Self(graph)
+        let graph = unsafe { std::mem::zeroed::<gg::ggml_cgraph>() };
+        Self { n_threads, graph }
     }
 
     /// Register a tensor to be processed when the graph is computed.
@@ -439,7 +444,7 @@ impl GGraph {
         tensor
             .as_ref()
             .with_tensor_infallible(|_ctx, _ictx, tptr| unsafe {
-                gg::ggml_build_forward_expand(&mut self.0, tptr)
+                gg::ggml_build_forward_expand(&mut self.graph, tptr)
             })
     }
 }
diff --git a/src/gtensor/binary_ops.rs b/src/gtensor/binary_ops.rs
@@ -204,14 +204,12 @@ where
     /// # !!!! FIXME !!!!
     /// # !!!! FIXME !!!!
     /// # !!!! FIXME !!!!
-    pub fn conv_1d<
-        const STRIDE: usize,
-        const RDIMS: usize,
-        const ODIMS: usize,
-        T: AsRef<GTensor<RDIMS>>,
-    >(
+    pub fn conv_1d<const RDIMS: usize, const ODIMS: usize, T: AsRef<GTensor<RDIMS>>>(
         &self,
         rhs: T,
+        s0: usize,
+        p0: usize,
+        d0: usize,
     ) -> Self
     where
         Dim<RDIMS>: DimValid,
@@ -220,8 +218,6 @@ where
         DimPair<DIMS, 4>: DimLt,
         DimPair<RDIMS, 2>: DimGtE,
         DimPair<ODIMS, 2>: DimEq,
-        DimPair<STRIDE, 1>: DimGtE,
-        DimPair<STRIDE, 3>: DimLt,
     {
         let rmd = rhs.as_ref().md.clone();
         self.new_binary(rhs, |ctx, ictx, ltptr, rtptr| {
@@ -234,11 +230,7 @@ where
             let mr = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, GType::F32, shp)
                 .fit_or_die()?;
             Ok((mr, unsafe {
-                if STRIDE == 1 {
-                    gg::ggml_conv_1d_s1_ph(ictx.gptr(), ltptr, rtptr)
-                } else {
-                    gg::ggml_conv_1d_s2_ph(ictx.gptr(), ltptr, rtptr)
-                }
+                gg::ggml_conv_1d(ictx.gptr(), ltptr, rtptr, s0 as i32, p0 as i32, d0 as i32)
             }))
         })
     }

diff --git a/src/gtensor/matmul.rs b/src/gtensor/matmul.rs
@@ -109,7 +109,14 @@ macro_rules! mk_gmulmatinstances {
                         GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, shp)
                             .fit_or_die()?;
                     unsafe {
-                        Ok((mr, gg::ggml_mul_mat(ictx.gptr(), ltptr, rtptr)))
+                        let t = gg::ggml_mul_mat(ictx.gptr(), ltptr, rtptr);
+                        // FIXME: Horrible hack to pretend mul_mat has the old non-broadcasting behavior.
+                        let real_dims = (*t).ne.iter().take_while(|i| **i != 1).collect::<Vec<_>>().len();
+                        if real_dims != $o {
+                            Err(GTensorError::InvalidOperation)?;
+                        }
+                        (*t).n_dims = $o;
+                        Ok((mr, t))
                     }
                 })
             }

diff --git a/src/gtensor/tensor.rs b/src/gtensor/tensor.rs
@@ -66,7 +66,11 @@ where
     pub(crate) fn from_ptr(tp: NonNull<gg::ggml_tensor>) -> Self {
         let (tr, tp) = (unsafe { tp.as_ref() }, tp.as_ptr());
         let (op, typ, shape) = {
-            assert_eq!(DIMS, tr.n_dims as usize, "Unexpected number of dimensions!");
+            assert_eq!(
+                DIMS, tr.n_dims as usize,
+                "Unexpected number of dimensions {:?}!",
+                tr.ne
+            );
             let mut shp = [0; DIMS];
             shp.iter_mut()
                 .zip(tr.ne[0..DIMS].iter())

diff --git a/src/gtensor/unary_ops.rs b/src/gtensor/unary_ops.rs
@@ -112,24 +112,6 @@ where
         /// ```
         [neg, ggml_neg],
 
-        /// Perform LayerNorm operation on tensor `A`.
-        /// Returns a new tensor.
-        ///
-        /// `a.norm()`
-        ///
-        /// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md)
-        /// for more information and comparison with the [GTensor::rms_norm] function.
-        [rms_norm, ggml_rms_norm],
-
-        /// Perform RMSNorm operation on tensor `A`.
-        /// Returns a new tensor.
-        ///
-        /// `a.rms_norm()`
-        ///
-        /// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md)
-        /// for more information and comparison with the [GTensor::norm] function.
-        [norm, ggml_norm],
-
         /// Elementwise step operation on tensor `A`.
         /// Returns a new tensor.
         ///
@@ -229,6 +211,38 @@ where
         [soft_max, ggml_soft_max],
     }
 
+    /// Perform LayerNorm operation on tensor `A`.
+    /// Returns a new tensor.
+    ///
+    /// `a.norm()`
+    ///
+    /// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md)
+    /// for more information and comparison with the [GTensor::rms_norm] function.
+    pub fn norm(&self, eps: f32) -> Self {
+        self.new_unary(|ctx, ictx, tptr| {
+            let mr =
+                GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, self.md.shape)
+                    .fit_or_die()?;
+            unsafe { Ok((mr, gg::ggml_norm(ictx.gptr(), tptr, eps))) }
+        })
+    }
+
+    /// Perform RMSNorm operation on tensor `A`.
+    /// Returns a new tensor.
+    ///
+    /// `a.rms_norm()`
+    ///
+    /// See [this helpful explanation](https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/README.md)
+    /// for more information and comparison with the [GTensor::norm] function.
+    pub fn rms_norm(&self, eps: f32) -> Self {
+        self.new_unary(|ctx, ictx, tptr| {
+            let mr =
+                GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, self.md.shape)
+                    .fit_or_die()?;
+            unsafe { Ok((mr, gg::ggml_rms_norm(ictx.gptr(), tptr, eps))) }
+        })
+    }
+
     /// Elementwise `mean` of tensor `A`.
     /// Returns a new tensor.
     ///
@@ -384,7 +398,40 @@ where
     /// # !!!! FIXME !!!!
     /// # !!!! FIXME !!!!
     /// # !!!! FIXME !!!!
-    pub fn rope(self, n_past: usize, n_dims: usize, mode: usize) -> Self {
+    pub fn rope(self, n_past: usize, n_dims: usize, mode: usize, n_ctx: usize) -> Self {
+        self.new_unary(|ctx, ictx, tptr| {
+            // Creates a view plus a i32 tensor with three items.
+            let mr1 = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, []);
+            let mr2 = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, GType::I32, [3]);
+            let mr = (mr1 + mr2).fit_or_die()?;
+            unsafe {
+                Ok((
+                    mr,
+                    gg::ggml_rope(
+                        ictx.gptr(),
+                        tptr,
+                        n_past as i32,
+                        n_dims as i32,
+                        mode as i32,
+                        n_ctx as i32,
+                    ),
+                ))
+            }
+        })
+    }
+
+    /// # !!!! FIXME !!!!
+    /// # !!!! FIXME !!!!
+    /// # !!!! FIXME !!!!
+    pub fn rope_custom(
+        self,
+        n_past: usize,
+        n_dims: usize,
+        mode: usize,
+        n_ctx: usize,
+        freq_base: f32,
+        freq_scale: f32,
+    ) -> Self {
         self.new_unary(|ctx, ictx, tptr| {
             // Creates a view plus a i32 tensor with three items.
             let mr1 = GMemoryRequest::estimate_tensor_request_ictx(ctx, ictx, self.md.typ, []);
@@ -393,7 +440,16 @@ where
             unsafe {
                 Ok((
                     mr,
-                    gg::ggml_rope(ictx.gptr(), tptr, n_past as i32, n_dims as i32, mode as i32),
+                    gg::ggml_rope_custom(
+                        ictx.gptr(),
+                        tptr,
+                        n_past as i32,
+                        n_dims as i32,
+                        mode as i32,
+                        n_ctx as i32,
+                        freq_base,
+                        freq_scale,
+                    ),
                 ))
             }
         })