From a1e33fa8ca96559563b20b0fcb1112548aefefe0 Mon Sep 17 00:00:00 2001
From: Rasmus Kaj <kaj@kth.se>
Date: Fri, 16 Oct 2020 20:04:47 +0200
Subject: [PATCH 01/10] Implement LocatedSpan::get_line().

Add a function to get the full input line containing the (start point
of the) LocatedSpan.

As suggested in #53.
---
 src/lib.rs | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/src/lib.rs b/src/lib.rs
index f9259b5..6718a44 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -277,6 +277,45 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
         (column, &before_self[self.offset - (column - 1)..])
     }
 
+    /// Return the line that contains this LocatedSpan.
+    ///
+    /// The `get_column` and `get_utf8_column` functions returns
+    /// indexes that corresponds to the line returned by this function.
+    ///
+    /// ```
+    /// # extern crate nom_locate;
+    /// # extern crate nom;
+    /// # use nom_locate::LocatedSpan;
+    /// # use nom::{Slice, FindSubstring};
+    /// #
+    /// # fn main() {
+    /// let program = LocatedSpan::new(
+    ///     "Hello World!\
+    ///     \nThis is a multi-line input\
+    ///     \nthat ends after this line.\n");
+    /// let multi = program.find_substring("multi").unwrap();
+    ///
+    /// assert_eq!(
+    ///     program.slice(multi..).get_line(),
+    ///     Some("This is a multi-line input".as_ref()),
+    /// );
+    /// # }
+    /// ```
+    pub fn get_line(&self) -> Option<&[u8]> {
+        let self_bytes = self.fragment.as_bytes();
+        let self_ptr = self_bytes.as_ptr();
+        let offset = self.get_column() - 1;
+        let the_line = unsafe {
+            assert!(
+                offset <= isize::max_value() as usize,
+                "offset is too big"
+            );
+            let line_start_ptr = self_ptr.offset(-(offset as isize));
+            slice::from_raw_parts(line_start_ptr, offset + self_bytes.len())
+        };
+        the_line.split(|c| *c == b'\n').next()
+    }
+
     /// Return the column index, assuming 1 byte = 1 column.
     ///
     /// Use it for ascii text, or use get_utf8_column for UTF8.

From c4df14551364c8f395cb1cc8f9049b072e65c141 Mon Sep 17 00:00:00 2001
From: Rasmus Kaj <kaj@kth.se>
Date: Fri, 16 Oct 2020 22:47:43 +0200
Subject: [PATCH 02/10] Add some tests.

---
 src/tests.rs | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/src/tests.rs b/src/tests.rs
index 537b53e..8e5ad39 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -414,3 +414,96 @@ fn it_should_display_hex() {
         "00000000\t61 62 63    \tabc\n".to_owned()
     );
 }
+
+#[test]
+fn line_of_empty_span_is_empty() {
+    assert_eq!(
+        StrSpan::new("").get_line(),
+        Some("".as_ref()),
+    );
+}
+
+#[test]
+fn line_of_single_line_start_is_whole() {
+    assert_eq!(
+        StrSpan::new("A single line").get_line(),
+        Some("A single line".as_ref()),
+    );
+}
+#[test]
+fn line_of_single_line_end_is_whole() {
+    let data = "A single line";
+    assert_eq!(
+        StrSpan::new(data).slice(data.len()..).get_line(),
+        Some("A single line".as_ref()),
+    );
+}
+
+#[test]
+fn line_of_start_is_first() {
+    assert_eq!(
+        StrSpan::new(
+            "One line of text\
+             \nFollowed by a second\
+             \nand a third\n"
+        ).get_line(),
+        Some("One line of text".as_ref()),
+    );
+}
+
+#[test]
+fn line_of_nl_is_before() {
+    let data =
+        "One line of text\
+         \nFollowed by a second\
+         \nand a third\n";
+    assert_eq!(
+        StrSpan::new(data).slice(data.find('\n').unwrap()..).get_line(),
+        Some("One line of text".as_ref()),
+    );
+}
+
+#[test]
+fn line_of_end_after_nl_is_empty() {
+    let data =
+        "One line of text\
+         \nFollowed by a second\
+         \nand a third\n";
+    assert_eq!(
+        StrSpan::new(data).slice(data.len()..).get_line(),
+        Some("".as_ref()),
+    );
+}
+
+#[test]
+fn line_of_end_no_nl_is_last() {
+    let data =
+        "One line of text\
+         \nFollowed by a second\
+         \nand a third";
+    assert_eq!(
+        StrSpan::new(data).slice(data.len()..).get_line(),
+        Some("and a third".as_ref()),
+    );
+}
+
+#[test]
+fn line_for_non_ascii_chars() {
+    // I don't really know if this Oriya text makes sense.
+    let data = StrSpan::new(
+        "Några rader text på Svenska.\
+         \nFörra raden var först, den här är i mitten\
+         \noch här är sista raden.\n");
+    let s = data.slice(data.find_substring("först").unwrap()..);
+    assert_eq!(
+        format!(
+            "{line_no:3}: {line_text}\n    {0:>lpos$}^- The match\n",
+            "",
+            line_no = s.location_line(),
+            line_text = core::str::from_utf8(s.get_line().unwrap()).unwrap(),
+            lpos = s.get_utf8_column(),
+        ),
+        "  2: Förra raden var först, den här är i mitten\
+       \n                     ^- The match\n",
+    );
+}

From 65cf3e1e3bb747eb44a42bab62ac15a019dc543c Mon Sep 17 00:00:00 2001
From: Rasmus Kaj <kaj@kth.se>
Date: Fri, 16 Oct 2020 22:59:48 +0200
Subject: [PATCH 03/10] Remove bogus comment.

---
 src/tests.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tests.rs b/src/tests.rs
index 8e5ad39..b64db46 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -489,7 +489,6 @@ fn line_of_end_no_nl_is_last() {
 
 #[test]
 fn line_for_non_ascii_chars() {
-    // I don't really know if this Oriya text makes sense.
     let data = StrSpan::new(
         "Några rader text på Svenska.\
          \nFörra raden var först, den här är i mitten\

From 942d0c96fdd9d7eac487c772e48702d7deb323a1 Mon Sep 17 00:00:00 2001
From: Rasmus Kaj <kaj@kth.se>
Date: Fri, 16 Oct 2020 23:10:32 +0200
Subject: [PATCH 04/10] No need for get_line() to return Option.

---
 src/lib.rs   |  9 ++++++---
 src/tests.rs | 16 ++++++++--------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 6718a44..2120b00 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -297,11 +297,11 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
     ///
     /// assert_eq!(
     ///     program.slice(multi..).get_line(),
-    ///     Some("This is a multi-line input".as_ref()),
+    ///     "This is a multi-line input".as_bytes(),
     /// );
     /// # }
     /// ```
-    pub fn get_line(&self) -> Option<&[u8]> {
+    pub fn get_line(&self) -> &[u8] {
         let self_bytes = self.fragment.as_bytes();
         let self_ptr = self_bytes.as_ptr();
         let offset = self.get_column() - 1;
@@ -313,7 +313,10 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
             let line_start_ptr = self_ptr.offset(-(offset as isize));
             slice::from_raw_parts(line_start_ptr, offset + self_bytes.len())
         };
-        the_line.split(|c| *c == b'\n').next()
+        match memchr::memchr(b'\n', the_line) {
+            None => the_line,
+            Some(pos) => &the_line[..pos],
+        }
     }
 
     /// Return the column index, assuming 1 byte = 1 column.
diff --git a/src/tests.rs b/src/tests.rs
index b64db46..e0ded8d 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -419,7 +419,7 @@ fn it_should_display_hex() {
 fn line_of_empty_span_is_empty() {
     assert_eq!(
         StrSpan::new("").get_line(),
-        Some("".as_ref()),
+        "".as_bytes(),
     );
 }
 
@@ -427,7 +427,7 @@ fn line_of_empty_span_is_empty() {
 fn line_of_single_line_start_is_whole() {
     assert_eq!(
         StrSpan::new("A single line").get_line(),
-        Some("A single line".as_ref()),
+        "A single line".as_bytes(),
     );
 }
 #[test]
@@ -435,7 +435,7 @@ fn line_of_single_line_end_is_whole() {
     let data = "A single line";
     assert_eq!(
         StrSpan::new(data).slice(data.len()..).get_line(),
-        Some("A single line".as_ref()),
+        "A single line".as_bytes(),
     );
 }
 
@@ -447,7 +447,7 @@ fn line_of_start_is_first() {
              \nFollowed by a second\
              \nand a third\n"
         ).get_line(),
-        Some("One line of text".as_ref()),
+        "One line of text".as_bytes(),
     );
 }
 
@@ -459,7 +459,7 @@ fn line_of_nl_is_before() {
          \nand a third\n";
     assert_eq!(
         StrSpan::new(data).slice(data.find('\n').unwrap()..).get_line(),
-        Some("One line of text".as_ref()),
+        "One line of text".as_bytes(),
     );
 }
 
@@ -471,7 +471,7 @@ fn line_of_end_after_nl_is_empty() {
          \nand a third\n";
     assert_eq!(
         StrSpan::new(data).slice(data.len()..).get_line(),
-        Some("".as_ref()),
+        "".as_bytes(),
     );
 }
 
@@ -483,7 +483,7 @@ fn line_of_end_no_nl_is_last() {
          \nand a third";
     assert_eq!(
         StrSpan::new(data).slice(data.len()..).get_line(),
-        Some("and a third".as_ref()),
+        "and a third".as_bytes(),
     );
 }
 
@@ -499,7 +499,7 @@ fn line_for_non_ascii_chars() {
             "{line_no:3}: {line_text}\n    {0:>lpos$}^- The match\n",
             "",
             line_no = s.location_line(),
-            line_text = core::str::from_utf8(s.get_line().unwrap()).unwrap(),
+            line_text = core::str::from_utf8(s.get_line()).unwrap(),
             lpos = s.get_utf8_column(),
         ),
         "  2: Förra raden var först, den här är i mitten\

From 060fd1fcfccbb8a784698567ef93cea7f5a830e4 Mon Sep 17 00:00:00 2001
From: Rasmus Kaj <kaj@kth.se>
Date: Sat, 17 Oct 2020 09:55:22 +0200
Subject: [PATCH 05/10] The test that uses `format!` requires std.

---
 src/tests.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tests.rs b/src/tests.rs
index e0ded8d..fad947b 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -487,6 +487,7 @@ fn line_of_end_no_nl_is_last() {
     );
 }
 
+#[cfg(feature = "std")]
 #[test]
 fn line_for_non_ascii_chars() {
     let data = StrSpan::new(

From 1810ec310e4f0fc942fd2c5c668ca853e51c4bba Mon Sep 17 00:00:00 2001
From: Rasmus Kaj <kaj@kth.se>
Date: Sat, 17 Oct 2020 10:07:09 +0200
Subject: [PATCH 06/10] Some rustfmt.

---
 src/tests.rs | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/tests.rs b/src/tests.rs
index fad947b..afa133b 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -417,10 +417,7 @@ fn it_should_display_hex() {
 
 #[test]
 fn line_of_empty_span_is_empty() {
-    assert_eq!(
-        StrSpan::new("").get_line(),
-        "".as_bytes(),
-    );
+    assert_eq!(StrSpan::new("").get_line(), "".as_bytes());
 }
 
 #[test]
@@ -446,27 +443,28 @@ fn line_of_start_is_first() {
             "One line of text\
              \nFollowed by a second\
              \nand a third\n"
-        ).get_line(),
+        )
+        .get_line(),
         "One line of text".as_bytes(),
     );
 }
 
 #[test]
 fn line_of_nl_is_before() {
-    let data =
-        "One line of text\
+    let data = "One line of text\
          \nFollowed by a second\
          \nand a third\n";
     assert_eq!(
-        StrSpan::new(data).slice(data.find('\n').unwrap()..).get_line(),
+        StrSpan::new(data)
+            .slice(data.find('\n').unwrap()..)
+            .get_line(),
         "One line of text".as_bytes(),
     );
 }
 
 #[test]
 fn line_of_end_after_nl_is_empty() {
-    let data =
-        "One line of text\
+    let data = "One line of text\
          \nFollowed by a second\
          \nand a third\n";
     assert_eq!(
@@ -477,8 +475,7 @@ fn line_of_end_after_nl_is_empty() {
 
 #[test]
 fn line_of_end_no_nl_is_last() {
-    let data =
-        "One line of text\
+    let data = "One line of text\
          \nFollowed by a second\
          \nand a third";
     assert_eq!(
@@ -493,7 +490,8 @@ fn line_for_non_ascii_chars() {
     let data = StrSpan::new(
         "Några rader text på Svenska.\
          \nFörra raden var först, den här är i mitten\
-         \noch här är sista raden.\n");
+         \noch här är sista raden.\n",
+    );
     let s = data.slice(data.find_substring("först").unwrap()..);
     assert_eq!(
         format!(

From 84ca913cbca63a9468acdc51f59afc29443c652a Mon Sep 17 00:00:00 2001
From: Rasmus Kaj <kaj@kth.se>
Date: Sun, 18 Oct 2020 00:28:27 +0200
Subject: [PATCH 07/10] Refactor two similar unsafe blocks to one.

---
 src/lib.rs | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 2120b00..b46c0f5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -257,17 +257,24 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
         &self.fragment
     }
 
-    fn get_columns_and_bytes_before(&self) -> (usize, &[u8]) {
+    fn get_unoffsetted_slice(&self) -> &[u8] {
         let self_bytes = self.fragment.as_bytes();
         let self_ptr = self_bytes.as_ptr();
-        let before_self = unsafe {
+        unsafe {
             assert!(
                 self.offset <= isize::max_value() as usize,
                 "offset is too big"
             );
             let orig_input_ptr = self_ptr.offset(-(self.offset as isize));
-            slice::from_raw_parts(orig_input_ptr, self.offset)
-        };
+            slice::from_raw_parts(
+                orig_input_ptr,
+                self.offset + self_bytes.len(),
+            )
+        }
+    }
+
+    fn get_columns_and_bytes_before(&self) -> (usize, &[u8]) {
+        let before_self = &self.get_unoffsetted_slice()[..self.offset];
 
         let column = match memchr::memrchr(b'\n', before_self) {
             None => self.offset + 1,
@@ -302,20 +309,11 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
     /// # }
     /// ```
     pub fn get_line(&self) -> &[u8] {
-        let self_bytes = self.fragment.as_bytes();
-        let self_ptr = self_bytes.as_ptr();
-        let offset = self.get_column() - 1;
-        let the_line = unsafe {
-            assert!(
-                offset <= isize::max_value() as usize,
-                "offset is too big"
-            );
-            let line_start_ptr = self_ptr.offset(-(offset as isize));
-            slice::from_raw_parts(line_start_ptr, offset + self_bytes.len())
-        };
-        match memchr::memchr(b'\n', the_line) {
+        let column0 = self.get_column() - 1;
+        let the_line = &self.get_unoffsetted_slice()[self.offset - column0..];
+        match memchr::memchr(b'\n', &the_line[column0..]) {
             None => the_line,
-            Some(pos) => &the_line[..pos],
+            Some(pos) => &the_line[..column0 + pos],
         }
     }
 

From 6fb916a99affce0b8b8ca68572ce7c538af4437b Mon Sep 17 00:00:00 2001
From: Rasmus Kaj <kaj@kth.se>
Date: Sun, 18 Oct 2020 13:59:05 +0200
Subject: [PATCH 08/10] Add some disclaimer comments / docs.

---
 src/lib.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/lib.rs b/src/lib.rs
index b46c0f5..81396d7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -257,6 +257,10 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
         &self.fragment
     }
 
+    // Attempt to get the "original" data slice back, by extending
+    // self.fragment backwards by self.offset.
+    // Note that any bytes truncated from after self.fragment will not
+    // be recovered.
     fn get_unoffsetted_slice(&self) -> &[u8] {
         let self_bytes = self.fragment.as_bytes();
         let self_ptr = self_bytes.as_ptr();
@@ -289,6 +293,10 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
     /// The `get_column` and `get_utf8_column` functions returns
     /// indexes that corresponds to the line returned by this function.
     ///
+    /// Note that if this LocatedSpan ends before the end of the
+    /// original data, the result of calling `get_line()` will not
+    /// include any data from after the LocatedSpan.
+    ///
     /// ```
     /// # extern crate nom_locate;
     /// # extern crate nom;

From 60ea6cba480cd1a751d04dcb4538bb1ca01d5de8 Mon Sep 17 00:00:00 2001
From: Rasmus Kaj <kaj@kth.se>
Date: Sun, 18 Oct 2020 19:12:44 +0200
Subject: [PATCH 09/10] Rename get_line to get_line_beginning.

---
 src/lib.rs   |  8 ++++----
 src/tests.rs | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 81396d7..d668b08 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -294,8 +294,8 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
     /// indexes that corresponds to the line returned by this function.
     ///
     /// Note that if this LocatedSpan ends before the end of the
-    /// original data, the result of calling `get_line()` will not
-    /// include any data from after the LocatedSpan.
+    /// original data, the result of calling `get_line_beginning()`
+    /// will not include any data from after the LocatedSpan.
     ///
     /// ```
     /// # extern crate nom_locate;
@@ -311,12 +311,12 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
     /// let multi = program.find_substring("multi").unwrap();
     ///
     /// assert_eq!(
-    ///     program.slice(multi..).get_line(),
+    ///     program.slice(multi..).get_line_beginning(),
     ///     "This is a multi-line input".as_bytes(),
     /// );
     /// # }
     /// ```
-    pub fn get_line(&self) -> &[u8] {
+    pub fn get_line_beginning(&self) -> &[u8] {
         let column0 = self.get_column() - 1;
         let the_line = &self.get_unoffsetted_slice()[self.offset - column0..];
         match memchr::memchr(b'\n', &the_line[column0..]) {
diff --git a/src/tests.rs b/src/tests.rs
index afa133b..913b860 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -417,13 +417,13 @@ fn it_should_display_hex() {
 
 #[test]
 fn line_of_empty_span_is_empty() {
-    assert_eq!(StrSpan::new("").get_line(), "".as_bytes());
+    assert_eq!(StrSpan::new("").get_line_beginning(), "".as_bytes());
 }
 
 #[test]
 fn line_of_single_line_start_is_whole() {
     assert_eq!(
-        StrSpan::new("A single line").get_line(),
+        StrSpan::new("A single line").get_line_beginning(),
         "A single line".as_bytes(),
     );
 }
@@ -431,7 +431,7 @@ fn line_of_single_line_start_is_whole() {
 fn line_of_single_line_end_is_whole() {
     let data = "A single line";
     assert_eq!(
-        StrSpan::new(data).slice(data.len()..).get_line(),
+        StrSpan::new(data).slice(data.len()..).get_line_beginning(),
         "A single line".as_bytes(),
     );
 }
@@ -444,7 +444,7 @@ fn line_of_start_is_first() {
              \nFollowed by a second\
              \nand a third\n"
         )
-        .get_line(),
+        .get_line_beginning(),
         "One line of text".as_bytes(),
     );
 }
@@ -457,7 +457,7 @@ fn line_of_nl_is_before() {
     assert_eq!(
         StrSpan::new(data)
             .slice(data.find('\n').unwrap()..)
-            .get_line(),
+            .get_line_beginning(),
         "One line of text".as_bytes(),
     );
 }
@@ -468,7 +468,7 @@ fn line_of_end_after_nl_is_empty() {
          \nFollowed by a second\
          \nand a third\n";
     assert_eq!(
-        StrSpan::new(data).slice(data.len()..).get_line(),
+        StrSpan::new(data).slice(data.len()..).get_line_beginning(),
         "".as_bytes(),
     );
 }
@@ -479,7 +479,7 @@ fn line_of_end_no_nl_is_last() {
          \nFollowed by a second\
          \nand a third";
     assert_eq!(
-        StrSpan::new(data).slice(data.len()..).get_line(),
+        StrSpan::new(data).slice(data.len()..).get_line_beginning(),
         "and a third".as_bytes(),
     );
 }
@@ -498,7 +498,7 @@ fn line_for_non_ascii_chars() {
             "{line_no:3}: {line_text}\n    {0:>lpos$}^- The match\n",
             "",
             line_no = s.location_line(),
-            line_text = core::str::from_utf8(s.get_line()).unwrap(),
+            line_text = core::str::from_utf8(s.get_line_beginning()).unwrap(),
             lpos = s.get_utf8_column(),
         ),
         "  2: Förra raden var först, den här är i mitten\

From de713cb5e84d3bf3cb460bdd315a46a51c04cb52 Mon Sep 17 00:00:00 2001
From: Rasmus Kaj <kaj@kth.se>
Date: Sun, 18 Oct 2020 19:21:18 +0200
Subject: [PATCH 10/10] Add line_begining_may_ot_be_entire_len test.

This test documents how `get_line_beginning()` differs from a
hypotetical `get_line()` method.
---
 src/tests.rs | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/tests.rs b/src/tests.rs
index 913b860..c07b273 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -484,6 +484,21 @@ fn line_of_end_no_nl_is_last() {
     );
 }
 
+/// This test documents how `get_line_beginning()` differs from
+/// a hypotetical `get_line()` method.
+#[test]
+fn line_begining_may_ot_be_entire_len() {
+    let data = "One line of text\
+         \nFollowed by a second\
+         \nand a third";
+    let by = "by";
+    let pos = data.find_substring(by).unwrap();
+    assert_eq!(
+        StrSpan::new(data).slice(pos..pos+by.len()).get_line_beginning(),
+        "Followed by".as_bytes(),
+    );
+}
+
 #[cfg(feature = "std")]
 #[test]
 fn line_for_non_ascii_chars() {