From b5d2dcd8c7a51635d8bb11a69827c789d07e200e Mon Sep 17 00:00:00 2001 From: Kalil de Lima Date: Sat, 3 Oct 2020 22:43:51 -0300 Subject: [PATCH 1/2] add support for markdown text --- lib/reading_time.ex | 44 ++++++++++++++++++++++++++++++++++++-- test/reading_time_test.exs | 32 +++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/lib/reading_time.ex b/lib/reading_time.ex index a186113..07a8e35 100644 --- a/lib/reading_time.ex +++ b/lib/reading_time.ex @@ -5,11 +5,50 @@ defmodule ReadingTime do @split_pattern [" ", "\n", "\r", "\t"] @words_per_minute 200 + @markdown_words [ + # ignore headers + ~r/#+/, + ~r/(==)+/, + ~r/(--)+/, + # ingore boxed text + ~r/>+/, + # ignore unordered lists + ~r/(-)+/, + ~r/(\*)+/, + ~r/(\+)+/, + # ignore horizontal rules + ~r/----*/, + ~r/\*\*(\*)+/, + ~r/___+/, + ] + + defp markdown_word?(word) do + Enum.any?( + @markdown_words, + fn md_word -> + Regex.match?(md_word, word) + end + ) + end + + defp ignore_format_words(words, :text) do + words + end + + defp ignore_format_words(words, :markdown) do + words + |> Enum.filter( + fn word -> + !markdown_word?(word) + end + ) + end @spec time( String.t(), words_per_minute: non_neg_integer(), - split_pattern: nonempty_list(String.t()) + split_pattern: nonempty_list(String.t()), + format: atom() ) :: number @doc """ Returns the time in minutes for a given string. @@ -26,10 +65,11 @@ defmodule ReadingTime do def time(string, opts \\ []) do words_per_minute = Keyword.get(opts, :words_per_minute, @words_per_minute) split_pattern = Keyword.get(opts, :split_pattern, @split_pattern) - + text_format = Keyword.get(opts, :text_format, :text) words = string |> String.split(split_pattern, trim: true) + |> ignore_format_words(text_format) |> length minutes = diff --git a/test/reading_time_test.exs b/test/reading_time_test.exs index d0327c8..bcad520 100644 --- a/test/reading_time_test.exs +++ b/test/reading_time_test.exs @@ -46,4 +46,36 @@ defmodule ReadingTimeTest do ) * long_text_multiplier end end + + property "Markdown formatters get ignored" do + text = " + # This is a header1. + + ## This is a header2. + + This is also a header1. + == + + This is also a header2. + -- + + > This is a blockquote + >> This is a nested blockquote + >> # This is a header1 inside a blockquote + + --- + + - List1 + + List2 + * List3 + + *** + + ___ + + " + actual_words = 37 + assert ReadingTime.time(text, words_per_minute: 1, text_format: :markdown) == actual_words + end + end From f85ee77c84681bb34c2bb0b714657a85c6953ddd Mon Sep 17 00:00:00 2001 From: Kalil de Lima Date: Sun, 4 Oct 2020 17:37:40 -0300 Subject: [PATCH 2/2] use a stricter regex --- lib/reading_time.ex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/reading_time.ex b/lib/reading_time.ex index 07a8e35..35142ed 100644 --- a/lib/reading_time.ex +++ b/lib/reading_time.ex @@ -13,11 +13,11 @@ defmodule ReadingTime do # ingore boxed text ~r/>+/, # ignore unordered lists - ~r/(-)+/, - ~r/(\*)+/, - ~r/(\+)+/, + ~r/-/, + ~r/(\*)/, + ~r/(\+)/, # ignore horizontal rules - ~r/----*/, + ~r/---+/, ~r/\*\*(\*)+/, ~r/___+/, ]