haskell · andreasabel · Jun 20, 2023 · Jun 20, 2023 · Jun 20, 2023
diff --git a/.github/workflows/fix-whitespace.yml b/.github/workflows/fix-whitespace.yml
@@ -0,0 +1,15 @@
+name: Whitespace
+on:
+  push:
+  pull_request:
+
+jobs:
+  check-whitespace:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout sources
+      uses: actions/checkout@v3
+
+    - name: Check for whitespace violations
+      uses: andreasabel/fix-whitespace-action@v1
diff --git a/fix-whitespace.yaml b/fix-whitespace.yaml
@@ -0,0 +1,32 @@
+# Files checked by `fix-whitespace` for whitespace violations.
+#
+# - Trailing whitespace.
+# - Trailing empty lines.
+# - Final line not ending in a newline characters.
+# - Tabs.
+
+excluded-dirs:
+- .git
+- .stack-work
+- dist
+- dist-newstyle
+- old-docbook
+
+included-files:
+# Some file types make use of tabs which we do not convert just now (2023-06-20),
+# so these are commented out.
+- .gitignore
+# - .mailmap
+- LICENSE
+# - "*.bat"
+- "*.c"
+- "*.cabal"
+- "*.hs"
+- "*.md"
+- "*.project"
+- "*.py"
+- "*.rst"
+- "*.sh"
+- "*.txt"
+# - "*.x"
+# - "*.y"
diff --git a/src/DFA.hs b/src/DFA.hs
@@ -1,12 +1,12 @@
 -- -----------------------------------------------------------------------------
--- 
+--
 -- DFA.hs, part of Alex
 --
 -- (c) Chris Dornan 1995-2000, Simon Marlow 2003
 --
 -- This module generates a DFA from a scanner by first converting it
 -- to an NFA and then converting the NFA with the subset construction.
--- 
+--
 -- See the chapter on `Finite Automata and Lexical Analysis' in the
 -- dragon book for an excellent overview of the algorithms in this
 -- module.
@@ -29,39 +29,39 @@ import Data.Maybe ( fromJust )
 
 -- (This section should logically belong to the DFA module but it has been
 -- placed here to make this module self-contained.)
---  
+--
 -- `DFA' provides an alternative to `Scanner' (described in the RExp module);
 -- it can be used directly to scan text efficiently.  Additionally it has an
 -- extra place holder for holding action functions for generating
 -- application-specific tokens.  When this place holder is not being used, the
 -- unit type will be used.
---  
+--
 -- Each state in the automaton consist of a list of `Accept' values, descending
 -- in priority, and an array mapping characters to new states.  As the array
 -- may only cover a sub-range of the characters, a default state number is
 -- given in the third field.  By convention, all transitions to the -1 state
 -- represent invalid transitions.
---  
+--
 -- A list of accept states is provided for as the original specification may
 -- have been ambiguous, in which case the highest priority token should be
 -- taken (the one appearing earliest in the specification); this can not be
 -- calculated when the DFA is generated in all cases as some of the tokens may
 -- be associated with leading or trailing context or start codes.
---  
+--
 -- `scan_token' (see above) can deal with unconditional accept states more
 -- efficiently than those associated with context; to save it testing each time
 -- whether the list of accept states contains an unconditional state, the flag
 -- in the first field of `St' is set to true whenever the list contains an
 -- unconditional state.
---  
+--
 -- The `Accept' structure contains the priority of the token being accepted
 -- (lower numbers => higher priorities), the name of the token, a place holder
 -- that can be used for storing the `action' function for constructing the
 -- token from the input text and the scanner's state, a list of start codes
 -- (listing the start codes that the scanner must be in for the token to be
 -- accepted; empty => no restriction), the leading and trailing context (both
 -- `Nothing' if there is none).
---  
+--
 -- The leading context consists simply of a character predicate that will
 -- return true if the last character read is acceptable.  The trailing context
 -- consists of an alternative starting state within the DFA; if this `sub-dfa'
@@ -185,10 +185,10 @@ in_pdfa ss (DFA _ mp) = ss `Map.member` mp
 
 mk_int_dfa:: NFA -> DFA StateSet a -> DFA SNum a
 mk_int_dfa nfa (DFA start_states mp)
-  = DFA [0 .. length start_states-1] 
+  = DFA [0 .. length start_states-1]
         (Map.fromList [ (lookup' st, cnv pds) | (st, pds) <- Map.toAscList mp ])
   where
-        mp' = Map.fromList (zip (start_states ++ 
+        mp' = Map.fromList (zip (start_states ++
                                  (map fst . Map.toAscList) (foldr Map.delete mp start_states)) [0..])
 
         lookup' = fromJust . flip Map.lookup mp'
@@ -200,9 +200,9 @@ mk_int_dfa nfa (DFA start_states mp)
 
                 accs' = map cnv_acc accs
                 cnv_acc (Acc p a lctx rctx) = Acc p a lctx rctx'
-                  where rctx' = 
+                  where rctx' =
                           case rctx of
-                                RightContextRExp s -> 
+                                RightContextRExp s ->
                                   RightContextRExp (lookup' (mk_ss nfa [s]))
                                 other -> other
 
@@ -221,7 +221,7 @@ mk_int_dfa nfa (DFA start_states mp)
 -- some initial segment of the array may be omitted) or the value that 255 maps
 -- to (in which case a final segment of the array may be omitted), hence the
 -- calculation of `(df,bds)'.
---  
+--
 -- Note that empty arrays are avoided as they can cause severe problems for
 -- some popular Haskell compilers.
 

diff --git a/src/DFAMin.hs b/src/DFAMin.hs
@@ -62,7 +62,7 @@ import qualified Data.List as List
 --
 -- % The second for loop that iterates over R mutates Q,
 -- % but it does not affect the third for loop that iterates over Q.
--- % Because once X refines Y into Y1 and Y2, Y1 and Y2 can't be more refined by X. 
+-- % Because once X refines Y into Y1 and Y2, Y1 and Y2 can't be more refined by X.
 
 minimizeDFA :: forall a. Ord a => DFA Int a -> DFA Int a
 minimizeDFA  dfa@(DFA { dfa_start_states = starts,

diff --git a/src/DFS.hs b/src/DFS.hs
@@ -91,7 +91,7 @@ scc:: Graph -> GForest
 scc g = dff' (reverse (top_sort (reverse_graph g))) g
 
 top_sort:: Graph -> [Int]
-top_sort = postorder . dff 
+top_sort = postorder . dff
 
 
 -- `dff' computes the depth-first forest.  It works by unrolling the

diff --git a/src/Data/LICENSE.txt b/src/Data/LICENSE.txt
@@ -1,30 +1,30 @@
 Copyright (c) 2005, Paul Johnson
 All rights reserved.
 
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions 
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
 are met:
 
-    * Redistributions of source code must retain the above copyright 
+    * Redistributions of source code must retain the above copyright
       notice, this list of conditions and the following disclaimer.
-      
-    * Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in 
-      the documentation and/or other materials provided with the 
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
       distribution.
-      
-    * Neither the name of the Ranged Sets project nor the names of its 
-      contributors may be used to endorse or promote products derived 
+
+    * Neither the name of the Ranged Sets project nor the names of its
+      contributors may be used to endorse or promote products derived
       from this software without specific prior written permission.
 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/Info.hs b/src/Info.hs
@@ -1,5 +1,5 @@
 -- -----------------------------------------------------------------------------
--- 
+--
 -- Info.hs, part of Alex
 --
 -- (c) Simon Marlow 2003
@@ -23,7 +23,7 @@ infoDFA _ func_nm dfa
   = str "Scanner : " . str func_nm . nl
   . str "States  : " . shows (length dfa_list) . nl
   . nl . infoDFA'
-  where    
+  where
     dfa_list = Map.toAscList (dfa_states dfa)
 
     infoDFA' = interleave_shows nl (map infoStateN dfa_list)
@@ -47,7 +47,7 @@ infoDFA _ func_nm dfa
             Nothing   -> id
             Just code -> str " { " . str code . str " }")
         . nl
-        
+
     infoTransition (char',state)
         = str (ljustify 8 (show char'))
         . str " -> "

diff --git a/src/Map.hs b/src/Map.hs
@@ -60,7 +60,7 @@ elems = eltsFM
 fromList :: Ord k => [(k,a)] -> Map k a
 fromList = listToFM
 
-fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a 
+fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a
 fromListWith c = addListToFM_C (flip c) emptyFM
 
 toAscList :: Map k a -> [(k,a)]

diff --git a/src/NFA.hs b/src/NFA.hs
@@ -1,13 +1,13 @@
 -- -----------------------------------------------------------------------------
--- 
+--
 -- NFA.hs, part of Alex
 --
 -- (c) Chris Dornan 1995-2000, Simon Marlow 2003
 --
 -- The `scanner2nfa' takes a `Scanner' (see the `RExp' module) and
 -- generates its equivalent nondeterministic finite automaton.  NFAs
 -- are turned into DFAs in the DFA module.
--- 
+--
 -- See the chapter on `Finite Automata and Lexical Analysis' in the
 -- dragon book for an excellent overview of the algorithms in this
 -- module.
@@ -61,7 +61,7 @@ instance Show NState where
 -- (listing the start codes that the scanner must be in for the token to be
 -- accepted; empty => no restriction), the leading and trailing context (both
 -- `Nothing' if there is none).
---  
+--
 -- The leading context consists simply of a character predicate that will
 -- return true if the last character read is acceptable.  The trailing context
 -- consists of an alternative starting state within the DFA; if this `sub-dfa'
@@ -84,13 +84,13 @@ scanner2nfa enc Scanner{scannerTokens = toks} startcodes
           -- make a start state for each start code (these will be
           -- numbered from zero).
           start_states <- sequence (replicate (length startcodes) newState)
-          
+
           -- construct the NFA for each token
           tok_states <- zipWithM do_token toks [0..]
 
           -- make an epsilon edge from each state state to each
           -- token that is acceptable in that state
-          zipWithM_ (tok_transitions (zip toks tok_states)) 
+          zipWithM_ (tok_transitions (zip toks tok_states))
                 startcodes start_states
 
         where
@@ -104,7 +104,7 @@ scanner2nfa enc Scanner{scannerTokens = toks} startcodes
                                         return NoRightContext
                                   RightContextCode code' ->
                                         return (RightContextCode code')
-                                  RightContextRExp re' -> do 
+                                  RightContextRExp re' -> do
                                         r_b <- newState
                                         r_e <- newState
                                         rexp2nfa r_b r_e re'
@@ -123,7 +123,7 @@ scanner2nfa enc Scanner{scannerTokens = toks} startcodes
 -- NFA creation from a regular expression
 
 -- rexp2nfa B E R generates an NFA that begins in state B, recognises
--- R, and ends in state E only if R has been recognised. 
+-- R, and ends in state E only if R has been recognised.
 
 rexp2nfa :: SNum -> SNum -> RExp -> NFAM ()
 rexp2nfa b e Eps    = epsilonEdge b e
@@ -176,7 +176,7 @@ instance Monad NFAM where
 
 runNFA :: Encoding -> NFAM () -> NFA
 runNFA e m = case unN m 0 Map.empty e of
-                (s, nfa_map, ()) -> -- trace ("runNfa.." ++ show (Map.toAscList nfa_map)) $ 
+                (s, nfa_map, ()) -> -- trace ("runNfa.." ++ show (Map.toAscList nfa_map)) $
                                     e_close (array (0,s-1) (Map.toAscList nfa_map))
 
 e_close:: Array Int NState -> NFA
@@ -202,8 +202,8 @@ anyBytes from n to = do
 bytesEdge :: SNum -> [Byte] -> [Byte] -> SNum -> NFAM ()
 bytesEdge from [] [] to = epsilonEdge from to
 bytesEdge from [x] [y] to = byteEdge from (byteSetRange x y) to -- (OPTIMISATION)
-bytesEdge from (x:xs) (y:ys) to 
-    | x == y = do 
+bytesEdge from (x:xs) (y:ys) to
+    | x == y = do
         s <- newState
         byteEdge from (byteSetSingleton x) s
         bytesEdge s xs ys to
@@ -216,33 +216,33 @@ bytesEdge from (x:xs) (y:ys) to
            byteEdge from (byteSetSingleton y) t
            bytesEdge t (fmap (const 0x00) xs) ys to
 
-        when ((x+1) <= (y-1)) $ do 
+        when ((x+1) <= (y-1)) $ do
            u <- newState
            byteEdge from (byteSetRange (x+1) (y-1)) u
            anyBytes u (length xs) to
 bytesEdge _ _ _ _ = undefined -- hide compiler warning
 
 charEdge :: SNum -> CharSet -> SNum -> NFAM ()
 charEdge from charset to = do
-  -- trace ("charEdge: " ++ (show $ charset) ++ " => " ++ show (byteRanges charset)) $ 
+  -- trace ("charEdge: " ++ (show $ charset) ++ " => " ++ show (byteRanges charset)) $
   e <- getEncoding
   forM_ (byteRanges e charset) $ \(xs,ys) -> do
     bytesEdge from xs ys to
-    
+
 
 
 byteEdge :: SNum -> ByteSet -> SNum -> NFAM ()
 byteEdge from charset to = N $ \s n _ -> (s, addEdge n, ())
  where
    addEdge n =
      case Map.lookup from n of
-       Nothing -> 
+       Nothing ->
            Map.insert from (NSt [] [] [(charset,to)]) n
        Just (NSt acc eps trans) ->
            Map.insert from (NSt acc eps ((charset,to):trans)) n
 
 epsilonEdge :: SNum -> SNum -> NFAM ()
-epsilonEdge from to 
+epsilonEdge from to
  | from == to = return ()
  | otherwise  = N $ \s n _ -> let n' = addEdge n in n' `seq` (s, n', ())
  where
@@ -254,7 +254,7 @@ epsilonEdge from to
 accept :: SNum -> Accept Code -> NFAM ()
 accept state new_acc = N $ \s n _ -> (s, addAccept n, ())
  where
-   addAccept n = 
+   addAccept n =
      case Map.lookup state n of
        Nothing ->
            Map.insert state (NSt [new_acc] [] []) n

diff --git a/src/Set.hs b/src/Set.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE CPP #-}
 module Set ( Set, member, empty, insert ) where
 
-import Data.Set 
+import Data.Set
 
 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ < 603
 member :: Ord a => a -> Set a -> Bool

diff --git a/src/UTF8.hs b/src/UTF8.hs
@@ -8,7 +8,7 @@ import Data.Char
 -- Could also be imported:
 
 import Codec.Binary.UTF8.Light as UTF8
- 
+
 encode :: Char -> [Word8]
 encode c = head (UTF8.encodeUTF8' [UTF8.c2w c])