Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Optional bootstrap #174

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ env:
before_install:
- sudo add-apt-repository -y ppa:hvr/ghc
- sudo apt-get update
- sudo apt-get install alex-3.1.7 happy-1.19.5 cabal-install-3.4 ghc-$GHCVER
- export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:/opt/happy/1.19.5/bin:$PATH
- sudo apt-get install happy-1.19.5 cabal-install-3.4 ghc-$GHCVER
- export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/happy/1.19.5/bin:$PATH

install:
- cabal update
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## Unreleased

* No longer include pre-generated Parser.hs in the Hackage upload, as
Happy's new ability to bootstrap from parser combinators is a better
way to make building easy and less trustful.

* Allow arbitary repetitions in regexps.
Previously, the `r{n,m}` and related forms were restricted to single
digit numbers `n` and `m`.
Expand Down
13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ sdist ::
echo "Error: Tree is not clean"; \
exit 1; \
fi
$(HAPPY) $(HAPPY_OPTS) src/Parser.y -o src/Parser.hs
$(ALEX) $(ALEX_OPTS) src/Scan.x -o src/Scan.hs
mv src/Parser.y src/Parser.y.boot
mv src/Scan.x src/Scan.x.boot
$(CABAL) v2-run gen-alex-sdist
$(CABAL) v2-sdist
@if [ ! -f "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" ]; then \
Expand All @@ -42,7 +38,14 @@ sdist-test-only ::
rm -rf "${SDIST_DIR}/alex-$(ALEX_VER)/"
tar -xf "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" -C ${SDIST_DIR}/
echo "packages: ." > "${SDIST_DIR}/alex-$(ALEX_VER)/cabal.project"
cd "${SDIST_DIR}/alex-$(ALEX_VER)/" && cabal v2-test --enable-tests all
echo "tests: True" >> "${SDIST_DIR}/alex-$(ALEX_VER)/cabal.project"
cd "${SDIST_DIR}/alex-$(ALEX_VER)/" \
&& cabal v2-build all --flag -bootstrap \
&& cabal v2-install --flag -bootstrap --installdir="./bootstrap-root" \
&& cabal v2-test all -j --flag -bootstrap \
&& export PATH=./bootstrap-root:$$PATH \
&& cabal v2-build all --flag +bootstrap \
&& cabal v2-test all -j --flag +bootstrap
@echo ""
@echo "Success! ${SDIST_DIR}/alex-$(ALEX_VER).tar.gz is ready for distribution!"
@echo ""
28 changes: 26 additions & 2 deletions alex.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ extra-source-files:
examples/words.x
examples/words_monad.x
examples/words_posn.x
src/Parser.y.boot
src/Scan.x.boot
src/ghc_hooks.c
templates/GenericTemplate.hs
templates/wrappers.hs
Expand Down Expand Up @@ -125,6 +123,11 @@ extra-source-files:
tests/issue_119.x
tests/issue_141.x

flag bootstrap
description: Optimize the implementation of happy using a pre-built alex
manual: True
default: False

source-repository head
type: git
location: https://github.com/simonmar/alex.git
Expand All @@ -137,6 +140,11 @@ executable alex
, array
, containers
, directory
-- min bounds for Except and ExceptT
, transformers >=0.4 && <0.6
, mtl >= 2.2.1

build-tools: happy

default-language: Haskell98
default-extensions: CPP
Expand All @@ -157,16 +165,32 @@ executable alex
Paths_alex
Parser
ParseMonad
ParseMonad.Class
Scan
Set
Sort
Token
Util
UTF8
Data.Ranged
Data.Ranged.Boundaries
Data.Ranged.RangedSet
Data.Ranged.Ranges

if flag(bootstrap)
-- TODO put this back when Cabal can use it's qualified goals to better
-- understand bootstrapping, see
-- https://github.com/haskell/cabal/issues/7189
--build-tools: alex
cpp-options: -DALEX_BOOTSTRAP
other-modules:
ParseMonad.Bootstrapped
Scan.Bootstrapped
else
other-modules:
ParseMonad.Oracle
Scan.Oracle

test-suite tests
type: exitcode-stdio-1.0
main-is: test.hs
Expand Down
3 changes: 1 addition & 2 deletions src/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ import Info
import Map ( Map )
import qualified Map hiding ( Map )
import Output
import ParseMonad ( runP, Warning(..) )
import ParseMonad ( runP, AlexPosn(..), Warning(..) )
import Parser
import Scan
import Util ( hline )
import Paths_alex ( version, getDataDir )

Expand Down
176 changes: 56 additions & 120 deletions src/ParseMonad.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
-- -----------------------------------------------------------------------------
--
-- ParseMonad.hs, part of Alex
Expand All @@ -7,182 +10,115 @@
-- ----------------------------------------------------------------------------}

module ParseMonad (
AlexInput, alexInputPrevChar, alexGetChar, alexGetByte,
AlexPosn(..), alexStartPos,
module ParseMonad.Class,
Warning(..), warnIfNullable,
P, runP, StartCode, failP, lookupSMac, lookupRMac, newSMac, newRMac,
setStartCode, getStartCode, getInput, setInput,
P, P', PBase, runP, raiseP, failP,
lookupSMac, lookupRMac, newSMac, newRMac,
) where

import AbsSyn hiding ( StartCode )
import CharSet ( CharSet )
import Map ( Map )
import qualified Map hiding ( Map )
import UTF8
import ParseMonad.Class

#if __GLASGOW_HASKELL__ < 710
import Control.Applicative ( Applicative(..) )
#endif
import Control.Monad ( liftM, ap, when )
import Data.Word (Word8)
-- -----------------------------------------------------------------------------
-- The input type
--import Codec.Binary.UTF8.Light as UTF8

type Byte = Word8

type AlexInput = (AlexPosn, -- current position,
Char, -- previous char
[Byte],
String) -- current input string

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (_,c,_,_) = c


alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
alexGetChar (_,_,[],[]) = Nothing
alexGetChar (p,_,[],(c:s)) = let p' = alexMove p c in p' `seq`
Just (c, (p', c, [], s))
alexGetChar (_, _ ,_ : _, _) = undefined -- hide compiler warning

alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
alexGetByte (_,_,[],[]) = Nothing
alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
(b:bs) = UTF8.encode c
in p' `seq` Just (b, (p', c, bs, s))

-- -----------------------------------------------------------------------------
-- Token positions

-- `Posn' records the location of a token in the input text. It has three
-- fields: the address (number of charaters preceding the token), line number
-- and column of a token within the file. `start_pos' gives the position of the
-- start of the file and `eof_pos' a standard encoding for the end of file.
-- `move_pos' calculates the new position after traversing a given character,
-- assuming the usual eight character tab stops.

data AlexPosn = AlexPn !Int !Int !Int
deriving (Eq,Show)
import Control.Applicative
import Control.Monad.State ( StateT(..), get, modify )
import Control.Monad.Trans ( MonadTrans, lift )
import Control.Monad ( MonadPlus, when )

alexStartPos :: AlexPosn
alexStartPos = AlexPn 0 1 1

alexMove :: AlexPosn -> Char -> AlexPosn
alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (((c+7) `div` 8)*8+1)
alexMove (AlexPn a l _) '\n' = AlexPn (a+1) (l+1) 1
alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1)
#if ALEX_BOOTSTRAP
import ParseMonad.Bootstrapped (PBase)
#else
import ParseMonad.Oracle (PBase)
#endif

-- -----------------------------------------------------------------------------
-- Alex lexing/parsing monad
-- Alex parsing monad transformerx

data Warning
= WarnNullableRExp
{ _warnPos :: AlexPosn -- ^ The position of the code following the regex.
, _warnText :: String -- ^ Warning text.
}

type ParseError = (Maybe AlexPosn, String)
type StartCode = Int

data PState = PState
{ warnings :: [Warning] -- ^ Stack of warnings, top = last warning.
, smac_env :: Map String CharSet
, rmac_env :: Map String RExp
, startcode :: Int
, input :: AlexInput
}

newtype P a = P { unP :: PState -> Either ParseError (PState,a) }
newtype P' m a = P { unP :: StateT PState m a }
deriving (Functor, Applicative, Alternative, Monad, MonadTrans, MonadPlus)

instance Functor P where
fmap = liftM

instance Applicative P where
pure a = P $ \env -> Right (env,a)
(<*>) = ap

instance Monad P where
(P m) >>= k = P $ \env -> case m env of
Left err -> Left err
Right (env',ok) -> unP (k ok) env'
return = pure
type P = P' PBase

-- | Run the parser on given input.
runP :: String
runP :: MonadBasicParse m
=> String
-- ^ Input string.
-> (Map String CharSet, Map String RExp)
-- ^ Character set and regex definitions.
-> P a
-> P' m a
-- ^ Parsing computation.
-> Either ParseError ([Warning], a)
-- ^ List of warnings in first-to-last order, result.
runP str (senv,renv) (P p)
= case p initial_state of
Left err -> Left err
Right (s, a) -> Right (reverse (warnings s), a)
runP str (senv,renv) p = runPBase str $ do
(a, s) <- runStateT (unP p) initial_state
return (reverse (warnings s), a)
where
initial_state = PState
{ warnings = []
, smac_env = senv
, rmac_env = renv
, startcode = 0
, input = (alexStartPos, '\n', [], str)
}

failP :: String -> P a
failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)
raiseP :: MonadBasicParse m => ParseError -> P' m a
raiseP = P . lift . raisePBase

failP :: MonadBasicParse m => String -> P' m a
failP = P . lift . failPBase

-- Macros are expanded during parsing, to simplify the abstract
-- syntax. The parsing monad passes around two environments mapping
-- macro names to sets and regexps respectively.

lookupSMac :: (AlexPosn,String) -> P CharSet
lookupSMac (posn,smac)
= P $ \s@PState{ smac_env = senv } ->
case Map.lookup smac senv of
Just ok -> Right (s,ok)
Nothing -> Left (Just posn, "unknown set macro: $" ++ smac)

lookupRMac :: String -> P RExp
lookupRMac rmac
= P $ \s@PState{ rmac_env = renv } ->
case Map.lookup rmac renv of
Just ok -> Right (s,ok)
Nothing -> Left (Nothing, "unknown regex macro: %" ++ rmac)

newSMac :: String -> CharSet -> P ()
lookupSMac :: MonadBasicParse m => (AlexPosn,String) -> P' m CharSet
lookupSMac (posn, smac) = do
PState{ smac_env = senv } <- P get
case Map.lookup smac senv of
Just ok -> return ok
Nothing -> raiseP (Just posn, "unknown set macro: $" ++ smac)

lookupRMac :: MonadBasicParse m => String -> P' m RExp
lookupRMac rmac = do
PState{ rmac_env = renv } <- P get
case Map.lookup rmac renv of
Just ok -> return ok
Nothing -> raiseP (Nothing, "unknown regex macro: %" ++ rmac)

newSMac :: Monad m => String -> CharSet -> P' m ()
newSMac smac set
= P $ \s -> Right (s{smac_env = Map.insert smac set (smac_env s)}, ())
= P $ modify $ \s -> s { smac_env = Map.insert smac set (smac_env s) }

newRMac :: String -> RExp -> P ()
newRMac :: Monad m => String -> RExp -> P' m ()
newRMac rmac rexp
= P $ \s -> Right (s{rmac_env = Map.insert rmac rexp (rmac_env s)}, ())

setStartCode :: StartCode -> P ()
setStartCode sc = P $ \s -> Right (s{ startcode = sc }, ())

getStartCode :: P StartCode
getStartCode = P $ \s -> Right (s, startcode s)

getInput :: P AlexInput
getInput = P $ \s -> Right (s, input s)

setInput :: AlexInput -> P ()
setInput inp = P $ \s -> Right (s{ input = inp }, ())
= P $ modify $ \s -> s { rmac_env = Map.insert rmac rexp (rmac_env s) }

-- | Add a warning if given regular expression is nullable
-- unless the user wrote the regex 'Eps'.
warnIfNullable
:: RExp -- ^ Regular expression.
:: Monad m
=> RExp -- ^ Regular expression.
-> AlexPosn -- ^ Position associated to regular expression.
-> P ()
-> P' m ()
-- If the user wrote @()@, they wanted to match the empty sequence!
-- Thus, skip the warning then.
warnIfNullable Eps _ = return ()
warnIfNullable r pos = when (nullable r) $ P $ \ s ->
Right (s{ warnings = WarnNullableRExp pos w : warnings s}, ())
warnIfNullable r pos = P $
when (nullable r) $ modify $ \ s -> s {
warnings = WarnNullableRExp pos w : warnings s
}
where
w = unwords
[ "Regular expression"
Expand Down
Loading