-
Notifications
You must be signed in to change notification settings - Fork 2
/
Main.hs
190 lines (161 loc) · 7.71 KB
/
Main.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
-- rv-2-rka
-- xdusek21
-- Daniel Dušek
import Options.Applicative
import Data.Semigroup ((<>))
import Control.Monad
import System.IO
import Data.Either
import Data.List (delete)
import System.Exit (exitWith)
{- My custom modules -}
import Types
-- Extra mile: Module that allows output FSM to be displayed in PDF
import FSMDrawer
{-----------------------------------------------------------
COMMANDLINE ENTRY POINT & ARGUMENTS PROCESSING
------------------------------------------------------------}
main :: IO ()
main = do
processInput =<< execParser opts
where
opts = info (parameterDefinitions <**> helper)
( fullDesc
<> progDesc "Reads regular expression in reverse polish notation (postfix) from input file and based on -r|-t switch displays or transforms it to finite state machine."
<> header "Postfix regex to FSM convertor." )
parameterDefinitions :: Parser Arguments
parameterDefinitions = Arguments
<$> (parseRepresent <|> parseTransform <|> parseTransformDraw)
<*> argument str (metavar "FILE" <> value "")
where
parseRepresent = flag' Represent
( long "represent"
<> short 'r'
<> help "Converts regular expression from input to internal representation and then prints it back to standard output."
)
parseTransform = flag' Transform
( long "transform"
<> short 't'
<> help "Converts regular expression from input to finite state machine on output."
)
parseTransformDraw = flag' TransformDraw
( long "trasform-draw"
<> short 'd'
<> help "Does the basic transformation but instead of printing string on output, generates image of final state machine."
)
{-- Based on chosen flag calls for corresponding processing function --}
processInput :: Arguments -> IO ()
processInput (Arguments a file) = case a of
Represent -> demonstrateRegexRepresentation file
Transform -> transformRV2FSM file
TransformDraw -> transformRV2Image file
{-----------------------------------------------------------
INPUT PARSING & OUTPUT FORMATTING FUNCTIONS
------------------------------------------------------------}
{--
Content serving IO functions for both -r and -t switches
Note: I designed these functions to be the only ones interacting with IO, hence the $-madness on their last lines
--}
demonstrateRegexRepresentation :: String -> IO ()
demonstrateRegexRepresentation file = do
content <- ensureProperInput file
if lines content == [] then putStr ""
else putStr $ reverse $ representTree' $ head $ map readRPNRegex $ lines content
transformRV2FSM :: String -> IO ()
transformRV2FSM file = do
content <- ensureProperInput file
if lines content == [] then putStr $ show $ FSM [1] [] [] 1 []
else putStr $ rv2rka' $ head $ map readRPNRegex $ lines content
transformRV2Image :: String -> IO ()
transformRV2Image file = do
content <- ensureProperInput file
if lines content == [] then putStr (convertToTex $ FSM [1] [] [] 1 [] )
else putStr ( rka2Image' ( head ( map readRPNRegex (lines content) ) ) )
ensureProperInput :: String -> IO String
ensureProperInput file = case file == "" of
True -> hGetContents stdin
False -> readFile file
{--
Input regex parsing functions
Inspired by: * http://stackoverflow.com/questions/36277160/haskell-reverse-polish-notation-regular-expression-to-expression-tree
* Also from Ing. Marek Kidon consultations
Extra mile: Function is actually capable of determining (and reacting) on invalid regex on input (i.e. for ab++ prints information about invalid input)
--}
readRPNRegex :: String -> Either String Tree
readRPNRegex s = case foldM parseCharacter' [] s of
Right [e] -> Right e
Left e -> Left e
_ -> Left regexNotValid
where
parseCharacter' (r:l:s) '.' = Right $ (BinaryOperation '.' l r):s
parseCharacter' (r:l:s) '+' = Right $ (BinaryOperation '+' l r):s
parseCharacter' (r:s) '*' = Right $ (Star '*' r):s
parseCharacter' s c
| c == '.' || c == '+' || c == '*' = Left $ regexNotValid
| True = Right $ (Character c):s
{--
Regex tree representation functions (-r switch)
--}
representTree' :: Either String Tree -> String
representTree' (Left e) = reverse e
representTree' (Right t) = representTree t
representTree :: Tree -> String
representTree (Character c) = [c]
representTree (Star c tree) = c : (representTree tree)
representTree (BinaryOperation c leftTree rightTree) = c : (representTree rightTree) ++ (representTree leftTree)
{--
FSM transformation functions (-t switch)
--}
rv2rka' :: Either String Tree -> String
rv2rka' (Left e) = e
rv2rka' (Right t) = show $ rv2rka t
rv2rka :: Tree -> FSM
rv2rka (Character a) = FSM [1,2] [] [TTransition 1 (TransitionLabel a) 2] 1 [2] -- Only basic automaton for 'a'
rv2rka (BinaryOperation '+' leftTree rightTree) = constructUnion (rv2rka leftTree) (rv2rka rightTree)
rv2rka (BinaryOperation '.' leftTree rightTree) = constructConcat (rv2rka leftTree) (rv2rka rightTree)
rv2rka (Star '*' tree) = constructIteration $ rv2rka tree
rv2rka t = FSM [1] [] [] 1 []
{-----------------------------------------------------------
REGEX TO FSM ALGORITHM FUNCTIONS
Algorithm I followed is described in README documentation(s)
------------------------------------------------------------}
{--
Reusable FSM construction helper functions
--}
shiftTransition :: Int -> TTransition -> TTransition
shiftTransition shift (TTransition f s t) = (TTransition (f+shift) s (t+shift))
{--
Functions for Union, Concatenation and Iteration of FSMs
--}
constructUnion :: FSM -> FSM -> FSM
constructUnion (FSM aS _ aT aIS aFS) (FSM bS _ bT bIS bFS) = FSM ([1] ++ generateNewStates) [] generateNewTransitions 1 [newFinalState]
where
bStateShift = 1 + length aS
newFinalState = 2 + (length $ aS ++ bS)
generateNewStates = map (+1) aS ++ map (+bStateShift) bS ++ [newFinalState]
generateNewTransitions = map (shiftTransition 1) aT
++ map (shiftTransition bStateShift) bT
++ generateEpsTransitions
generateEpsTransitions = [TTransition 1 Epsilon (aIS + 1), TTransition 1 Epsilon (bIS + bStateShift)]
++ map (\s -> TTransition s Epsilon newFinalState) (map (+1) aFS ++ map (+bStateShift) bFS)
constructConcat :: FSM -> FSM -> FSM
constructConcat (FSM aS _ aT aIS aFS) (FSM bS _ bT bIS bFS) = FSM (generateNewStates) [] generateNewTransitions 1 [newFinalState]
where
bStateShift = 1 + length aS
generateNewStates = aS ++ [bStateShift] ++ map (+bStateShift) bS
newFinalState = last generateNewStates
generateNewTransitions = [TTransition (length aS) Epsilon (1 + length aS), TTransition (1 + length aS) Epsilon (2 + length aS)]
++ aT
++ map (shiftTransition bStateShift) bT
constructIteration :: FSM -> FSM
constructIteration (FSM aS _ aT aIS aFS) = FSM ([1] ++ generateNewStates) [] generateNewTransitions 1 [newFinalState]
where
generateNewStates = map (+1) aS ++ [newFinalState]
newFinalState = 2 + length (aS)
generateNewTransitions = [TTransition 1 Epsilon 2, TTransition newFinalState Epsilon 1, TTransition 1 Epsilon newFinalState]
++ map (\s -> TTransition s Epsilon newFinalState) (map (+1) aFS)
++ map (shiftTransition 1) aT
{-- Extension: FSM drawing --}
rka2Image' :: Either String Tree -> String
rka2Image' (Left e) = e
rka2Image' (Right t) = convertToTex $ rv2rka t