forked from jbowens/jBBCode
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathParser.php
293 lines (251 loc) · 10.4 KB
/
Parser.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
<?php
namespace JBBCode;
use JBBCode\CodeDefinition;
require_once('ElementNode.php');
require_once('TextNode.php');
require_once('DocumentElement.php');
require_once('CodeDefinition.php');
require_once('TokenManager.php');
require_once('NodeVisitor.php');
/**
* @author Jackson Owens
*
* BBCodeParser is the main parser class that constructs and stores the parse tree. Through this class
* new bbcode definitions can be added, and documents may be parsed and converted to html/bbcode/plaintext, etc.
*/
class Parser {
/* The root element of the parse tree */
protected $treeRoot;
/* The list of bbcodes to be used by the parser. */
protected $bbcodes;
/**
* Constructs an instance of the BBCode parser
*/
public function __construct() {
$this->reset();
$this->bbcodes = array();
}
/**
* Adds a simple (text-replacement only) bbcode definition
*
* @param string $tagName the tag name of the code (for example the b in [b])
* @param string $replace the html to use, with {param} and optionally {option} for replacements
* @param boolean $useOption whether or not this bbcode uses the secondary {option} replacement
* @param boolean $parseContent whether or not to parse the content within these elements
* @param integer $nestLimit an optional limit of the number of elements of this kind that can be nested within
* each other before the parser stops parsing them.
*/
public function addBBCode($tagName, $replace, $useOption = false, $parseContent = true, $nestLimit = -1) {
$code = new CodeDefinition();
$code->setTagName($tagName);
$code->setUseOption( $useOption );
$code->setParseContent( $parseContent );
$code->setNestLimit( $nestLimit );
$code->setReplacementText($replace);
array_push($this->bbcodes, $code);
}
/**
* Adds a complex bbcode defnition. You may subclass the CodeDefinition class, instantiate a definition of your new
* class and add it to the parser through this method.
*
* @param CodeDefinition $definition the bbcode definition to add
*/
public function addCodeDefinition( CodeDefinition $definition )
{
array_push($this->bbcodes, $definition);
}
/**
* Returns the entire parse tree as text. Only {param} content is returned. BBCode markup will be ignored.
*
* @return a text representation of the parse tree
*/
public function getAsText() {
return $this->treeRoot->getAsText();
}
/**
* Returns the entire parse tree as bbcode. This will be identical to the inputted string, except unclosed tags
* will be closed.
*
* @return a bbcode representation of the parse tree
*/
public function getAsBBCode() {
return $this->treeRoot->getAsBBCode();
}
/**
* Returns the entire parse tree as HTML. All BBCode replacements will be made. This is generally the method
* you will want to use to retrieve the parsed bbcode.
*
* @return a parsed html string
*/
public function getAsHTML() {
return $this->treeRoot->getAsHTML();
}
/**
* Accepts the given NodeVisitor at the root.
*
* @param nodeVisitor a NodeVisitor
*/
public function accept(NodeVisitor $nodeVisitor) {
$this->treeRoot->accept($nodeVisitor);
}
/**
* Constructs the parse tree from a string of bbcode markup.
*
* @param string $str the bbcode markup to parse
*/
public function parse( $str ) {
$this->reset();
$parent = $this->treeRoot;
$tokenManager = new TokenManager( $str );
$nodeid = 1;
$inTag = false;
while( $tokenManager->hasCurrent() ) {
// tokens are either "[", "]" or a string that contains neither a opening bracket nor a closing bracket
if( $inTag ) {
// this token should be a tag name
// explode by = in case there's an attribute
$pieces = explode('=', $tokenManager->getCurrent(), 2);
// check if it's a closing tag
if( substr($pieces[0], 0, 1) == "/" ) {
$tagName = substr($pieces[0], 1);
$closing = true;
} else {
$tagName = $pieces[0];
$closing = false;
}
if( ($this->codeExists( $tagName, isset($pieces[1])) || $closing && $this->codeExists($tagName, true)) && $tokenManager->hasNext() && $tokenManager->next() == "]" )
{
if( $closing )
{
$closestParent = $parent->closestParentOfType( $tagName );
if( $closestParent != null && $closestParent->hasParent() )
{
// closing an element... move to this element's parent
$parent->getCodeDefinition()->decrementCounter();
$parent = $closestParent->getParent();
$tokenManager->advance();
$tokenManager->advance();
$inTag = false;
continue;
}
} else {
// new element
$el = new ElementNode();
$code = $this->getCode($tagName, isset($pieces[1]));
$code->incrementCounter();
$el->setNestDepth($code->getCounter());
$el->setCodeDefinition($code);
$el->setTagName( $tagName );
$el->setNodeId( $nodeid++ );
if( isset($pieces[1]) )
$el->setAttribute( $pieces[1] );
$parent->addChild( $el );
$parent = $el;
$tokenManager->advance();
$tokenManager->advance();
$inTag = false;
continue;
}
}
// the opening bracket that sent us in here was really just plain text
$node = new TextNode( "[" );
$node->setNodeId($nodeid++);
$parent->addChild( $node );
$inTag = false;
// treat this token as regular text, and let the next if...else structure handle it as regular text
}
if( $tokenManager->getCurrent() == "[") {
$inTag = true;
}
else {
$node = new TextNode( $tokenManager->getCurrent() );
$node->setNodeId($nodeid++);
$parent->addChild( $node );
}
$tokenManager->advance();
}
}
/**
* Removes any elements that are nested beyond their nest limit from the parse tree.
*/
public function removeOverNestedElements() {
foreach( $this->treeRoot->getChildren() as $child )
$this->removeOverNested($child);
}
/**
* Recursive version of removeOverNestedElements().
*
* @param a node to clean up (including the entire subtree)
*/
protected function removeOverNested( Node $el ) {
if( $el->isTextNode() )
return;
else if( $el->beyondDefinitionLimit() )
{
$el->getParent()->removeChild( $el );
}
else
{
foreach( $el->getChildren() as $child )
$this->removeOverNested($child);
}
}
/**
* Removes the old parse tree if one exists.
*/
protected function reset() {
// remove any old tree information
$this->treeRoot = new DocumentElement();
}
/**
* Determines whether a bbcode exists based on its tag name and whether or not it uses an option
*
* @param string $tagName the bbcode tag name to check
* @param boolean $usesOption whether or not the bbcode accepts an option
*
* @return true if the code exists, false otherwise
*/
public function codeExists( $tagName, $usesOption = false ) {
foreach( $this->bbcodes as $code )
{
if( strtolower($tagName) == $code->getTagName() && $usesOption == $code->usesOption())
return true;
}
return false;
}
/**
* Returns the CodeDefinition of a bbcode with the matching tag name and usesOption parameter
*
* @param string $tagName the tag name of the bbcode being searched for
* @param boolean $usesOption whether or not the bbcode accepts an option
*
* @return CodeDefinition if the bbcode exists, null otherwise
*/
public function getCode( $tagName, $usesOption = false ) {
foreach( $this->bbcodes as $code )
{
if( strtolower($tagName) == $code->getTagName() && $code->usesOption() == $usesOption )
return $code;
}
return null;
}
/**
* Adds a set of default, standard bbcode definitions commonly used across the web.
*/
public function loadDefaultCodes() {
$this->addBBCode("b", "<strong>{param}</strong>");
$this->addBBCode("i", "<em>{param}</em>");
$this->addBBCode("u", "<u>{param}</u>");
$this->addBBCode("url", "<a href=\"{param}\">{param}</a>");
$this->addBBCode("url", "<a href=\"{option}\">{param}</a>", true);
$this->addBBCode("img", "<img src=\"{param}\" alt=\"a user uploaded image\" />");
$this->addBBCode("img", "<img src=\"{param}\" alt=\"{option}\" />", true);
$this->addBBCode("color", "<span style=\"color: {option}\">{param}</span>", true);
}
/**
* FOR DEBUG ONLY. This method prints the entire parse tree in a human-readable format and kills script execution.
*/
public function printTree() {
die("<pre>".htmlentities(print_r($this->treeRoot, true))."</pre>");
}
}