diff --git a/CHANGES b/CHANGES
index 72bed4a8a5..d46cd834e4 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,10 @@
jsoup changelog
+*** Release 1.2.4 [PENDING]
+ * Removed dependency on Apache Commons-lang. Jsoup now has no external dependencies.
+
+ * Further speed optimisations for parsing and output generation.
+
*** Release 1.2.3 [2010-Aug-04]
* Added support for automatic input character set detection and decoding. Jsoup now automatically detects the encoding
character set when parsing HTML from a File or URL. The parser checks the content-type header, then the
diff --git a/pom.xml b/pom.xml
index ad2921eff9..74deb18009 100644
--- a/pom.xml
+++ b/pom.xml
@@ -128,14 +128,9 @@
4.5
test
-
-
-
- commons-lang
- commons-lang
- 2.4
-
+
+
diff --git a/src/main/java/org/jsoup/DataUtil.java b/src/main/java/org/jsoup/DataUtil.java
index 2d2e469893..21a93d5197 100644
--- a/src/main/java/org/jsoup/DataUtil.java
+++ b/src/main/java/org/jsoup/DataUtil.java
@@ -1,12 +1,12 @@
package org.jsoup;
-import org.apache.commons.lang.Validate;
+import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.io.*;
-import java.net.URL;
import java.net.HttpURLConnection;
+import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
diff --git a/src/main/java/org/jsoup/examples/ListLinks.java b/src/main/java/org/jsoup/examples/ListLinks.java
index ae44bbc2d5..43e8aa6c46 100644
--- a/src/main/java/org/jsoup/examples/ListLinks.java
+++ b/src/main/java/org/jsoup/examples/ListLinks.java
@@ -1,13 +1,13 @@
package org.jsoup.examples;
-import org.apache.commons.lang.Validate;
+import org.jsoup.Jsoup;
+import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
-import java.net.URL;
import java.io.IOException;
+import java.net.URL;
/**
* Example program to list links from a URL.
diff --git a/src/main/java/org/jsoup/helper/StringUtil.java b/src/main/java/org/jsoup/helper/StringUtil.java
new file mode 100644
index 0000000000..b49a1053fd
--- /dev/null
+++ b/src/main/java/org/jsoup/helper/StringUtil.java
@@ -0,0 +1,96 @@
+package org.jsoup.helper;
+
+import java.util.Collection;
+import java.util.Iterator;
+
+/**
+ * A minimal String utility class. Designed for interal jsoup use only.
+ */
+public final class StringUtil {
+ // memoised padding up to 10
+ private static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "};
+
+ /**
+ * Join a collection of strings by a seperator
+ * @param strings collection of string objects
+ * @param sep string to place between strings
+ * @return joined string
+ */
+ public static String join(Collection strings, String sep) {
+ return join(strings.iterator(), sep);
+ }
+
+ /**
+ * Join a collection of strings by a seperator
+ * @param strings iterator of string objects
+ * @param sep string to place between strings
+ * @return joined string
+ */
+ public static String join(Iterator strings, String sep) {
+ if (!strings.hasNext())
+ return "";
+
+ String start = strings.next();
+ if (!strings.hasNext()) // only one, avoid builder
+ return start;
+
+ StringBuilder sb = new StringBuilder(64).append(start);
+ while (strings.hasNext()) {
+ sb.append(sep);
+ sb.append(strings.next());
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns space padding
+ * @param width amount of padding desired
+ * @return string of spaces * width
+ */
+ public static String padding(int width) {
+ if (width < 0)
+ throw new IllegalArgumentException("width must be > 0");
+
+ if (width < padding.length)
+ return padding[width];
+
+ char[] out = new char[width];
+ for (int i = 0; i < width; i++)
+ out[i] = ' ';
+ return String.valueOf(out);
+ }
+
+ /**
+ * Tests if a string is blank: null, emtpy, or only whitespace (" ", \r\n, \t, etc)
+ * @param string string to test
+ * @return if string is blank
+ */
+ public static boolean isBlank(String string) {
+ if (string == null || string.length() == 0)
+ return true;
+
+ int l = string.length();
+ for (int i = 0; i < l; i++) {
+ if (!Character.isWhitespace(string.codePointAt(i)))
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Tests if a string is numeric, i.e. contains only digit characters
+ * @param string string to test
+ * @return true if only digit chars, false if empty or null or contains non-digit chrs
+ */
+ public static boolean isNumeric(String string) {
+ if (string == null || string.length() == 0)
+ return false;
+
+ int l = string.length();
+ for (int i = 0; i < l; i++) {
+ if (!Character.isDigit(string.codePointAt(i)))
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/src/main/java/org/jsoup/helper/Validate.java b/src/main/java/org/jsoup/helper/Validate.java
new file mode 100644
index 0000000000..6d91ffc234
--- /dev/null
+++ b/src/main/java/org/jsoup/helper/Validate.java
@@ -0,0 +1,85 @@
+package org.jsoup.helper;
+
+import java.util.Collection;
+
+/**
+ * Simple validation methods. Designed for jsoup internal use
+ */
+public final class Validate {
+
+ /**
+ * Validates that the obect is not null
+ * @param obj object to test
+ */
+ public static void notNull(Object obj) {
+ if (obj == null)
+ throw new IllegalArgumentException("Object must not be null");
+ }
+
+ /**
+ * Validates that the object is not null
+ * @param obj object to test
+ * @param msg message to output if validation fails
+ */
+ public static void notNull(Object obj, String msg) {
+ if (obj == null)
+ throw new IllegalArgumentException(msg);
+ }
+
+ /**
+ * Validates that the value is true
+ * @param val object to test
+ */
+ public static void isTrue(boolean val) {
+ if (!val)
+ throw new IllegalArgumentException("Must be true");
+ }
+
+ /**
+ * Validates that the value is true
+ * @param val object to test
+ * @param msg message to output if validation fails
+ */
+ public static void isTrue(boolean val, String msg) {
+ if (!val)
+ throw new IllegalArgumentException(msg);
+ }
+
+ /**
+ * Validates that the array contains no null elements
+ * @param objects the array to test
+ */
+ public static void noNullElements(Object[] objects) {
+ noNullElements(objects, "Array must not contain any null objects");
+ }
+
+ /**
+ * Validates that the array contains no null elements
+ * @param objects the array to test
+ * @param msg message to output if validation fails
+ */
+ public static void noNullElements(Object[] objects, String msg) {
+ for (Object obj : objects)
+ if (obj == null)
+ throw new IllegalArgumentException(msg);
+ }
+
+ /**
+ * Validates that the string is not empty
+ * @param string the string to test
+ */
+ public static void notEmpty(String string) {
+ if (string == null || string.length() == 0)
+ throw new IllegalArgumentException("String must not be empty");
+ }
+
+ /**
+ * Validates that the string is not empty
+ * @param string the string to test
+ * @param msg message to output if validation fails
+ */
+ public static void notEmpty(String string, String msg) {
+ if (string == null || string.length() == 0)
+ throw new IllegalArgumentException(msg);
+ }
+}
diff --git a/src/main/java/org/jsoup/nodes/Attribute.java b/src/main/java/org/jsoup/nodes/Attribute.java
index ac8bdd4fbe..3833ffb6fc 100644
--- a/src/main/java/org/jsoup/nodes/Attribute.java
+++ b/src/main/java/org/jsoup/nodes/Attribute.java
@@ -1,7 +1,6 @@
package org.jsoup.nodes;
-import org.apache.commons.lang.StringEscapeUtils;
-import org.apache.commons.lang.Validate;
+import org.jsoup.helper.Validate;
import java.util.Map;
diff --git a/src/main/java/org/jsoup/nodes/Attributes.java b/src/main/java/org/jsoup/nodes/Attributes.java
index b616e371f4..19aa8ad9ca 100644
--- a/src/main/java/org/jsoup/nodes/Attributes.java
+++ b/src/main/java/org/jsoup/nodes/Attributes.java
@@ -1,6 +1,6 @@
package org.jsoup.nodes;
-import org.apache.commons.lang.Validate;
+import org.jsoup.helper.Validate;
import java.util.*;
diff --git a/src/main/java/org/jsoup/nodes/DataNode.java b/src/main/java/org/jsoup/nodes/DataNode.java
index b45eb2b165..0d79d935e6 100644
--- a/src/main/java/org/jsoup/nodes/DataNode.java
+++ b/src/main/java/org/jsoup/nodes/DataNode.java
@@ -1,7 +1,5 @@
package org.jsoup.nodes;
-import org.apache.commons.lang.StringEscapeUtils;
-
/**
A data node, for contents of style, script tags etc, where contents should not show in text().
@@ -48,7 +46,7 @@ public String toString() {
@return new DataNode
*/
public static DataNode createFromEncoded(String encodedData, String baseUri) {
- String data = StringEscapeUtils.unescapeHtml(encodedData);
+ String data = Entities.unescape(encodedData);
return new DataNode(data, baseUri);
}
}
diff --git a/src/main/java/org/jsoup/nodes/Document.java b/src/main/java/org/jsoup/nodes/Document.java
index 7d9b0203fe..afe111e7a1 100644
--- a/src/main/java/org/jsoup/nodes/Document.java
+++ b/src/main/java/org/jsoup/nodes/Document.java
@@ -1,12 +1,12 @@
package org.jsoup.nodes;
-import org.apache.commons.lang.Validate;
+import org.jsoup.helper.Validate;
import org.jsoup.parser.Tag;
-import java.util.List;
-import java.util.ArrayList;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
+import java.util.ArrayList;
+import java.util.List;
/**
A HTML Document.
diff --git a/src/main/java/org/jsoup/nodes/Element.java b/src/main/java/org/jsoup/nodes/Element.java
index 9a069babd6..f63dbcf72f 100644
--- a/src/main/java/org/jsoup/nodes/Element.java
+++ b/src/main/java/org/jsoup/nodes/Element.java
@@ -1,7 +1,7 @@
package org.jsoup.nodes;
-import org.apache.commons.lang.Validate;
-import org.apache.commons.lang.StringUtils;
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
import org.jsoup.parser.Parser;
import org.jsoup.parser.Tag;
import org.jsoup.select.Collector;
@@ -810,7 +810,7 @@ public Set classNames() {
*/
public Element classNames(Set classNames) {
Validate.notNull(classNames);
- attributes.put("class", StringUtils.join(classNames, " "));
+ attributes.put("class", StringUtil.join(classNames, " "));
return this;
}
diff --git a/src/main/java/org/jsoup/nodes/Entities.java b/src/main/java/org/jsoup/nodes/Entities.java
index 15e8ef5cea..0231ec685d 100644
--- a/src/main/java/org/jsoup/nodes/Entities.java
+++ b/src/main/java/org/jsoup/nodes/Entities.java
@@ -1,10 +1,10 @@
package org.jsoup.nodes;
+import java.nio.charset.CharsetEncoder;
import java.util.HashMap;
import java.util.Map;
-import java.util.regex.Pattern;
import java.util.regex.Matcher;
-import java.nio.charset.CharsetEncoder;
+import java.util.regex.Pattern;
/**
* HMTL entities, and escape routines.
diff --git a/src/main/java/org/jsoup/nodes/Evaluator.java b/src/main/java/org/jsoup/nodes/Evaluator.java
index d32f076682..e52c97c402 100644
--- a/src/main/java/org/jsoup/nodes/Evaluator.java
+++ b/src/main/java/org/jsoup/nodes/Evaluator.java
@@ -1,10 +1,10 @@
package org.jsoup.nodes;
+import org.jsoup.helper.Validate;
+
+import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import java.util.List;
-
-import org.apache.commons.lang.Validate;
/**
diff --git a/src/main/java/org/jsoup/nodes/Node.java b/src/main/java/org/jsoup/nodes/Node.java
index cc545cdf69..f32d829bd4 100644
--- a/src/main/java/org/jsoup/nodes/Node.java
+++ b/src/main/java/org/jsoup/nodes/Node.java
@@ -1,9 +1,9 @@
package org.jsoup.nodes;
-import org.apache.commons.lang.Validate;
-import org.apache.commons.lang.StringUtils;
-import org.jsoup.select.NodeVisitor;
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
import org.jsoup.select.NodeTraversor;
+import org.jsoup.select.NodeVisitor;
import java.net.MalformedURLException;
import java.net.URL;
@@ -365,7 +365,7 @@ public String toString() {
}
protected void indent(StringBuilder accum, int depth) {
- accum.append("\n").append(StringUtils.leftPad("", depth));
+ accum.append("\n").append(StringUtil.padding(depth));
}
@Override
diff --git a/src/main/java/org/jsoup/nodes/TextNode.java b/src/main/java/org/jsoup/nodes/TextNode.java
index 57619afba9..c51fd2df09 100644
--- a/src/main/java/org/jsoup/nodes/TextNode.java
+++ b/src/main/java/org/jsoup/nodes/TextNode.java
@@ -1,10 +1,9 @@
package org.jsoup.nodes;
-import java.util.regex.Pattern;
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
-import org.apache.commons.lang.StringEscapeUtils;
-import org.apache.commons.lang.Validate;
-import org.apache.commons.lang.StringUtils;
+import java.util.regex.Pattern;
/**
A text node.
@@ -62,7 +61,7 @@ public String getWholeText() {
@return true if this document is empty or only whitespace, false if it contains any text content.
*/
public boolean isBlank() {
- return StringUtils.isBlank(normaliseWhitespace(getWholeText()));
+ return StringUtil.isBlank(getWholeText());
}
void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
@@ -102,10 +101,6 @@ static String stripLeadingWhitespace(String text) {
}
static boolean lastCharIsWhitespace(StringBuilder sb) {
- if (sb.length() == 0)
- return false;
- String lastChar = sb.substring(sb.length()-1, sb.length());
- Validate.isTrue(lastChar.length() == 1); // todo: remove check
- return lastChar.equals(" ");
+ return sb.length() != 0 && sb.charAt(sb.length() - 1) == ' ';
}
}
diff --git a/src/main/java/org/jsoup/parser/Parser.java b/src/main/java/org/jsoup/parser/Parser.java
index d9394b8bf2..b1f7c80d3b 100644
--- a/src/main/java/org/jsoup/parser/Parser.java
+++ b/src/main/java/org/jsoup/parser/Parser.java
@@ -1,9 +1,10 @@
package org.jsoup.parser;
-import org.apache.commons.lang.Validate;
+
+import org.jsoup.helper.Validate;
import org.jsoup.nodes.*;
-import java.util.*;
+import java.util.LinkedList;
/**
Parses HTML into a {@link Document}. Generally best to use one of the more convenient parse methods in {@link org.jsoup.Jsoup}.
diff --git a/src/main/java/org/jsoup/parser/Tag.java b/src/main/java/org/jsoup/parser/Tag.java
index fe1b368e2b..9a91e965a0 100644
--- a/src/main/java/org/jsoup/parser/Tag.java
+++ b/src/main/java/org/jsoup/parser/Tag.java
@@ -1,6 +1,6 @@
package org.jsoup.parser;
-import org.apache.commons.lang.Validate;
+import org.jsoup.helper.Validate;
import java.util.*;
diff --git a/src/main/java/org/jsoup/parser/TokenQueue.java b/src/main/java/org/jsoup/parser/TokenQueue.java
index a72e73c004..f70886169d 100644
--- a/src/main/java/org/jsoup/parser/TokenQueue.java
+++ b/src/main/java/org/jsoup/parser/TokenQueue.java
@@ -1,6 +1,6 @@
package org.jsoup.parser;
-import org.apache.commons.lang.Validate;
+import org.jsoup.helper.Validate;
/**
* A character queue with parsing helpers.
diff --git a/src/main/java/org/jsoup/safety/Cleaner.java b/src/main/java/org/jsoup/safety/Cleaner.java
index 6e342ac76c..68c0ab2144 100644
--- a/src/main/java/org/jsoup/safety/Cleaner.java
+++ b/src/main/java/org/jsoup/safety/Cleaner.java
@@ -1,6 +1,6 @@
package org.jsoup.safety;
-import org.apache.commons.lang.Validate;
+import org.jsoup.helper.Validate;
import org.jsoup.nodes.*;
import org.jsoup.parser.Tag;
diff --git a/src/main/java/org/jsoup/safety/Whitelist.java b/src/main/java/org/jsoup/safety/Whitelist.java
index 77f2953777..6fd66771f5 100644
--- a/src/main/java/org/jsoup/safety/Whitelist.java
+++ b/src/main/java/org/jsoup/safety/Whitelist.java
@@ -5,7 +5,7 @@ Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/
this whitelist configuration, and the initial defaults.
*/
-import org.apache.commons.lang.Validate;
+import org.jsoup.helper.Validate;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Element;
diff --git a/src/main/java/org/jsoup/select/Elements.java b/src/main/java/org/jsoup/select/Elements.java
index 08540c010d..603f5a3829 100644
--- a/src/main/java/org/jsoup/select/Elements.java
+++ b/src/main/java/org/jsoup/select/Elements.java
@@ -1,7 +1,7 @@
package org.jsoup.select;
+import org.jsoup.helper.Validate;
import org.jsoup.nodes.Element;
-import org.apache.commons.lang.Validate;
import java.util.*;
diff --git a/src/main/java/org/jsoup/select/Selector.java b/src/main/java/org/jsoup/select/Selector.java
index 440870ac4c..7996e6871b 100644
--- a/src/main/java/org/jsoup/select/Selector.java
+++ b/src/main/java/org/jsoup/select/Selector.java
@@ -1,7 +1,8 @@
package org.jsoup.select;
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.lang.Validate;
+
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
import org.jsoup.nodes.Element;
import org.jsoup.parser.TokenQueue;
@@ -261,7 +262,7 @@ private Elements indexEquals() {
private int consumeIndex() {
String indexS = tq.chompTo(")").trim();
- Validate.isTrue(StringUtils.isNumeric(indexS), "Index must be numeric");
+ Validate.isTrue(StringUtil.isNumeric(indexS), "Index must be numeric");
return Integer.parseInt(indexS);
}
diff --git a/src/test/java/org/jsoup/helper/StringUtilTest.java b/src/test/java/org/jsoup/helper/StringUtilTest.java
new file mode 100644
index 0000000000..e70d5ad1e6
--- /dev/null
+++ b/src/test/java/org/jsoup/helper/StringUtilTest.java
@@ -0,0 +1,44 @@
+package org.jsoup.helper;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import java.util.Arrays;
+
+public class StringUtilTest {
+
+ @Test public void join() {
+ assertEquals("", StringUtil.join(Arrays.asList(""), " "));
+ assertEquals("one", StringUtil.join(Arrays.asList("one"), " "));
+ assertEquals("one two three", StringUtil.join(Arrays.asList("one", "two", "three"), " "));
+ }
+
+ @Test public void padding() {
+ assertEquals("", StringUtil.padding(0));
+ assertEquals(" ", StringUtil.padding(1));
+ assertEquals(" ", StringUtil.padding(2));
+ assertEquals(" ", StringUtil.padding(15));
+ }
+
+ @Test public void isBlank() {
+ assertTrue(StringUtil.isBlank(null));
+ assertTrue(StringUtil.isBlank(""));
+ assertTrue(StringUtil.isBlank(" "));
+ assertTrue(StringUtil.isBlank(" \r\n "));
+
+ assertFalse(StringUtil.isBlank("hello"));
+ assertFalse(StringUtil.isBlank(" hello "));
+ }
+
+ @Test public void isNumeric() {
+ assertFalse(StringUtil.isNumeric(null));
+ assertFalse(StringUtil.isNumeric(" "));
+ assertFalse(StringUtil.isNumeric("123 546"));
+ assertFalse(StringUtil.isNumeric("hello"));
+ assertFalse(StringUtil.isNumeric("123.334"));
+
+ assertTrue(StringUtil.isNumeric("1"));
+ assertTrue(StringUtil.isNumeric("1234"));
+ }
+}