diff --git a/CHANGES b/CHANGES index 72bed4a8a5..d46cd834e4 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,10 @@ jsoup changelog +*** Release 1.2.4 [PENDING] + * Removed dependency on Apache Commons-lang. Jsoup now has no external dependencies. + + * Further speed optimisations for parsing and output generation. + *** Release 1.2.3 [2010-Aug-04] * Added support for automatic input character set detection and decoding. Jsoup now automatically detects the encoding character set when parsing HTML from a File or URL. The parser checks the content-type header, then the diff --git a/pom.xml b/pom.xml index ad2921eff9..74deb18009 100644 --- a/pom.xml +++ b/pom.xml @@ -128,14 +128,9 @@ 4.5 test - - - - commons-lang - commons-lang - 2.4 - + + diff --git a/src/main/java/org/jsoup/DataUtil.java b/src/main/java/org/jsoup/DataUtil.java index 2d2e469893..21a93d5197 100644 --- a/src/main/java/org/jsoup/DataUtil.java +++ b/src/main/java/org/jsoup/DataUtil.java @@ -1,12 +1,12 @@ package org.jsoup; -import org.apache.commons.lang.Validate; +import org.jsoup.helper.Validate; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import java.io.*; -import java.net.URL; import java.net.HttpURLConnection; +import java.net.URL; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.util.regex.Matcher; diff --git a/src/main/java/org/jsoup/examples/ListLinks.java b/src/main/java/org/jsoup/examples/ListLinks.java index ae44bbc2d5..43e8aa6c46 100644 --- a/src/main/java/org/jsoup/examples/ListLinks.java +++ b/src/main/java/org/jsoup/examples/ListLinks.java @@ -1,13 +1,13 @@ package org.jsoup.examples; -import org.apache.commons.lang.Validate; +import org.jsoup.Jsoup; +import org.jsoup.helper.Validate; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.Jsoup; import org.jsoup.select.Elements; -import java.net.URL; import java.io.IOException; +import java.net.URL; /** * Example program to list links from a URL. diff --git a/src/main/java/org/jsoup/helper/StringUtil.java b/src/main/java/org/jsoup/helper/StringUtil.java new file mode 100644 index 0000000000..b49a1053fd --- /dev/null +++ b/src/main/java/org/jsoup/helper/StringUtil.java @@ -0,0 +1,96 @@ +package org.jsoup.helper; + +import java.util.Collection; +import java.util.Iterator; + +/** + * A minimal String utility class. Designed for interal jsoup use only. + */ +public final class StringUtil { + // memoised padding up to 10 + private static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "}; + + /** + * Join a collection of strings by a seperator + * @param strings collection of string objects + * @param sep string to place between strings + * @return joined string + */ + public static String join(Collection strings, String sep) { + return join(strings.iterator(), sep); + } + + /** + * Join a collection of strings by a seperator + * @param strings iterator of string objects + * @param sep string to place between strings + * @return joined string + */ + public static String join(Iterator strings, String sep) { + if (!strings.hasNext()) + return ""; + + String start = strings.next(); + if (!strings.hasNext()) // only one, avoid builder + return start; + + StringBuilder sb = new StringBuilder(64).append(start); + while (strings.hasNext()) { + sb.append(sep); + sb.append(strings.next()); + } + return sb.toString(); + } + + /** + * Returns space padding + * @param width amount of padding desired + * @return string of spaces * width + */ + public static String padding(int width) { + if (width < 0) + throw new IllegalArgumentException("width must be > 0"); + + if (width < padding.length) + return padding[width]; + + char[] out = new char[width]; + for (int i = 0; i < width; i++) + out[i] = ' '; + return String.valueOf(out); + } + + /** + * Tests if a string is blank: null, emtpy, or only whitespace (" ", \r\n, \t, etc) + * @param string string to test + * @return if string is blank + */ + public static boolean isBlank(String string) { + if (string == null || string.length() == 0) + return true; + + int l = string.length(); + for (int i = 0; i < l; i++) { + if (!Character.isWhitespace(string.codePointAt(i))) + return false; + } + return true; + } + + /** + * Tests if a string is numeric, i.e. contains only digit characters + * @param string string to test + * @return true if only digit chars, false if empty or null or contains non-digit chrs + */ + public static boolean isNumeric(String string) { + if (string == null || string.length() == 0) + return false; + + int l = string.length(); + for (int i = 0; i < l; i++) { + if (!Character.isDigit(string.codePointAt(i))) + return false; + } + return true; + } +} diff --git a/src/main/java/org/jsoup/helper/Validate.java b/src/main/java/org/jsoup/helper/Validate.java new file mode 100644 index 0000000000..6d91ffc234 --- /dev/null +++ b/src/main/java/org/jsoup/helper/Validate.java @@ -0,0 +1,85 @@ +package org.jsoup.helper; + +import java.util.Collection; + +/** + * Simple validation methods. Designed for jsoup internal use + */ +public final class Validate { + + /** + * Validates that the obect is not null + * @param obj object to test + */ + public static void notNull(Object obj) { + if (obj == null) + throw new IllegalArgumentException("Object must not be null"); + } + + /** + * Validates that the object is not null + * @param obj object to test + * @param msg message to output if validation fails + */ + public static void notNull(Object obj, String msg) { + if (obj == null) + throw new IllegalArgumentException(msg); + } + + /** + * Validates that the value is true + * @param val object to test + */ + public static void isTrue(boolean val) { + if (!val) + throw new IllegalArgumentException("Must be true"); + } + + /** + * Validates that the value is true + * @param val object to test + * @param msg message to output if validation fails + */ + public static void isTrue(boolean val, String msg) { + if (!val) + throw new IllegalArgumentException(msg); + } + + /** + * Validates that the array contains no null elements + * @param objects the array to test + */ + public static void noNullElements(Object[] objects) { + noNullElements(objects, "Array must not contain any null objects"); + } + + /** + * Validates that the array contains no null elements + * @param objects the array to test + * @param msg message to output if validation fails + */ + public static void noNullElements(Object[] objects, String msg) { + for (Object obj : objects) + if (obj == null) + throw new IllegalArgumentException(msg); + } + + /** + * Validates that the string is not empty + * @param string the string to test + */ + public static void notEmpty(String string) { + if (string == null || string.length() == 0) + throw new IllegalArgumentException("String must not be empty"); + } + + /** + * Validates that the string is not empty + * @param string the string to test + * @param msg message to output if validation fails + */ + public static void notEmpty(String string, String msg) { + if (string == null || string.length() == 0) + throw new IllegalArgumentException(msg); + } +} diff --git a/src/main/java/org/jsoup/nodes/Attribute.java b/src/main/java/org/jsoup/nodes/Attribute.java index ac8bdd4fbe..3833ffb6fc 100644 --- a/src/main/java/org/jsoup/nodes/Attribute.java +++ b/src/main/java/org/jsoup/nodes/Attribute.java @@ -1,7 +1,6 @@ package org.jsoup.nodes; -import org.apache.commons.lang.StringEscapeUtils; -import org.apache.commons.lang.Validate; +import org.jsoup.helper.Validate; import java.util.Map; diff --git a/src/main/java/org/jsoup/nodes/Attributes.java b/src/main/java/org/jsoup/nodes/Attributes.java index b616e371f4..19aa8ad9ca 100644 --- a/src/main/java/org/jsoup/nodes/Attributes.java +++ b/src/main/java/org/jsoup/nodes/Attributes.java @@ -1,6 +1,6 @@ package org.jsoup.nodes; -import org.apache.commons.lang.Validate; +import org.jsoup.helper.Validate; import java.util.*; diff --git a/src/main/java/org/jsoup/nodes/DataNode.java b/src/main/java/org/jsoup/nodes/DataNode.java index b45eb2b165..0d79d935e6 100644 --- a/src/main/java/org/jsoup/nodes/DataNode.java +++ b/src/main/java/org/jsoup/nodes/DataNode.java @@ -1,7 +1,5 @@ package org.jsoup.nodes; -import org.apache.commons.lang.StringEscapeUtils; - /** A data node, for contents of style, script tags etc, where contents should not show in text(). @@ -48,7 +46,7 @@ public String toString() { @return new DataNode */ public static DataNode createFromEncoded(String encodedData, String baseUri) { - String data = StringEscapeUtils.unescapeHtml(encodedData); + String data = Entities.unescape(encodedData); return new DataNode(data, baseUri); } } diff --git a/src/main/java/org/jsoup/nodes/Document.java b/src/main/java/org/jsoup/nodes/Document.java index 7d9b0203fe..afe111e7a1 100644 --- a/src/main/java/org/jsoup/nodes/Document.java +++ b/src/main/java/org/jsoup/nodes/Document.java @@ -1,12 +1,12 @@ package org.jsoup.nodes; -import org.apache.commons.lang.Validate; +import org.jsoup.helper.Validate; import org.jsoup.parser.Tag; -import java.util.List; -import java.util.ArrayList; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; +import java.util.ArrayList; +import java.util.List; /** A HTML Document. diff --git a/src/main/java/org/jsoup/nodes/Element.java b/src/main/java/org/jsoup/nodes/Element.java index 9a069babd6..f63dbcf72f 100644 --- a/src/main/java/org/jsoup/nodes/Element.java +++ b/src/main/java/org/jsoup/nodes/Element.java @@ -1,7 +1,7 @@ package org.jsoup.nodes; -import org.apache.commons.lang.Validate; -import org.apache.commons.lang.StringUtils; +import org.jsoup.helper.StringUtil; +import org.jsoup.helper.Validate; import org.jsoup.parser.Parser; import org.jsoup.parser.Tag; import org.jsoup.select.Collector; @@ -810,7 +810,7 @@ public Set classNames() { */ public Element classNames(Set classNames) { Validate.notNull(classNames); - attributes.put("class", StringUtils.join(classNames, " ")); + attributes.put("class", StringUtil.join(classNames, " ")); return this; } diff --git a/src/main/java/org/jsoup/nodes/Entities.java b/src/main/java/org/jsoup/nodes/Entities.java index 15e8ef5cea..0231ec685d 100644 --- a/src/main/java/org/jsoup/nodes/Entities.java +++ b/src/main/java/org/jsoup/nodes/Entities.java @@ -1,10 +1,10 @@ package org.jsoup.nodes; +import java.nio.charset.CharsetEncoder; import java.util.HashMap; import java.util.Map; -import java.util.regex.Pattern; import java.util.regex.Matcher; -import java.nio.charset.CharsetEncoder; +import java.util.regex.Pattern; /** * HMTL entities, and escape routines. diff --git a/src/main/java/org/jsoup/nodes/Evaluator.java b/src/main/java/org/jsoup/nodes/Evaluator.java index d32f076682..e52c97c402 100644 --- a/src/main/java/org/jsoup/nodes/Evaluator.java +++ b/src/main/java/org/jsoup/nodes/Evaluator.java @@ -1,10 +1,10 @@ package org.jsoup.nodes; +import org.jsoup.helper.Validate; + +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.List; - -import org.apache.commons.lang.Validate; /** diff --git a/src/main/java/org/jsoup/nodes/Node.java b/src/main/java/org/jsoup/nodes/Node.java index cc545cdf69..f32d829bd4 100644 --- a/src/main/java/org/jsoup/nodes/Node.java +++ b/src/main/java/org/jsoup/nodes/Node.java @@ -1,9 +1,9 @@ package org.jsoup.nodes; -import org.apache.commons.lang.Validate; -import org.apache.commons.lang.StringUtils; -import org.jsoup.select.NodeVisitor; +import org.jsoup.helper.StringUtil; +import org.jsoup.helper.Validate; import org.jsoup.select.NodeTraversor; +import org.jsoup.select.NodeVisitor; import java.net.MalformedURLException; import java.net.URL; @@ -365,7 +365,7 @@ public String toString() { } protected void indent(StringBuilder accum, int depth) { - accum.append("\n").append(StringUtils.leftPad("", depth)); + accum.append("\n").append(StringUtil.padding(depth)); } @Override diff --git a/src/main/java/org/jsoup/nodes/TextNode.java b/src/main/java/org/jsoup/nodes/TextNode.java index 57619afba9..c51fd2df09 100644 --- a/src/main/java/org/jsoup/nodes/TextNode.java +++ b/src/main/java/org/jsoup/nodes/TextNode.java @@ -1,10 +1,9 @@ package org.jsoup.nodes; -import java.util.regex.Pattern; +import org.jsoup.helper.StringUtil; +import org.jsoup.helper.Validate; -import org.apache.commons.lang.StringEscapeUtils; -import org.apache.commons.lang.Validate; -import org.apache.commons.lang.StringUtils; +import java.util.regex.Pattern; /** A text node. @@ -62,7 +61,7 @@ public String getWholeText() { @return true if this document is empty or only whitespace, false if it contains any text content. */ public boolean isBlank() { - return StringUtils.isBlank(normaliseWhitespace(getWholeText())); + return StringUtil.isBlank(getWholeText()); } void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) { @@ -102,10 +101,6 @@ static String stripLeadingWhitespace(String text) { } static boolean lastCharIsWhitespace(StringBuilder sb) { - if (sb.length() == 0) - return false; - String lastChar = sb.substring(sb.length()-1, sb.length()); - Validate.isTrue(lastChar.length() == 1); // todo: remove check - return lastChar.equals(" "); + return sb.length() != 0 && sb.charAt(sb.length() - 1) == ' '; } } diff --git a/src/main/java/org/jsoup/parser/Parser.java b/src/main/java/org/jsoup/parser/Parser.java index d9394b8bf2..b1f7c80d3b 100644 --- a/src/main/java/org/jsoup/parser/Parser.java +++ b/src/main/java/org/jsoup/parser/Parser.java @@ -1,9 +1,10 @@ package org.jsoup.parser; -import org.apache.commons.lang.Validate; + +import org.jsoup.helper.Validate; import org.jsoup.nodes.*; -import java.util.*; +import java.util.LinkedList; /** Parses HTML into a {@link Document}. Generally best to use one of the more convenient parse methods in {@link org.jsoup.Jsoup}. diff --git a/src/main/java/org/jsoup/parser/Tag.java b/src/main/java/org/jsoup/parser/Tag.java index fe1b368e2b..9a91e965a0 100644 --- a/src/main/java/org/jsoup/parser/Tag.java +++ b/src/main/java/org/jsoup/parser/Tag.java @@ -1,6 +1,6 @@ package org.jsoup.parser; -import org.apache.commons.lang.Validate; +import org.jsoup.helper.Validate; import java.util.*; diff --git a/src/main/java/org/jsoup/parser/TokenQueue.java b/src/main/java/org/jsoup/parser/TokenQueue.java index a72e73c004..f70886169d 100644 --- a/src/main/java/org/jsoup/parser/TokenQueue.java +++ b/src/main/java/org/jsoup/parser/TokenQueue.java @@ -1,6 +1,6 @@ package org.jsoup.parser; -import org.apache.commons.lang.Validate; +import org.jsoup.helper.Validate; /** * A character queue with parsing helpers. diff --git a/src/main/java/org/jsoup/safety/Cleaner.java b/src/main/java/org/jsoup/safety/Cleaner.java index 6e342ac76c..68c0ab2144 100644 --- a/src/main/java/org/jsoup/safety/Cleaner.java +++ b/src/main/java/org/jsoup/safety/Cleaner.java @@ -1,6 +1,6 @@ package org.jsoup.safety; -import org.apache.commons.lang.Validate; +import org.jsoup.helper.Validate; import org.jsoup.nodes.*; import org.jsoup.parser.Tag; diff --git a/src/main/java/org/jsoup/safety/Whitelist.java b/src/main/java/org/jsoup/safety/Whitelist.java index 77f2953777..6fd66771f5 100644 --- a/src/main/java/org/jsoup/safety/Whitelist.java +++ b/src/main/java/org/jsoup/safety/Whitelist.java @@ -5,7 +5,7 @@ Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/ this whitelist configuration, and the initial defaults. */ -import org.apache.commons.lang.Validate; +import org.jsoup.helper.Validate; import org.jsoup.nodes.Attribute; import org.jsoup.nodes.Attributes; import org.jsoup.nodes.Element; diff --git a/src/main/java/org/jsoup/select/Elements.java b/src/main/java/org/jsoup/select/Elements.java index 08540c010d..603f5a3829 100644 --- a/src/main/java/org/jsoup/select/Elements.java +++ b/src/main/java/org/jsoup/select/Elements.java @@ -1,7 +1,7 @@ package org.jsoup.select; +import org.jsoup.helper.Validate; import org.jsoup.nodes.Element; -import org.apache.commons.lang.Validate; import java.util.*; diff --git a/src/main/java/org/jsoup/select/Selector.java b/src/main/java/org/jsoup/select/Selector.java index 440870ac4c..7996e6871b 100644 --- a/src/main/java/org/jsoup/select/Selector.java +++ b/src/main/java/org/jsoup/select/Selector.java @@ -1,7 +1,8 @@ package org.jsoup.select; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang.Validate; + +import org.jsoup.helper.StringUtil; +import org.jsoup.helper.Validate; import org.jsoup.nodes.Element; import org.jsoup.parser.TokenQueue; @@ -261,7 +262,7 @@ private Elements indexEquals() { private int consumeIndex() { String indexS = tq.chompTo(")").trim(); - Validate.isTrue(StringUtils.isNumeric(indexS), "Index must be numeric"); + Validate.isTrue(StringUtil.isNumeric(indexS), "Index must be numeric"); return Integer.parseInt(indexS); } diff --git a/src/test/java/org/jsoup/helper/StringUtilTest.java b/src/test/java/org/jsoup/helper/StringUtilTest.java new file mode 100644 index 0000000000..e70d5ad1e6 --- /dev/null +++ b/src/test/java/org/jsoup/helper/StringUtilTest.java @@ -0,0 +1,44 @@ +package org.jsoup.helper; + +import org.junit.Test; + +import static org.junit.Assert.*; + +import java.util.Arrays; + +public class StringUtilTest { + + @Test public void join() { + assertEquals("", StringUtil.join(Arrays.asList(""), " ")); + assertEquals("one", StringUtil.join(Arrays.asList("one"), " ")); + assertEquals("one two three", StringUtil.join(Arrays.asList("one", "two", "three"), " ")); + } + + @Test public void padding() { + assertEquals("", StringUtil.padding(0)); + assertEquals(" ", StringUtil.padding(1)); + assertEquals(" ", StringUtil.padding(2)); + assertEquals(" ", StringUtil.padding(15)); + } + + @Test public void isBlank() { + assertTrue(StringUtil.isBlank(null)); + assertTrue(StringUtil.isBlank("")); + assertTrue(StringUtil.isBlank(" ")); + assertTrue(StringUtil.isBlank(" \r\n ")); + + assertFalse(StringUtil.isBlank("hello")); + assertFalse(StringUtil.isBlank(" hello ")); + } + + @Test public void isNumeric() { + assertFalse(StringUtil.isNumeric(null)); + assertFalse(StringUtil.isNumeric(" ")); + assertFalse(StringUtil.isNumeric("123 546")); + assertFalse(StringUtil.isNumeric("hello")); + assertFalse(StringUtil.isNumeric("123.334")); + + assertTrue(StringUtil.isNumeric("1")); + assertTrue(StringUtil.isNumeric("1234")); + } +}