Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New CodeQL codemod for mitigating Regex DoS #484

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public static List<Class<? extends CodeChanger>> asList() {
CodeQLOutputResourceLeakCodemod.class,
CodeQLPotentiallyUnsafeCryptoAlgorithmCodemod.class,
CodeQLPredictableSeedCodemod.class,
CodeQLRegexDoSCodemod.class,
CodeQLRegexInjectionCodemod.class,
CodeQLSQLInjectionCodemod.class,
CodeQLSSRFCodemod.class,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package io.codemodder.codemods.codeql;

import com.contrastsecurity.sarif.Result;
import com.github.javaparser.ast.CompilationUnit;
import io.codemodder.*;
import io.codemodder.codetf.DetectorRule;
import io.codemodder.providers.sarif.codeql.ProvidedCodeQLScan;
import io.codemodder.remediation.GenericRemediationMetadata;
import io.codemodder.remediation.Remediator;
import io.codemodder.remediation.regexdos.RegexDoSRemediator;
import java.util.Optional;
import javax.inject.Inject;

/** A codemod that mitigates regex dos vulnerabilities * */
@Codemod(
id = "codeql:java/regex-dos",
reviewGuidance = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW,
importance = Importance.MEDIUM,
executionPriority = CodemodExecutionPriority.HIGH)
public final class CodeQLRegexDoSCodemod extends CodeQLRemediationCodemod {

private final Remediator<Result> remediator;

@Inject
public CodeQLRegexDoSCodemod(
@ProvidedCodeQLScan(ruleId = "java/polynomial-redos") final RuleSarif sarif) {
super(GenericRemediationMetadata.REGEX_DOS.reporter(), sarif);
this.remediator = new RegexDoSRemediator<>();
}

@Override
public DetectorRule detectorRule() {
return new DetectorRule(
"polynomial-redos",
"Polynomial regular expression used on uncontrolled data",
"https://codeql.github.com/codeql-query-help/java/java-polynomial-redos/");
}

@Override
public CodemodFileScanningResult visit(
final CodemodInvocationContext context, final CompilationUnit cu) {
return remediator.remediateAll(
cu,
context.path().toString(),
detectorRule(),
ruleSarif.getResultsByLocationPath(context.path()),
SarifFindingKeyUtil::buildFindingId,
r -> r.getLocations().get(0).getPhysicalLocation().getRegion().getStartLine(),
r ->
Optional.ofNullable(
r.getLocations().get(0).getPhysicalLocation().getRegion().getEndLine()),
r ->
Optional.ofNullable(
r.getLocations().get(0).getPhysicalLocation().getRegion().getStartColumn()));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package io.codemodder.codemods.codeql;

import io.codemodder.testutils.CodemodTestMixin;
import io.codemodder.testutils.Metadata;

@Metadata(
codemodType = CodeQLRegexDoSCodemod.class,
testResourceDir = "codeql-regexdos",
renameTestFile = "app/src/main/java/org/apache/roller/util/RegexUtil.java",
expectingFixesAtLines = {62},
dependencies = {})
final class CodeQLRegexDoSCodemodTest implements CodemodTestMixin {}
117 changes: 117 additions & 0 deletions core-codemods/src/test/resources/codeql-regexdos/RegexUtil.java.after
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/

package org.apache.roller.util;

import io.github.pixee.security.ExecuteWithTimeout;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.binary.Hex;


/**
* Regular expressions utility class.
*/
public final class RegexUtil {

public static final Pattern MAILTO_PATTERN =
Pattern.compile("mailto:([a-zA-Z0-9\\.\\-]+@[a-zA-Z0-9\\.\\-]+\\.[a-zA-Z0-9]+)");

public static final Pattern EMAIL_PATTERN =
Pattern.compile("\\b[a-zA-Z0-9\\.\\-]+(@)([a-zA-Z0-9\\.\\-]+)(\\.)([a-zA-Z0-9]+)\\b");


public static String encodeEmail(String str) {
// obfuscate mailto's: turns them into hex encoded,
// so that browsers can still understand the mailto link
Matcher mailtoMatch = MAILTO_PATTERN.matcher(str);
while (mailtoMatch.find()) {
String email = mailtoMatch.group(1);
//System.out.println("email=" + email);
String hexed = encode(email);
str = str.replaceFirst("mailto:"+email, "mailto:"+hexed);
}

return obfuscateEmail(str);
}


/**
* obfuscate plaintext emails: makes them
* "human-readable" - still too easy for
* machines to parse however.
*/
public static String obfuscateEmail(String str) {
Matcher emailMatch = EMAIL_PATTERN.matcher(str);
while (ExecuteWithTimeout.executeWithTimeout(() -> emailMatch.find(), 5000)) {
String at = emailMatch.group(1);
//System.out.println("at=" + at);
str = str.replaceFirst(at, "-AT-");

String dot = emailMatch.group(2) + emailMatch.group(3) + emailMatch.group(4);
String newDot = emailMatch.group(2) + "-DOT-" + emailMatch.group(4);
//System.out.println("dot=" + dot);
str = str.replaceFirst(dot, newDot);
}
return str;
}


/**
* Return the specified match "groups" from the pattern.
* For each group matched a String will be entered in the ArrayList.
*
* @param pattern The Pattern to use.
* @param match The String to match against.
* @param group The group number to return in case of a match.
* @return List of matched groups from the pattern.
*/
public static List<String> getMatches(Pattern pattern, String match, int group) {
List<String> matches = new ArrayList<>();
Matcher matcher = pattern.matcher(match);
while (matcher.find()) {
matches.add( matcher.group(group) );
}
return matches;
}


/**
* Thanks to the folks at Blojsom (http://sf.net/projects/blojsom)
* for showing me what I was doing wrong with the Hex class.
*
* @param email
* @return
*/
public static String encode(String email) {
StringBuilder result = new StringBuilder(16);
char[] hexString = Hex.encodeHex(email.getBytes(StandardCharsets.UTF_8));
for (int i = 0; i < hexString.length; i++) {
if (i % 2 == 0) {
result.append('%');
}
result.append(hexString[i]);
}

return result.toString();
}

}
116 changes: 116 additions & 0 deletions core-codemods/src/test/resources/codeql-regexdos/RegexUtil.java.before
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/

package org.apache.roller.util;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.binary.Hex;


/**
* Regular expressions utility class.
*/
public final class RegexUtil {

public static final Pattern MAILTO_PATTERN =
Pattern.compile("mailto:([a-zA-Z0-9\\.\\-]+@[a-zA-Z0-9\\.\\-]+\\.[a-zA-Z0-9]+)");

public static final Pattern EMAIL_PATTERN =
Pattern.compile("\\b[a-zA-Z0-9\\.\\-]+(@)([a-zA-Z0-9\\.\\-]+)(\\.)([a-zA-Z0-9]+)\\b");


public static String encodeEmail(String str) {
// obfuscate mailto's: turns them into hex encoded,
// so that browsers can still understand the mailto link
Matcher mailtoMatch = MAILTO_PATTERN.matcher(str);
while (mailtoMatch.find()) {
String email = mailtoMatch.group(1);
//System.out.println("email=" + email);
String hexed = encode(email);
str = str.replaceFirst("mailto:"+email, "mailto:"+hexed);
}

return obfuscateEmail(str);
}


/**
* obfuscate plaintext emails: makes them
* "human-readable" - still too easy for
* machines to parse however.
*/
public static String obfuscateEmail(String str) {
Matcher emailMatch = EMAIL_PATTERN.matcher(str);
while (emailMatch.find()) {
String at = emailMatch.group(1);
//System.out.println("at=" + at);
str = str.replaceFirst(at, "-AT-");

String dot = emailMatch.group(2) + emailMatch.group(3) + emailMatch.group(4);
String newDot = emailMatch.group(2) + "-DOT-" + emailMatch.group(4);
//System.out.println("dot=" + dot);
str = str.replaceFirst(dot, newDot);
}
return str;
}


/**
* Return the specified match "groups" from the pattern.
* For each group matched a String will be entered in the ArrayList.
*
* @param pattern The Pattern to use.
* @param match The String to match against.
* @param group The group number to return in case of a match.
* @return List of matched groups from the pattern.
*/
public static List<String> getMatches(Pattern pattern, String match, int group) {
List<String> matches = new ArrayList<>();
Matcher matcher = pattern.matcher(match);
while (matcher.find()) {
matches.add( matcher.group(group) );
}
return matches;
}


/**
* Thanks to the folks at Blojsom (http://sf.net/projects/blojsom)
* for showing me what I was doing wrong with the Hex class.
*
* @param email
* @return
*/
public static String encode(String email) {
StringBuilder result = new StringBuilder(16);
char[] hexString = Hex.encodeHex(email.getBytes(StandardCharsets.UTF_8));
for (int i = 0; i < hexString.length; i++) {
if (i % 2 == 0) {
result.append('%');
}
result.append(hexString[i]);
}

return result.toString();
}

}
Loading
Loading