languages) {
- String[] extensions = languages.stream()
- .map(Language::getExtensions)
- .flatMap(Collection::stream)
- .toArray(String[]::new);
- ExtensionBasedFileVisitor visitor = ExtensionBasedFileVisitor.forExtensions(extensions);
- Files.walkFileTree(dirPath, visitor);
- List paths = visitor.getVisited();
- for (Path path: paths) {
- File file = parseFile(path);
- if (file != null) {
- file.setPath(dirPath.relativize(path).toString());
- file.setRepo(repo);
- file.getFunctions().forEach(function -> function.setRepo(repo));
- HibernateUtils.save(file);
- }
- }
- }
-
- private static File parseFile(Path filePath) {
- String extension = PathUtils.getExtension(filePath);
- Language language = extensionToLanguage.get(extension);
-
- Parser parser = Parser.getParser(language);
- File file;
- try {
- file = parser.parse(filePath);
- } catch (ParsingException ignored) {
- parser = new FallbackParser(language);
- file = parser.parse(filePath);
- }
-
- return file;
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/CrawlerProperties.java b/dl4se-crawler/src/main/java/usi/si/seart/CrawlerProperties.java
deleted file mode 100644
index 60ebac3e..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/CrawlerProperties.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package usi.si.seart;
-
-import lombok.experimental.UtilityClass;
-import usi.si.seart.io.PropertiesReader;
-
-import java.time.LocalDate;
-
-@UtilityClass
-public class CrawlerProperties {
-
- public static final String tmpDirPrefix;
- public static final LocalDate startDate;
- public static final String ghsSearchUrl;
-
- static {
- PropertiesReader propertiesReader = new PropertiesReader("application.properties");
- tmpDirPrefix = propertiesReader.getProperty("app.general.tmpDirPrefix");
- String dateString = propertiesReader.getProperty("app.crawl.startDate");
- startDate = LocalDate.parse(dateString);
- ghsSearchUrl = propertiesReader.getProperty("app.crawl.ghs.searchUrl");
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/collection/Tuple.java b/dl4se-crawler/src/main/java/usi/si/seart/collection/Tuple.java
deleted file mode 100644
index 7524affe..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/collection/Tuple.java
+++ /dev/null
@@ -1,66 +0,0 @@
-package usi.si.seart.collection;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Getter;
-import lombok.experimental.FieldDefaults;
-
-import java.util.Map;
-
-/**
- * Simple implementation of a tuple class, used as a container for two values of arbitrary types.
- * Tuples are immutable, and can only be created through a static factory:
- * {@code
- * Tuple t = Tuple.of(1, "Hello!");
- * }
- * Values are subsequently accessed through getters:
- * {@code
- * int i = t.getLeft();
- * String msg = t.getRight();
- * }
- * This class can also be used in place of {@link Map.Entry Entry}:
- * {@code
- * Map = Map.ofEntries(
- * Tuple.of(1, 1L),
- * Tuple.of(2, ""),
- * Tuple.of(3, new Object())
- * );
- * }
- * In terms of behaviour, it is similar to {@code UnmodifiableEntry}.
- *
- * @author dabico
- * @param The type of the first value.
- * @param The type of the second value.
- */
-@Getter
-@AllArgsConstructor(access = AccessLevel.PRIVATE)
-@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
-public class Tuple implements Map.Entry {
-
- L left;
- R right;
-
- @Override
- public L getKey() {
- return this.left;
- }
-
- @Override
- public R getValue() {
- return this.right;
- }
-
- @Override
- public R setValue(R value) {
- throw new UnsupportedOperationException("Tuple values are not modifiable!");
- }
-
- @Override
- public String toString() {
- return String.format("(%s, %s)", left.toString(), right.toString());
- }
-
- public static Tuple of(L left, R right) {
- return new Tuple<>(left, right);
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/collection/utils/CollectionUtils.java b/dl4se-crawler/src/main/java/usi/si/seart/collection/utils/CollectionUtils.java
deleted file mode 100644
index 533444f7..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/collection/utils/CollectionUtils.java
+++ /dev/null
@@ -1,93 +0,0 @@
-package usi.si.seart.collection.utils;
-
-import lombok.experimental.UtilityClass;
-
-import java.lang.reflect.Array;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Set;
-
-@UtilityClass
-public class CollectionUtils {
-
- /**
- * Helper method used to merge two arrays into one. The resulting array contains elements from the first array,
- * followed by the elements from the second array, preserving their original order of appearance. Does not modify
- * the contents of the arrays passed as arguments.
- *
- * @param array1 The first {@link Array}.
- * @param array2 The second {@link Array}.
- * @param The type of elements in both arrays.
- * @return The merged array.
- */
- @SuppressWarnings("unchecked")
- public T[] merge(T[] array1, T[] array2) {
- Objects.requireNonNull(array1);
- Objects.requireNonNull(array2);
- Class> type = array1.getClass().getComponentType();
- T[] merged = (T[]) Array.newInstance(type, array1.length + array2.length);
- System.arraycopy(array1, 0, merged, 0, array1.length);
- System.arraycopy(array2, 0, merged, array1.length, array2.length);
- return merged;
- }
-
- /**
- * Helper method used for calculating the intersection of two sets: a set of shared items between both sets.
- * Although it employs mutable operations, it ensures that the original sets are not modified by working on their
- * copies.
- *
- * @param set1 The first {@link Set}.
- * @param set2 The second {@link Set}.
- * @param The type of elements in both sets.
- * @return The intersection of the two sets.
- */
- public Set intersection(Set set1, Set set2) {
- Objects.requireNonNull(set1);
- Objects.requireNonNull(set2);
- Set intersection = new HashSet<>(set1);
- intersection.retainAll(set2);
- return intersection;
- }
-
- /**
- * Helper method used for calculating the difference of two sets: a set of items contained in the first set that are
- * not part of the second set. Although it employs mutable operations, it ensures that the original sets are not
- * modified by working on their copies.
- *
- * @param set1 The first {@link Set}.
- * @param set2 The second {@link Set}.
- * @param The type of elements in both sets.
- * @return The difference of the first and second set.
- */
- public Set difference(Set set1, Set set2) {
- Objects.requireNonNull(set1);
- Objects.requireNonNull(set2);
- Set difference = new HashSet<>(set1);
- difference.removeAll(set2);
- return difference;
- }
-
- /**
- * Helper method used for retrieving all values contained in a {@code Map}. If the key does not map to any value,
- * then it is simply ignored. The order of value retrieval depends on the access order of the {@code Collection}
- * containing the keys.
- *
- * @param map A {@code Map}.
- * @param keys A {@code Collection} of keys.
- * @param The key type.
- * @param The value type.
- * @return The {@code Set} of values that are mapped my the keys.
- */
- public Set getAllValuesFrom(Map map, Collection keys) {
- Objects.requireNonNull(map);
- Objects.requireNonNull(keys);
- Set values = new HashSet<>(keys.size());
- for (K key: keys) {
- V value = map.get(key);
- if (value != null) values.add(value);
- }
- return values;
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/converter/Converter.java b/dl4se-crawler/src/main/java/usi/si/seart/converter/Converter.java
deleted file mode 100644
index ea64e2b5..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/converter/Converter.java
+++ /dev/null
@@ -1,123 +0,0 @@
-package usi.si.seart.converter;
-
-import lombok.AccessLevel;
-import lombok.NoArgsConstructor;
-
-import java.util.Iterator;
-import java.util.Objects;
-
-/**
- * Based on Google's {@link com.google.common.base.Converter Converter}, albeit with some minor differences.
- * A {@code Converter} maps an instance of type {@code A} to an instance of type {@code B}.
- * {@code null} handling is done automatically. Each implementing class should follow a singleton pattern,
- * reusing a single eagerly initialized instance throughout the application code.
- *
- * @param The type to convert from
- * @param The type to convert to
- */
-@NoArgsConstructor(access = AccessLevel.PROTECTED)
-public abstract class Converter {
-
- private Converter reverse;
-
- protected abstract B forward(A a);
- protected abstract A backward(B b);
-
- B doForward(A a) {
- if (a != null) return forward(a);
- else return null;
- }
-
- A doBackward(B b) {
- if (b != null) return backward(b);
- else return null;
- }
-
- /**
- * Method used for performing conversions.
- *
- * @return The input value, converted from type {@code A} to {@code B}, or {@code null} if and only if the input is also {@code null}.
- */
- public final B convert(A a) {
- return doForward(a);
- }
-
- /**
- * Returns an {@code Iterable} that applies {@code convert} to each element of {@code fromIterable}. The
- * conversion is done lazily.
- *
- * The returned iterable's iterator supports {@code remove()} if the input iterator does. After
- * a successful {@code remove()} call, {@code fromIterable} no longer contains the corresponding
- * element.
- *
- * @param aIterable The {@code Iterable} whose instances we wish to convert.
- */
- public Iterable convertAll(Iterable aIterable) {
- Objects.requireNonNull(aIterable);
- return () -> new Iterator<>()
- {
- private final Iterator extends A> aIterator = aIterable.iterator();
-
- @Override
- public boolean hasNext() {
- return aIterator.hasNext();
- }
-
- @Override
- public B next() {
- return convert(aIterator.next());
- }
-
- @Override
- public void remove() {
- aIterator.remove();
- }
- };
- }
-
- /**
- * Method used for obtaining the reversed view of this converter.
- *
- * @return A {@code Converter} from {@code B} to {@code A}.
- */
- public Converter reverse() {
- if (this.reverse == null) {
- reverse = new ReverseConverter<>(this);
- }
-
- return reverse;
- }
-
- private static final class ReverseConverter extends Converter {
- final Converter original;
-
- ReverseConverter(Converter original) {
- this.original = original;
- }
-
- @Override
- protected A forward(B b) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- protected B backward(A a) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- A doForward(B b) {
- return original.doBackward(b);
- }
-
- @Override
- B doBackward(A a) {
- return original.doForward(a);
- }
-
- @Override
- public Converter reverse() {
- return original;
- }
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/converter/DateToLDTConverter.java b/dl4se-crawler/src/main/java/usi/si/seart/converter/DateToLDTConverter.java
deleted file mode 100644
index ed891f56..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/converter/DateToLDTConverter.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package usi.si.seart.converter;
-
-import lombok.AccessLevel;
-import lombok.Getter;
-import lombok.NoArgsConstructor;
-
-import java.time.LocalDateTime;
-import java.time.ZoneOffset;
-import java.util.Date;
-
-@NoArgsConstructor(access = AccessLevel.PRIVATE)
-public class DateToLDTConverter extends Converter {
-
- @Getter
- public static final Converter instance = new DateToLDTConverter();
-
- @Override
- protected LocalDateTime forward(Date date) {
- return LocalDateTime.ofInstant(date.toInstant(), ZoneOffset.UTC);
- }
-
- @Override
- protected Date backward(LocalDateTime ldt) {
- return new Date(ldt.toInstant(ZoneOffset.UTC).toEpochMilli());
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/converter/GhsToGitRepoConverter.java b/dl4se-crawler/src/main/java/usi/si/seart/converter/GhsToGitRepoConverter.java
deleted file mode 100644
index 7f84fb5d..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/converter/GhsToGitRepoConverter.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package usi.si.seart.converter;
-
-import lombok.AccessLevel;
-import lombok.Getter;
-import lombok.NoArgsConstructor;
-import usi.si.seart.http.payload.GhsGitRepo;
-import usi.si.seart.model.GitRepo;
-
-import java.time.LocalDateTime;
-
-@NoArgsConstructor(access = AccessLevel.PRIVATE)
-public class GhsToGitRepoConverter extends Converter {
-
- @Getter
- private static final Converter instance = new GhsToGitRepoConverter();
-
- @Override
- protected GitRepo forward(GhsGitRepo ghsGitRepo) {
- GitRepo.GitRepoBuilder builder = GitRepo.builder();
-
- builder.name(ghsGitRepo.getName());
- builder.license(ghsGitRepo.getLicense());
- builder.isFork(ghsGitRepo.getIsFork());
-
- Long commits = ghsGitRepo.getCommits();
- if (commits != null) builder.commits(commits);
- Long contributors = ghsGitRepo.getContributors();
- if (contributors != null) builder.contributors(contributors);
- Long issues = ghsGitRepo.getTotalIssues();
- if (issues != null) builder.issues(issues);
- Long stars = ghsGitRepo.getStargazers();
- if (stars != null) builder.stars(stars);
- LocalDateTime lastCommit = DateToLDTConverter.getInstance().convert(ghsGitRepo.getLastCommit());
- if (lastCommit != null) builder.lastCommit(lastCommit);
- String lastCommitSHA = ghsGitRepo.getLastCommitSHA();
- if (lastCommitSHA != null) builder.lastCommitSHA(lastCommitSHA);
-
- return builder.build();
- }
-
- @Override
- protected GhsGitRepo backward(GitRepo repo) {
- throw new UnsupportedOperationException("Backwards conversion is not supported!");
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/git/GitException.java b/dl4se-crawler/src/main/java/usi/si/seart/git/GitException.java
deleted file mode 100644
index 74a537c6..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/git/GitException.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package usi.si.seart.git;
-
-public class GitException extends Exception {
-
- public GitException(String message) {
- super(message);
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/io/PropertiesReader.java b/dl4se-crawler/src/main/java/usi/si/seart/io/PropertiesReader.java
deleted file mode 100644
index 68d70412..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/io/PropertiesReader.java
+++ /dev/null
@@ -1,81 +0,0 @@
-package usi.si.seart.io;
-
-import lombok.AccessLevel;
-import lombok.SneakyThrows;
-import lombok.experimental.FieldDefaults;
-
-import java.io.InputStream;
-import java.util.Properties;
-import java.util.function.Function;
-import java.util.function.UnaryOperator;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Class used for reading {@code Properties} files. It has the added benefit of being able to resolve references to
- * other file properties, system properties and environment variables, with the resolution precedence maintaining the
- * aforementioned order. The property values themselves remain preserved as they were written in the file, with the
- * resolution taking place once {@link #getProperty} is called. Only ${...}
is supported for property
- * referencing. Nested resolution (i.e. ${...${...}...}
) is not supported.
- *
- * @author dabico
- * @see System#getProperty(String) System.getProperty
- * @see System#getenv(String) System.getenv
- * @see Properties
- * @see Matcher
- * @see Pattern Definition
- */
-@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
-public class PropertiesReader {
-
- static Pattern referencePattern = Pattern.compile("\\$\\{([^}]+)\\}");
-
- Properties properties;
- Function resolver;
-
- @SneakyThrows
- public PropertiesReader(String fileName) {
- this.properties = new Properties();
- InputStream is = getClass().getClassLoader().getResourceAsStream(fileName);
- this.properties.load(is);
-
- UnaryOperator resolver1 = resolver(this.properties::getProperty);
- UnaryOperator resolver2 = resolver(System::getProperty);
- UnaryOperator resolver3 = resolver(System::getenv);
- resolver = resolver1.andThen(resolver2).andThen(resolver3);
- }
-
- /**
- * Searches for the property with the specified key in this property list, performing resolutions if the property
- * value contains references to other properties or variables. The method returns null if the property is not found.
- *
- * @param key The property key.
- * @return The resolved property value.
- */
- public String getProperty(String key) {
- String value = this.properties.getProperty(key);
- if (value != null) return resolver.apply(value);
- else return null;
- }
-
- private UnaryOperator resolver(UnaryOperator mapper) {
- return str -> {
- Matcher matcher = referencePattern.matcher(str);
-
- if (str.matches(".*"+referencePattern.pattern()+".*")) {
- StringBuilder builder = new StringBuilder();
-
- while (matcher.find()) {
- String name = matcher.group(1);
- String value = mapper.apply(name);
- if (value != null) matcher.appendReplacement(builder, Matcher.quoteReplacement(value));
- }
-
- matcher.appendTail(builder);
- return builder.toString();
- }
-
- return str;
- };
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/parser/AbstractParser.java b/dl4se-crawler/src/main/java/usi/si/seart/parser/AbstractParser.java
deleted file mode 100644
index 993cea2f..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/parser/AbstractParser.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package usi.si.seart.parser;
-
-import lombok.AccessLevel;
-import lombok.experimental.FieldDefaults;
-import usi.si.seart.model.Language;
-import usi.si.seart.model.code.File;
-import usi.si.seart.model.code.Function;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Collectors;
-
-@FieldDefaults(level = AccessLevel.PROTECTED)
-public abstract class AbstractParser implements Parser {
-
- File.FileBuilder, ?> fileBuilder = File.builder();
- List> functionBuilders = new ArrayList<>();
-
- Language language;
-
- protected AbstractParser(Language language) {
- this.language = language;
- }
-
- protected File buildFileAndFunctions() {
- File file = fileBuilder.build();
- List functions = functionBuilders.stream()
- .map(builder -> builder.file(file).isTest(file.getIsTest()).build())
- .collect(Collectors.toList());
- file.setFunctions(functions);
- return file;
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/parser/FallbackParser.java b/dl4se-crawler/src/main/java/usi/si/seart/parser/FallbackParser.java
deleted file mode 100644
index 39bed750..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/parser/FallbackParser.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package usi.si.seart.parser;
-
-import lombok.extern.slf4j.Slf4j;
-import usi.si.seart.model.Language;
-import usi.si.seart.model.code.File;
-import usi.si.seart.utils.PathUtils;
-import usi.si.seart.utils.StringUtils;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-@Slf4j
-public class FallbackParser extends AbstractParser {
-
- public FallbackParser(Language language) {
- super(language);
- }
-
- @Override
- public File parse(Path path) {
- fileBuilder.isTest(PathUtils.isTestFile(path));
- fileBuilder.language(language);
-
- try {
- String fileContents = Files.readString(path);
- String normalized = StringUtils.normalizeSpace(fileContents);
-
- fileBuilder.content(fileContents);
- fileBuilder.contentHash(StringUtils.sha256(normalized));
- fileBuilder.lines(fileContents.lines().count());
- fileBuilder.characters(fileContents.chars().count());
- fileBuilder.containsNonAscii(StringUtils.containsNonAscii(fileContents));
-
- return buildFileAndFunctions();
- } catch (IOException ex) {
- log.error("Could not read file: " + path, ex);
- return null;
- }
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/parser/JavaParser.java b/dl4se-crawler/src/main/java/usi/si/seart/parser/JavaParser.java
deleted file mode 100644
index ba38e506..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/parser/JavaParser.java
+++ /dev/null
@@ -1,212 +0,0 @@
-package usi.si.seart.parser;
-
-import com.github.javaparser.JavaToken;
-import com.github.javaparser.ParseProblemException;
-import com.github.javaparser.StaticJavaParser;
-import com.github.javaparser.ast.CompilationUnit;
-import com.github.javaparser.ast.Node;
-import com.github.javaparser.ast.body.CallableDeclaration;
-import com.github.javaparser.ast.body.ConstructorDeclaration;
-import com.github.javaparser.ast.body.MethodDeclaration;
-import com.github.javaparser.ast.comments.Comment;
-import com.github.javaparser.ast.visitor.VoidVisitorAdapter;
-import com.github.javaparser.printer.XmlPrinter;
-import lombok.extern.slf4j.Slf4j;
-import usi.si.seart.collection.Tuple;
-import usi.si.seart.model.Language;
-import usi.si.seart.model.code.Boilerplate;
-import usi.si.seart.model.code.Code;
-import usi.si.seart.model.code.File;
-import usi.si.seart.model.code.Function;
-import usi.si.seart.utils.PathUtils;
-import usi.si.seart.utils.StringUtils;
-
-import java.io.FileNotFoundException;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.Spliterator;
-import java.util.stream.Collectors;
-import java.util.stream.StreamSupport;
-
-@Slf4j
-public class JavaParser extends AbstractParser {
-
- private static final XmlPrinter astPrinter = new XmlPrinter(true);
-
- public JavaParser(Language language) {
- super(language);
- }
-
- @Override
- public File parse(Path path) throws ParsingException {
- fileBuilder.isTest(PathUtils.isTestFile(path));
-
- try {
- CompilationUnit compilationUnit = StaticJavaParser.parse(path.toFile());
- fileBuilder.isParsed(true);
- new VoidVisitor().visit(compilationUnit, null);
- } catch (ParseProblemException | FileNotFoundException | StackOverflowError thr) {
- log.error("Parsing failed for: " + path, thr);
- throw new ParsingException(thr.getMessage(), thr.getCause());
- }
-
- return buildFileAndFunctions();
- }
-
- private class VoidVisitor extends VoidVisitorAdapter {
-
- @Override
- public void visit(CompilationUnit declaration, Object arg) {
- copyToBuilder(declaration, fileBuilder);
-
- super.visit(declaration, arg);
- }
-
- @Override
- public void visit(MethodDeclaration declaration, Object arg) {
- visit(declaration);
- super.visit(declaration, arg);
- }
-
- @Override
- public void visit(ConstructorDeclaration declaration, Object arg) {
- visit(declaration);
- super.visit(declaration, arg);
- }
-
- private void visit(CallableDeclaration> declaration) {
- Function.FunctionBuilder, ?> functionBuilder = Function.builder();
-
- copyToBuilder(declaration, functionBuilder);
-
- functionBuilder.boilerplateType(getBoilerplateType(declaration));
-
- functionBuilders.add(functionBuilder);
- }
-
- private void copyToBuilder(Node node, Code.CodeBuilder, ?> builder) {
- builder.language(language);
-
- String contents = node.toString();
- builder.content(contents);
-
- builder.ast(astPrinter.output(node));
-
- builder.characters(contents.chars().count());
-
- Tuple tokensCount = countTokens(node);
- builder.totalTokens(tokensCount.getLeft());
- builder.codeTokens(tokensCount.getRight());
-
- builder.lines(countLines(node));
-
- builder.containsNonAscii(StringUtils.containsNonAscii(contents));
-
- removeComments(node);
- String normalized = StringUtils.normalizeSpace(node.toString());
- builder.contentHash(StringUtils.sha256(normalized));
- builder.astHash(getAstHash(node));
- }
- }
-
- static String getAstHash(Node node) {
- StringBuilder builder = new StringBuilder();
- getAstTypeNames(node, builder);
- return StringUtils.sha256(builder.toString());
- }
-
- private static void getAstTypeNames(Node node, StringBuilder builder) {
- builder.append(node.getMetaModel().getTypeName());
- List children = node.getChildNodes();
- for (Node child : children) {
- getAstTypeNames(child, builder);
- }
- }
-
- static Tuple countTokens(Node node) {
- if (node instanceof CompilationUnit) {
- return rangeLength(node);
- } else if (node instanceof CallableDeclaration) {
- return countTokens((CallableDeclaration>) node);
- } else {
- throw new UnsupportedOperationException("Token counting is not supported at this granularity level!");
- }
- }
-
- private static Tuple countTokens(CallableDeclaration> cd) {
- Tuple count = rangeLength(cd);
- Optional comment = cd.getComment();
-
- if (comment.isPresent()) {
- String jdoc = comment.get().toString();
- Long jdocLen = countWordsAndSpaces(jdoc);
- count = Tuple.of(count.getLeft() + jdocLen, count.getRight());
- }
-
- return count;
- }
-
- private static Tuple rangeLength(Node node) {
- List tokens = getNodeTokens(node);
- Map> partition = tokens.stream()
- .collect(Collectors.partitioningBy(token -> token.getCategory().isWhitespaceOrComment()));
-
- long codeTokens = partition.get(false).size();
- long nonCodeTokens = partition.get(true).stream().mapToLong(token -> {
- JavaToken.Category category = token.getCategory();
- if (category.isWhitespace()) {
- return 1L;
- } else {
- return countWordsAndSpaces(token.getText());
- }
- }).sum();
-
- return Tuple.of(codeTokens + nonCodeTokens, codeTokens);
- }
-
- private static List getNodeTokens(Node node) {
- return node.getTokenRange()
- .map(range -> {
- Spliterator spliterator = range.spliterator();
- return StreamSupport.stream(spliterator, false).collect(Collectors.toList());
- })
- .orElse(new ArrayList<>());
- }
-
- private static long countWordsAndSpaces(String input) {
- if (input.isBlank()) return 0L;
- String normalized = StringUtils.normalizeSpace(input);
- String[] words = normalized.split("\\s");
- long spaces = words.length - 1L;
- return words.length + spaces;
- }
-
- static Long countLines(Node node) {
- return node.getRange()
- .map(range -> (long)(range.end.line + 1 - range.begin.line))
- .orElse(0L);
- }
-
- static Boilerplate getBoilerplateType(CallableDeclaration> node) {
- if (node instanceof ConstructorDeclaration) return Boilerplate.CONSTRUCTOR;
- String name = node.getNameAsString();
- if (name.startsWith("set")) return Boilerplate.SETTER;
- if (name.startsWith("get")) return Boilerplate.GETTER;
- switch (name) {
- case "builder": return Boilerplate.BUILDER;
- case "equals": return Boilerplate.EQUALS;
- case "hashCode": return Boilerplate.HASH_CODE;
- case "toString": return Boilerplate.TO_STRING;
- default: return null;
- }
- }
-
- static void removeComments(Node node) {
- List comments = node.getAllContainedComments();
- node.getComment().ifPresent(comments::add);
- comments.forEach(Comment::remove);
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/parser/Parser.java b/dl4se-crawler/src/main/java/usi/si/seart/parser/Parser.java
deleted file mode 100644
index 6b34226b..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/parser/Parser.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package usi.si.seart.parser;
-
-import usi.si.seart.model.Language;
-import usi.si.seart.model.code.File;
-
-import java.nio.file.Path;
-
-public interface Parser {
- File parse(Path path);
-
- static Parser getParser(Language language) {
- switch (language.getName()) {
- case "Java": return new JavaParser(language);
- default: return new FallbackParser(language);
- }
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/parser/ParsingException.java b/dl4se-crawler/src/main/java/usi/si/seart/parser/ParsingException.java
deleted file mode 100644
index f255aed4..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/parser/ParsingException.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package usi.si.seart.parser;
-
-public class ParsingException extends RuntimeException {
-
- public ParsingException(String message, Throwable cause) {
- super(message, cause);
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/utils/HibernateUtils.java b/dl4se-crawler/src/main/java/usi/si/seart/utils/HibernateUtils.java
deleted file mode 100644
index 5a410903..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/utils/HibernateUtils.java
+++ /dev/null
@@ -1,157 +0,0 @@
-package usi.si.seart.utils;
-
-import lombok.experimental.UtilityClass;
-import lombok.extern.slf4j.Slf4j;
-import org.hibernate.Hibernate;
-import org.hibernate.Session;
-import org.hibernate.SessionFactory;
-import org.hibernate.Transaction;
-import org.hibernate.cfg.Configuration;
-import usi.si.seart.CrawlerProperties;
-import usi.si.seart.model.GitRepo;
-import usi.si.seart.model.Language;
-import usi.si.seart.model.code.File;
-import usi.si.seart.model.job.CrawlJob;
-import usi.si.seart.model.job.Job;
-
-import javax.persistence.PersistenceException;
-import java.nio.file.Path;
-import java.time.LocalDateTime;
-import java.util.Optional;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-@Slf4j
-@UtilityClass
-@SuppressWarnings("TryFinallyCanBeTryWithResources")
-public class HibernateUtils {
-
- private static final SessionFactory factory = new Configuration().configure().buildSessionFactory();
-
- public CrawlJob getLastJob() {
- try (Session session = factory.openSession()) {
- Optional lastJobOptional = session.createQuery(
- "SELECT c FROM CrawlJob c WHERE c.jobType = usi.si.seart.model.job.Job.CODE", CrawlJob.class
- ).uniqueResultOptional();
-
- if (lastJobOptional.isPresent()) {
- CrawlJob lastJob = lastJobOptional.get();
- session.evict(lastJob);
- return lastJob;
- } else {
- CrawlJob startJob = CrawlJob.builder()
- .checkpoint(CrawlerProperties.startDate.atStartOfDay())
- .jobType(Job.CODE)
- .build();
- save(startJob);
- return getLastJob();
- }
- }
- }
-
- public Set getLanguages() {
- try (Session session = factory.openSession()) {
- return session.createQuery("SELECT l FROM Language l", Language.class)
- .stream()
- .collect(Collectors.toSet());
- }
- }
-
- public Optional getRepoByName(String name) {
- try (Session session = factory.openSession()) {
- Optional result = session.createQuery("SELECT r FROM GitRepo r WHERE r.name = :name", GitRepo.class)
- .setParameter("name", name)
- .uniqueResultOptional();
- return result.map(repo -> {
- Hibernate.initialize(repo.getLanguages());
- return repo;
- });
- }
- }
-
- public void save(CrawlJob crawlJob) {
- saveOrUpdate(crawlJob);
- }
-
- public void save(GitRepo repo) {
- saveOrUpdate(repo);
- }
-
- public void save(File file) {
- saveOrUpdate(file);
- }
-
- private void saveOrUpdate(Object obj) {
- Session session = factory.openSession();
- Transaction transaction = null;
- try {
- transaction = session.beginTransaction();
- session.saveOrUpdate(obj);
- session.flush();
- transaction.commit();
- } catch (PersistenceException ex) {
- log.error("Error while persisting: " + obj.getClass().getName(), ex);
- if (transaction != null) transaction.rollback();
- } finally {
- session.close();
- }
- }
-
- public void deleteFileByRepoIdAndPath(Long id, Path path) {
- Session session = factory.openSession();
- Transaction transaction = null;
- try {
- transaction = session.beginTransaction();
- session.createQuery("DELETE FROM File WHERE repo.id = :id AND path = :path")
- .setParameter("id", id)
- .setParameter("path", path.toString())
- .executeUpdate();
- session.flush();
- transaction.commit();
- } catch (PersistenceException ex) {
- log.error("Exception occurred while deleting File[repo.id="+id+", path="+path+"]!", ex);
- if (transaction != null) transaction.rollback();
- } finally {
- session.close();
- }
- }
-
- public void updateFilePathByRepoId(Long id, Path before, Path after) {
- Session session = factory.openSession();
- Transaction transaction = null;
- try {
- transaction = session.beginTransaction();
- session.createQuery("UPDATE File SET path = :after WHERE repo.id = :id AND path = :before")
- .setParameter("id", id)
- .setParameter("before", before.toString())
- .setParameter("after", after.toString())
- .executeUpdate();
- session.flush();
- transaction.commit();
- } catch (PersistenceException ex) {
- log.error("Exception occurred while updating File[repo.id="+id+", path="+before+"]!", ex);
- if (transaction != null) transaction.rollback();
- } finally {
- session.close();
- }
- }
-
- public void updateCrawlJobById(Long id, LocalDateTime checkpoint) {
- Session session = factory.openSession();
- Transaction transaction = null;
- try {
- transaction = session.beginTransaction();
- session.createQuery("UPDATE CrawlJob SET checkpoint = :checkpoint WHERE id = :id")
- .setParameter("id", id)
- .setParameter("checkpoint", checkpoint)
- .executeUpdate();
- session.flush();
- transaction.commit();
- } catch (PersistenceException ex) {
- log.error("Exception occurred while updating CrawlJob[id="+id+"]!", ex);
- if (transaction != null) transaction.rollback();
- } finally {
- session.close();
- }
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/utils/PathUtils.java b/dl4se-crawler/src/main/java/usi/si/seart/utils/PathUtils.java
deleted file mode 100644
index ed730fb9..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/utils/PathUtils.java
+++ /dev/null
@@ -1,105 +0,0 @@
-package usi.si.seart.utils;
-
-import lombok.SneakyThrows;
-import lombok.experimental.UtilityClass;
-
-import java.nio.file.FileSystems;
-import java.nio.file.Path;
-import java.nio.file.PathMatcher;
-import java.util.Objects;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-@UtilityClass
-public class PathUtils {
-
- private final PathMatcher testPathMatcher;
- static {
- String globPattern = Stream.of(
- "**/test/**",
- "**/tests/**",
- "**/Test*.java",
- "**/*Test.java",
- "**/*Tests.java",
- "**/*TestCase.java",
- "**/IT*.java",
- "**/*IT.java",
- "**/*ITCase.java"
- ).collect(Collectors.joining(",", "glob:{", "}"));
- testPathMatcher = FileSystems.getDefault().getPathMatcher(globPattern);
- }
-
-
- /**
- * Determine if a file is indeed a test file. We define test files as files whose:
- *
- *
- *
- * Path contains the following directories:
- *
- * {@code test}
- * {@code tests}
- *
- *
- *
- * Name matches the following patterns:
- *
- * {@code Test*.java}
- * {@code *Test.java}
- * {@code *Tests.java}
- * {@code *TestCase.java}
- * {@code IT*.java}
- * {@code *IT.java}
- * {@code *ITCase.java}
- *
- *
- *
- *
- * @implNote We keep the matching restrictive to minimise the amount of false positives.
- * For instance, using a more general pattern like {@code **test**} would match non-conforming cases such as
- * {@code /src/latest/App.java}.
- * @param path {@code Path} that we are testing against.
- * @return Whether the path in question matches the definition of a test file.
- */
- //TODO 08.03.22: Switch pattern matching based on extension
- public boolean isTestFile(Path path) {
- return testPathMatcher.matches(path);
- }
-
- /**
- * Extract the file extension for a given file path.
- *
- * @param path {@code Path} that we are testing against.
- * @return The file extension {@code String}. If the file has no extension or is a directory, then an empty
- * string is returned.
- */
- public String getExtension(Path path) {
- Objects.requireNonNull(path);
- String fileName = path.getFileName().toString();
- if (fileName.contains(".")) {
- int extStart = fileName.lastIndexOf(".") + 1;
- if (extStart < fileName.length() && extStart > 0) {
- return fileName.substring(extStart);
- }
- }
- return "";
- }
-
- /**
- * Forcefully delete a file, or a directory and all its contents.
- *
- * @implNote We suppress throws of {@link java.io.IOException IOException} and {@link InterruptedException}.
- * @param path {@code Path} to file or directory that we wish to delete.
- * @see ProcessBuilder
- * @see Run Shell Commands in Java
- */
- @SneakyThrows
- public void forceDelete(Path path) {
- Objects.requireNonNull(path);
- ProcessBuilder builder = new ProcessBuilder();
- builder.command("rm", "-rf", path.getFileName().toString());
- builder.directory(path.getParent().toFile());
- Process process = builder.start();
- process.waitFor();
- }
-}
diff --git a/dl4se-crawler/src/main/java/usi/si/seart/utils/StringUtils.java b/dl4se-crawler/src/main/java/usi/si/seart/utils/StringUtils.java
deleted file mode 100644
index e91b52b3..00000000
--- a/dl4se-crawler/src/main/java/usi/si/seart/utils/StringUtils.java
+++ /dev/null
@@ -1,98 +0,0 @@
-package usi.si.seart.utils;
-
-import lombok.SneakyThrows;
-import lombok.experimental.UtilityClass;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import java.util.Objects;
-
-@UtilityClass
-public class StringUtils {
-
- /**
- * Simple implementation of the SHA-256 algorithm.
- *
- * @param input An input {@code String} of arbitrary length.
- * @return A 64-character {@code String} representing the hashing algorithm result.
- * @implNote We suppress throws of {@link NoSuchAlgorithmException}.
- * @see Baeldung Guide
- * @author dabico
- */
- @SneakyThrows({NoSuchAlgorithmException.class})
- public String sha256(String input) {
- Objects.requireNonNull(input);
- MessageDigest md = MessageDigest.getInstance("SHA-256");
- byte[] hash = md.digest(input.getBytes(StandardCharsets.UTF_8));
- StringBuilder hexString = new StringBuilder(2 * hash.length);
- for (byte b : hash) {
- String hex = Integer.toHexString(0xff & b);
- if (hex.length() == 1) hexString.append('0');
- hexString.append(hex);
- }
- return hexString.toString();
- }
-
- /**
- * Used to check if an input {@code String} contains any non-ASCII characters.
- *
- * @param input The input {@code String} to check against.
- * @return {@code true} if it contains any non-ASCII characters, {@code false} otherwise.
- * @author dabico
- */
- public boolean containsNonAscii(String input) {
- Objects.requireNonNull(input);
- return input.chars().anyMatch(ch -> ch > 127);
- }
-
- /**
- * Used to normalize white spaces in a {@code String}. For a passed input we replace all consecutive occurrences of
- * whitespace characters as defined by {@link Character#isWhitespace(char) Character.isWhiteSpace} with a single
- * space character. Before returning, the resulting {@code String} is also stripped of any leading or trailing
- * whitespaces.
- *
- * @param input An input {@code String}.
- * @return The space-normalized input.
- * @author dabico
- */
- public String normalizeSpace(String input) {
- Objects.requireNonNull(input);
- if (input.isBlank()) return "";
-
- StringBuilder builder = new StringBuilder(input.length());
- boolean lastWasWhitespace = false;
- for (int i = 0; i < input.length(); i++) {
- char current = input.charAt(i);
- if (Character.isWhitespace(current)) {
- if (!lastWasWhitespace) {
- lastWasWhitespace = true;
- builder.append(' ');
- }
- } else {
- lastWasWhitespace = false;
- builder.append(current);
- }
- }
-
- return builder.toString().trim();
- }
-
- /**
- * Used to read any arbitrary {@code InputStream} into a {@code String}.
- *
- * @param inputStream An {@code InputStream}.
- * @return The stream contents as a {@code String}.
- * @apiNote Intended to be used for processing the STD/ERR output of a {@link java.lang.Process Process}.
- * @implNote We suppress throws of {@link IOException}.
- * @author dabico
- * @see Baeldung Guide
- */
- @SneakyThrows({IOException.class})
- public String fromInputStream(InputStream inputStream) {
- Objects.requireNonNull(inputStream);
- return new String(inputStream.readAllBytes(), StandardCharsets.UTF_8);
- }
-}
diff --git a/dl4se-crawler/src/main/resources/application.properties b/dl4se-crawler/src/main/resources/application.properties
index c732554d..43453103 100644
--- a/dl4se-crawler/src/main/resources/application.properties
+++ b/dl4se-crawler/src/main/resources/application.properties
@@ -1,3 +1,32 @@
-app.general.tmpDirPrefix=dl4se
-app.crawl.startDate=2008-01-01
-app.crawl.ghs.searchUrl=${CODE_SEARCH_URL}
\ No newline at end of file
+# Spring Configuration
+spring.application.name=@project.name@
+
+# Banner Configuration
+spring.banner.location=classpath:banner.txt
+
+# Main Method Configuration
+spring.main.web-application-type=none
+
+# Logging Configuration
+logging.level.root=INFO
+logging.level.ch.usi.si.seart.crawler=INFO
+logging.file.path=logs
+logging.file.name=${logging.file.path}/crawler.log
+logging.logback.rollingpolicy.max-history=180
+logging.logback.rollingpolicy.max-file-size=100MB
+logging.logback.rollingpolicy.total-size-cap=5GB
+logging.logback.rollingpolicy.file-name-pattern=${logging.file.path}/crawler_%d{yyyy-MM-dd}_%i.log.gz
+
+# JPA Configuration
+spring.jpa.database=postgresql
+spring.jpa.open-in-view=false
+spring.jpa.hibernate.ddl-auto=none
+spring.jpa.properties.hibernate.jdbc.time_zone=UTC
+spring.jpa.properties.hibernate.jdbc.fetch_size=500
+spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.PostgreSQL10Dialect
+
+# Spring Datasource Configuration
+spring.datasource.url=jdbc:postgresql://${DATABASE_HOST:localhost}:${DATABASE_PORT:5432}/${DATABASE_NAME:dl4se}
+spring.datasource.username=${DATABASE_USER:dl4se_admin}
+spring.datasource.password=${DATABASE_PASS:Lugano2022}
+spring.datasource.driver-class-name=org.postgresql.Driver
diff --git a/dl4se-crawler/src/main/resources/crawler.properties b/dl4se-crawler/src/main/resources/crawler.properties
new file mode 100644
index 00000000..d552e9ec
--- /dev/null
+++ b/dl4se-crawler/src/main/resources/crawler.properties
@@ -0,0 +1,16 @@
+crawler.base-url=https://seart-ghs.si.usi.ch/api/r/search
+crawler.start-date=2008-01-01
+crawler.tmp-dir-prefix=crawler-
+crawler.scheduling.next-run-delay=PT1H
+crawler.analyzer.core-pool-size=2
+crawler.analyzer.max-pool-size=4
+crawler.analyzer.queue-capacity=64
+crawler.analyzer.max-parse-time=1000ms
+crawler.ignore.repository.names=
+crawler.ignore.repository.files.max-size=5MB
+crawler.ignore.repository.files.max-lines=10000
+crawler.ignore.repository.files.glob-pattern=
+#crawler.languages.Java=java
+#crawler.languages.Python=py
+#crawler.languages.C=c,h
+#crawler.languages.[C++]=cc,cpp
diff --git a/dl4se-crawler/src/main/resources/hibernate.cfg.xml b/dl4se-crawler/src/main/resources/hibernate.cfg.xml
deleted file mode 100644
index b0935a65..00000000
--- a/dl4se-crawler/src/main/resources/hibernate.cfg.xml
+++ /dev/null
@@ -1,40 +0,0 @@
-
-
-
-
-
-
-
- org.postgresql.Driver
- jdbc:postgresql://${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_NAME}
- ${DATABASE_USER}
- ${DATABASE_PASS}
-
-
- 1
-
-
- org.hibernate.dialect.PostgreSQL95Dialect
-
-
- org.hibernate.cache.internal.DisabledCaching
-
-
- false
- false
- false
-
-
- none
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/dl4se-crawler/src/main/resources/hibernate.properties b/dl4se-crawler/src/main/resources/hibernate.properties
deleted file mode 100644
index a22fe63c..00000000
--- a/dl4se-crawler/src/main/resources/hibernate.properties
+++ /dev/null
@@ -1 +0,0 @@
-hibernate.types.print.banner=false
\ No newline at end of file
diff --git a/dl4se-crawler/src/main/resources/logback.xml b/dl4se-crawler/src/main/resources/logback.xml
deleted file mode 100644
index 99fb72b4..00000000
--- a/dl4se-crawler/src/main/resources/logback.xml
+++ /dev/null
@@ -1,63 +0,0 @@
-
-
-
-
-
- System.out
-
- INFO
- ACCEPT
- DENY
-
-
- %d{yyyy-MM-dd HH:mm:ss.SSS} | %-5level | %logger{35} | %msg%n
-
-
-
-
- System.err
-
- WARN
-
-
-
- logger.startsWith("usi.si.seart")
-
- DENY
- ACCEPT
-
-
- %d{yyyy-MM-dd HH:mm:ss.SSS} | %-5level | %logger{35} | %file:%line | %msg%n
-
-
-
-
- ${LOG_DIR_NAME}/${LOG_FILE_NAME}.log
- true
- true
-
- ${LOG_DIR_NAME}/${LOG_FILE_NAME}_%d{yyyy-MM-dd}_%i.log.gz
- 100MB
- 180
-
-
- DEBUG
-
-
-
- logger.startsWith("usi.si.seart")
-
- ACCEPT
- DENY
-
-
- %d{yyyy-MM-dd HH:mm:ss.SSS} | %-5level | %logger{35} | %file:%line | %msg%n
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/dl4se-crawler/src/test/java/ch/usi/si/seart/analyzer/NullFilteredReaderTest.java b/dl4se-crawler/src/test/java/ch/usi/si/seart/analyzer/NullFilteredReaderTest.java
new file mode 100644
index 00000000..4a0cca25
--- /dev/null
+++ b/dl4se-crawler/src/test/java/ch/usi/si/seart/analyzer/NullFilteredReaderTest.java
@@ -0,0 +1,21 @@
+package ch.usi.si.seart.analyzer;
+
+import java.io.Reader;
+
+class NullFilteredReaderTest extends ReaderTest {
+
+ @Override
+ protected Reader getSubject(Reader initial) {
+ return new NullFilteredReader(initial);
+ }
+
+ @Override
+ protected String getInput() {
+ return "This\0is\0a\0test!";
+ }
+
+ @Override
+ protected String getExpected() {
+ return "Thisisatest!";
+ }
+}
diff --git a/dl4se-crawler/src/test/java/ch/usi/si/seart/analyzer/ReaderTest.java b/dl4se-crawler/src/test/java/ch/usi/si/seart/analyzer/ReaderTest.java
new file mode 100644
index 00000000..a1dc7d8b
--- /dev/null
+++ b/dl4se-crawler/src/test/java/ch/usi/si/seart/analyzer/ReaderTest.java
@@ -0,0 +1,29 @@
+package ch.usi.si.seart.analyzer;
+
+import com.google.common.io.CharStreams;
+import lombok.Cleanup;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+abstract class ReaderTest {
+
+ protected abstract Reader getSubject(Reader initial);
+
+ protected abstract String getInput();
+
+ protected abstract String getExpected();
+
+ @Test
+ void testRead() throws IOException {
+ String input = getInput();
+ String expected = getExpected();
+ @Cleanup Reader stringReader = new StringReader(input);
+ @Cleanup Reader testSubject = getSubject(stringReader);
+ String actual = CharStreams.toString(testSubject);
+ Assertions.assertEquals(expected, actual);
+ }
+}
diff --git a/dl4se-crawler/src/test/java/ch/usi/si/seart/git/GitTest.java b/dl4se-crawler/src/test/java/ch/usi/si/seart/git/GitTest.java
new file mode 100644
index 00000000..7de52259
--- /dev/null
+++ b/dl4se-crawler/src/test/java/ch/usi/si/seart/git/GitTest.java
@@ -0,0 +1,264 @@
+package ch.usi.si.seart.git;
+
+import ch.usi.si.seart.crawler.git.Git;
+import ch.usi.si.seart.crawler.git.GitException;
+import ch.usi.si.seart.model.Language;
+import lombok.AccessLevel;
+import lombok.experimental.FieldDefaults;
+import lombok.experimental.NonFinal;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EmptySource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.time.LocalDateTime;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
+class GitTest {
+
+ @NonFinal
+ @TempDir
+ Path tmp;
+ // https://github.com/dabico/dl4se-test
+ String testRepoName = "dabico/dl4se-test";
+ // https://github.com/dabico/dl4se-empty
+ String emptyRepoName = "dabico/dl4se-empty";
+ // https://github.com/dabico/dl4se-history
+ String historyRepoName = "dabico/dl4se-history";
+
+ Language java = Language.builder().extensions(Collections.singletonList("java")).build();
+ Language python = Language.builder().extensions(Collections.singletonList("py")).build();
+ Language cpp = Language.builder()
+ .extensions(List.of(
+ "c",
+ "cc",
+ "cpp",
+ "cxx"
+ ))
+ .build();
+
+ @Test
+ void regularCloneTest() throws IOException, GitException {
+ try (Git ignored = new Git(testRepoName, tmp)) {
+ checkContents(tmp.toFile());
+ }
+ }
+
+ @Test
+ void shallowCloneTest() throws IOException, GitException {
+ try (Git ignored = new Git(testRepoName, tmp, true)) {
+ checkContents(tmp.toFile());
+ }
+ }
+
+ @Test
+ void shallowCloneSinceTest() throws IOException, GitException {
+ try (Git ignored = new Git(testRepoName, tmp, LocalDateTime.of(2022, 2, 12, 0, 0))) {
+ checkContents(tmp.toFile());
+ }
+ }
+
+ private void checkContents(File dir) {
+ Assertions.assertTrue(dir.exists());
+ Assertions.assertTrue(dir.isDirectory());
+ File[] files = dir.listFiles();
+ Assertions.assertNotNull(files);
+ Assertions.assertEquals(4, files.length);
+ }
+
+ // ref: https://github.com/dabico/dl4se-crawler-test/commit/010e305c9818d7d8a985e91cf60739ac3b66d24e
+ @Test
+ void getLastCommitInfoTest() throws IOException, GitException {
+ try (Git git = new Git(testRepoName, tmp, true)) {
+ Git.Commit commit = git.getLastCommitInfo();
+ Assertions.assertEquals("010e305c9818d7d8a985e91cf60739ac3b66d24e", commit.getSha());
+ Assertions.assertEquals(LocalDateTime.of(2022, 2, 12, 20, 19, 51), commit.getTimestamp());
+ }
+ }
+
+ @Test
+ void getLastCommitEmptyRepoTest() throws IOException, GitException {
+ try (Git git = new Git(emptyRepoName, tmp)) {
+ Assertions.assertThrows(GitException.class, git::getLastCommitInfo);
+ }
+ }
+
+ @Test
+ void getDiffLowerBoundTest() throws IOException, GitException {
+ try (Git git = new Git(historyRepoName, tmp)) {
+ Git.Diff diff = git.getDiff("bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7");
+ // Expected diff output:
+ // M .gitignore
+ // D app.cpp
+ // A app.py
+ // R079 app.java dir/app.java
+ // R100 app.scala dir/app.scala
+ // R100 app.c program.c
+ Assertions.assertEquals(1, diff.getAdded().size());
+ Assertions.assertEquals(1, diff.getDeleted().size());
+ Assertions.assertEquals(1, diff.getModified().size());
+ Assertions.assertEquals(2, diff.getRenamed().size());
+ Assertions.assertEquals(1, diff.getEdited().size());
+ }
+ }
+
+ @Test
+ void getDiffTestLowerAndUpperBoundTest() throws IOException, GitException {
+ try (Git git = new Git(historyRepoName, tmp)) {
+ Git.Diff diff = git.getDiff(
+ "bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7",
+ "5dae826d5335bc633ce4bbae74e6b8394e563c13"
+ );
+ // Expected diff output:
+ // D app.cpp
+ // A app.py
+ // R100 app.c program.c
+ Assertions.assertEquals(1, diff.getDeleted().size());
+ Assertions.assertEquals(1, diff.getAdded().size());
+ Assertions.assertEquals(0, diff.getModified().size());
+ Assertions.assertEquals(1, diff.getRenamed().size());
+ Assertions.assertEquals(0, diff.getEdited().size());
+ diff = git.getDiff(
+ "db7dfcf4f141ffbf34c9acd089087e493029a973",
+ "225c820cb9ba921127cfc57ee358e1205efd06c9"
+ );
+ // Expected diff output:
+ // R079 app.java dir/app.java
+ // R100 app.c program.c
+ Assertions.assertEquals(0, diff.getDeleted().size());
+ Assertions.assertEquals(0, diff.getAdded().size());
+ Assertions.assertEquals(0, diff.getModified().size());
+ Assertions.assertEquals(1, diff.getRenamed().size());
+ Assertions.assertEquals(1, diff.getEdited().size());
+ }
+ }
+
+ @Test
+ void getDiffLowerBoundFilteredTest() throws IOException, GitException {
+ try (Git git = new Git(historyRepoName, tmp)) {
+ Git.Diff diff = git.getDiff("bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7", Set.of(java, python));
+ // Expected diff output:
+ // A app.py
+ // R079 app.java dir/app.java
+ Assertions.assertEquals(1, diff.getAdded().size());
+ Assertions.assertEquals(0, diff.getDeleted().size());
+ Assertions.assertEquals(0, diff.getModified().size());
+ Assertions.assertEquals(0, diff.getRenamed().size());
+ Assertions.assertEquals(1, diff.getEdited().size());
+
+ diff = git.getDiff("bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7", Set.of(cpp, python));
+ // Expected diff output:
+ // D app.cpp
+ // A app.py
+ // R100 app.c program.c
+ Assertions.assertEquals(1, diff.getAdded().size());
+ Assertions.assertEquals(1, diff.getDeleted().size());
+ Assertions.assertEquals(0, diff.getModified().size());
+ Assertions.assertEquals(1, diff.getRenamed().size());
+ Assertions.assertEquals(0, diff.getEdited().size());
+ }
+ }
+
+ @Test
+ void getDiffTestLowerAndUpperBoundFilteredTest() throws IOException, GitException {
+ try (Git git = new Git(historyRepoName, tmp)) {
+ Git.Diff diff = git.getDiff(
+ "bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7",
+ "5dae826d5335bc633ce4bbae74e6b8394e563c13",
+ Set.of(java, python)
+ );
+ // Expected diff output:
+ // A app.py
+ Assertions.assertEquals(1, diff.getAdded().size());
+ Assertions.assertEquals(0, diff.getDeleted().size());
+ Assertions.assertEquals(0, diff.getModified().size());
+ Assertions.assertEquals(0, diff.getRenamed().size());
+ Assertions.assertEquals(0, diff.getEdited().size());
+
+ diff = git.getDiff(
+ "bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7",
+ "5dae826d5335bc633ce4bbae74e6b8394e563c13",
+ Set.of(cpp, python)
+ );
+ // Expected diff output:
+ // D app.cpp
+ // A app.py
+ // R100 app.c program.c
+ Assertions.assertEquals(1, diff.getAdded().size());
+ Assertions.assertEquals(1, diff.getDeleted().size());
+ Assertions.assertEquals(0, diff.getModified().size());
+ Assertions.assertEquals(1, diff.getRenamed().size());
+ Assertions.assertEquals(0, diff.getEdited().size());
+
+ diff = git.getDiff(
+ "bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7",
+ "5dae826d5335bc633ce4bbae74e6b8394e563c13",
+ Set.of(java)
+ );
+ // Expected diff output:
+ //
+ Assertions.assertEquals(0, diff.getAdded().size());
+ Assertions.assertEquals(0, diff.getDeleted().size());
+ Assertions.assertEquals(0, diff.getModified().size());
+ Assertions.assertEquals(0, diff.getRenamed().size());
+ Assertions.assertEquals(0, diff.getEdited().size());
+ }
+ }
+
+ @Test
+ void getDiffSameSHATest() throws IOException, GitException {
+ try (Git git = new Git(historyRepoName, tmp)) {
+ String lastCommitSha = "bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7";
+ Git.Diff diff = git.getDiff(lastCommitSha, lastCommitSha);
+ Assertions.assertEquals(0, diff.getAdded().size());
+ Assertions.assertEquals(0, diff.getDeleted().size());
+ Assertions.assertEquals(0, diff.getModified().size());
+ Assertions.assertEquals(0, diff.getRenamed().size());
+ Assertions.assertEquals(0, diff.getEdited().size());
+ }
+ }
+
+ @ParameterizedTest
+ @EmptySource
+ @ValueSource(strings = {
+ "0000000",
+ "0000000000000000000000000000000000000000"
+ })
+ void gitDiffBadSHATest(String sha) throws IOException, GitException {
+ try (Git git = new Git(testRepoName, tmp)) {
+ Assertions.assertThrows(GitException.class, () -> git.getDiff(sha));
+ }
+ }
+
+ @Test
+ @SuppressWarnings("resource")
+ void nonEmptyDirTest() throws IOException {
+ File newFile = new File(tmp.toFile().getAbsolutePath() + File.separator + "empty_file.txt");
+ boolean created = newFile.createNewFile();
+ Assertions.assertTrue(created);
+ Assertions.assertThrows(GitException.class, () -> new Git(testRepoName, tmp, true));
+ }
+
+ @Test
+ @SuppressWarnings("resource")
+ void nonExistingRepoTest() {
+ String fakeRepoName = "dabico/fake-repo";
+ Assertions.assertThrows(GitException.class, () -> new Git(fakeRepoName, tmp, false));
+ }
+
+ @Test
+ @SuppressWarnings("resource")
+ void invalidShallowDateTest() {
+ Assertions.assertThrows(
+ GitException.class, () -> new Git(testRepoName, tmp, LocalDateTime.of(2022, 2, 14, 0, 0))
+ );
+ }
+}
diff --git a/dl4se-crawler/src/test/java/usi/si/seart/io/ExtensionBasedFileVisitorTest.java b/dl4se-crawler/src/test/java/ch/usi/si/seart/io/ExtensionBasedFileVisitorTest.java
similarity index 93%
rename from dl4se-crawler/src/test/java/usi/si/seart/io/ExtensionBasedFileVisitorTest.java
rename to dl4se-crawler/src/test/java/ch/usi/si/seart/io/ExtensionBasedFileVisitorTest.java
index 71ae5302..8b481885 100644
--- a/dl4se-crawler/src/test/java/usi/si/seart/io/ExtensionBasedFileVisitorTest.java
+++ b/dl4se-crawler/src/test/java/ch/usi/si/seart/io/ExtensionBasedFileVisitorTest.java
@@ -1,5 +1,6 @@
-package usi.si.seart.io;
+package ch.usi.si.seart.io;
+import ch.usi.si.seart.crawler.io.ExtensionBasedFileVisitor;
import lombok.SneakyThrows;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -71,6 +72,7 @@ void invalidExtensionsTest(String[] extensions) {
@Test
void nullExtensionsTest() {
- Assertions.assertThrows(NullPointerException.class, () -> ExtensionBasedFileVisitor.forExtensions(null));
+ String[] invalid = null;
+ Assertions.assertThrows(NullPointerException.class, () -> ExtensionBasedFileVisitor.forExtensions(invalid));
}
-}
\ No newline at end of file
+}
diff --git a/dl4se-crawler/src/test/java/usi/si/seart/collection/utils/CollectionUtilsTest.java b/dl4se-crawler/src/test/java/usi/si/seart/collection/utils/CollectionUtilsTest.java
deleted file mode 100644
index e846805a..00000000
--- a/dl4se-crawler/src/test/java/usi/si/seart/collection/utils/CollectionUtilsTest.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package usi.si.seart.collection.utils;
-
-import lombok.AccessLevel;
-import lombok.experimental.FieldDefaults;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-
-import java.util.Map;
-import java.util.Set;
-
-@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
-class CollectionUtilsTest {
-
- Set ints1 = Set.of(1, 2, 3, 4, 5, 6);
- Set ints2 = Set.of(4, 5, 6, 7, 8, 9);
- Set strs1 = Set.of("this", "is", "a", "utils", "test");
- Set strs2 = Set.of("this", "is", "another", "test");
-
- @Test
- void mergeTest() {
- Integer[] empty = new Integer[]{};
- Integer[] arr1 = new Integer[] {1, 2, 3};
- Integer[] arr2 = new Integer[] {4, 5, 6};
- Assertions.assertArrayEquals(empty, CollectionUtils.merge(empty, empty));
- Assertions.assertArrayEquals(arr1, CollectionUtils.merge(empty, arr1));
- Assertions.assertArrayEquals(arr1, CollectionUtils.merge(arr1, empty));
- Assertions.assertArrayEquals(new Integer[] {1, 2, 3, 4, 5, 6}, CollectionUtils.merge(arr1, arr2));
- Assertions.assertArrayEquals(new Integer[] {1, 2, 3, 1, 2, 3}, CollectionUtils.merge(arr1, arr1));
- }
-
- @Test
- void intersectionTest() {
- Assertions.assertEquals(Set.of(4, 5, 6), CollectionUtils.intersection(ints1, ints2));
- Assertions.assertEquals(Set.of("this", "is", "test"), CollectionUtils.intersection(strs1, strs2));
- }
-
- @Test
- void differenceTest() {
- Assertions.assertEquals(Set.of(1, 2, 3), CollectionUtils.difference(ints1, ints2));
- Assertions.assertEquals(Set.of("a", "utils"), CollectionUtils.difference(strs1, strs2));
- }
-
- @Test
- void getAllKeysFromTest() {
- Map map = Map.of(0, 5, 1, 6, 2, 7, 3, 8, 4, 9);
- Set keys = Set.of(0, 1, 2, 3, 4);
-
- Assertions.assertEquals(Set.of(5, 6, 7, 8, 9), CollectionUtils.getAllValuesFrom(map, keys));
- Assertions.assertEquals(Set.of(5, 6, 7), CollectionUtils.getAllValuesFrom(map, Set.of(0, 1, 2)));
- Assertions.assertEquals(Set.of(), CollectionUtils.getAllValuesFrom(map, Set.of(5, 6, 7)));
- }
-}
\ No newline at end of file
diff --git a/dl4se-crawler/src/test/java/usi/si/seart/converter/ConverterTest.java b/dl4se-crawler/src/test/java/usi/si/seart/converter/ConverterTest.java
deleted file mode 100644
index 0f47f6ad..00000000
--- a/dl4se-crawler/src/test/java/usi/si/seart/converter/ConverterTest.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package usi.si.seart.converter;
-
-import lombok.AccessLevel;
-import lombok.experimental.FieldDefaults;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.util.List;
-import java.util.Spliterator;
-import java.util.stream.Collectors;
-import java.util.stream.StreamSupport;
-
-@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
-class ConverterTest {
-
- static Converter intToStringConverter;
- static Converter stringToIntConverter;
-
- private static class IntToStringConverter extends Converter {
- @Override
- protected String forward(Integer i) {
- return i.toString();
- }
-
- @Override
- protected Integer backward(String s) {
- return Integer.parseInt(s);
- }
- }
-
- @BeforeAll
- public static void before() {
- intToStringConverter = new IntToStringConverter();
- stringToIntConverter = intToStringConverter.reverse();
- }
-
- @Test
- void singleConversionTest() {
- Integer i = 5;
- String s = "5";
- Assertions.assertEquals(s, intToStringConverter.convert(i));
- Assertions.assertEquals(i, stringToIntConverter.convert(s));
- }
-
- @Test
- void multipleConversionTest() {
- List iList = List.of(1, 2, 3, 4);
- List sList = List.of("1", "2", "3", "4");
-
- Spliterator spliterator1 = intToStringConverter.convertAll(iList).spliterator();
- Spliterator spliterator2 = stringToIntConverter.convertAll(sList).spliterator();
-
- List iConverted = StreamSupport.stream(spliterator1, false).collect(Collectors.toList());
- List sConverted = StreamSupport.stream(spliterator2, false).collect(Collectors.toList());
-
- Assertions.assertEquals(sList, iConverted);
- Assertions.assertEquals(iList, sConverted);
- }
-}
\ No newline at end of file
diff --git a/dl4se-crawler/src/test/java/usi/si/seart/git/GitTest.java b/dl4se-crawler/src/test/java/usi/si/seart/git/GitTest.java
deleted file mode 100644
index d1060706..00000000
--- a/dl4se-crawler/src/test/java/usi/si/seart/git/GitTest.java
+++ /dev/null
@@ -1,257 +0,0 @@
-package usi.si.seart.git;
-
-import lombok.AccessLevel;
-import lombok.SneakyThrows;
-import lombok.experimental.FieldDefaults;
-import lombok.experimental.NonFinal;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.EmptySource;
-import org.junit.jupiter.params.provider.ValueSource;
-import usi.si.seart.model.Language;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Path;
-import java.time.LocalDateTime;
-import java.util.Set;
-
-@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
-class GitTest {
-
- @NonFinal
- @TempDir
- Path tmp;
- // https://github.com/dabico/dl4se-test
- String testRepoName = "dabico/dl4se-test";
- // https://github.com/dabico/dl4se-empty
- String emptyRepoName = "dabico/dl4se-empty";
- // https://github.com/dabico/dl4se-history
- String historyRepoName = "dabico/dl4se-history";
-
- Language java = Language.builder().extension("java").build();
- Language python = Language.builder().extension("py").build();
- Language cpp = Language.builder()
- .extension("c")
- .extension("cc")
- .extension("cpp")
- .extension("cxx")
- .build();
-
- @Test
- @SneakyThrows({GitException.class})
- void regularCloneTest() {
- new Git(testRepoName, tmp);
- checkContents(tmp.toFile());
- }
-
- @Test
- @SneakyThrows({GitException.class})
- void shallowCloneTest() {
- new Git(testRepoName, tmp, true);
- checkContents(tmp.toFile());
- }
-
- @Test
- @SneakyThrows({GitException.class})
- void shallowCloneSinceTest() {
- new Git(testRepoName, tmp, LocalDateTime.of(2022, 2, 12, 0, 0));
- checkContents(tmp.toFile());
- }
-
- private void checkContents(File dir) {
- Assertions.assertTrue(dir.exists());
- Assertions.assertTrue(dir.isDirectory());
- File[] files = dir.listFiles();
- Assertions.assertNotNull(files);
- Assertions.assertEquals(4, files.length);
- }
-
- // ref: https://github.com/dabico/dl4se-crawler-test/commit/010e305c9818d7d8a985e91cf60739ac3b66d24e
- @Test
- @SneakyThrows({GitException.class})
- void getLastCommitInfoTest() {
- Git git = new Git(testRepoName, tmp, true);
- Git.Commit commit = git.getLastCommitInfo();
- Assertions.assertEquals("010e305c9818d7d8a985e91cf60739ac3b66d24e", commit.getSha());
- Assertions.assertEquals(LocalDateTime.of(2022, 2, 12, 20, 19, 51), commit.getTimestamp());
- }
-
- @Test
- @SneakyThrows({GitException.class})
- void getLastCommitEmptyRepoTest() {
- Git git = new Git(emptyRepoName, tmp);
- Assertions.assertThrows(GitException.class, git::getLastCommitInfo);
- }
-
- @Test
- @SneakyThrows({GitException.class})
- void getDiffLowerBoundTest() {
- Git git = new Git(historyRepoName, tmp);
- Git.Diff diff = git.getDiff("bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7");
- // Expected diff output:
- // M .gitignore
- // D app.cpp
- // A app.py
- // R079 app.java dir/app.java
- // R100 app.scala dir/app.scala
- // R100 app.c program.c
- Assertions.assertEquals(1, diff.getAdded().size());
- Assertions.assertEquals(1, diff.getDeleted().size());
- Assertions.assertEquals(1, diff.getModified().size());
- Assertions.assertEquals(2, diff.getRenamed().size());
- Assertions.assertEquals(1, diff.getEdited().size());
- }
-
- @Test
- @SneakyThrows({GitException.class})
- void getDiffTestLowerAndUpperBoundTest() {
- Git git = new Git(historyRepoName, tmp);
- Git.Diff diff = git.getDiff(
- "bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7",
- "5dae826d5335bc633ce4bbae74e6b8394e563c13"
- );
- // Expected diff output:
- // D app.cpp
- // A app.py
- // R100 app.c program.c
- Assertions.assertEquals(1, diff.getDeleted().size());
- Assertions.assertEquals(1, diff.getAdded().size());
- Assertions.assertEquals(0, diff.getModified().size());
- Assertions.assertEquals(1, diff.getRenamed().size());
- Assertions.assertEquals(0, diff.getEdited().size());
- diff = git.getDiff(
- "db7dfcf4f141ffbf34c9acd089087e493029a973",
- "225c820cb9ba921127cfc57ee358e1205efd06c9"
- );
- // Expected diff output:
- // R079 app.java dir/app.java
- // R100 app.c program.c
- Assertions.assertEquals(0, diff.getDeleted().size());
- Assertions.assertEquals(0, diff.getAdded().size());
- Assertions.assertEquals(0, diff.getModified().size());
- Assertions.assertEquals(1, diff.getRenamed().size());
- Assertions.assertEquals(1, diff.getEdited().size());
- }
-
- @Test
- @SneakyThrows({GitException.class})
- void getDiffLowerBoundFilteredTest() {
- Git git = new Git(historyRepoName, tmp);
- Git.Diff diff = git.getDiff("bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7", Set.of(java, python));
- // Expected diff output:
- // A app.py
- // R079 app.java dir/app.java
- Assertions.assertEquals(1, diff.getAdded().size());
- Assertions.assertEquals(0, diff.getDeleted().size());
- Assertions.assertEquals(0, diff.getModified().size());
- Assertions.assertEquals(0, diff.getRenamed().size());
- Assertions.assertEquals(1, diff.getEdited().size());
-
- diff = git.getDiff("bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7", Set.of(cpp, python));
- // Expected diff output:
- // D app.cpp
- // A app.py
- // R100 app.c program.c
- Assertions.assertEquals(1, diff.getAdded().size());
- Assertions.assertEquals(1, diff.getDeleted().size());
- Assertions.assertEquals(0, diff.getModified().size());
- Assertions.assertEquals(1, diff.getRenamed().size());
- Assertions.assertEquals(0, diff.getEdited().size());
- }
-
- @Test
- @SneakyThrows({GitException.class})
- void getDiffTestLowerAndUpperBoundFilteredTest() {
- Git git = new Git(historyRepoName, tmp);
- Git.Diff diff = git.getDiff(
- "bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7",
- "5dae826d5335bc633ce4bbae74e6b8394e563c13",
- Set.of(java, python)
- );
- // Expected diff output:
- // A app.py
- Assertions.assertEquals(1, diff.getAdded().size());
- Assertions.assertEquals(0, diff.getDeleted().size());
- Assertions.assertEquals(0, diff.getModified().size());
- Assertions.assertEquals(0, diff.getRenamed().size());
- Assertions.assertEquals(0, diff.getEdited().size());
-
- diff = git.getDiff(
- "bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7",
- "5dae826d5335bc633ce4bbae74e6b8394e563c13",
- Set.of(cpp, python)
- );
- // Expected diff output:
- // D app.cpp
- // A app.py
- // R100 app.c program.c
- Assertions.assertEquals(1, diff.getAdded().size());
- Assertions.assertEquals(1, diff.getDeleted().size());
- Assertions.assertEquals(0, diff.getModified().size());
- Assertions.assertEquals(1, diff.getRenamed().size());
- Assertions.assertEquals(0, diff.getEdited().size());
-
- diff = git.getDiff(
- "bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7",
- "5dae826d5335bc633ce4bbae74e6b8394e563c13",
- Set.of(java)
- );
- // Expected diff output:
- //
- Assertions.assertEquals(0, diff.getAdded().size());
- Assertions.assertEquals(0, diff.getDeleted().size());
- Assertions.assertEquals(0, diff.getModified().size());
- Assertions.assertEquals(0, diff.getRenamed().size());
- Assertions.assertEquals(0, diff.getEdited().size());
- }
-
- @Test
- @SneakyThrows({GitException.class})
- void getDiffSameSHATest() {
- Git git = new Git(historyRepoName, tmp);
- String lastCommitSha = "bc74b0bbd2821c4cdb0b1943f6b3afced8d49ca7";
- Git.Diff diff = git.getDiff(lastCommitSha, lastCommitSha);
- Assertions.assertEquals(0, diff.getAdded().size());
- Assertions.assertEquals(0, diff.getDeleted().size());
- Assertions.assertEquals(0, diff.getModified().size());
- Assertions.assertEquals(0, diff.getRenamed().size());
- Assertions.assertEquals(0, diff.getEdited().size());
- }
-
- @ParameterizedTest
- @EmptySource
- @ValueSource(strings = {
- "0000000",
- "0000000000000000000000000000000000000000"
- })
- @SneakyThrows({GitException.class})
- void gitDiffBadSHATest(String sha) {
- Git git = new Git(testRepoName, tmp);
- Assertions.assertThrows(GitException.class, () -> git.getDiff(sha));
- }
-
- @Test
- @SneakyThrows({IOException.class})
- void nonEmptyDirTest() {
- File newFile = new File(tmp.toFile().getAbsolutePath() + File.separator + "empty_file.txt");
- boolean created = newFile.createNewFile();
- Assertions.assertTrue(created);
- Assertions.assertThrows(GitException.class, () -> new Git(testRepoName, tmp, true));
- }
-
- @Test
- void nonExistingRepoTest() {
- String fakeRepoName = "dabico/fake-repo";
- Assertions.assertThrows(GitException.class, () -> new Git(fakeRepoName, tmp, false));
- }
-
- @Test
- void invalidShallowDateTest() {
- Assertions.assertThrows(
- GitException.class, () -> new Git(testRepoName, tmp, LocalDateTime.of(2022, 2, 14, 0, 0))
- );
- }
-}
\ No newline at end of file
diff --git a/dl4se-crawler/src/test/java/usi/si/seart/io/PropertiesReaderTest.java b/dl4se-crawler/src/test/java/usi/si/seart/io/PropertiesReaderTest.java
deleted file mode 100644
index 33771139..00000000
--- a/dl4se-crawler/src/test/java/usi/si/seart/io/PropertiesReaderTest.java
+++ /dev/null
@@ -1,56 +0,0 @@
-package usi.si.seart.io;
-
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-
-import java.util.List;
-import java.util.stream.Collectors;
-
-class PropertiesReaderTest {
-
- PropertiesReader propertiesReader = new PropertiesReader("reader.properties");
-
- static String user = System.getenv("DB_USER");
- static String pass = System.getProperty("db.password");
- static String srvr = System.getenv("DB_SERVER");
- static String port = System.getenv("DB_PORT");
-
- @Test
- void getPropertyTest() {
-
- Assertions.assertNull(propertiesReader.getProperty("app.property.nonexistant"));
-
- List expected = List.of(
- "",
- "dl4se_properties",
- "this is a test for the properties parsing",
- "2022-01-01",
- user,
- pass,
- "jdbc:postgresql://"+srvr+"/"+port,
- "dl4se_properties",
- "The "+user+" is testing dl4se_properties with "+pass
- );
-
- List properties = List.of(
- "app.property.empty",
- "app.property.name",
- "app.property.description",
- "app.property.date",
- "app.property.db.user",
- "app.property.db.password",
- "app.property.db.url",
- "app.property.name.copy",
- "app.property.multiple"
- );
-
- List actual = properties.stream()
- .map(propertiesReader::getProperty)
- .collect(Collectors.toList());
-
- Assertions.assertEquals(expected.size(), actual.size());
- for (int i = 0; i < actual.size(); i++) {
- Assertions.assertEquals(expected.get(i), actual.get(i));
- }
- }
-}
\ No newline at end of file
diff --git a/dl4se-crawler/src/test/java/usi/si/seart/parser/JavaParserTest.java b/dl4se-crawler/src/test/java/usi/si/seart/parser/JavaParserTest.java
deleted file mode 100644
index ef3e7f7a..00000000
--- a/dl4se-crawler/src/test/java/usi/si/seart/parser/JavaParserTest.java
+++ /dev/null
@@ -1,159 +0,0 @@
-package usi.si.seart.parser;
-
-import com.github.javaparser.StaticJavaParser;
-import com.github.javaparser.ast.Node;
-import com.github.javaparser.ast.body.CallableDeclaration;
-import com.github.javaparser.ast.body.ConstructorDeclaration;
-import com.github.javaparser.ast.body.MethodDeclaration;
-import lombok.AccessLevel;
-import lombok.experimental.FieldDefaults;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.extension.ExtensionContext;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.Arguments;
-import org.junit.jupiter.params.provider.ArgumentsProvider;
-import org.junit.jupiter.params.provider.ArgumentsSource;
-import usi.si.seart.collection.Tuple;
-import usi.si.seart.model.code.Boilerplate;
-
-import java.util.stream.Stream;
-
-class JavaParserTest {
-
- private static final Node noComment = StaticJavaParser.parseMethodDeclaration(
- "public void method(){\nint x = 1;\n}"
- );
- private static final Node lineComment = StaticJavaParser.parseMethodDeclaration(
- "public void method(){\n// This is a single line comment\n}"
- );
- private static final Node blockComment = StaticJavaParser.parseMethodDeclaration(
- "public void method(){\n/* This is a\n* multi line comment\n*/\n}"
- );
- private static final Node jdocComment = StaticJavaParser.parseMethodDeclaration(
- "/** * This is a java documentation comment */\npublic void method(){}"
- );
-
- private static final class CountTokensArgumentProvider implements ArgumentsProvider {
-
- @Override
- public Stream extends Arguments> provideArguments(ExtensionContext context) {
- return Stream.of(
- Arguments.of(noComment, 19, 12),
- Arguments.of(lineComment, 24, 7),
- Arguments.of(blockComment, 28, 7),
- Arguments.of(jdocComment, 26, 7)
- );
- }
- }
-
- @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
- private static final class BoilerplateTypeArgumentProvider implements ArgumentsProvider {
-
- MethodDeclaration md1 = StaticJavaParser.parseMethodDeclaration("public void method(){}");
- MethodDeclaration md2 = StaticJavaParser.parseMethodDeclaration("public void setX(){}");
- MethodDeclaration md3 = StaticJavaParser.parseMethodDeclaration("public void getX(){}");
- MethodDeclaration md4 = StaticJavaParser.parseMethodDeclaration("public void builder(){}");
- MethodDeclaration md5 = StaticJavaParser.parseMethodDeclaration("public void toString(){}");
- MethodDeclaration md6 = StaticJavaParser.parseMethodDeclaration("public void equals(){}");
- MethodDeclaration md7 = StaticJavaParser.parseMethodDeclaration("public void hashCode(){}");
- ConstructorDeclaration cd = new ConstructorDeclaration();
-
- @Override
- public Stream extends Arguments> provideArguments(ExtensionContext context) {
- return Stream.of(
- Arguments.of(md1, null),
- Arguments.of(md2, Boilerplate.SETTER),
- Arguments.of(md3, Boilerplate.GETTER),
- Arguments.of(md4, Boilerplate.BUILDER),
- Arguments.of(md5, Boilerplate.TO_STRING),
- Arguments.of(md6, Boilerplate.EQUALS),
- Arguments.of(md7, Boilerplate.HASH_CODE),
- Arguments.of(cd, Boilerplate.CONSTRUCTOR)
- );
- }
- }
-
- @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
- private static final class GetAstHashArgumentProvider implements ArgumentsProvider {
-
- MethodDeclaration md1 = StaticJavaParser.parseMethodDeclaration("public void method(){ x += 1; }");
- MethodDeclaration md2 = StaticJavaParser.parseMethodDeclaration("public void method(){ a += 5; }");
- MethodDeclaration md3 = StaticJavaParser.parseMethodDeclaration("/** JDoc */ public void method(){ x += 1; }");
- MethodDeclaration md4 = StaticJavaParser.parseMethodDeclaration("public void method(){ a += 5L; }");
-
- @Override
- public Stream extends Arguments> provideArguments(ExtensionContext context) {
- return Stream.of(
- Arguments.of(md1, true),
- Arguments.of(md2, true),
- Arguments.of(md3, true),
- Arguments.of(md4, false)
- );
- }
- }
-
- private static final class CountLinesArgumentProvider implements ArgumentsProvider {
-
- @Override
- public Stream extends Arguments> provideArguments(ExtensionContext context) {
- return Stream.of(
- Arguments.of(noComment, 3),
- Arguments.of(lineComment, 3),
- Arguments.of(blockComment, 5),
- Arguments.of(jdocComment, 1)
- );
- }
- }
-
- private static final class RemoveAllCommentsArgumentProvider implements ArgumentsProvider {
-
- @Override
- public Stream extends Arguments> provideArguments(ExtensionContext context) {
- return Stream.of(
- Arguments.of(noComment.clone(), 0),
- Arguments.of(lineComment.clone(), 1),
- Arguments.of(blockComment.clone(), 1),
- Arguments.of(jdocComment.clone(), 1)
- );
- }
- }
-
- @ParameterizedTest
- @ArgumentsSource(CountTokensArgumentProvider.class)
- void countTokensTest(Node node, long expectedLeft, long expectedRight) {
- Tuple tokens = JavaParser.countTokens(node);
- Assertions.assertEquals(expectedLeft, tokens.getLeft());
- Assertions.assertEquals(expectedRight, tokens.getRight());
- }
-
- @ParameterizedTest
- @ArgumentsSource(BoilerplateTypeArgumentProvider.class)
- void boilerplateTypeTest(CallableDeclaration> declaration, Boilerplate expected) {
- Assertions.assertEquals(expected, JavaParser.getBoilerplateType(declaration));
- }
-
- @ParameterizedTest
- @ArgumentsSource(GetAstHashArgumentProvider.class)
- void getAstHashTest(MethodDeclaration declaration, boolean expected) {
- String baseline = "6abae81a5835bb1bbf4a8b2ce105271327e397ec6d453227cf8fd6043a1f2621"; // manually calculated
- String result = JavaParser.getAstHash(declaration);
- Assertions.assertEquals(expected, baseline.equals(result));
- }
-
- @ParameterizedTest
- @ArgumentsSource(CountLinesArgumentProvider.class)
- void countLinesTest(Node node, long expected) {
- long actual = JavaParser.countLines(node);
- Assertions.assertEquals(expected, actual);
- }
-
- @ParameterizedTest
- @ArgumentsSource(RemoveAllCommentsArgumentProvider.class)
- void removeAllCommentsTest(Node node, int expected) {
- Node original = node.clone();
- JavaParser.removeComments(node);
- int actual = (original.getAllContainedComments().size() + original.getComment().map(comment -> 1).orElse(0))
- - (node.getAllContainedComments().size() + node.getComment().map(comment -> 1).orElse(0));
- Assertions.assertEquals(expected, actual);
- }
-}
\ No newline at end of file
diff --git a/dl4se-crawler/src/test/java/usi/si/seart/utils/PathUtilsTest.java b/dl4se-crawler/src/test/java/usi/si/seart/utils/PathUtilsTest.java
deleted file mode 100644
index 268c1f4d..00000000
--- a/dl4se-crawler/src/test/java/usi/si/seart/utils/PathUtilsTest.java
+++ /dev/null
@@ -1,68 +0,0 @@
-package usi.si.seart.utils;
-
-import lombok.SneakyThrows;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-class PathUtilsTest {
-
- @Test
- @SneakyThrows
- void forceDeleteTest() {
- Path temp = Files.createTempDirectory("test");
- Files.createFile(Path.of(temp.toString(), "file1.txt"));
- Files.createFile(Path.of(temp.toString(), "file2.txt"));
- Files.createFile(Path.of(temp.toString(), "file3.txt"));
- Files.createDirectory(Path.of(temp.toString(), "empty"));
- Files.createFile(
- Path.of(Files.createDirectory(Path.of(temp.toString(), "nonempty")).toString(), "file4.txt")
- );
- PathUtils.forceDelete(temp);
- Assertions.assertFalse(temp.toFile().exists());
- }
-
- @Test
- void isTestFileTest() {
- Path file1 = Path.of("/java/src/org/json/App.java");
- Path file2 = Path.of("/java/src/org/json/TestApp.java");
- Path file3 = Path.of("/java/src/org/json/AppTest.java");
- Path file4 = Path.of("/java/src/org/json/AppTests.java");
- Path file5 = Path.of("/java/src/org/json/AppTestCase.java");
- Path file6 = Path.of("/java/test/org/json/App.java");
- Path file7 = Path.of("/java/tests/org/json/App.java");
- Path file8 = Path.of("/test/org/json/App.java");
- Path file9 = Path.of("/AppTest.java");
- Assertions.assertFalse(PathUtils.isTestFile(file1));
- Assertions.assertTrue(PathUtils.isTestFile(file2));
- Assertions.assertTrue(PathUtils.isTestFile(file3));
- Assertions.assertTrue(PathUtils.isTestFile(file4));
- Assertions.assertTrue(PathUtils.isTestFile(file5));
- Assertions.assertTrue(PathUtils.isTestFile(file6));
- Assertions.assertTrue(PathUtils.isTestFile(file7));
- Assertions.assertTrue(PathUtils.isTestFile(file8));
- Assertions.assertTrue(PathUtils.isTestFile(file9));
- }
-
- @Test
- void getExtensionTest() {
- Path path1 = Path.of("/java/src/org/json/App.java");
- Path path2 = Path.of("/App.java");
- Path path3 = Path.of("App.java");
- Path path4 = Path.of("/java/src/org.json/App.java");
- Path path5 = Path.of("/.gitignore");
- Path path6 = Path.of("/java/src/org/json");
- Path path7 = Path.of("/java/src.org/json");
- Path path8 = Path.of("");
- Assertions.assertEquals("java", PathUtils.getExtension(path1));
- Assertions.assertEquals("java", PathUtils.getExtension(path2));
- Assertions.assertEquals("java", PathUtils.getExtension(path3));
- Assertions.assertEquals("java", PathUtils.getExtension(path4));
- Assertions.assertEquals("gitignore", PathUtils.getExtension(path5));
- Assertions.assertEquals("", PathUtils.getExtension(path6));
- Assertions.assertEquals("", PathUtils.getExtension(path7));
- Assertions.assertEquals("", PathUtils.getExtension(path8));
- }
-}
\ No newline at end of file
diff --git a/dl4se-crawler/src/test/java/usi/si/seart/utils/StringUtilsTest.java b/dl4se-crawler/src/test/java/usi/si/seart/utils/StringUtilsTest.java
deleted file mode 100644
index 6dc153a6..00000000
--- a/dl4se-crawler/src/test/java/usi/si/seart/utils/StringUtilsTest.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package usi.si.seart.utils;
-
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.ValueSource;
-
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-
-class StringUtilsTest {
-
- @Test
- void sha256Test() {
- // Expected generated from: https://passwordsgenerator.net/sha256-hash-generator/
- Assertions.assertEquals(
- "7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069",
- StringUtils.sha256("Hello World!")
- );
- }
-
- @ParameterizedTest
- @ValueSource(strings = {
- "Ово је проба",
- "Ovo je проба",
- "Ово је proba"
- })
- void containsNonAsciiTest(String input) {
- Assertions.assertTrue(StringUtils.containsNonAscii(input));
- }
-
- @ParameterizedTest
- @ValueSource(strings = {
- "This is a String",
- " This is a String ",
- "This is a String",
- "This\nis\ta\rString",
- "\n\r This\n\n\nis a\r\r\rString \r\n"
- })
- void normalizeSpaceTest(String input) {
- String baseline = "This is a String";
- Assertions.assertEquals(baseline, StringUtils.normalizeSpace(input));
- }
-
- @Test
- void fromInputStreamTest() {
- String expected = "abcd1234";
- InputStream inputStream = new ByteArrayInputStream(expected.getBytes());
- String actual = StringUtils.fromInputStream(inputStream);
- Assertions.assertEquals(expected, actual);
- }
-}
\ No newline at end of file
diff --git a/dl4se-crawler/src/test/resources/logback-test.xml b/dl4se-crawler/src/test/resources/logback-test.xml
deleted file mode 100644
index 0b3c831c..00000000
--- a/dl4se-crawler/src/test/resources/logback-test.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-
-
- System.out
-
- DEBUG
- ACCEPT
- DENY
-
-
- [%d{yyyy-MM-dd HH:mm:ss.SSS}] [%level] [%thread] %logger{10} %msg%n
-
-
-
- System.err
-
- ERROR
- ACCEPT
- DENY
-
-
- [%d{yyyy-MM-dd HH:mm:ss.SSS}] [%level] [%thread] %logger{10} [%file:%line] %msg%n
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/dl4se-crawler/src/test/resources/reader.properties b/dl4se-crawler/src/test/resources/reader.properties
deleted file mode 100644
index 107da739..00000000
--- a/dl4se-crawler/src/test/resources/reader.properties
+++ /dev/null
@@ -1,9 +0,0 @@
-app.property.empty=
-app.property.name=dl4se_properties
-app.property.description=this is a test for the properties parsing
-app.property.date=2022-01-01
-app.property.db.user=${DB_USER}
-app.property.db.password=${db.password}
-app.property.db.url=jdbc:postgresql://${DB_SERVER}/${DB_PORT}
-app.property.name.copy=${app.property.name}
-app.property.multiple=The ${DB_USER} is testing ${app.property.name} with ${db.password}
diff --git a/dl4se-model/pom.xml b/dl4se-model/pom.xml
index dfb6be98..970f1a9f 100644
--- a/dl4se-model/pom.xml
+++ b/dl4se-model/pom.xml
@@ -3,33 +3,29 @@
4.0.0
dl4se
- usi.si.seart
+ ch.usi.si.seart
${revision}
../pom.xml
dl4se-model
- dl4se-model
+ ${project.groupId}:${project.artifactId}
UTF-8
- 2.13.4
-
-
-
- com.fasterxml.jackson
- jackson-bom
- ${jackson.version}
- pom
- import
-
-
-
-
+
+ ch.usi.si.seart
+ jakarta-validation-utils-legacy
+ 0.4.0
+
+
+ com.google.guava
+ guava
+
com.fasterxml.jackson.core
jackson-databind
@@ -39,90 +35,46 @@
jackson-annotations
- javax.el
- javax.el-api
- 3.0.0
- test
+ com.fasterxml.jackson.module
+ jackson-module-jaxb-annotations
- org.glassfish
- javax.el
- 3.0.0
- test
+ jakarta.persistence
+ jakarta.persistence-api
- javax.persistence
- javax.persistence-api
- 2.2
+ jakarta.annotation
+ jakarta.annotation-api
- org.hibernate
- hibernate-core
- 5.5.7.Final
-
-
-