diff --git a/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINT2Preprocessor.java b/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINT2Preprocessor.java index 5ac86f9c0..04134d291 100644 --- a/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINT2Preprocessor.java +++ b/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINT2Preprocessor.java @@ -29,6 +29,25 @@ public class AQUAINT2Preprocessor { /** Directory of the AQUAINT corpus */ private static String dir; + public AQUAINT2Preprocessor() { + handleparagraphs(); + } + + public void handleparagraphs() { + // convert to 'trectext' + MsgPrinter.printStatusMsg("Converting to 'trectext' format:\n"); + if (convertToTrectext()) + MsgPrinter.printStatusMsg("Documents converted successfully."); + else { + MsgPrinter.printErrorMsg("Could not convert documents."); + System.exit(1); + } + } + + + + + /** * Adds paragraph tags to documents of type 'multi', 'advis' and 'other'. * Documents of type 'story' are usually already tagged. @@ -214,42 +233,4 @@ private static boolean convertToTrectext() { return true; } - - /** - *

Entry point of the program.

- * - *

Preprocesses the AQUAINT-2 corpus.

- * - * @param args argument 1: directory of the AQUAINT-2 corpus - */ - public static void main(String[] args) { - if (args.length < 1) { - MsgPrinter.printUsage("java AQUAINT2Preprocessor " + - "AQUAINT2_directory"); - System.exit(1); - } - dir = args[0]; - - // enable output of status and error messages - MsgPrinter.enableStatusMsgs(true); - MsgPrinter.enableErrorMsgs(true); - - // add paragraph tags if missing - MsgPrinter.printStatusMsg("Adding paragraph tags:\n"); - if (addParagraphTags()) - MsgPrinter.printStatusMsg("Paragraph tags added successfully.\n"); - else { - MsgPrinter.printErrorMsg("Could not add paragraph tags."); - System.exit(1); - } - - // convert to 'trectext' - MsgPrinter.printStatusMsg("Converting to 'trectext' format:\n"); - if (convertToTrectext()) - MsgPrinter.printStatusMsg("Documents converted successfully."); - else { - MsgPrinter.printErrorMsg("Could not convert documents."); - System.exit(1); - } - } } diff --git a/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINTFactory.java b/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINTFactory.java new file mode 100644 index 000000000..e7283785c --- /dev/null +++ b/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINTFactory.java @@ -0,0 +1,31 @@ +public class AQUAINTFactory { + public AQUAINTPreprocessTemplate getAQUAINTType(String type) { + if (args.length < 1) { + MsgPrinter.printUsage("java AQUAINTFactory " + "directory"); + System.exit(1); + } + type = args[0]; + + // enable output of status and error messages + MsgPrinter.enableStatusMsgs(true); + MsgPrinter.enableErrorMsgs(true); + + // add paragraph tags if missing + MsgPrinter.printStatusMsg("Adding paragraph tags..."); + if (addParagraphTags()) + MsgPrinter.printStatusMsg("Paragraph tags added successfully."); + else { + MsgPrinter.printErrorMsg("Could not add paragraph tags."); + System.exit(1); + } + if(type == null) { + return null; + } else if(type.equalsIgnoreCase("AQUAINTPreprocess")) { + return new AQUAINTPreprocess; + } else if(type.equalsIgnoreCase("AQUAINT2Preprocess")) { + return new AQUAINT2Preprocess; + } else { + return null; + } + } +} \ No newline at end of file diff --git a/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINTPreprocessor.java b/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINTPreprocessor.java index d59eab012..6a426681e 100644 --- a/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINTPreprocessor.java +++ b/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINTPreprocessor.java @@ -29,6 +29,22 @@ public class AQUAINTPreprocessor { * * @return true, iff the preprocessing was successful */ + + public AQUAINTPreprocessor() { + handleparagraphs(); + } + + public void handleparagraphs() { + // split paragraphs + MsgPrinter.printStatusMsg("Splitting paragraphs..."); + if (splitParagraphs()) + MsgPrinter.printStatusMsg("Paragraphs splitted successfully."); + else { + MsgPrinter.printErrorMsg("Could not split paragraphs."); + System.exit(1); + } + } + private static boolean addParagraphTags() { File[] files = FileUtils.getFilesRec(dir); @@ -162,41 +178,4 @@ private static boolean splitParagraphs() { return true; } - - /** - *

Entry point of the program.

- * - *

Preprocesses the AQUAINT corpus.

- * - * @param args argument 1: directory of the AQUAINT corpus - */ - public static void main(String[] args) { - if (args.length < 1) { - MsgPrinter.printUsage("java AQUAINTPreprocessor AQUAINT_directory"); - System.exit(1); - } - dir = args[0]; - - // enable output of status and error messages - MsgPrinter.enableStatusMsgs(true); - MsgPrinter.enableErrorMsgs(true); - - // add paragraph tags if missing - MsgPrinter.printStatusMsg("Adding paragraph tags..."); - if (addParagraphTags()) - MsgPrinter.printStatusMsg("Paragraph tags added successfully."); - else { - MsgPrinter.printErrorMsg("Could not add paragraph tags."); - System.exit(1); - } - - // split paragraphs - MsgPrinter.printStatusMsg("Splitting paragraphs..."); - if (splitParagraphs()) - MsgPrinter.printStatusMsg("Paragraphs splitted successfully."); - else { - MsgPrinter.printErrorMsg("Could not split paragraphs."); - System.exit(1); - } - } } diff --git a/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINTPreprocessorTemplate.JAVA b/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINTPreprocessorTemplate.JAVA new file mode 100644 index 000000000..b49eaf865 --- /dev/null +++ b/lucida/questionanswering/OpenEphyra/src/info/ephyra/indexing/AQUAINTPreprocessorTemplate.JAVA @@ -0,0 +1,30 @@ +package info.ephyra.indexing; + +public abstract class AQUAINTPreprocessorTemplate{ + /** Directory of the AQUAINT corpus */ + protected static String dir; + + // Common Methods + public void preProcessing() { + // enable output of status and error messages + MsgPrinter.enableStatusMsgs(true); + MsgPrinter.enableErrorMsgs(true); + + // add paragraph tags if missing + MsgPrinter.printStatusMsg("Adding paragraph tags..."); + if (addParagraphTags()) + MsgPrinter.printStatusMsg("Paragraph tags added successfully."); + else { + MsgPrinter.printErrorMsg("Could not add paragraph tags."); + System.exit(1); + } + } + + + @Override + protected abstract static boolean addParagraphTags(); + + + + +} \ No newline at end of file