Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

correction of configuration logic (add correct feature extractors) #11

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import de.tu.darmstadt.lt.ner.feature.extractor.CamelCaseFeatureExtractor;
import de.tu.darmstadt.lt.ner.feature.extractor.ClarkPosInductionFeatureExtractor;
import de.tu.darmstadt.lt.ner.feature.extractor.DBLocationListFeatureExtractor;
import de.tu.darmstadt.lt.ner.feature.extractor.DBNachnamenListFeatureExtractor;
import de.tu.darmstadt.lt.ner.feature.extractor.DBPersonListFeatureExtractor;
import de.tu.darmstadt.lt.ner.feature.extractor.FreeBaseFeatureExtractor;
Expand Down Expand Up @@ -46,22 +47,22 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
LTCharacterNgramFeatureFunction.Orientation fromRight = Orientation.RIGHT_TO_LEFT;

List<FeatureExtractor1<Token>> germaNERfeatures = new ArrayList<FeatureExtractor1<Token>>();
if (aProp.getProperty("usePosition").equals("1")) {
if (aProp.getProperty("usePosition", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new PositionFeatureExtractor()));
}

if (aProp.getProperty("useFreeBase").equals("1")) {
if (aProp.getProperty("useFreeBase", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new FreeBaseFeatureExtractor()));
}

if (aProp.getProperty("useClarkPosInduction").equals("1")) {
if (aProp.getProperty("useClarkPosInduction", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new ClarkPosInductionFeatureExtractor()));
}

if (aProp.getProperty("useWordFeature").equals("1")) {
if (aProp.getProperty("useWordFeature", "0").equals("1")) {
germaNERfeatures
.add(new FeatureFunctionExtractor<Token>(new CoveredTextExtractor<Token>()));
germaNERfeatures.add(new CleartkExtractor<Token, Token>(Token.class,
Expand All @@ -70,7 +71,7 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
new CoveredTextExtractor<Token>(), new Following(2)));
}

if (aProp.getProperty("useCapitalFeature").equals("1")) {
if (aProp.getProperty("useCapitalFeature", "0").equals("1")) {
germaNERfeatures.add(new CleartkExtractor<Token, Token>(Token.class,
new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new LTCapitalTypeFeatureFunction()),
Expand All @@ -83,7 +84,7 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
new Following(2)));
}

if (aProp.getProperty("usePreffix1Feature").equals("1")) {
if (aProp.getProperty("usePreffix1Feature", "0").equals("1")) {
germaNERfeatures.add(new CleartkExtractor<Token, Token>(Token.class,
new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new LTCharacterNgramFeatureFunction(fromLeft, 0, 1)),
Expand All @@ -96,7 +97,7 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
new Following(1)));
}

if (aProp.getProperty("usePreffix2Feature").equals("1")) {
if (aProp.getProperty("usePreffix2Feature", "0").equals("1")) {
germaNERfeatures.add(new CleartkExtractor<Token, Token>(Token.class,
new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new LTCharacterNgramFeatureFunction(fromLeft, 0, 2)),
Expand All @@ -109,7 +110,7 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
new Following(1)));
}

if (aProp.getProperty("usePreffix3Feature").equals("1")) {
if (aProp.getProperty("usePreffix3Feature", "0").equals("1")) {
germaNERfeatures.add(new CleartkExtractor<Token, Token>(Token.class,
new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new LTCharacterNgramFeatureFunction(fromLeft, 0, 3)),
Expand All @@ -122,7 +123,7 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
new Following(1)));
}

if (aProp.getProperty("usePreffix4Feature").equals("1")) {
if (aProp.getProperty("usePreffix4Feature", "0").equals("1")) {
germaNERfeatures.add(new CleartkExtractor<Token, Token>(Token.class,
new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new LTCharacterNgramFeatureFunction(fromLeft, 0, 4)),
Expand All @@ -135,7 +136,7 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
new Following(1)));
}

if (aProp.getProperty("useSuffix1Feature").equals("1")) {
if (aProp.getProperty("useSuffix1Feature", "0").equals("1")) {
germaNERfeatures.add(new CleartkExtractor<Token, Token>(Token.class,
new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new LTCharacterNgramFeatureFunction(fromRight, 0, 1)),
Expand All @@ -148,7 +149,7 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
new Following(1)));
}

if (aProp.getProperty("useSuffix2Feature").equals("1")) {
if (aProp.getProperty("useSuffix2Feature", "0").equals("1")) {
germaNERfeatures.add(new CleartkExtractor<Token, Token>(Token.class,
new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new LTCharacterNgramFeatureFunction(fromRight, 0, 2)),
Expand All @@ -161,7 +162,7 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
new Following(1)));
}

if (aProp.getProperty("useSuffix3Feature").equals("1")) {
if (aProp.getProperty("useSuffix3Feature", "0").equals("1")) {
germaNERfeatures.add(new CleartkExtractor<Token, Token>(Token.class,
new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new LTCharacterNgramFeatureFunction(fromRight, 0, 3)),
Expand All @@ -174,7 +175,7 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
new Following(1)));
}

if (aProp.getProperty("useSuffix4Feature").equals("1")) {
if (aProp.getProperty("useSuffix4Feature", "0").equals("1")) {
germaNERfeatures.add(new CleartkExtractor<Token, Token>(Token.class,
new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new LTCharacterNgramFeatureFunction(fromRight, 0, 4)),
Expand All @@ -187,90 +188,90 @@ public static List<FeatureExtractor1<Token>> getFeatures(Properties aProp) throw
new Following(1)));
}

if (aProp.getProperty("useFirstNameFeature").equals("1")) {
if (aProp.getProperty("useFirstNameFeature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new VornameListFeatureExtractor()));
}

if (aProp.getProperty("useSimilarWord1Feature").equals("1")) {
if (aProp.getProperty("useSimilarWord1Feature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new SimilarWord1Extractor()));
}

if (aProp.getProperty("useSimilarWord2Feature").equals("1")) {
if (aProp.getProperty("useSimilarWord2Feature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new SimilarWord2Extractor()));
}

if (aProp.getProperty("useSimilarWord3Feature").equals("1")) {
if (aProp.getProperty("useSimilarWord3Feature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new SimilarWord3Extractor()));
}

if (aProp.getProperty("useSimilarWord4Feature").equals("1")) {
if (aProp.getProperty("useSimilarWord4Feature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new SimilarWord4Extractor()));
}

if (aProp.getProperty("useCamelCaseFeature").equals("1")) {
if (aProp.getProperty("useCamelCaseFeature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new CamelCaseFeatureExtractor()));
}

if (aProp.getProperty("useDBPediaPersonListFeature").equals("1")) {
if (aProp.getProperty("useDBPediaPersonListFeature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new DBNachnamenListFeatureExtractor()));
new DBPersonListFeatureExtractor()));
}

if (aProp.getProperty("useDBPediaLocationListFeature").equals("1")) {
if (aProp.getProperty("useDBPediaLocationListFeature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new DBPersonListFeatureExtractor()));
new DBLocationListFeatureExtractor()));
}

if (aProp.getProperty("useTopicClass100Feature").equals("1")) {
if (aProp.getProperty("useTopicClass100Feature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new TopicClass1FeatureExtractor()));
}

if (aProp.getProperty("useTopicClass50Feature").equals("1")) {
if (aProp.getProperty("useTopicClass50Feature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new TopicClass50Feature1Extractor()));
}

if (aProp.getProperty("useTopicClass200Feature").equals("1")) {
if (aProp.getProperty("useTopicClass200Feature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new TopicClass200Feature1Extractor()));
}

if (aProp.getProperty("useTopicClass500Feature").equals("1")) {
if (aProp.getProperty("useTopicClass500Feature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new TopicClass500Feature1Extractor()));
}

if (aProp.getProperty("useTopicClassUpper100Feature").equals("1")) {
if (aProp.getProperty("useTopicClassUpper100Feature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new UperCasedTopicClass1FeatureExtractor()));
}

if (aProp.getProperty("useCharacterCategoryFeature").equals("1")) {
if (aProp.getProperty("useCharacterCategoryFeature", "0").equals("1")) {
germaNERfeatures.add(LTCharacterCategoryPatternFunction
.<Token> createExtractor(PatternType.ONE_PER_CHAR));
germaNERfeatures.add(LTCharacterCategoryPatternFunction
.<Token> createExtractor(PatternType.REPEATS_MERGED));
}

if (aProp.getProperty("useDBPediaPersonLastNameFeature").equals("1")) {
if (aProp.getProperty("useDBPediaPersonLastNameFeature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new DBNachnamenListFeatureExtractor()));
}

/** Below are template features. Add them when fitting your need.*/
if (aProp.getProperty("lookUpFeature").equals("1")) {
if (aProp.getProperty("lookUpFeature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new TemplateLookupFeatureExtractor()));
}

if (aProp.getProperty("listFeature").equals("1")) {
if (aProp.getProperty("listFeature", "0").equals("1")) {
germaNERfeatures.add(new MyFeatureFunctionExtractor(new CoveredTextExtractor<Token>(),
new TemplateBinaryFeatureExtractor()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ public static void main(String[] arg)
}
else {
LOG.error("The directory for this output file does not exist. Output file "
+ "will be found in the current directury under folder \"output\"");
+ "will be found in the current directory under folder \"output\"");
outputFile = new File(modelDirectory, "result.tsv");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public void process(JCas jcas)

if (Configuration.useFreeBase) {
try {
useFreaBase();
useFreeBase();
}
catch (Exception e) {
// TODO
Expand Down Expand Up @@ -239,7 +239,7 @@ private void terminateSentence(Sentence sentence, Token token, StringBuffer docT
+ sentence.getBegin() + "\t" + sentence.getEnd());
}

private void useFreaBase()
private void useFreeBase()
throws Exception
{

Expand Down