From 36890ec7d195c7928f0f4a63a906712ddc61ac2f Mon Sep 17 00:00:00 2001 From: Alanscut Date: Wed, 16 Sep 2020 12:03:40 +0800 Subject: [PATCH] add KeyWordComputerTest --- .../org/ansj/splitWord/impl/GetWordsImpl.java | 222 +++++++++--------- .../ansj/app/keyword/KeyWordComputerTest.java | 15 +- .../org/ansj/app/phrase/OccurrenceTest.java | 56 +++++ 3 files changed, 179 insertions(+), 114 deletions(-) create mode 100644 src/test/java/org/ansj/app/phrase/OccurrenceTest.java diff --git a/src/main/java/org/ansj/splitWord/impl/GetWordsImpl.java b/src/main/java/org/ansj/splitWord/impl/GetWordsImpl.java index efbff829..fb1e7096 100644 --- a/src/main/java/org/ansj/splitWord/impl/GetWordsImpl.java +++ b/src/main/java/org/ansj/splitWord/impl/GetWordsImpl.java @@ -6,127 +6,127 @@ public class GetWordsImpl implements GetWords { - /** - * offe : 当前词的偏移量 - */ - public int offe; + /** + * offe : 当前词的偏移量 + */ + public int offe; + int charsLength = 0; + public char[] chars; + private int charHashCode; + private int start = 0; + public int end = 0; + private int baseValue = 0; + private int checkValue = 0; + private int tempBaseValue = 0; + public int i = 0; + private String str = null; - /** - * 构造方法,同时加载词典,传入词语相当于同时调用了setStr() ; - */ - public GetWordsImpl(String str) { - setStr(str); - } + /** + * 构造方法,同时加载词典,传入词语相当于同时调用了setStr() ; + */ + public GetWordsImpl(String str) { + setStr(str); + } - /** - * 构造方法,同时加载词典 - */ - public GetWordsImpl() { - } + /** + * 构造方法,同时加载词典 + */ + public GetWordsImpl() { + } - int charsLength = 0; - @Override - public void setStr(String str) { - setChars(str.toCharArray(), 0, str.length()); - } + @Override + public void setStr(String str) { + setChars(str.toCharArray(), 0, str.length()); + } - @Override - public void setChars(char[] chars, int start, int end) { - this.chars = chars; - i = start; - this.start = start; - charsLength = end; - checkValue = 0; - } + @Override + public void setChars(char[] chars, int start, int end) { + this.chars = chars; + i = start; + this.start = start; + charsLength = end; + checkValue = 0; + } - public char[] chars; - private int charHashCode; - private int start = 0; - public int end = 0; - private int baseValue = 0; - private int checkValue = 0; - private int tempBaseValue = 0; - public int i = 0; - private String str = null; - @Override - public String allWords() { - for (; i < charsLength; i++) { - charHashCode = chars[i]; - end++; - switch (getStatement()) { - case 0: - if (baseValue == chars[i]) { - str = String.valueOf(chars[i]); - offe = i; - start = ++i; - end = 0; - baseValue = 0; - tempBaseValue = baseValue; - return str; - } else { - int startCharStatus = DATDictionary.getItem(chars[start]).getStatus(); - if (startCharStatus == 1) { //如果start的词的status为1,则将start设为i;否则start加1 - start = i; - i--; - end = 0; - baseValue = 0; - } else { - i = start; - start++; - end = 0; - baseValue = 0; - } - break; - } - case 2: - i++; - offe = start; - tempBaseValue = baseValue; - return DATDictionary.getItem(tempBaseValue).getName(); - case 3: - offe = start; - start++; - i = start; - end = 0; - tempBaseValue = baseValue; - baseValue = 0; - return DATDictionary.getItem(tempBaseValue).getName(); - } + @Override + public String allWords() { + for (; i < charsLength; i++) { + charHashCode = chars[i]; + end++; + switch (getStatement()) { + case 0: + if (baseValue == chars[i]) { + str = String.valueOf(chars[i]); + offe = i; + start = ++i; + end = 0; + baseValue = 0; + tempBaseValue = baseValue; + return str; + } else { + int startCharStatus = DATDictionary.getItem(chars[start]).getStatus(); + if (startCharStatus == 1) { //如果start的词的status为1,则将start设为i;否则start加1 + start = i; + i--; + end = 0; + baseValue = 0; + } else { + i = start; + start++; + end = 0; + baseValue = 0; + } + break; + } + case 2: + i++; + offe = start; + tempBaseValue = baseValue; + return DATDictionary.getItem(tempBaseValue).getName(); + case 3: + offe = start; + start++; + i = start; + end = 0; + tempBaseValue = baseValue; + baseValue = 0; + return DATDictionary.getItem(tempBaseValue).getName(); + } - } - end = 0; - baseValue = 0; - i = 0; - return null; - } + } + end = 0; + baseValue = 0; + i = 0; + return null; + } - /** - * 根据用户传入的c得到单词的状态. 0.代表这个字不在词典中 1.继续 2.是个词但是还可以继续 3.停止已经是个词了 - * - * @param c - * @return - */ - private int getStatement() { - checkValue = baseValue; - baseValue = DATDictionary.getItem(checkValue).getBase() + charHashCode; - if (baseValue < DATDictionary.arrayLength) { - AnsjItem temp = DATDictionary.getItem(baseValue); - if (temp.getCheck() == checkValue || temp.getCheck() == -1) { - return DATDictionary.getItem(baseValue).getStatus(); - } - } - return 0; - } + /** + * 根据用户传入的c得到单词的状态. 0.代表这个字不在词典中 1.继续 2.是个词但是还可以继续 3.停止已经是个词了 + * + * @param c + * @return + */ + private int getStatement() { + checkValue = baseValue; + baseValue = DATDictionary.getItem(checkValue).getBase() + charHashCode; + if (baseValue < DATDictionary.arrayLength) { + AnsjItem temp = DATDictionary.getItem(baseValue); + if (temp.getCheck() == checkValue || temp.getCheck() == -1) { + return DATDictionary.getItem(baseValue).getStatus(); + } + } + return 0; + } - public AnsjItem getItem() { - return DATDictionary.getItem(tempBaseValue); - } + public AnsjItem getItem() { + return DATDictionary.getItem(tempBaseValue); + } - @Override - public int getOffe() { - return offe; - } + @Override + public int getOffe() { + return offe; + } } diff --git a/src/test/java/org/ansj/app/keyword/KeyWordComputerTest.java b/src/test/java/org/ansj/app/keyword/KeyWordComputerTest.java index 7e1c11e3..b10a0deb 100644 --- a/src/test/java/org/ansj/app/keyword/KeyWordComputerTest.java +++ b/src/test/java/org/ansj/app/keyword/KeyWordComputerTest.java @@ -1,8 +1,11 @@ package org.ansj.app.keyword; import org.ansj.splitWord.analysis.NlpAnalysis; +import org.junit.Assert; import org.junit.Test; +import java.util.List; + public class KeyWordComputerTest { @Test @@ -12,7 +15,13 @@ public void test() { System.out.println(kwc.computeArticleTfidf(content)); } - - - + @Test + public void test1() { + KeyWordComputer keyWordComputer = new KeyWordComputer(); + keyWordComputer.setAnalysisType(new NlpAnalysis()); + keyWordComputer = new KeyWordComputer(1); + List list = keyWordComputer.computeArticleTfidf("", ""); + System.out.println(list); + Assert.assertTrue(list.size() == 0); + } } diff --git a/src/test/java/org/ansj/app/phrase/OccurrenceTest.java b/src/test/java/org/ansj/app/phrase/OccurrenceTest.java new file mode 100644 index 00000000..cf19e9b6 --- /dev/null +++ b/src/test/java/org/ansj/app/phrase/OccurrenceTest.java @@ -0,0 +1,56 @@ +package org.ansj.app.phrase; + +import org.ansj.domain.Term; +import org.junit.Assert; +import org.junit.Test; +import org.nlpcn.commons.lang.util.MapCount; + +import java.util.ArrayList; +import java.util.List; + +public class OccurrenceTest { + List terms = new ArrayList<>(); + Occurrence occurrence = new Occurrence(terms); + + @Test + public void test() { + occurrence.addLeftTerm("a"); + MapCount leftTerms = occurrence.getLeftTerms(); + System.out.println("leftTerms.size()=" + leftTerms.size()); + Assert.assertTrue(leftTerms.size() == 1); + occurrence.addRightTerm("b"); + MapCount rightTerms = occurrence.getRightTerms(); + System.out.println("rightTerms.size()=" + rightTerms.size()); + Assert.assertTrue(rightTerms.size() == 1); + } + + @Test + public void test1() { + List terms1 = occurrence.getTerms(); + System.out.println("terms1.size()=" + terms1.size()); + Assert.assertTrue(terms1.size() == 0); + occurrence.increaseFrequency(); + System.out.println("occurrence.getFrequency()=" + occurrence.getFrequency()); + Assert.assertTrue(occurrence.getFrequency() == 1); + } + + @Test + public void test2() { + occurrence.setPmi(1.0); + System.out.println("occurrence.getPmi()=" + occurrence.getPmi()); + Assert.assertTrue(occurrence.getPmi() == 1.0); + occurrence.setLeftEntropy(2.0); + System.out.println("occurrence.getLeftEntropy()=" + occurrence.getLeftEntropy()); + Assert.assertTrue(occurrence.getLeftEntropy() == 2.0); + } + + @Test + public void test3() { + occurrence.setRightEntropy(3.0); + System.out.println("occurrence.getRightEntropy()=" + occurrence.getRightEntropy()); + Assert.assertTrue(occurrence.getRightEntropy() == 3.0); + occurrence.setScore(4.0); + System.out.println("occurrence.getScore()=" + occurrence.getScore()); + Assert.assertTrue(occurrence.getScore() == 4.0); + } +}