-
Notifications
You must be signed in to change notification settings - Fork 0
/
topictiling-experiment.hs
40 lines (35 loc) · 1.26 KB
/
topictiling-experiment.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
{-# LANGUAGE LambdaCase #-}
import Control.Monad
import Text.Printf
import Control.Parallel.Strategies
import System.Environment (getArgs)
import System.Directory (doesFileExist)
import Data.Binary (decodeFile)
import qualified NLP.Segmentation.TopicTiling as TT
import NLP.Data (Annotated(..))
import Datasets (load_ds)
main = do
args <- getArgs
when (length args /= 1) $
fail "Usage: topictiling-experiment <path to LDA model>"
lda <- let lda_file = head args
in doesFileExist lda_file >>= \case
True -> decodeFile lda_file
False -> fail $ "Model file does not exist: "++lda_file
testSet <- load_ds
let configs =
[(TT.defaultConfig lda)
{ TT.w = w }
| w <- [3,4,6,8,10]]
printf "\"document\",\"w\",\"sentence gap scores\",\"word topic assignments\"\n"
let csv_lines = [
printf "\"%s\",%d,\"%s\",\"%s\"\n"
(name doc)
(TT.w config)
(show $ TT.gapScores result)
(show $ TT.wordTopics result)
| doc <- testSet
, config <- configs
, let result = TT.eval config (document doc)]
`using` parBuffer 4 rdeepseq
mapM_ putStr csv_lines