-
Notifications
You must be signed in to change notification settings - Fork 0
/
docstats.hs
47 lines (39 loc) · 1.65 KB
/
docstats.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
{-# LANGUAGE ViewPatterns #-}
import Control.Monad
import Text.Printf
import Python.Interpreter (py_initialize)
import Data.List
import NLP.SegEval
import NLP.Data (Annotated(..),NamedSegmentation(..))
import Util
import Datasets (load_ds)
ex1 = do
ds <- load_ds
let (descs, funcs) = unzip
[("Total mass", \segs -> float (sum (head segs)))
,("Mean seg count", \segs -> mean (map length segs) :: Double)
,("Min seg mass", \segs -> float (minimum (concat segs)))
,("Mean seg mass", \segs -> mean (concat segs) :: Double)
,("Max seg mass", \segs -> float (maximum (concat segs)))
,("1 - WD", \segs -> 1 - mean_pairwise_permuted (windowdiff' (compute_window_size (concat segs))) segs)
,("1 - Pk", \segs -> 1 - mean_pairwise_permuted pk segs)
, s 1 (1,1)
, s 2 (1,1)
, s 3 (1,1)
, s 5 (1,1)
, s 1 (1,0)
, s 2 (1,0)
, s 3 (1,0)
, s 5 (1,0)
]
where s n (w1,w2) = ( printf "S(n=%d;Ws=%.1f;Wt=%.1f)" (toInteger n) w1 w2
, mean_pairwise (similarity' n (w1,w2)))
float x = fromIntegral x :: Double
printf "\"Document\",%s\n" (intercalate "," (map (\d -> "\""++d++"\"") descs))
forM_ ds $ \(Annotated docname toks segmentations) -> do
let all_segs = map segseg segmentations
--let ([segseg->docsouth], map segseg->others) = partition (("annotators:docsouth"==).segname) segmentations
printf "\"%s\",%s\n" docname (intercalate "," (map (show . ($ all_segs)) funcs))
main = do
py_initialize
ex1