-
Notifications
You must be signed in to change notification settings - Fork 3
/
install.fish
executable file
·156 lines (141 loc) · 5.08 KB
/
install.fish
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env fish
# -r rebuild spacy from scratch with serbian data + train the model
# -u update serbian data (no model training)
# -t train the model
# -2 use spacy 2.3 build process
# -3 use spacy 3.0 build process: doesn't work in this branch. working on it in a separate brach with the new lemmatizer.
# Because the Serbian pipeline will be used together with the slovo-app,
# we'll use this script also to install the necessary modules for django etc.
argparse -x 'r,u' -x'2,3' 'r' 'u' 'v' 'd' 'e' '2' '3' 't' -- $argv
if begin; test -z $_flag_u; and test -z $_flag_r; end;
set _flag_u "-u"
end
if begin; test -z $_flag_2; and test -z $_flag_3; end;
set _flag_2 "-2"
end
if test $_flag_r
# rebuild from scratch requires both the data + training the model
set _flag_t "-t"
set _flag_u "-u"
end
# check installed version
if test -d env
source env/bin/activate.fish
# on Mac we need ggrep (brew install ggrep), the default grep won't do
switch (uname)
case Linux
set installed (pip3 show spacy | grep -oP '(?<=Version:\s)\d')
case Darwin
set installed (pip3 show spacy | ggrep -oP '(?<=Version:\s)\d')
end
else
set installed 0
end
# Install spacy from scratch
if test $_flag_r
if test -d env
echo "Removing previous installation..."
#rm -rf env
# remove spacy and dependancies CHECK LATER IF v3 NEEDS SOMETHING ELSE
pip-autoremove spacy -y
pip-autoremove spacy_lookups_data -y
pip-autoremove sr -y
# because we copied serbian files to spacy_lookups_data, some files may show up as modified (not staged for commit) as far as git is concerned
if test -d spacy_lookups_data
cd spacy_lookups_data
if test (git status -s)
git checkout data/sr_lexeme_norm.json #v2
git checkout data/sr_lexeme_norm.json #v2, v3 may have additional files here
end
end
else
python3 -m venv env
end
source env/bin/activate.fish
pip install -U pip setuptools wheel
pip install pip-autoremove
## slovo-app requirements
pip install -r ../slovo-app/requirements.txt
# pip install Django==3.1.5
# pip install requests==2.18.4
# pip install beautifulsoup4==4.9.3
# pip install PyYAML==5.4.1
## end of slovo-app requirements
if test $_flag_2
pip install -U spacy==2.3.5
else
#v3
echo "Installing spacy-nightly"
pip install -U spacy
#[transformers,lookups]
end
# install english and german models if using -e and -dflags
if test $_flag_d
python3 -m spacy download de_core_news_sm
end
if test $_flag_e
python3 -m spacy download en_core_web_sm
end
end
set site (python3 -c "import site; print(site.getsitepackages()[0])")
if test $_flag_u
echo "Updating Serbian datasets..."
if test $_flag_2
# Clone or pull the spacy lookups data
if not test -d spacy-lookups-data
echo "Cloning spacy lookups data..."
git clone https://github.com/explosion/spacy-lookups-data.git
else
echo "Updating spacy lookup data"
cd spacy-lookups-data
# because we copied serbian files to spacy_lookups_data, some files may show up as modified (not staged for commit) as far as git is concerned
set -l gitoutput (git status -s)
if test "$gitoutput"
git checkout data/sr_lexeme_norm.json #v2
git checkout data/sr_lemma_lookup.json #v2, v3 may have additional files here
end
git pull
cd ..
end
echo "Copying local sr_lemma_lookup.json to the spacy lookup data..."
# make sure you generate SLAWS normalization files to
# sr_lookups_data/sr_lexeme_norm.json once we move to spacy3
cp sr_lookups_data/sr_lemma_lookup.json spacy-lookups-data/spacy_lookups_data/data/sr_lemma_lookup.json
cp sr_lookups_data/sr_lexeme_norm.json spacy-lookups-data/spacy_lookups_data/data/sr_lexeme_norm.json
cd spacy-lookups-data/
echo "Installing lookups data..."
# this next step may take a bit z complete
pip install -e .
cd ..
# Use Serbian pipeline
echo "Set up Serbian pipeline..."
python3 setup.py develop
else
# no need to do anything here for spacy3
end
end
# git clone https://github.com/UniversalDependencies/UD_Serbian-SET.git
# converted UD datasets to Cyrillic; this is just for starters, we plan to do our own training data
# mkdir training_data
# python3 -m spacy convert UD_Serbian_Cyrl-SET/sr_set-ud-train.conllu sr_training_data
#python3 -m spacy convert UD_Serbian_Cyrl-SET/sr_set-ud-dev.conllu sr_training_data
#python3 -m spacy convert UD_Serbian_Cyrl-SET/sr_set-ud-test.conllu sr_training_data
if test $_flag_t
if not test -d models
mkdir models
else
rm -rf models
mkdir models
end
echo "Train model... "
if test $_flag_2
python3 -m spacy train sr models/sr sr_training_data/sr_set-ud-train.json sr_training_data/sr_set-ud-dev.json -n 1 -V 0.0.1
else
python3 -m spacy init fill-config base_config.cfg config.cfg
python3 -m spacy train config.cfg --output models/srp
#python3 -m spacy ray train config.cfg --output models/srp --n-workers 2
end
end
echo "Done"
# # evaluate
# python3 -m spacy evaluate models/sr/model-best sr_training_data/sr_set-ud-test.json