forked from Unstructured-IO/unstructured
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
246 lines (194 loc) · 7.6 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
PACKAGE_NAME := unstructured
PIP_VERSION := 23.1.2
CURRENT_DIR := $(shell pwd)
ARCH := $(shell uname -m)
.PHONY: help
help: Makefile
@sed -n 's/^\(## \)\([a-zA-Z]\)/\2/p' $<
###########
# Install #
###########
## install-base: installs core requirements needed for text processing bricks
.PHONY: install-base
install-base: install-base-pip-packages install-nltk-models
## install: installs all test, dev, and experimental requirements
.PHONY: install
install: install-base-pip-packages install-dev install-nltk-models install-test install-huggingface install-unstructured-inference
.PHONY: install-ci
install-ci: install-base-pip-packages install-nltk-models install-huggingface install-unstructured-inference install-test
.PHONY: install-base-pip-packages
install-base-pip-packages:
python3 -m pip install pip==${PIP_VERSION}
python3 -m pip install -r requirements/base.txt
.PHONY: install-huggingface
install-huggingface:
python3 -m pip install pip==${PIP_VERSION}
python3 -m pip install -r requirements/huggingface.txt
.PHONE: install-nltk-models
install-nltk-models:
python -c "import nltk; nltk.download('punkt')"
python -c "import nltk; nltk.download('averaged_perceptron_tagger')"
.PHONY: install-test
install-test:
python3 -m pip install -r requirements/test.txt
# NOTE(robinson) - Installing weaviate-client separately here because the requests
# version conflicts with label_studio_sdk
python3 -m pip install weaviate-client
.PHONY: install-dev
install-dev:
python3 -m pip install -r requirements/dev.txt
.PHONY: install-build
install-build:
python3 -m pip install -r requirements/build.txt
.PHONY: install-ingest-google-drive
install-ingest-google-drive:
python3 -m pip install -r requirements/ingest-google-drive.txt
## install-ingest-s3: install requirements for the s3 connector
.PHONY: install-ingest-s3
install-ingest-s3:
python3 -m pip install -r requirements/ingest-s3.txt
.PHONY: install-ingest-gcs
install-ingest-gcs:
python3 -m pip install -r requirements/ingest-gcs.txt
.PHONY: install-ingest-azure
install-ingest-azure:
python3 -m pip install -r requirements/ingest-azure.txt
.PHONY: install-ingest-discord
install-ingest-discord:
pip install -r requirements/ingest-discord.txt
.PHONY: install-ingest-github
install-ingest-github:
python3 -m pip install -r requirements/ingest-github.txt
.PHONY: install-ingest-gitlab
install-ingest-gitlab:
python3 -m pip install -r requirements/ingest-gitlab.txt
.PHONY: install-ingest-reddit
install-ingest-reddit:
python3 -m pip install -r requirements/ingest-reddit.txt
.PHONY: install-ingest-slack
install-ingest-slack:
pip install -r requirements/ingest-slack.txt
.PHONY: install-ingest-wikipedia
install-ingest-wikipedia:
python3 -m pip install -r requirements/ingest-wikipedia.txt
.PHONY: install-unstructured-inference
install-unstructured-inference:
python3 -m pip install -r requirements/local-inference.txt
## install-local-inference: installs requirements for local inference
.PHONY: install-local-inference
install-local-inference: install install-unstructured-inference
.PHONY: install-pandoc
install-pandoc:
ARCH=${ARCH} ./scripts/install-pandoc.sh
## pip-compile: compiles all base/dev/test requirements
.PHONY: pip-compile
pip-compile:
pip-compile --upgrade requirements/base.in
# Extra requirements for huggingface staging functions
pip-compile --upgrade requirements/huggingface.in
pip-compile --upgrade requirements/test.in
pip-compile --upgrade requirements/dev.in
pip-compile --upgrade requirements/build.in
pip-compile --upgrade requirements/local-inference.in
# NOTE(robinson) - doc/requirements.txt is where the GitHub action for building
# sphinx docs looks for additional requirements
cp requirements/build.txt docs/requirements.txt
pip-compile --upgrade requirements/ingest-s3.in
pip-compile --upgrade requirements/ingest-gcs.in
pip-compile --upgrade requirements/ingest-azure.in
pip-compile --upgrade requirements/ingest-discord.in
pip-compile --upgrade requirements/ingest-reddit.in
pip-compile --upgrade requirements/ingest-github.in
pip-compile --upgrade requirements/ingest-gitlab.in
pip-compile --upgrade requirements/ingest-slack.in
pip-compile --upgrade requirements/ingest-wikipedia.in
pip-compile --upgrade requirements/ingest-google-drive.in
## install-project-local: install unstructured into your local python environment
.PHONY: install-project-local
install-project-local: install
# MAYBE TODO: fail if already exists?
pip install -e .
## uninstall-project-local: uninstall unstructured from your local python environment
.PHONY: uninstall-project-local
uninstall-project-local:
pip uninstall ${PACKAGE_NAME}
#################
# Test and Lint #
#################
## test: runs all unittests
.PHONY: test
test:
PYTHONPATH=. pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
## check: runs linters (includes tests)
.PHONY: check
check: check-src check-tests check-version
## check-src: runs linters (source only, no tests)
.PHONY: check-src
check-src:
ruff . --select I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402 --ignore PT011,PT012,SIM117
black --line-length 100 ${PACKAGE_NAME} --check
flake8 ${PACKAGE_NAME}
mypy ${PACKAGE_NAME} --ignore-missing-imports --check-untyped-defs
.PHONY: check-tests
check-tests:
black --line-length 100 test_${PACKAGE_NAME} --check
flake8 test_${PACKAGE_NAME}
## check-scripts: run shellcheck
.PHONY: check-scripts
check-scripts:
# Fail if any of these files have warnings
scripts/shellcheck.sh
## check-version: run check to ensure version in CHANGELOG.md matches version in package
.PHONY: check-version
check-version:
# Fail if syncing version would produce changes
scripts/version-sync.sh -c \
-f "unstructured/__version__.py" semver
## tidy: run black
.PHONY: tidy
tidy:
ruff . --select I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402 --fix-only || true
black --line-length 100 ${PACKAGE_NAME}
black --line-length 100 test_${PACKAGE_NAME}
## version-sync: update __version__.py with most recent version from CHANGELOG.md
.PHONY: version-sync
version-sync:
scripts/version-sync.sh \
-f "unstructured/__version__.py" semver
.PHONY: check-coverage
check-coverage:
coverage report --fail-under=95
## check-deps: check consistency of dependencies
.PHONY: check-deps
check-deps:
scripts/consistent-deps.sh
##########
# Docker #
##########
# Docker targets are provided for convenience only and are not required in a standard development environment
DOCKER_IMAGE ?= unstructured:dev
.PHONY: docker-build
docker-build:
PIP_VERSION=${PIP_VERSION} DOCKER_IMAGE_NAME=${DOCKER_IMAGE} ./scripts/docker-build.sh
.PHONY: docker-start-bash
docker-start-bash:
docker run -ti --rm ${DOCKER_IMAGE}
.PHONY: docker-test
docker-test:
docker run --rm \
-v ${CURRENT_DIR}/test_unstructured:/home/test_unstructured \
-v ${CURRENT_DIR}/test_unstructured_ingest:/home/test_unstructured_ingest \
$(DOCKER_IMAGE) \
bash -c "pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured"
.PHONY: docker-smoke-test
docker-smoke-test:
DOCKER_IMAGE=${DOCKER_IMAGE} ./scripts/docker-smoke-test.sh
###########
# Jupyter #
###########
.PHONY: docker-jupyter-notebook
docker-jupyter-notebook:
docker run -p 8888:8888 --mount type=bind,source=$(realpath .),target=/home --entrypoint jupyter-notebook -t --rm ${DOCKER_IMAGE} --allow-root --port 8888 --ip 0.0.0.0 --NotebookApp.token='' --NotebookApp.password=''
.PHONY: run-jupyter
run-jupyter:
PYTHONPATH=$(realpath .) JUPYTER_PATH=$(realpath .) jupyter-notebook --NotebookApp.token='' --NotebookApp.password=''