-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrequirements.txt
106 lines (86 loc) · 2.31 KB
/
requirements.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#requirements.txt with the packages you used
pandas==1.5.3
numpy==1.26.4
et-xmlfile==1.1.0
grobid-client-python==0.0.8
lxml==5.2.1
openpyxl==3.1.2
plotly==5.20.0
PyPDF2==3.0.1
PyMuPDF==1.24.2
PyMuPDFb==1.24.1
regex==2023.12.25
seaborn==0.13.2
squarify==0.4.3
zipp==3.17.0
kaleido==0.2.1
'''Additional Setup Instructions'''
# 1. Download GROBID
# wget https://github.com/kermitt2/grobid/archive/0.8.0.zip
# 2. Unzip the downloaded file
# unzip 0.8.0.zip
''' Run GROBID in terminal before running the notebook'''
# 3. Change to the GROBID directory
# cd grobid-0.8.0
# 4. Run GROBID
# ./gradlew run
# Python Import Statements
# Imports for handling PDF files
import fitz # PyMuPDF for PDF rendering and manipulation
import PyPDF2 # PyPDF2 for PDF file reading and manipulationimport PyPDF2
# Standard library imports
# Import for working with ZIP files
import zipfile
# Import for handling HTTP requests
import requests
from bs4 import BeautifulSoup
# Standard library imports
import io
from io import BytesIO
import urllib.request
from urllib.request import urlopen
import os
import re
import sys
import textwrap
import subprocess
import regex as re
import numpy as np
import pandas as pd
from xml.etree import ElementTree as et
from lxml import etree
from collections import Counter
from grobid_client.grobid_client import GrobidClient
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import colormaps
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.ticker as ticker
import squarify
import seaborn as sns
import plotly.graph_objects as go
import plotly.io as pio
# from numpy import stack
# from sympy import li
############################################################################################################
'''
Optional Installations
Note: Ensure that necessary packages are installed.
Uncomment the following lines if installations are needed.
'''
# !pip install PyPDF2
# !pip install xhtml2pdf requests lxml
# !pip install xhtml2pdf requests
# !pip install lxml
# !pip install beautifulsoup4
# !pip install pandas
# !pip install numpy
# !pip install regex
# !pip install zipfile
# !pip install lxml
# !pip install pandas openpyxl
# !pip install beautifulsoup4 requests lxml regex
# !pip install pandas openpyxl
# !pip install kaleido
# !pip install -U kaleido