From 05b58465b0b5caec2629885c7041b1fce24e47aa Mon Sep 17 00:00:00 2001 From: janlukasschroeder Date: Mon, 14 Oct 2024 16:13:47 -0400 Subject: [PATCH] added PDF Generator API --- README.md | 23 +++++++++++++++++++++++ examples.py | 19 +++++++++++++++++++ sec_api/__init__.py | 1 + sec_api/index.py | 36 ++++++++++++++++++++++++++++++++++-- setup.py | 2 +- 5 files changed, 78 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3282b2a..70b2ca8 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ It includes: - [SEC Filing Search and Full-Text Search API](#sec-edgar-filings-query-api) - [Real-Time Filing Stream API](#sec-edgar-filings-real-time-stream-api) - [Filing Download & PDF Render API](#filing-render--download-api) +- [PDF Generator API](#pdf-generator-api) **Converter & Extractor APIs:** @@ -175,6 +176,28 @@ with open("filename.pdf", "wb") as f: > See the documentation for more details: https://sec-api.io/docs/sec-filings-render-api +## PDF Generator API + +SEC filings, including Forms 10-K, 10-Q, 8-K, and others, are typically published in HTML, XML, or text formats. The PDF Generator API enables the conversion of any SEC filing or exhibit into a PDF file, preserving all original formatting, tables, images, and other elements from the filing. + +```python +from sec_api import PdfGeneratorApi + +pdfGeneratorApi = PdfGeneratorApi("YOUR_API_KEY") + +# Form 8-K exhibit URL +edgar_file_url = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1320695/000132069520000148/ths12-31x201910krecast.htm" +# Form 10-K filing URL +# edgar_file_url = "https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm" + +pdf_file = pdfGeneratorApi.get_pdf(edgar_file_url) + +with open("filename.pdf", "wb") as f: + f.write(pdf_file) +``` + +> See the documentation for more details: https://sec-api.io/docs/sec-filings-render-api + ## SEC EDGAR Filings Real-Time Stream API The Stream API provides a live stream (aka feed) of newly published filings on SEC EDGAR via WebSockets. A new filing is sent to your connected client as soon as it is published. diff --git a/examples.py b/examples.py index c3d6162..c3d564f 100644 --- a/examples.py +++ b/examples.py @@ -1,5 +1,6 @@ from sec_api.index import ( RenderApi, + PdfGeneratorApi, XbrlApi, ExtractorApi, MappingApi, @@ -43,6 +44,24 @@ f.write(binary_data) # """ +# +# PDF Generator API +# +""" +pdfGeneratorApi = PdfGeneratorApi("YOUR_API_KEY") + +# Form 8-K exhibit URL +edgar_file_url = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1320695/000132069520000148/ths12-31x201910krecast.htm" +# Form 10-K filing URL +# edgar_file_url = "https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm" + +pdf_file = pdfGeneratorApi.get_pdf(edgar_file_url) + +with open("filename.pdf", "wb") as f: + f.write(pdf_file) +# """ + + # # XBRL-to-JSON API example # diff --git a/sec_api/__init__.py b/sec_api/__init__.py index 9529306..bb93a91 100644 --- a/sec_api/__init__.py +++ b/sec_api/__init__.py @@ -2,6 +2,7 @@ from sec_api.index import QueryApi from sec_api.index import FullTextSearchApi from sec_api.index import RenderApi +from sec_api.index import PdfGeneratorApi # Extractor & Converter APIs from sec_api.index import XbrlApi diff --git a/sec_api/index.py b/sec_api/index.py index 3638ed6..8084993 100644 --- a/sec_api/index.py +++ b/sec_api/index.py @@ -5,7 +5,8 @@ query_api_endpoint = "https://api.sec-api.io" full_text_search_api_endpoint = "https://api.sec-api.io/full-text-search" -render_api_endpoint = "https://archive.sec-api.io" +filing_download_api_endpoint = "https://archive.sec-api.io" +pdf_generator_api_endpoint = "https://api.sec-api.io/filing-reader" xbrl_api_endpoint = "https://api.sec-api.io/xbrl-to-json" extractor_api_endpoint = "https://api.sec-api.io/extractor" # @@ -102,7 +103,7 @@ class RenderApi: def __init__(self, api_key, proxies=None): self.api_key = api_key - self.api_endpoint = render_api_endpoint + self.api_endpoint = filing_download_api_endpoint self.proxies = proxies if proxies else {} def get_filing(self, url, return_binary=False): @@ -146,6 +147,37 @@ def get_file(self, url, return_binary=False): handle_api_error(response) +class PdfGeneratorApi: + """ + Base class for PDF Generator API + """ + + def __init__(self, api_key, proxies=None): + self.api_key = api_key + self.api_endpoint = pdf_generator_api_endpoint + self.proxies = proxies if proxies else {} + + def get_pdf(self, url): + response = {} + file_url = re.sub(r"ix\?doc=/", "", url) + _url = ( + self.api_endpoint + "?type=pdf&url=" + file_url + "&token=" + self.api_key + ) + + # use backoff strategy to handle "too many requests" error. + for x in range(3): + response = requests.get(_url, proxies=self.proxies) + if response.status_code == 200: + return response.content + elif response.status_code == 429: + # wait 500 * (x + 1) milliseconds and try again + time.sleep(0.5 * (x + 1)) + else: + handle_api_error(response) + else: + handle_api_error(response) + + class XbrlApi: """ Base class for XBRL-to-JSON API diff --git a/setup.py b/setup.py index 9a800be..fef8c1b 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="sec-api", - version="1.0.23", + version="1.0.24", author="SEC API", author_email="support@sec-api.io", description="SEC EDGAR Filings API",