diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 0000000000..d540f4c52f --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,22 @@ +changelog: + categories: + - title: New Features + labels: + - connector-update + - new-connector + - parsons-core + - title: Automated Testing + labels: + - testing + - title: Bug Fixes + labels: + - bug-fix + - title: Documentation + labels: + - documentation + # - title: New Contributors + # labels: + # -🎉-first-PR + - title: Other Changes + labels: + - "*" \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 78c471e33b..e34f0872c2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,167 +1,15 @@ We're thrilled that you're thinking about contributing to Parsons! Welcome to our contributor community. -Here are some ways you can contribute: +You can find a detailed version of this guide [on our website](https://www.parsonsproject.org/pub/contributing-guide/). -* [submit an issue](#submitting-issues) -* [contribute code](#contributing-code-to-parsons) -* [contribute documentation](#documentation) -* [add sample code to our library of examples](#contributing-sample-code) +The best way to get involved is by joining our Slack. To join, email engineering@movementcooperative.org. In addition to all the great discussions that happen on our Slack, we also have virtual events including trainings, pairing sessions, social hangouts, discussions, and more. Every other Thursday afternoon we host 🎉 Parsons Parties 🎉 on Zoom where we work on contributions together. -Every other Thursday afternoon we host 🎉 Parsons Parties 🎉 on Zoom where we work on contributions together. Reach out if you'd like to join - it's a great way to get involved. +You can contribute by: -## Submitting Issues +* [submitting issues](https://www.parsonsproject.org/pub/contributing-guide#submitting-issues) +* [contributing code](https://www.parsonsproject.org/pub/contributing-guide/) +* [updating our documentation](https://www.parsonsproject.org/pub/updating-documentation/) +* [teaching and mentoring](https://www.parsonsproject.org/pub/contributing-guide#teaching-and-mentoring) +* [helping "triage" issues and review pull requests](https://www.parsonsproject.org/pub/contributing-guide#maintainer-tasks) -We encourage folks to review existing issues before starting a new issue. - -* If the issue you want exists, feel free to use the *thumbs up* emoji to up vote the issue. -* If you have additional documentation or context that would be helpful, please add using comments. -* If you have code snippets, but don’t have time to do the full write, please add to the issue! - -We use labels to help us classify issues. They include: -* **bug** - something in Parsons isn’t working the way it should -* **enhancement** - new feature or request (e.g. a new API connector) -* **good first issue** - an issue that would be good for someone who is new to Parsons - -## Contributing Code to Parsons - -Generally, code contributions to Parsons will be either enhancements or bug requests (or contributions of [sample code](#sample-code), discussed below). All changes to the repository are made [via pull requests](#submitting-a-pull-request). - -If you would like to contribute code to Parsons, please review the issues in the repository and find one you would like to work on. If you are new to Parsons or to open source projects, look for issues with the [**good first issue**](https://github.com/move-coop/parsons/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) label. Once you have found your issue, please add a comment to the issue that lets others know that you are interested in working on it. If you're having trouble finding something to work on, please ask us for help on Slack. - -The bulk of Parsons is made up of Connector classes, which are Python classes that help move data in and out of third party services. When you feel ready, you may want to contribute by [adding a new Connector class](https://move-coop.github.io/parsons/html/build_a_connector.html). - -### Making Changes to Parsons - -To make code changes to Parsons, you'll need to set up your development environment, make your changes, and then submit a pull request. - -To set up your development environment: - -* Fork the Parsons project using [the “Fork” button in GitHub](https://guides.github.com/activities/forking/) -* Clone your fork to your local computer -* Set up a [virtual environment](#virtual-environments) -* Install the [dependencies](#installing-dependencies) -* Check that everything's working by [running the unit tests](#unit-tests) and the [linter](#linting) - -Now it's time to make your changes. We suggest taking a quick look at our [coding conventions](#coding-conventions) - it'll make the review process easier down the line. In addition to any code changes, make sure to update the documentation and the unit tests if necessary. Not sure if your changes require test or documentation updates? Just ask in Slack or through a comment on the relevant issue. When you're done, make sure to run the [unit tests](#unit-tests) and the [linter](#linting) again. - -Finally, you'll want to [submit a pull request](#submitting-a-pull-request). And that's it! - -#### Virtual Environments - -If required dependencies conflict with packages or modules you need for other projects, you can create and use a [virtual environment](https://docs.python.org/3/library/venv.html). - -``` -python3 -m venv .venv # Creates a virtual environment in the .venv folder -source .venv/bin/activate # Activate in Unix or MacOS -.venv/Scripts/activate.bat # Activate in Windows -``` - -#### Installing Dependencies - -Before running or testing your code changes, be sure to install all of the required Python libraries that Parsons depends on. - -From the root of the parsons repository, use the run the following command: - -```bash -> pip install -r requirements.txt -``` - -#### Unit Tests - -When contributing code, we ask you to add to tests that can be used to verify that the code is working as expected. All of our unit tests are located in the `test/` folder at the root of the repository. - -We use the pytest tool to run our suite of automated unit tests. The pytest command line tool is installed as part of the Parsons dependencies. - -To run all the entire suite of unit tests, execute the following command: - -```bash -> pytest -rf test/ -``` - -Once the pytest tool has finished running all of the tests, it will output details around any errors or test failures it encountered. If no failures are identified, then you are good to go! - -**Note:*** Some tests are written to call out to external API’s, and will be skipped as part of standard unit testing. This is expected. - -See the [pytest documentation](https://docs.pytest.org/en/latest/contents.html) for more info and many more options. - -#### Linting - -We use the [black](https://github.com/psf/black) and [flake8](http://flake8.pycqa.org/en/latest/) tools to [lint](https://en.wikipedia.org/wiki/Lint_(software)) the code in the repository to make sure it matches our preferred style. Both tools are installed as part of the Parsons dependencies. - -Run the following commands from the root of the Parsons repository to lint your code changes: - -```bash -> flake8 --max-line-length=100 --extend-ignore=E203,W503 parsons -> black parsons -``` - -Pre-commit hooks are available to enforce black and isort formatting on -commit. You can also set up your IDE to reformat using black and/or isort on -save. - -To set up the pre-commit hooks, install pre-commit with `pip install -pre-commit`, and then run `pre-commit install`. - -#### Coding Conventions - -The following is a list of best practices to consider when writing code for the Parsons project: - -* Each tool connector should be its own unique class (e.g. ActionKit, VAN) in its own Python package. Use existing connectors as examples when deciding how to layout your code. - -* Methods should be named using a verb_noun structure, such as `get_activist()` or `update_event()`. - -* Methods should reflect the vocabulary utilized by the original tool where possible to mantain transparency. For example, Google Cloud Storage refers to file like objects as blobs. The methods are called `get_blob()` rather than `get_file()`. - -* Methods that can work with arbitrarily large data (e.g. database or API queries) should use of Parson Tables to hold the data instead of standard Python collections (e.g. lists, dicts). - -* You should avoid abbreviations for method names and variable names where possible. - -* Inline comments explaining complex codes and methods are appreciated. - -* Capitalize the word Parsons for consistency where possible, especially in documentation. - -If you are building a new connector or extending an existing connector, there are more best practices in the [How to Build a Connector](https://move-coop.github.io/parsons/html/build_a_connector.html) documentation. - -## Documentation - -Parsons documentation is built using the Python Sphinx tool. Sphinx uses the `docs/*.rst` files in the repository to create the documentation. - -We have a [documentation label](https://github.com/move-coop/parsons/issues?q=is%3Aissue+is%3Aopen+label%3Adocumentation) that may help you find good docs issues to work on. If you are adding a new connector, you will need to add a reference to the connector to one of the .rst files. Please use the existing documentation as an example. - -When editing documentation, make sure you are editing the source files (with .md or .rst extension) and not the build files (.html extension). - -The workflow for documentation changes is a bit simpler than for code changes: - -* Fork the Parsons project using [the “Fork” button in GitHub](https://guides.github.com/activities/forking/) -* Clone your fork to your local computer -* Change into the `docs` folder and install the requirements with `pip install -r requirements.txt` (you may want to set up a [virtual environment](#virtual-environments) first) -* Make your changes and re-build the docs by running `make html`. (Note: this builds only a single version of the docs, from the current files. To create docs with multiple versions like our publicly hosted docs, run `make deploy_docs`.) -* Open these files in your web browser to check that they look as you expect. -* [Submit a pull request](#submitting-a-pull-request) - -When you make documentation changes, you only need to track the source files with git. The docs built by the html folder should not be included. - -You should not need to worry about the unit tests or the linter if you are making documentation changes only. - -## Contributing Sample Code - -One important way to contribute to the Parsons project is to submit sample code that provides recipes and patterns for how to use the Parsons library. - -We have a folder called `useful_resources/` in the root of the repository. If you have scripts that incorporate Parsons, we encourage you to add them there! - -The workflow for adding sample code is: - -* Fork the Parsons project using [the “Fork” button in GitHub](https://guides.github.com/activities/forking/) -* Clone your fork to your local computer -* Add your sample code into the `useful_resources/` folder -* [Submit a pull request](#submitting-a-pull-request) - -You should not need to worry about the unit tests or the linter if you are only adding sample code. - -## Submitting a Pull Request - -To submit a pull request, follow [these instructions to create a Pull Request from your fork](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork) back to the original Parsons repository. - -The Parsons team will review your pull request and provide feedback. Please feel free to ping us if no one's responded to your Pull Request after a few days. We may not be able to review it right away, but we should be able to tell you when we'll get to it. - -Once your pull request has been approved, the Parsons team will merge your changes into the Parsons repository +If you're not sure how to get started, please ask for help! We're happy to chat and help you find the best way to get involved. \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 1f885f4781..7fdd250950 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.7 +FROM --platform=linux/amd64 python:3.7 #################### ## Selenium setup ## diff --git a/docs/build_a_connector.rst b/docs/build_a_connector.rst index 9aabc92606..f70436833b 100644 --- a/docs/build_a_connector.rst +++ b/docs/build_a_connector.rst @@ -2,401 +2,4 @@ How to Build a Connector ======================== -Connector classes are at the heart of the Parsons project. When we want to add a new service for users to connect to with Parsons, we build a new Connector class for that service. - -The documentation contains `a complete list `_ of existing connectors. Requests for new connectors are made and discussed in `our issue tracker `_. Before starting to build a new connector, check to see if there’s any discussion about it in the tracker. Ideally, you’ll have a good sense of what you and/or other users want the connector to do before you start trying to build it. Remember, you can always reach out to the community and ask for advice! - -When you’re ready to get started, make sure you have Parsons installed and that the tests run successfully. - ---------------- -Getting Started ---------------- - -The first thing you’ll need to do is create a new folder for your connector. This folder should have the same name as the module (file) within the folder, and the same name as the connector class. For example, the airtable connector is in the “airtable” folder, and the hustle connector is in the “hustle” folder. - -Inside the folder, create two files. The first should be named __init__.py and should be empty. The second will have the same name as your folder - this is the file which will have your connector’s code. For example, in the airtable folder this file is called airtable.py and in the hustle folder it’s called hustle.py. - -The directory should look like this: - -.. code-block:: python - - yourconnectorname/ - __init__.py - yourconnectorname.py - -Next, add the reference to your connector to `parsons/__init__.py`. Specifically, open `parsons/__init__.py`, scroll to the end of the other imports, and add the following: - -.. code-block:: python - - from parsons.yourconnectorname.yourconnectorname import yourconnectorname - -Also, in `parsons/__init__.py` add 'yourconnectorname' to the end of the list `__all__`. - -Once this is done, open the yourconnectorname.py file. At the top of the file, add the following code to enable logging for our connector: - -.. code-block:: python - - import logging - - - logger = logging.getLogger(__name__) - -You’ll also want to create the Connector class itself: - -.. code-block:: python - - class YourConnectorName(object): - """ - Instantiate class. - - `Args:` - """ - - def __init__(self, api_key=None): - pass - -The text enclosed in triple quotes “”” “”” is called a DocString, and is used to provide information about the class. Typically, it includes the arguments accepted by the __init__ method of the class. - -The __init__ method defines how the class is instantiated. For instance, if you want to get an instance of the Connector class by writing `connector = YourConnectorName(table_name, api_key)` you’d have to add a table_name argument to go with the api_key argument. Your connector’s init statement will probably require a different set of arguments than we’ve written here, but this makes for a good start. - -In our Parsons connector classes, the __init__ method should handle authentication. That is, when we initialize our Connector, we should give it credentials so that it can connect to the third-party service. Then we won’t have to worry about authenticating in the other methods. How exactly you authenticate to the service will depend on the service, but it typically involves getting an api_key or access_token, and it almost always involves creating an account on the service. - -(Users of your connector class will need to know how to authenticate too! Take notes of where you signed up for an account and how you got the api key, access token, etc so you can include it in the documentation for your connector.) - -We like to give users two different options for getting api keys and other authentication to the connector - passing them as arguments to the __init__ method, and storing them as environmental variables. Use the Parsons utility checkenv to allow for either possibility with code that looks like this: - -.. code-block:: python - - import logging - from parsons.utilities import check_env - - logger = logging.getLogger(__name__) - - - class YourConnectorName(object): - """ - Instantiate class. - - `Args:` - """ - - def __init__(self, api_key=None): - self.api_key = check_env.check('YOURCONNECTORNAME_API_KEY', api_key) - -This code looks in the environmental variables for the api key and, if it doesn’t find it, uses the api_key passed in. - -Most connectors make extensive use of existing client/providers. Most likely, your next step will be to instantiate one of those existing clients using the authentication data, and add it to the class. You can see an example of this in the `Airtable Connector `_. - --------- -Patterns --------- - -Parsons has a number of patterns that should be used when developing a connector to ensure that connectors look alike, which makes them easier to use and modify. Not all patterns apply to all connectors, but when reviewing pull requests, the maintainers will be looking to see if you adhere to the patterns described in this document. - -In the sections below, we will attempt to enumerate the established patterns. We will use the `parsons.mailchimp.mailchimp.Mailchimp` connector as an example of how to implement the patterns. - -^^^^^^^^^^^^^^^^^^^^ -Class initialization -^^^^^^^^^^^^^^^^^^^^ - -**Allow configuration of a connector with environment variables as well as arguments passed to the class initializer.** Make use of `parsons.utilities.check_env.check` function to check that the value was provided either as an argument to the initializer, or in the environment. - -**When calling into a web API, use the `parsons.utilities.APIConnector` class.** The `APIConnector` class has a number of methods for making web requests, and using the `APIConnector` helps enforce consistency across connectors. The `APIConnector` is a wrapper around the Python `requests` library. - - -Mailchimp example: - -.. code-block:: python - - from parsons.utilities import check_env - from parsons.utilities.api_connector import APIConnector - - - class Mailchimp(): - """ - Instantiate Mailchimp Class - - `Args:` - api_key: - The Mailchimp-provided application key. Not required if - ``MAILCHIMP_API_KEY`` env variable set. - `Returns:` - Mailchimp Class - """ - - def __init__(self, api_key=None): - self.api_key = check_env.check('MAILCHIMP_API_KEY', api_key) - self.domain = re.findall("(?<=-).+$", self.api_key)[0] - self.uri = f'https://{self.domain}.api.mailchimp.com/3.0/' - self.client = APIConnector(self.uri, auth=('x', self.api_key)) - -In the `__init__` method above, the Mailchimp class takes one argument: `api_key`. The argument has a default value of `None`, which allows for a user to initialize the connector without any arguments (ie `Mailchimp()`. If no value is passed for `api_key` as an argument to the `__init__` method, then the `check_env.check` function will attempt to retrieve the value from the `MAILCHIMP_API_KEY` environment variable. If the value is neither passed in as argument nor in the environment, the `check_env.check` method will raise a `KeyError` exception. - -In the last line of the code snippet above, the `Mailchimp` class creates an `APIConnector` class, providing the root URL for the API (`self.uri`). The Mailchimp API accepts basic authentication as an authentication mechanism, so the `Mailchimp` connector is able to pass the `api_key` to the `APIConnector` via the `auth` keyword argument. If the API for your connector does not support basic authentication, you may need to implement your own authentication (e.g. via request headers). - -^^^^^^^^^^^^^^^^^^^^^^^^ -Your connector’s methods -^^^^^^^^^^^^^^^^^^^^^^^^ - -**The methods of your connector should generally mirror the endpoints of the API.** Every API is different, but the connector should generally look like the API it is connecting to. Methods of your connector should reference the resources the API is using (e.g. “people”, “members”, “events”). - -The following lists rules for naming common endpoints: - -* GET - single record - *get_* (e.g. get_event, get_person) -* GET - multiple records - *get_s* (e.g. get_members, get_people) -* POST - single record - *create_* (e.g. create_person, create_tag) -* PUT - single record - *update_* (e.g. update_person, update_event) -* DELETE - single record - *delete_* (e.g. delete_member) - -**A method’s arguments should mirror the parameters of the API endpoint it is calling.** Optional parameters should be optional in your method signature (i.e. default to `None`). - -**Use Python docstrings to document every public method of your class.** The docstrings for your public methods are used to automatically generate documentation for your connector. Having this documentation for every method makes it easier for users to pick up your connector. - -**Methods returning multiple values should return a Parsons Table.** If the list of results is empty, return an empty Parsons `Table` (not `None`). Methods returning a single value should just return the value. If the API could not find the value (eg, the ID provided for a resource was not found), return a `None` value from the method. - -Mailchimp example: - -.. code-block:: python - - class Mailchimp(): - - def get_lists(self, fields=None, exclude_fields=None, - count=None, offset=None, before_date_created=None, - since_date_created=None, before_campaign_last_sent=None, - since_campaign_last_sent=None, email=None, sort_field=None, - sort_dir=None): - """ - Get a table of lists under the account based on query parameters. Note - that argument descriptions here are sourced from Mailchimp's official - API documentation. - - `Args:` - fields: list of strings - A comma-separated list of fields to return. Reference - parameters of sub-objects with dot notation. - exclude_fields: list of strings - A comma-separated list of fields to exclude. Reference - parameters of sub-objects with dot notation. - count: int - The number of records to return. Default value is 10. Maximum - value is 1000. - offset: int - The number of records from a collection to skip. Iterating over - large collections with this parameter can be slow. Default - value is 0. - before_date_created: string - Restrict response to lists created before the set date. We - recommend ISO 8601 time format: 2015-10-21T15:41:36+00:00. - since_date_created: string - Restrict results to lists created after the set date. We - recommend ISO 8601 time format: 2015-10-21T15:41:36+00:00. - before_campaign_last_sent: string - Restrict results to lists created before the last campaign send - date. We recommend ISO 8601 time format: - 2015-10-21T15:41:36+00:00. - since_campaign_last_sent: string - Restrict results to lists created after the last campaign send - date. We recommend ISO 8601 time format: - 2015-10-21T15:41:36+00:00. - email: string - Restrict results to lists that include a specific subscriber's - email address. - sort_field: string, can only be 'date_created' or None - Returns files sorted by the specified field. - sort_dir: string, can only be 'ASC', 'DESC', or None - Determines the order direction for sorted results. - - `Returns:` - Table Class - """ - params = {'fields': fields, - 'exclude_fields': exclude_fields, - 'count': count, - 'offset': offset, - 'before_date_created': before_date_created, - 'since_date_created': since_date_created, - 'before_campaign_last_sent': before_campaign_last_sent, - 'since_campaign_last_sent': since_campaign_last_sent, - 'email': email, - 'sort_field': sort_field, - 'sort_dir': sort_dir} - - response = self.get_request('lists', params=params) - tbl = Table(response['lists']) - logger.info(f'Found {tbl.num_rows} lists.') - if tbl.num_rows > 0: - return tbl - else: - return Table() - - -The `get_lists` method corresponds to the `GET /lists `_ endpoint on the Mailchimp API. The method has a number of arguments (all optional), all of which are described in the docstring. The arguments are then mapped to the name of the endpoints’ parameters, and passed to the `APIConnector`’s `get_request` method. - -The method can return more than one record, so the results of the call to the API are wrapped in a Parsons `Table`. If there are no results from the call, an empty table is returned. - --------------- -Sandbox Access --------------- - -When developing a Parsons connector, it's helpful to be able to test your changes against a non-production account. We have set up test accounts with some vendors which you can use for testing by following the steps below. We also maintain :ref:`a list of vendors with free accounts` that you can use as sandboxes. - -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Accessing and Using Credentials -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Step 1: Request Access** - -Request access to the test account (usually in the form of an API key) by emailing engineering@movementcooperative.org. Please provide your GitHub username and some context for your request. Why do you need the account credentials? What are you testing? If a community member recommended you request an account from us, you can include their name here. See :ref:`connector-specific-guidance` for additional information we may need to give you access to a specific sandbox. - -An example request might look something like "Hi, I'm Ana (abc123 on github), I want to work on the ActionNetwork connector but I don't know how to test it. At the contributor meeting Betty linked me here and said I should ask you." - -**Step 2: Save and Use the Credentials** - -When using your credentials, please store them as environmental variables rather than including them directly in your code. If you use them in your code and accidentally include them as part of a pull request, we will need to generate new credentials. Let's try to avoid that hassle! - -You can set environmental variables with the following commands:: - - set VARIABLE_NAME=VARIABLE_VALUE # Windows - export VARIABLE_NAME=VARIABLE_VALUE # Linux/Mac - -Some environmental variables may need to be explicitly loaded into scripts for use, but most will not. This is because each Parsons connector automatically looks in the environment for specific variables and uses them when initializing the connector. For example, the Zoom connector looks for ZOOM_API_KEY and ZOOM_API_SECRET. Check the documentation for the precise names of the environmental variables it looks for. - -In rare cases you may need to load the environmental variables yourself within the script. You can do so with the following code:: - - import os - ENV_VARIABLE = os.getenv('ENV_VARIABLE') - -^^^^^^^^^^^^^^^^^^^^^^ -General Best Practices -^^^^^^^^^^^^^^^^^^^^^^ - -Since the sandbox accounts are shared with multiple people, we ask contributors to observe some guidelines: - -* Use clear naming conventions when creating test data. Either prefix or suffix data with your initials or use another identifier. -* Only add mock data to the test account, never real data (especially if there are fields for contact information). -* Try to limit the amount of data you push in/pull out of the account to only the amount that you need. -* Leave test data that looks like it was created by someone else in the same state that you found it. -* Delete test data when you finish testing. -* Be mindful when sending requests to third party platforms. We don’t want to burden them or to have our account suspended and rate-limited. If you accidentally over-requested from the third-party platform and have been suspended or rate-limited in a way that does not expire after a day or less, please reach out to us so we can try to get access again. - -.. _connector-specific-guidance: - -^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Connector-Specific Guidance -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -################## -API Keys Available -################## - -The following connectors have sandbox API keys available. Some connectors have specific best practices or additional information to send along when you request the key from us. - -**ActionKit**: No additional information needed, but please be mindful that this sandbox is shared across many organizations, not just Parsons-affiliated organizations. Be extra careful not to modify existing data. - -**ActionNetwork**: In order to access the ActionNetwork sandbox account, we’ll need the email address associated with your ActionNetwork account. Please make an ActionNetwork account if you don’t have one already, and include the associated email in your access request to us. - -**ControlShift**: Please limit your testing to pushing and pulling data in and out and do not use the account for sending mass texts. (The sandbox account has an associated phone number, but it is unnecessary for Parsons testing.) - -**Hustle**: No connector-specific guidance. - -**Mobilize**: No connector-specific guidance. - -**Strive**: No connector-specific guidance. - -.. _create-sandbox: - -####################### -Create Your Own Sandbox -####################### - -The following connectors are confirmed to have free accounts which can be used to make sandboxes. - -**Airtable**: You can create `free accounts `_ on the Airtable website. - -**Braintree**: You can create `free sandbox accounts `_ on the Braintree website. - -**Github**: You can create `free accounts `_ on the Github website. - -**Salesforce**: You can create `free developer accounts `_ directly on the Salesforce website, which you can use to `create a sandbox `_. - -**Twilio**: You can create a `free account `_ on the Twilio website which gets you access to their `test credentials `_. - ------------- -Finishing up ------------- - -^^^^^^^^^^^^^^^ -Testing locally -^^^^^^^^^^^^^^^ - -In order to test locally, you will need to install the version of Parsons that you have been working on. To do that, you will need to install in "editable" mode, which allows you to import your local Parsons code instead of the released code. - -To install Parsons in "editable" mode, run the following, where `` is the path to the root of the Parsons repository on your local machine. - -```bash -pip install -e -``` - -^^^^^^^^^^^^^^^^^^^^^^ -Adding automated tests -^^^^^^^^^^^^^^^^^^^^^^ - - * Add a folder *test_yourconnectorname* in parsons/test for your connector - * Add a file *test_yourconnectorname.py* to the *test_yourconnectorname* folder - * Use the code below as a starting point for your tests - * Add one `“Happy Path” `_ test per public method of your connector - * When possible mock out any external integrations, otherwise mark your test using the ``unittest.skipIf`` decorator (for an example, see test/test_s3.py) - - For a more detailed guide on writing unit tests, see :doc:`How to Write Tests for Parsons Connectors ` - -.. code-block:: python - - from parsons.yourconnector.yourconnector import YourConnector - import unittest - import requests_mock - - from parsons.yourconnector.yourconnector import YourConnector - import unittest - import requests_mock - - class TestYourConnector(unittest.TestCase): - - def setUp(self): - - # add any setup code here to run before each test - pass - - def tearDown(self): - - # add any teardown code here to run after each test - pass - - @requests_mock.Mocker() - def test_get_things(self, m): - - # Test that campaigns are returned correctly. - m.get('http://yourconnector.com/v1/things', json=[]) - yc = YourConnector() - tbl = yc.get_things() - - self.assertEqual(tbl.num_rows, 0) - -^^^^^^^^^^^^^^^^^^^^ -Adding documentation -^^^^^^^^^^^^^^^^^^^^ - - * Add *yourconnectorname.rst* to the parsons/docs folder. - * Use the parsons/docs/_template.rst file as a guide for the documentation for your connector. - * Add a reference to your connector’s doc file to the parsons/docs/index.rst - * You just need to add the filename without the .rst extension (ie *yourconnector*) - * Be sure to add *yourconnector* in alphabetical order - -^^^^^^^^^^^ -Final steps -^^^^^^^^^^^ - - * Add any new dependencies to the parsons/requirements.txt file - * Run the entire suite of Parsons unit tests using the `pytest -rf test` command - * Run the linter against Parsons using `flake8 --max-line-length=100 parsons` - * Double-check that you have committed all of your code changes to your branch, and that you have pushed your branch to your fork - * Open a pull request against the move-coop/parsons repository +The "building a new connector guide" has been moved to the Parsons website! You can find it `here `_. diff --git a/docs/contributing.rst b/docs/contributing.rst index a43a1f06d3..a299f56f74 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -2,5 +2,5 @@ Contributing to Parsons ======================= -.. include:: ../CONTRIBUTING.md - :parser: myst_parser.sphinx_ \ No newline at end of file + +The contributing guide has been moved to the Parsons website! You can find it `here `_. diff --git a/docs/zoom.rst b/docs/zoom.rst index ea49171b43..6944b2dbfa 100644 --- a/docs/zoom.rst +++ b/docs/zoom.rst @@ -7,21 +7,23 @@ Overview `Zoom `_ is a video conferencing platform. This connector supports fetching users, fetching meetings, fetching metadata for past meetings, and fetching -participants of past meetings via the `Zoom API `_. +participants of past meetings via the `Zoom API `_. .. note:: Authentication - The ``Zoom`` class supports `JSON Web Token Authentication `_. - You must `Create a JWT App `_ to obtain - an API Key and API Secret for authentication. + The ``Zoom`` class uses server-to-server `OAuth ` + to authenticate queries to the Zoom API. You must create a server-to-server application in + `Zoom's app marketplace ` to obtain an + ``account_id``, ``client_id``, and ``client_secret`` key. You will use this OAuth application to define your scopes, + which gives your ``Zoom`` connector read permission on endpoints of your choosing (`meetings`, `webinars`, etc.) *********** Quick Start *********** -To instantiate the ``Zoom`` class, you can either store your Zoom API -key and secret as environmental variables (``ZOOM_API_KEY`` and ``ZOOM_API_SECRET``, -respectively) or pass them in as arguments: +To instantiate the ``Zoom`` class, you can either store your Zoom account ID, client ID, and client secret +as environmental variables (``ZOOM_ACCOUNT_ID``, ``ZOOM_CLIENT_ID``, ``ZOOM_CLIENT_SECRET``) +or pass them in as arguments. .. code-block:: python @@ -32,7 +34,11 @@ respectively) or pass them in as arguments: zoom = Zoom() # If providing authentication credentials via arguments - zoom = Zoom(api_key='my_api_key', api_secret='my_api_secret') + zoom = Zoom( + account_id="my_account_id", + client_id="my_client_id", + client_secret="my_client_secret" + ) # Get a table of host's meetings via their email or user id meetings_tbl = zoom.get_meetings('my_name@mail.com') diff --git a/parsons/__init__.py b/parsons/__init__.py index d611ebf63b..132d90db75 100644 --- a/parsons/__init__.py +++ b/parsons/__init__.py @@ -47,6 +47,8 @@ ("parsons.controlshift.controlshift", "Controlshift"), ("parsons.copper.copper", "Copper"), ("parsons.crowdtangle.crowdtangle", "CrowdTangle"), + ("parsons.databases.database_connector", "DatabaseConnector"), + ("parsons.databases.discover_database", "discover_database"), ("parsons.databases.db_sync", "DBSync"), ("parsons.databases.mysql.mysql", "MySQL"), ("parsons.databases.postgres.postgres", "Postgres"), diff --git a/parsons/action_builder/action_builder.py b/parsons/action_builder/action_builder.py index 2af620d77f..43fd70f1a6 100644 --- a/parsons/action_builder/action_builder.py +++ b/parsons/action_builder/action_builder.py @@ -188,12 +188,11 @@ def insert_entity_record(self, entity_type, data=None, campaign=None): Dict containing Action Builder entity data. """ # noqa: E501 + name_keys = ("name", "action_builder:name", "given_name") error = "Must provide data with name or given_name when inserting new record" if not isinstance(data, dict): raise ValueError(error) - name_check = [ - key for key in data.get("person", {}) if key in ("name", "given_name") - ] + name_check = [key for key in data.get("person", {}) if key in name_keys] if not name_check: raise ValueError(error) diff --git a/parsons/action_kit/action_kit.py b/parsons/action_kit/action_kit.py index ce147fee5e..6e346f6674 100644 --- a/parsons/action_kit/action_kit.py +++ b/parsons/action_kit/action_kit.py @@ -233,6 +233,63 @@ def update_event(self, event_id, **kwargs): ) logger.info(f"{resp.status_code}: {event_id}") + def get_blackholed_email(self, email): + """ + Get a blackholed email. A blackholed email is an email that has been prevented from + receiving bulk and transactional emails from ActionKit. `Documentation `_. + + `Args:` + email: str + Blackholed email of the record to get. + `Returns`: + Parsons.Table + The blackholed email data. + """ + + return self.paginated_get("blackholedemail", email=email) + + def blackhole_email(self, email): + """ + Prevent an email from receiving bulk and transactional emails from ActionKit. + `Documentation `_. + + `Args:` + user_id: str + Email to blackhole + `Returns:` + API location of new resource + """ + + return self._base_post( + endpoint="blackholedemail", + exception_message="Could not blackhole email", + email=email, + ) + + def delete_user_data(self, email, **kwargs): + """ + Delete user data. + + `Args:` + email: str + Email of user to delete data + **kwargs: + Optional arguments and fields to pass to the client. A full list can be found + in the `ActionKit API Documentation `_. + `Returns:` + API location of anonymized user + """ + + return self._base_post( + endpoint="eraser", + exception_message="Could not delete user data", + email=email, + **kwargs, + ) + def delete_user(self, user_id): """ Delete a user. @@ -857,7 +914,7 @@ def get_orders(self, limit=None, **kwargs): ak.get_orders(import_id="my-import-123") `Returns:` Parsons.Table - The events data. + The orders data. """ return self.paginated_get("order", limit=limit, **kwargs) @@ -1034,7 +1091,7 @@ def get_transactions(self, limit=None, **kwargs): ak.get_transactions(order="order-1") `Returns:` Parsons.Table - The events data. + The transactions data. """ return self.paginated_get("transaction", limit=limit, **kwargs) diff --git a/parsons/databases/database_connector.py b/parsons/databases/database_connector.py new file mode 100644 index 0000000000..e6778846be --- /dev/null +++ b/parsons/databases/database_connector.py @@ -0,0 +1,190 @@ +from abc import ABC, abstractmethod +from typing import Optional +from parsons.etl.table import Table + + +class DatabaseConnector(ABC): + """ + An abstract base class that provides a uniform interface for all Parsons database connectors. + This class should be used in functions instead of the specific database connector classes + when the functions don't rely on database-specific functionality. + + It ensures that any class that inherits from it implements the methods that are uniform + operations when working with databases. + + Should you use `DatabaseConnector` instead of `Redshift`/`BigQuery`/etc? + + Overall this class is mostly useful for code in the Parsons library, not code using it. + There could be some exceptions. In general though, if you are writing a script to do a task + like moving data out of an API service and into a data warehouse, you probably do not need + to use DatabaseConnector. You can probably just use the Parsons class that directly corresponds + with the database that you use. + + Here are more examples of situations where you may or may not need to use DatabaseConnector: + + 1. You do not use type annotations, or you don't know what "type annotations" are - No + + If you do not use type annotations for your code, then you do not need to think about + `DatabaseConnector` when writing your code. This is the most common case. If none + of the cases below apply to you, then you probably don't need it. + + In this simple example, we are not using type annotations in our code. We don't need + to think about exactly what class is being passed in. Python will figure it out. + + ```python + def my_database_function(db): + some_data = get_some_data() + db.copy("some_table", some_data) + + # These will all just work: + my_database_function(Redshift()) + my_database_function(MySQL()) + my_database_functon(BigQuery()) + ``` + + 2. You only use one database in your work - No + + This is where most people will fall. Usually code is not intended to run on + multiple databases without modification. For example, if you are working for + an organization that uses Amazon Redshift as your data warehouse, you do not + need to use `DatabaseConnector` to write ETL scripts to load data into your + Redshift. It is rare that organizations switch databases. In the cases where + that does occur, usually more work is required to migrate your environment and + your vendor-specific SQL than would be saved by using `DatabaseConnector`. + + 3. You are writing a sample script or a tutorial - Yes + + If you are using Parsons to write a sample script or tutorial, you should use + `DatabaseConnector`! If you use `DatabaseConnector` type annotations and the + `discover_database` function, then your sample code will run on any system. + This makes it much easier for new programmers to get your code working on + their system. + + 4. Utility code inside Parsons or other libraries - Yes + + If you are writing a utility script inside Parsons or another library meant + for broad distribution, you should probably use `DatabaseConnector` type + annotations. This will ensure that your library code will be usable by the + widest possible set of users, not just users on one specific database. + + Developer Notes: + This class is an Abstract Base Class (ABC). It's designed to ensure that all classes + inheriting from it implement certain methods, enforcing a consistent interface across + database connectors. + + If you need to add a new method to the database connectors, there are three options: + 1. Add the method to this ABC and implement it for all databases. + 2. Add the method to this ABC and implement it for some databases while adding stubs for + others. + 3. Implement the method on a specific database connector without touching the ABC. + + If you go the second route, you can add a stub method like this: + + .. code-block:: python + + def new_method(self, arg1, arg2): + raise NotImplementedError("Method not implemented for this database connector.") + ``` + + This communicates clearly to users that the method does not exist for certain connectors. + + If you go the third route, remember that you're responsible for making sure your new + method matches the existing methods in other database connectors. For example, if you're + adding a method that already exists in another connector, like Redshift, you need to ensure + your new method behaves the same way and has the same parameters with the same types in the + same order. See the note below for more detail. + + Note: + The Python type system (as of 3.10.6) will not stop you from breaking the type contract + of method signatures when implementing a subclass. It is up to the author of a database + connector to ensure that it satisfies this interface. Be careful to, for example, not + change the types of the parameters or leave out optional parameters that are specified + in the interface. + + Any such inconsistencies can cause unexpected runtime errors that will not be caught by + the type checker. + + It is safe to add additional features to subclasses, such as new methods or extra *optional* + parameters to specified methods. In general adding new methods is safe, but adding optional + parameters to methods specified in the interface should be considered bad practice, because + it could result in unexpected behavior. + + Example usage: + + .. code-block:: python + + def my_function(db: DatabaseConnector, data: Table): + # Your code here, using the db object + + # Pass an instance of a class that inherits from DatabaseConnector, e.g. Redshift + my_function(some_db_instance, some_data) + + """ + + @abstractmethod + def table_exists(self, table_name: str) -> bool: + """Check if a table or view exists in the database. + + `Args:` + table_name: str + The table name and schema (e.g. ``myschema.mytable``). + + `Returns:` + boolean + ``True`` if the table exists and ``False`` if it does not. + """ + pass + + @abstractmethod + def copy(self, tbl: Table, table_name: str, if_exists: str): + """Copy a :ref:`parsons-table` to the database. + + `Args`: + tbl (Table): + Table containing the data to save. + table_name (str): + The destination table name (ex. ``my_schema.my_table``). + if_exists (str): + If the table already exists, either ``fail``, ``append``, ``drop`` + or ``truncate`` the table. + """ + pass + + @abstractmethod + def query(self, sql: str, parameters: Optional[list] = None) -> Optional[Table]: + """Execute a query against the database. Will return ``None`` if the query returns empty. + + To include python variables in your query, it is recommended to pass them as parameters, + following the `psycopg style + `. + Using the ``parameters`` argument ensures that values are escaped properly, and avoids SQL + injection attacks. + + **Parameter Examples** + + .. code-block:: python + + # Note that the name contains a quote, which could break your query if not escaped + # properly. + name = "Beatrice O'Brady" + sql = "SELECT * FROM my_table WHERE name = %s" + db.query(sql, parameters=[name]) + + .. code-block:: python + + names = ["Allen Smith", "Beatrice O'Brady", "Cathy Thompson"] + placeholders = ', '.join('%s' for item in names) + sql = f"SELECT * FROM my_table WHERE name IN ({placeholders})" + db.query(sql, parameters=names) + + `Args:` + sql: str + A valid SQL statement + parameters: Optional[list] + A list of python variables to be converted into SQL values in your query + + `Returns:` + Parsons Table + See :ref:`parsons-table` for output options. + """ + pass diff --git a/parsons/databases/discover_database.py b/parsons/databases/discover_database.py new file mode 100644 index 0000000000..1d51a37112 --- /dev/null +++ b/parsons/databases/discover_database.py @@ -0,0 +1,79 @@ +import os +from typing import Optional, Union, Type, List + +from parsons.databases.database_connector import DatabaseConnector +from parsons.databases.redshift import Redshift +from parsons.databases.mysql import MySQL +from parsons.databases.postgres import Postgres +from parsons.google.google_bigquery import GoogleBigQuery + + +def discover_database( + default_connector: Optional[ + Union[Type[DatabaseConnector], List[Type[DatabaseConnector]]] + ] = None +) -> DatabaseConnector: + """Create an appropriate ``DatabaseConnector`` based on environmental variables. + + Will search the environmental variables for the proper credentials for the + Redshift, MySQL, Postgres, and BigQuery connectors. See the documentation + for the connectors to variables required to initialize them. + + If no suitable configuration is found, will raise an error. + + If multiple suitable configurations are found, will raise an error unless + a default connector class or list of classes is provided. + + Note that the variables to be searched for are hard-coded in this function, + since they are unlikely to change. If that is done, for some reason, or a + new database connector is added, ``discover_database`` should be updated + + Args: + default_connector: Optional, single Class or list of Classes inheriting from + DatabaseConnector to be used as default in case multiple database configurations + are detected. + + Returns: + DatabaseConnector: The database connector configured in the environment. + """ + connectors = { + "Redshift": Redshift, + "MySQL": MySQL, + "Postgres": Postgres, + "GoogleBigQuery": GoogleBigQuery, + } + + password_vars = { + "Redshift": "REDSHIFT_PASSWORD", + "MySQL": "MYSQL_PASSWORD", + "Postgres": "PGPASSWORD", + "GoogleBigQuery": "GOOGLE_APPLICATION_CREDENTIALS", + } + + detected = [name for name in connectors.keys() if os.getenv(password_vars[name])] + + if len(detected) > 1: + if default_connector is None: + raise EnvironmentError( + f"Multiple database configurations detected: {detected}." + " Please specify a default connector." + ) + + if isinstance(default_connector, list): + for connector in default_connector: + if connector.__name__ in detected: + return connector() + raise EnvironmentError( + f"None of the default connectors {default_connector} were detected." + ) + elif default_connector.__name__ in detected: + return default_connector() + else: + raise EnvironmentError( + f"Default connector {default_connector} not detected. Detected: {detected}." + ) + + elif detected: + return connectors[detected[0]]() + else: + raise EnvironmentError("Could not find any database configuration.") diff --git a/parsons/databases/mysql/mysql.py b/parsons/databases/mysql/mysql.py index c2d18906ca..3572d82bce 100644 --- a/parsons/databases/mysql/mysql.py +++ b/parsons/databases/mysql/mysql.py @@ -7,6 +7,7 @@ import pickle import logging import os +from parsons.databases.database_connector import DatabaseConnector from parsons.databases.table import BaseTable from parsons.databases.mysql.create_table import MySQLCreateTable from parsons.databases.alchemy import Alchemy @@ -19,7 +20,7 @@ logger = logging.getLogger(__name__) -class MySQL(MySQLCreateTable, Alchemy): +class MySQL(DatabaseConnector, MySQLCreateTable, Alchemy): """ Connect to a MySQL database. @@ -151,7 +152,6 @@ def query_with_connection(self, sql, connection, parameters=None, commit=True): See :ref:`parsons-table` for output options. """ with self.cursor(connection) as cursor: - # The python connector can only execute a single sql statement, so we will # break up each statement and execute them separately. for s in sql.strip().split(";"): @@ -193,7 +193,12 @@ def query_with_connection(self, sql, connection, parameters=None, commit=True): return final_tbl def copy( - self, tbl, table_name, if_exists="fail", chunk_size=1000, strict_length=True + self, + tbl: Table, + table_name: str, + if_exists: str = "fail", + chunk_size: int = 1000, + strict_length: bool = True, ): """ Copy a :ref:`parsons-table` to the database. @@ -225,7 +230,6 @@ def copy( return None with self.connection() as connection: - # Create table if not exists if self._create_table_precheck(connection, table_name, if_exists): sql = self.create_statement( @@ -282,7 +286,6 @@ def _create_table_precheck(self, connection, table_name, if_exists): # If the table exists, evaluate the if_exists argument for next steps. if self.table_exists(table_name): - if if_exists == "fail": raise ValueError("Table already exists.") @@ -301,7 +304,7 @@ def _create_table_precheck(self, connection, table_name, if_exists): else: return True - def table_exists(self, table_name): + def table_exists(self, table_name: str) -> bool: """ Check if a table or view exists in the database. diff --git a/parsons/databases/postgres/postgres.py b/parsons/databases/postgres/postgres.py index 13d8372bab..1463ec85cd 100644 --- a/parsons/databases/postgres/postgres.py +++ b/parsons/databases/postgres/postgres.py @@ -1,6 +1,8 @@ from parsons.databases.postgres.postgres_core import PostgresCore from parsons.databases.table import BaseTable from parsons.databases.alchemy import Alchemy +from parsons.databases.database_connector import DatabaseConnector +from parsons.etl.table import Table import logging import os @@ -8,7 +10,7 @@ logger = logging.getLogger(__name__) -class Postgres(PostgresCore, Alchemy): +class Postgres(PostgresCore, Alchemy, DatabaseConnector): """ A Postgres class to connect to database. Credentials can be passed from a ``.pgpass`` file stored in your home directory or with environmental variables. @@ -52,7 +54,13 @@ def __init__( self.timeout = timeout self.dialect = "postgres" - def copy(self, tbl, table_name, if_exists="fail", strict_length=False): + def copy( + self, + tbl: Table, + table_name: str, + if_exists: str = "fail", + strict_length: bool = False, + ): """ Copy a :ref:`parsons-table` to Postgres. @@ -68,14 +76,12 @@ def copy(self, tbl, table_name, if_exists="fail", strict_length=False): If the database table needs to be created, strict_length determines whether the created table's column sizes will be sized to exactly fit the current data, or if their size will be rounded up to account for future values being larger - then the current dataset + then the current dataset. Defaults to ``False``. """ with self.connection() as connection: - # Auto-generate table if self._create_table_precheck(connection, table_name, if_exists): - # Create the table # To Do: Pass in the advanced configuration parameters. sql = self.create_statement( diff --git a/parsons/databases/postgres/postgres_core.py b/parsons/databases/postgres/postgres_core.py index be7fcbdd35..8b6557de9c 100644 --- a/parsons/databases/postgres/postgres_core.py +++ b/parsons/databases/postgres/postgres_core.py @@ -1,4 +1,5 @@ from contextlib import contextmanager +from typing import Optional import psycopg2 import psycopg2.extras from parsons.etl.table import Table @@ -61,7 +62,7 @@ def cursor(self, connection): finally: cur.close() - def query(self, sql, parameters=None): + def query(self, sql: str, parameters: Optional[list] = None) -> Optional[Table]: """ Execute a query against the database. Will return ``None`` if the query returns zero rows. @@ -207,7 +208,7 @@ def _create_table_precheck(self, connection, table_name, if_exists): else: return True - def table_exists(self, table_name, view=True): + def table_exists(self, table_name: str, view: bool = True) -> bool: """ Check if a table or view exists in the database. @@ -215,7 +216,7 @@ def table_exists(self, table_name, view=True): table_name: str The table name and schema (e.g. ``myschema.mytable``). view: boolean - Check to see if a view exists by the same name + Check to see if a view exists by the same name. Defaults to ``True``. `Returns:` boolean diff --git a/parsons/databases/redshift/redshift.py b/parsons/databases/redshift/redshift.py index 0fc35e2c75..a870d90a11 100644 --- a/parsons/databases/redshift/redshift.py +++ b/parsons/databases/redshift/redshift.py @@ -1,3 +1,4 @@ +from typing import List, Optional from parsons.etl.table import Table from parsons.databases.redshift.rs_copy_table import RedshiftCopyTable from parsons.databases.redshift.rs_create_table import RedshiftCreateTable @@ -6,6 +7,7 @@ from parsons.databases.table import BaseTable from parsons.databases.alchemy import Alchemy from parsons.utilities import files, sql_helpers +from parsons.databases.database_connector import DatabaseConnector import psycopg2 import psycopg2.extras import os @@ -31,6 +33,7 @@ class Redshift( RedshiftTableUtilities, RedshiftSchema, Alchemy, + DatabaseConnector, ): """ A Redshift class to connect to database. @@ -152,7 +155,7 @@ def cursor(self, connection): finally: cur.close() - def query(self, sql, parameters=None): + def query(self, sql: str, parameters: Optional[list] = None) -> Optional[Table]: """ Execute a query against the Redshift database. Will return ``None`` if the query returns zero rows. @@ -293,6 +296,7 @@ def copy_s3( bucket_region=None, strict_length=True, template_table=None, + line_delimited=False, ): """ Copy a file from s3 to Redshift. @@ -411,6 +415,8 @@ def copy_s3( local_path = s3.get_file(bucket, key) if data_type == "csv": tbl = Table.from_csv(local_path, delimiter=csv_delimiter) + elif data_type == "json": + tbl = Table.from_json(local_path, line_delimited=line_delimited) else: raise TypeError("Invalid data type provided") @@ -430,6 +436,7 @@ def copy_s3( logger.info(f"{table_name} created.") # Copy the table + logger.info(f"Data type is {data_type}") copy_sql = self.copy_statement( table_name, bucket, @@ -461,36 +468,36 @@ def copy_s3( def copy( self, - tbl, - table_name, - if_exists="fail", - max_errors=0, - distkey=None, - sortkey=None, - padding=None, - statupdate=None, - compupdate=None, - acceptanydate=True, - emptyasnull=True, - blanksasnull=True, - nullas=None, - acceptinvchars=True, - dateformat="auto", - timeformat="auto", - varchar_max=None, - truncatecolumns=False, - columntypes=None, - specifycols=None, - alter_table=False, - alter_table_cascade=False, - aws_access_key_id=None, - aws_secret_access_key=None, - iam_role=None, - cleanup_s3_file=True, - template_table=None, - temp_bucket_region=None, - strict_length=True, - csv_encoding="utf-8", + tbl: Table, + table_name: str, + if_exists: str = "fail", + max_errors: int = 0, + distkey: Optional[str] = None, + sortkey: Optional[str] = None, + padding: Optional[float] = None, + statupdate: Optional[bool] = None, + compupdate: Optional[bool] = None, + acceptanydate: bool = True, + emptyasnull: bool = True, + blanksasnull: bool = True, + nullas: Optional[str] = None, + acceptinvchars: bool = True, + dateformat: str = "auto", + timeformat: str = "auto", + varchar_max: Optional[List[str]] = None, + truncatecolumns: bool = False, + columntypes: Optional[dict] = None, + specifycols: Optional[bool] = None, + alter_table: bool = False, + alter_table_cascade: bool = False, + aws_access_key_id: Optional[str] = None, + aws_secret_access_key: Optional[str] = None, + iam_role: Optional[str] = None, # Unused - Should we remove? + cleanup_s3_file: bool = True, + template_table: Optional[str] = None, + temp_bucket_region: Optional[str] = None, + strict_length: bool = True, + csv_encoding: str = "utf-8", ): """ Copy a :ref:`parsons-table` to Redshift. @@ -513,9 +520,6 @@ def copy( padding: float A percentage padding to add to varchar columns if creating a new table. This is helpful to add a buffer for future copies in which the data might be wider. - varchar_max: list - A list of columns in which to set the width of the varchar column to 65,535 - characters. statupate: boolean Governs automatic computation and refresh of optimizer statistics at the end of a successful COPY command. If ``True`` explicitly sets ``statupate`` to on, if @@ -553,6 +557,9 @@ def copy( Set the date format. Defaults to ``auto``. timeformat: str Set the time format. Defaults to ``auto``. + varchar_max: list + A list of columns in which to set the width of the varchar column to 65,535 + characters. truncatecolumns: boolean If the table already exists, truncates data in columns to the appropriate number of characters so that it fits the column specification. Applies only to columns @@ -600,7 +607,7 @@ def copy( in a different region from the temp bucket. strict_length: bool Whether or not to tightly fit the length of the table columns to the length - of the data in ``tbl``; if ``padding`` is specified, this argument is ignored + of the data in ``tbl``; if ``padding`` is specified, this argument is ignored. csv_ecoding: str String encoding to use when writing the temporary CSV file that is uploaded to S3. Defaults to 'utf-8'. @@ -705,6 +712,7 @@ def unload( allow_overwrite=True, parallel=True, max_file_size="6.2 GB", + extension=None, aws_region=None, aws_access_key_id=None, aws_secret_access_key=None, @@ -750,6 +758,8 @@ def unload( max_file_size: str The maximum size of files UNLOAD creates in Amazon S3. Specify a decimal value between 5 MB and 6.2 GB. + extension: str + This extension will be added to the end of file names loaded to S3 region: str The AWS Region where the target Amazon S3 bucket is located. REGION is required for UNLOAD to an Amazon S3 bucket that is not in the same AWS Region as the Amazon Redshift @@ -789,6 +799,8 @@ def unload( statement += "ESCAPE \n" if allow_overwrite: statement += "ALLOWOVERWRITE \n" + if extension: + statement += f"EXTENSION '{extension}' \n" if aws_region: statement += f"REGION {aws_region} \n" diff --git a/parsons/databases/redshift/rs_copy_table.py b/parsons/databases/redshift/rs_copy_table.py index ea014b09d7..7b4fa578b7 100644 --- a/parsons/databases/redshift/rs_copy_table.py +++ b/parsons/databases/redshift/rs_copy_table.py @@ -9,7 +9,6 @@ class RedshiftCopyTable(object): - aws_access_key_id = None aws_secret_access_key = None iam_role = None @@ -42,8 +41,9 @@ def copy_statement( aws_secret_access_key=None, compression=None, bucket_region=None, + json_option="auto", ): - + logger.info(f"Data type is {data_type}") # Source / Destination source = f"s3://{bucket}/{key}" @@ -101,6 +101,8 @@ def copy_statement( # Data Type if data_type == "csv": sql += f"csv delimiter '{csv_delimiter}' \n" + elif data_type == "json": + sql += f"json '{json_option}' \n" else: raise TypeError("Invalid data type specified.") @@ -112,7 +114,6 @@ def copy_statement( return sql def get_creds(self, aws_access_key_id, aws_secret_access_key): - if aws_access_key_id and aws_secret_access_key: # When we have credentials, then we don't need to set them again pass @@ -122,19 +123,16 @@ def get_creds(self, aws_access_key_id, aws_secret_access_key): return f"credentials 'aws_iam_role={self.iam_role}'\n" elif self.aws_access_key_id and self.aws_secret_access_key: - aws_access_key_id = self.aws_access_key_id aws_secret_access_key = self.aws_secret_access_key elif ( "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ ): - aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] else: - s3 = S3(use_env_token=self.use_env_token) creds = s3.aws.session.get_credentials() aws_access_key_id = creds.access_key @@ -151,7 +149,6 @@ def temp_s3_copy( aws_secret_access_key=None, csv_encoding="utf-8", ): - if not self.s3_temp_bucket: raise KeyError( ( @@ -184,6 +181,5 @@ def temp_s3_copy( return key def temp_s3_delete(self, key): - if key: self.s3.remove_file(self.s3_temp_bucket, key) diff --git a/parsons/databases/redshift/rs_create_table.py b/parsons/databases/redshift/rs_create_table.py index 059e4fa89c..7d37a81240 100644 --- a/parsons/databases/redshift/rs_create_table.py +++ b/parsons/databases/redshift/rs_create_table.py @@ -52,7 +52,6 @@ def create_statement( columntypes=None, strict_length=True, ): - # Warn the user if they don't provide a DIST key or a SORT key self._log_key_warning(distkey=distkey, sortkey=sortkey, method="copy") @@ -144,7 +143,6 @@ def vc_max(self, mapping, columns): # Set the varchar width of a column to the maximum for c in columns: - try: idx = mapping["headers"].index(c) mapping["longest"][idx] = self.VARCHAR_MAX @@ -156,13 +154,11 @@ def vc_max(self, mapping, columns): return mapping["longest"] def vc_trunc(self, mapping): - return [ self.VARCHAR_MAX if c > self.VARCHAR_MAX else c for c in mapping["longest"] ] def vc_validate(self, mapping): - return [1 if c == 0 else c for c in mapping["longest"]] def create_sql(self, table_name, mapping, distkey=None, sortkey=None): diff --git a/parsons/databases/redshift/rs_table_utilities.py b/parsons/databases/redshift/rs_table_utilities.py index 0ead403fcd..ae0c8a5c71 100644 --- a/parsons/databases/redshift/rs_table_utilities.py +++ b/parsons/databases/redshift/rs_table_utilities.py @@ -9,7 +9,7 @@ class RedshiftTableUtilities(object): def __init__(self): pass - def table_exists(self, table_name, view=True): + def table_exists(self, table_name: str, view: bool = True) -> bool: """ Check if a table or view exists in the database. diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index b261b0cf03..f10641c466 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -1,12 +1,15 @@ import pickle +from typing import Optional, Union import uuid from google.cloud import bigquery from google.cloud.bigquery import dbapi +from google.cloud.bigquery.job import LoadJobConfig from google.cloud import exceptions import petl from parsons.databases.table import BaseTable +from parsons.databases.database_connector import DatabaseConnector from parsons.etl import Table from parsons.google.utitities import setup_google_application_credentials from parsons.google.google_cloud_storage import GoogleCloudStorage @@ -56,7 +59,7 @@ def parse_table_name(table_name): return parsed -class GoogleBigQuery: +class GoogleBigQuery(DatabaseConnector): """ Class for querying BigQuery table and returning the data as Parsons tables. @@ -102,12 +105,12 @@ def __init__(self, app_creds=None, project=None, location=None): def copy( self, - table_obj, - table_name, - if_exists="fail", - tmp_gcs_bucket=None, - gcs_client=None, - job_config=None, + tbl: Table, + table_name: str, + if_exists: str = "fail", + tmp_gcs_bucket: Optional[str] = None, + gcs_client: Optional[GoogleCloudStorage] = None, + job_config: Optional[LoadJobConfig] = None, **load_kwargs, ): """ @@ -147,7 +150,7 @@ def copy( job_config = bigquery.LoadJobConfig() if not job_config.schema: - job_config.schema = self._generate_schema(table_obj) + job_config.schema = self._generate_schema(tbl) if not job_config.create_disposition: job_config.create_disposition = bigquery.CreateDisposition.CREATE_IF_NEEDED @@ -167,9 +170,7 @@ def copy( gcs_client = gcs_client or GoogleCloudStorage() temp_blob_name = f"{uuid.uuid4()}.csv" - temp_blob_uri = gcs_client.upload_table( - table_obj, tmp_gcs_bucket, temp_blob_name - ) + temp_blob_uri = gcs_client.upload_table(tbl, tmp_gcs_bucket, temp_blob_name) # load CSV from Cloud Storage into BigQuery table_ref = get_table_ref(self.client, table_name) @@ -195,7 +196,9 @@ def delete_table(self, table_name): table_ref = get_table_ref(self.client, table_name) self.client.delete_table(table_ref) - def query(self, sql, parameters=None): + def query( + self, sql: str, parameters: Optional[Union[list, dict]] = None + ) -> Optional[Table]: """ Run a BigQuery query and return the results as a Parsons table. @@ -268,7 +271,7 @@ def query(self, sql, parameters=None): return final_table - def table_exists(self, table_name): + def table_exists(self, table_name: str) -> bool: """ Check whether or not the Google BigQuery table exists in the specified dataset. diff --git a/parsons/utilities/api_connector.py b/parsons/utilities/api_connector.py index d57ca216d4..5c54106cbe 100644 --- a/parsons/utilities/api_connector.py +++ b/parsons/utilities/api_connector.py @@ -35,7 +35,6 @@ class APIConnector(object): def __init__( self, uri, headers=None, auth=None, pagination_key=None, data_key=None ): - # Add a trailing slash if its missing if not uri.endswith("/"): uri = uri + "/" @@ -237,7 +236,6 @@ def validate_response(self, resp): """ if resp.status_code >= 400: - if resp.reason: message = f"HTTP error occurred ({resp.status_code}): {resp.reason}" else: @@ -263,7 +261,7 @@ def data_parse(self, resp): A dictionary of data. """ - # To Do: Some response jsons are enclosed in a list. Need to deal with unpacking and/or + # TODO: Some response jsons are enclosed in a list. Need to deal with unpacking and/or # not assuming that it is going to be a dict. # In some instances responses are just lists. diff --git a/parsons/zoom/zoom.py b/parsons/zoom/zoom.py index 861f709a0e..095e097aaa 100644 --- a/parsons/zoom/zoom.py +++ b/parsons/zoom/zoom.py @@ -8,6 +8,9 @@ logger = logging.getLogger(__name__) ZOOM_URI = "https://api.zoom.us/v2/" +ZOOM_AUTH_CALLBACK = "https://zoom.us/oauth/token" + +########## class Zoom: @@ -23,15 +26,49 @@ class Zoom: variable set. """ - def __init__(self, api_key=None, api_secret=None): + def __init__(self, account_id=None, client_id=None, client_secret=None): + self.account_id = check_env.check("ZOOM_ACCOUNT_ID", account_id) + self.client_id = check_env.check("ZOOM_CLIENT_ID", client_id) + self.__client_secret = check_env.check("ZOOM_CLIENT_SECRET", client_secret) + + self.client = APIConnector(uri=ZOOM_URI) + + access_token = self.__generate_access_token() + + self.client.headers = { + "Authorization": f"Bearer {access_token}", + "Content-type": "application/json", + } + + def __generate_access_token(self) -> str: + """ + Uses Zoom's OAuth callback URL to generate an access token to query the Zoom API - self.api_key = check_env.check("ZOOM_API_KEY", api_key) - self.api_secret = check_env.check("ZOOM_API_SECRET", api_secret) - self.client = APIConnector(ZOOM_URI) + `Returns`: + String representation of access token + """ - def refresh_header_token(self): - # Generate a token that is valid for 30 seconds and update header. Full documentation - # on JWT generation using Zoom API: https://marketplace.zoom.us/docs/guides/auth/jwt + temp_client = APIConnector( + uri=ZOOM_URI, auth=(self.client_id, self.__client_secret) + ) + + resp = temp_client.post_request( + ZOOM_AUTH_CALLBACK, + data={ + "grant_type": "account_credentials", + "account_id": self.account_id, + }, + ) + + return resp["access_token"] + + def __refresh_header_token(self): + """ + NOTE: This function is deprecated as Zoom's API moves to an OAuth strategy on 9/1 + + Generate a token that is valid for 30 seconds and update header. Full documentation + on JWT generation using Zoom API: https://marketplace.zoom.us/docs/guides/auth/jwt + """ payload = { "iss": self.api_key, @@ -44,9 +81,22 @@ def refresh_header_token(self): } def _get_request(self, endpoint, data_key, params=None, **kwargs): - # To Do: Consider increasing default page size. + """ + TODO: Consider increasing default page size. + + `Args`: + endpoint: str + API endpoint to send GET request + data_key: str + Unique value to use to parse through nested data + (akin to a primary key in response JSON) + params: dict + Additional request parameters, defaults to None + + `Returns`: + Parsons Table of API responses + """ - self.refresh_header_token() r = self.client.get_request(endpoint, params=params, **kwargs) self.client.data_key = data_key data = self.client.data_parse(r) diff --git a/setup.py b/setup.py index 7d542cb9a2..b2e64ae26d 100644 --- a/setup.py +++ b/setup.py @@ -59,7 +59,7 @@ def main(): setup( name="parsons", - version="1.2.0", + version="2.0.0", author="The Movement Cooperative", author_email="info@movementcooperative.org", url="https://github.com/move-coop/parsons", diff --git a/test/test_action_kit.py b/test/test_action_kit.py index 14f64d8c29..93dd20e96e 100644 --- a/test/test_action_kit.py +++ b/test/test_action_kit.py @@ -108,6 +108,47 @@ def test_update_event(self): data=json.dumps({"is_approved": "test"}), ) + def test_get_blackholed_email(self): + # Test get blackholed email + resp_mock = mock.MagicMock() + type(resp_mock.get()).status_code = mock.PropertyMock(return_value=201) + type(resp_mock.get()).json = lambda x: {"meta": {"next": ""}, "objects": []} + self.actionkit.conn = resp_mock + + self.actionkit.get_blackholed_email("test") + self.actionkit.conn.get.assert_called_with( + "https://domain.actionkit.com/rest/v1/blackholedemail/", + params={"email": "test", "_limit": 100}, + ) + + def test_blackhole_email(self): + # Test blackhole email + + # Mock resp and status code + resp_mock = mock.MagicMock() + type(resp_mock.post()).status_code = mock.PropertyMock(return_value=201) + self.actionkit.conn = resp_mock + + self.actionkit.blackhole_email(email="test") + self.actionkit.conn.post.assert_called_with( + "https://domain.actionkit.com/rest/v1/blackholedemail/", + data=json.dumps({"email": "test"}), + ) + + def test_delete_user_data(self): + # Test delete user data + + # Mock resp and status code + resp_mock = mock.MagicMock() + type(resp_mock.post()).status_code = mock.PropertyMock(return_value=201) + self.actionkit.conn = resp_mock + + self.actionkit.delete_user_data(email="test") + self.actionkit.conn.post.assert_called_with( + "https://domain.actionkit.com/rest/v1/eraser/", + data=json.dumps({"email": "test"}), + ) + def test_delete_user(self): # Test delete user diff --git a/test/test_databases/test_discover_database.py b/test/test_databases/test_discover_database.py new file mode 100644 index 0000000000..b946629e10 --- /dev/null +++ b/test/test_databases/test_discover_database.py @@ -0,0 +1,116 @@ +import unittest +from unittest.mock import patch +from parsons.databases.redshift import Redshift +from parsons.databases.mysql import MySQL +from parsons.databases.postgres import Postgres +from parsons.google.google_bigquery import GoogleBigQuery +from parsons.databases.discover_database import discover_database + + +class TestDiscoverDatabase(unittest.TestCase): + @patch.object(GoogleBigQuery, "__init__", return_value=None) + @patch.object(Postgres, "__init__", return_value=None) + @patch.object(MySQL, "__init__", return_value=None) + @patch.object(Redshift, "__init__", return_value=None) + @patch("os.getenv") + def test_no_database_detected(self, mock_getenv, *_): + mock_getenv.return_value = None + with self.assertRaises(EnvironmentError): + discover_database() + + @patch.object(GoogleBigQuery, "__init__", return_value=None) + @patch.object(Postgres, "__init__", return_value=None) + @patch.object(MySQL, "__init__", return_value=None) + @patch.object(Redshift, "__init__", return_value=None) + @patch("os.getenv") + def test_single_database_detected(self, mock_getenv, *_): + mock_getenv.side_effect = ( + lambda var: "password" if var == "REDSHIFT_PASSWORD" else None + ) + self.assertIsInstance(discover_database(), Redshift) + + @patch.object(GoogleBigQuery, "__init__", return_value=None) + @patch.object(Postgres, "__init__", return_value=None) + @patch.object(MySQL, "__init__", return_value=None) + @patch.object(Redshift, "__init__", return_value=None) + @patch("os.getenv") + def test_single_database_detected_with_other_default(self, mock_getenv, *_): + mock_getenv.side_effect = ( + lambda var: "password" if var == "REDSHIFT_PASSWORD" else None + ) + self.assertIsInstance(discover_database(default_connector=Postgres), Redshift) + + @patch.object(GoogleBigQuery, "__init__", return_value=None) + @patch.object(Postgres, "__init__", return_value=None) + @patch.object(MySQL, "__init__", return_value=None) + @patch.object(Redshift, "__init__", return_value=None) + @patch("os.getenv") + def test_single_database_detected_with_other_default_list(self, mock_getenv, *_): + mock_getenv.side_effect = ( + lambda var: "password" if var == "REDSHIFT_PASSWORD" else None + ) + self.assertIsInstance( + discover_database(default_connector=[Postgres, MySQL]), Redshift + ) + + @patch.object(GoogleBigQuery, "__init__", return_value=None) + @patch.object(Postgres, "__init__", return_value=None) + @patch.object(MySQL, "__init__", return_value=None) + @patch.object(Redshift, "__init__", return_value=None) + @patch("os.getenv") + def test_multiple_databases_no_default(self, mock_getenv, *_): + mock_getenv.return_value = "password" + with self.assertRaises(EnvironmentError): + discover_database() + + @patch.object(GoogleBigQuery, "__init__", return_value=None) + @patch.object(Postgres, "__init__", return_value=None) + @patch.object(MySQL, "__init__", return_value=None) + @patch.object(Redshift, "__init__", return_value=None) + @patch("os.getenv") + def test_multiple_databases_with_default(self, mock_getenv, *_): + mock_getenv.return_value = "password" + self.assertIsInstance(discover_database(default_connector=Redshift), Redshift) + + @patch.object(GoogleBigQuery, "__init__", return_value=None) + @patch.object(Postgres, "__init__", return_value=None) + @patch.object(MySQL, "__init__", return_value=None) + @patch.object(Redshift, "__init__", return_value=None) + @patch("os.getenv") + def test_multiple_databases_with_default_list(self, mock_getenv, *_): + mock_getenv.return_value = "password" + self.assertIsInstance( + discover_database(default_connector=[MySQL, Redshift]), MySQL + ) + + @patch.object(GoogleBigQuery, "__init__", return_value=None) + @patch.object(Postgres, "__init__", return_value=None) + @patch.object(MySQL, "__init__", return_value=None) + @patch.object(Redshift, "__init__", return_value=None) + @patch("os.getenv") + def test_multiple_databases_invalid_default(self, mock_getenv, *_): + mock_getenv.side_effect = ( + lambda var: "password" + if var == "REDSHIFT_PASSWORD" or var == "MYSQL_PASSWORD" + else None + ) + with self.assertRaises(EnvironmentError): + discover_database(default_connector=Postgres) + + @patch.object(GoogleBigQuery, "__init__", return_value=None) + @patch.object(Postgres, "__init__", return_value=None) + @patch.object(MySQL, "__init__", return_value=None) + @patch.object(Redshift, "__init__", return_value=None) + @patch("os.getenv") + def test_multiple_databases_invalid_default_list(self, mock_getenv, *_): + mock_getenv.side_effect = ( + lambda var: "password" + if var == "REDSHIFT_PASSWORD" or var == "MYSQL_PASSWORD" + else None + ) + with self.assertRaises(EnvironmentError): + discover_database(default_connector=[Postgres, GoogleBigQuery]) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_zoom.py b/test/test_zoom.py index 9d71056484..c53b7696e0 100644 --- a/test/test_zoom.py +++ b/test/test_zoom.py @@ -4,14 +4,19 @@ import requests_mock from parsons import Table, Zoom -API_KEY = "fake_api_key" -API_SECRET = "fake_api_secret" +ACCOUNT_ID = "fakeAccountID" +CLIENT_ID = "fakeClientID" +CLIENT_SECRET = "fakeClientSecret" + ZOOM_URI = "https://api.zoom.us/v2/" +ZOOM_AUTH_CALLBACK = "https://zoom.us/oauth/token" class TestZoom(unittest.TestCase): - def setUp(self): - self.zoom = Zoom(API_KEY, API_SECRET) + @requests_mock.Mocker() + def setUp(self, m): + m.post(ZOOM_AUTH_CALLBACK, json={"access_token": "fakeAccessToken"}) + self.zoom = Zoom(ACCOUNT_ID, CLIENT_ID, CLIENT_SECRET) @requests_mock.Mocker() def test_get_users(self, m): @@ -63,6 +68,7 @@ def test_get_users(self, m): ] ) + m.post(ZOOM_AUTH_CALLBACK, json={"access_token": "fakeAccessToken"}) m.get(ZOOM_URI + "users", json=user_json) assert_matching_tables(self.zoom.get_users(), tbl) @@ -122,6 +128,7 @@ def test_get_meeting_participants(self, m): ] ) + m.post(ZOOM_AUTH_CALLBACK, json={"access_token": "fakeAccessToken"}) m.get(ZOOM_URI + "report/meetings/123/participants", json=participants) assert_matching_tables(self.zoom.get_past_meeting_participants(123), tbl) @@ -173,6 +180,7 @@ def test_get_meeting_registrants(self, m): ] ) + m.post(ZOOM_AUTH_CALLBACK, json={"access_token": "fakeAccessToken"}) m.get(ZOOM_URI + "meetings/123/registrants", json=registrants) assert_matching_tables(self.zoom.get_meeting_registrants(123), tbl) @@ -244,6 +252,7 @@ def test_get_user_webinars(self, m): ] ) + m.post(ZOOM_AUTH_CALLBACK, json={"access_token": "fakeAccessToken"}) m.get(ZOOM_URI + "users/123/webinars", json=webinars) assert_matching_tables(self.zoom.get_user_webinars(123), tbl) @@ -299,6 +308,7 @@ def test_get_past_webinar_participants(self, m): ] ) + m.post(ZOOM_AUTH_CALLBACK, json={"access_token": "fakeAccessToken"}) m.get(ZOOM_URI + "report/webinars/123/participants", json=participants) assert_matching_tables(self.zoom.get_past_webinar_participants(123), tbl) @@ -430,5 +440,6 @@ def test_get_webinar_registrants(self, m): ] ) + m.post(ZOOM_AUTH_CALLBACK, json={"access_token": "fakeAccessToken"}) m.get(ZOOM_URI + "webinars/123/registrants", json=registrants) assert_matching_tables(self.zoom.get_webinar_registrants(123), tbl)