diff --git a/duo_client/admin.py b/duo_client/admin.py index b2cffdb..374ff25 100644 --- a/duo_client/admin.py +++ b/duo_client/admin.py @@ -173,17 +173,18 @@ VALID_AUTHLOG_REQUEST_PARAMS = [ 'mintime', 'maxtime', - 'limit', - 'sort', 'next_offset', 'event_types', 'reasons', 'results', 'users', 'applications', + 'phone_numbers', + 'registration_id', + 'token_id', + 'webauthnkey', 'groups', 'factors', - 'api_version' ] @@ -320,41 +321,48 @@ def get_administrator_log(self, row['host'] = self.host return response - def get_authentication_log(self, api_version=1, **kwargs): + def get_authentication_log_iterator(self, params={}): """ - Returns authentication log events. - - api_version - The api version of the handler to use. Currently, the - default api version is v1, but the v1 api will be - deprecated in a future version of the Duo Admin API. - Please migrate to the v2 api at your earliest convenience. - For details on the differences between v1 and v2, - please see Duo's Admin API documentation. (Optional) - - API Version v1: - - mintime - Fetch events only >= mintime (to avoid duplicate - records that have already been fetched) + Provides a generator which produces authentication logs. Under the + hood, this generator uses pagination, so it will only store one page of + administrative_units at a time in memory. + Returns: A generator which produces authentication logs. + Raises RuntimeError on error. + """ + + multi_auth_log_parameters = [ + 'applications', + 'users', + 'event_types', + 'factors', + 'groups', + 'phone_numbers', + 'reasons', + 'results', + 'registration_id', + 'token_id', + 'webauthnkey' + ] + + # This will take a python list for params that support multiple + # parameters and make a comma seperated string + for multi_auth_log_parameter in multi_auth_log_parameters: + if multi_auth_log_parameter in params.keys(): + params[multi_auth_log_parameter] = ",".join( + params[multi_auth_log_parameter] + ) - Returns: - [ - {'timestamp': , - 'eventtype': "authentication", - 'host': , - 'username': , - 'factor': , - 'result': , - 'ip': , - 'new_enrollment': , - 'integration': , - 'location': { - 'state': '', - 'city': '', - 'country': '' - } - }] + return self.json_paging_api_call( + 'GET', + "/admin/v2/logs/authentication", + params + ) - Raises RuntimeError on error. + def get_authentication_log(self, + sort="ts:asc", + **kwargs): + """ + Returns a list of authentication log events. API Version v2: @@ -370,6 +378,12 @@ def get_authentication_log(self, api_version=1, **kwargs): reasons - List of reasons to filter to filter on factors - List of factors to filter on event_types - List of event_types to filter on + phone_numbers - List of phone_numbers to filter on + registration_id - List of registration ids to filter on + token_id - List of token ids to filter on + webauthnkey - List of webauthn keys to filter on + factors - List of factors to filter on + Returns: { @@ -419,51 +433,74 @@ def get_authentication_log(self, api_version=1, **kwargs): Raises RuntimeError on error. """ - if api_version not in [1,2]: - raise ValueError("Invalid API Version") - params = {} + for k in kwargs: + if kwargs[k] is not None and k in VALID_AUTHLOG_REQUEST_PARAMS: + params[k] = kwargs[k] - if api_version == 1: #v1 - params['mintime'] = kwargs['mintime'] if 'mintime' in kwargs else 0; - # Sanity check mintime as unix timestamp, then transform to string - params['mintime'] = '{:d}'.format(int(params['mintime'])) - warnings.warn( - 'The v1 Admin API for retrieving authentication log events ' - 'will be deprecated in a future release of the Duo Admin API. ' - 'Please migrate to the v2 API.', - DeprecationWarning) - else: #v2 - for k in kwargs: - if kwargs[k] is not None and k in VALID_AUTHLOG_REQUEST_PARAMS: - params[k] = kwargs[k] + if 'mintime' not in params: + params['mintime'] = (int(time.time()) - 86400) * 1000 + # Sanity check mintime as unix timestamp, then transform to string + params['mintime'] = '{:d}'.format(int(params['mintime'])) + + # Querying for results more recent than two mins will return as empty. + if 'maxtime' not in params: + params['maxtime'] = int(time.time() - 120) * 1000 + # Sanity check maxtime as unix timestamp, then transform to string + params['maxtime'] = '{:d}'.format(int(params['maxtime'])) - if 'mintime' not in params: - params['mintime'] = (int(time.time()) - 86400) * 1000 - # Sanity check mintime as unix timestamp, then transform to string - params['mintime'] = '{:d}'.format(int(params['mintime'])) + # Set the default limit to 1000, the max + if 'limit' not in params: + params['limit'] = "1000" - if 'maxtime' not in params: - params['maxtime'] = int(time.time()) * 1000 - # Sanity check maxtime as unix timestamp, then transform to string - params['maxtime'] = '{:d}'.format(int(params['maxtime'])) + return list(self.get_authentication_log_iterator(params)) + + def get_authentication_log_v1(self, **kwargs): + """ + Returns a list of authentication log events.. + mintime - Fetch events only >= mintime (to avoid duplicate + records that have already been fetched) + Returns: + [ + {'timestamp': , + 'eventtype': "authentication", + 'host': , + 'username': , + 'factor': , + 'result': , + 'ip': , + 'new_enrollment': , + 'integration': , + 'location': { + 'state': '', + 'city': '', + 'country': '' + } + }] + + Raises RuntimeError on error. + """ + params = {} + params['mintime'] = kwargs['mintime'] if 'mintime' in kwargs else 0 + # Sanity check mintime as unix timestamp, then transform to string + params['mintime'] = '{:d}'.format(int(params['mintime'])) + warnings.warn( + 'The v1 Admin API for retrieving authentication log events ' + 'will be deprecated in a future release of the Duo Admin API. ' + 'Please migrate to the v2 API.', DeprecationWarning) response = self.json_api_call( - 'GET', - '/admin/v{}/logs/authentication'.format(api_version), - params, + 'GET', + '/admin/v1/logs/authentication', + params ) - if api_version == 1: - for row in response: - row['eventtype'] = 'authentication' - row['host'] = self.host - else: - for row in response['authlogs']: - row['eventtype'] = 'authentication' - row['host'] = self.host + for row in response: + row['eventtype'] = 'authentication' + row['host'] = self.host + return response def get_telephony_log(self, diff --git a/duo_client/client.py b/duo_client/client.py index f94fedd..420beeb 100644 --- a/duo_client/client.py +++ b/duo_client/client.py @@ -36,8 +36,10 @@ pytz_error = e from .https_wrapper import CertValidatingHTTPSConnection +from http.client import RemoteDisconnected DEFAULT_CA_CERTS = os.path.join(os.path.dirname(__file__), 'ca_certs.pem') +MAX_GET_URL_LEN = 8192 def canon_params(params): @@ -183,7 +185,7 @@ def __init__(self, ikey, skey, host, self.sig_version = sig_version # Constants for handling rate limit backoff and retries - self._MAX_BACKOFF_WAIT_SECS = 32 + self._MAX_BACKOFF_WAIT_SECS = 64 self._INITIAL_BACKOFF_WAIT_SECS = 1 self._BACKOFF_FACTOR = 2 self._RATE_LIMITED_RESP_CODE = 429 @@ -220,60 +222,76 @@ def api_call(self, method, path, params): * path: Full path of the API endpoint. E.g. "/auth/v2/ping". * params: dict mapping from parameter name to stringified value, or a dict to be converted to json. - """ - if self.sig_version in (1, 2): - params = normalize_params(params) - elif self.sig_version in (3, 4): - # Raises if params are not a dict that can be converted - # to json. - params = self.canon_json(params) - - if self.sig_timezone == 'UTC': - now = email.utils.formatdate() - elif pytz is None: - raise pytz_error - else: - d = datetime.datetime.now(pytz.timezone(self.sig_timezone)) - now = d.strftime("%a, %d %b %Y %H:%M:%S %z") - - auth = sign(self.ikey, - self.skey, - method, - self.host, - path, - now, - self.sig_version, - params, - self.digestmod) - headers = { - 'Authorization': auth, - 'Date': now, - } - - if self.user_agent: - headers['User-Agent'] = self.user_agent - - if method in ['POST', 'PUT']: - if self.sig_version in (3,4): - headers['Content-type'] = 'application/json' - body = params - else: - headers['Content-type'] = 'application/x-www-form-urlencoded' - body = six.moves.urllib.parse.urlencode(params, doseq=True) - uri = path - else: - body = None - uri = path + '?' + six.moves.urllib.parse.urlencode(params, doseq=True) - encoded_headers = {} - for k, v in headers.items(): - if isinstance(k, six.text_type): - k = k.encode('ascii') - if isinstance(v, six.text_type): - v = v.encode('ascii') - encoded_headers[k] = v + Raises ValueError if invalid request. + """ - return self._make_request(method, uri, body, encoded_headers) + # backoff on rate limited requests and retry. if a request is rate + # limited after MAX_BACKOFF_WAIT_SECS, return the rate limited response + # We need to REGENERATE the request because of timestamp issues + # otherwise we'll get Received 401 Bad request timestamp eventually + wait_secs = self._INITIAL_BACKOFF_WAIT_SECS + while True: + if self.sig_version in (1, 2): + params = normalize_params(params) + elif self.sig_version in (3, 4): + # Raises if params are not a dict that can be converted + # to json. + params = self.canon_json(params) + + if self.sig_timezone == 'UTC': + now = email.utils.formatdate() + elif pytz is None: + raise pytz_error + else: + d = datetime.datetime.now(pytz.timezone(self.sig_timezone)) + now = d.strftime("%a, %d %b %Y %H:%M:%S %z") + + auth = sign(self.ikey, + self.skey, + method, + self.host, + path, + now, + self.sig_version, + params, + self.digestmod) + headers = { + 'Authorization': auth, + 'Date': now, + } + + if self.user_agent: + headers['User-Agent'] = self.user_agent + + if method in ['POST', 'PUT']: + if self.sig_version in (3, 4): + headers['Content-type'] = 'application/json' + body = params + else: + headers['Content-type'] = 'application/x-www-form-urlencoded' + body = six.moves.urllib.parse.urlencode(params, doseq=True) + uri = path + else: + body = None + uri = path + '?' + six.moves.urllib.parse.urlencode(params, doseq=True) + if len(uri) >= MAX_GET_URL_LEN: + raise RuntimeError("Invalid Request Length") + + encoded_headers = {} + for k, v in headers.items(): + if isinstance(k, six.text_type): + k = k.encode('ascii') + if isinstance(v, six.text_type): + v = v.encode('ascii') + encoded_headers[k] = v + response, data = self._make_request(method, uri, body, encoded_headers) + if (response.status != self._RATE_LIMITED_RESP_CODE): + break + random_offset = random.uniform(0.0, 1.0) # noqa: DUO102, non-cryptographic random use + sleep(wait_secs + random_offset) + wait_secs = wait_secs * self._BACKOFF_FACTOR + return (response, data) def _connect(self): # Host and port for the HTTP(S) connection to the API server. @@ -336,26 +354,28 @@ def _make_request(self, method, uri, body, headers): api_proto = 'https' uri = ''.join((api_proto, '://', self.host, uri)) conn = self._connect() - - # backoff on rate limited requests and retry. if a request is rate - # limited after MAX_BACKOFF_WAIT_SECS, return the rate limited response - wait_secs = self._INITIAL_BACKOFF_WAIT_SECS - while True: - response, data = self._attempt_single_request( - conn, method, uri, body, headers) - if (response.status != self._RATE_LIMITED_RESP_CODE or - wait_secs > self._MAX_BACKOFF_WAIT_SECS): - break - random_offset = random.uniform(0.0, 1.0) # noqa: DUO102, non-cryptographic random use - sleep(wait_secs + random_offset) - wait_secs = wait_secs * self._BACKOFF_FACTOR - + response, data = self._attempt_single_request( + conn, method, uri, body, headers) self._disconnect(conn) return (response, data) def _attempt_single_request(self, conn, method, uri, body, headers): - conn.request(method, uri, body, headers) - response = conn.getresponse() + response = None + max_attempts = 10 + attempt_counter = 0 + # Even with reliable internet, many subsequent requests will + # occasionally cause the Duo servers to respond with invalid data. + # here we capture these issues and resend after sleeping. + while not response: + try: + conn.request(method, uri, body, headers) + response = conn.getresponse() + except RemoteDisconnected as err: + response = None + attempt_counter += 1 + if attempt_counter >= max_attempts: + raise err + sleep(self._MAX_BACKOFF_WAIT_SECS) data = response.read() return (response, data) @@ -402,7 +422,13 @@ def json_paging_api_call(self, method, path, params): params['limit'] = str(self.paging_limit) while next_offset is not None: - params['offset'] = str(next_offset) + # This is done because auth logs are handled differently + # than other API calls, fortunatly, other API calls don't care + # if 'next_offset' is set. + if isinstance(next_offset, list) and len(next_offset) == 2: + params['next_offset'] = ",".join(next_offset) + else: + params['offset'] = str(next_offset) (response, data) = self.api_call(method, path, params) (objects, metadata) = self.parse_json_response_and_metadata(response, data) next_offset = metadata.get('next_offset', None) @@ -426,7 +452,7 @@ def json_cursor_api_call(self, method, path, params, get_records_func): :param get_records_func: Function that can be called to extract an iterable of records from the parsed response json. - + :returns: Generator which will yield records from the api response(s). """ @@ -494,7 +520,24 @@ def raise_error(msg): data = json.loads(data) if data['stat'] != 'OK': raise_error('Received error response: %s' % data) - return (data['response'], data.get('metadata', {})) + metadata = data.get('metadata', {}) + resp_data = data['response'] + # Auth Logs returns metadata inside response + # The list of data is also in a seperate key + # ... just to be confusing + if not metadata: + metadata = data['response'].get('metadata', {}) + # if we did get metadata in the 'response' + # we need to iterate through the other keys in response + # till we find one that is a list and not metadata + if metadata: + for resp_key in data['response'].keys(): + if resp_key == "metadata": + continue + if not isinstance(data['response'][resp_key], list): + continue + resp_data = data['response'][resp_key] + return (resp_data, metadata) except (ValueError, KeyError, TypeError): raise_error('Received bad response: %s' % data) diff --git a/examples/fetch_auth_logs.py b/examples/fetch_auth_logs.py new file mode 100644 index 0000000..e86600e --- /dev/null +++ b/examples/fetch_auth_logs.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +import sys +import datetime +import duo_client + + +def get_activity_logs(admin_api, min_time, users): + authlogs = [] + # Large amounts of duo users being requests, even if below the max + # URI limit, will just return no results ... 50 at a time seems to work + for i in range(0, len(users), 50): + end = i+50 + if end >= len(users): + end = len(users) + print(f"Requesting Auth Call {i}-{end}/{len(users)}") + # It is reccomended to set this to 30 or 60 if using + # get_authentication_log that will return many pages of results + admin_api._INITIAL_BACKOFF_WAIT_SECS = 32 + res = admin_api.get_authentication_log( + users=users[i:end], + limit="1000", + mintime=datetime.datetime.timestamp(min_time)*1000 + ) + authlogs += list(res) + print(f"Finished Auth Call {i}-{end}/{len(users)} - with {len(authlogs)} records") + return authlogs + + +def get_next_arg(prompt): + try: + return next(argv_iter) + except StopIteration: + return input(prompt) + + +def main(): + admin_api = duo_client.Admin( + ikey=get_next_arg('Admin API integration key ("DI..."): '), + skey=get_next_arg('integration secret key: '), + host=get_next_arg('API hostname ("api-....duosecurity.com"): '), + ) + + users = admin_api.get_users(limit=10) + one_ago = datetime.datetime.now() - datetime.timedelta(days=1) + bad_users = users + + bad_user_ids = [user['user_id'] for user in bad_users] + print(bad_user_ids) + act_logs = get_activity_logs(admin_api, one_ago, bad_user_ids) + for act_log in act_logs: + username = act_log['user']['name'] + security_agent = act_log['access_device']['security_agents'] + print(f"Found log for {username} running {security_agent}") + +if __name__ == "__main__": + argv_iter = iter(sys.argv[1:]) + main()