Skip to content

Commit

Permalink
azure mail reports api type (#8)
Browse files Browse the repository at this point in the history
* azure mail reports api type

* remove unused import

* added readme, refactor methods for future override

* change docker base image to slim
  • Loading branch information
tamir-michaeli authored Jan 16, 2024
1 parent 5504335 commit c231df6
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 26 deletions.
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ The following configuration uses OAuth 2.0 flow.
Supported types:

- azure_graph
- azure_mail_reports
- general

The following parameters are for every type:
Expand Down Expand Up @@ -202,8 +203,40 @@ oauth_apis:
settings:
time_interval: 1
start_date_name: activityDateTime
- type: azure_mail_reports
name: mail_reports
credentials:
id: <<AZURE_AD_SECRET_ID>>
key: <<AZURE_AD_SECRET_VALUE>>
token_http_request:
url: https://login.microsoftonline.com/abcd-efgh-abcd-efgh/oauth2/v2.0/token
body: client_id=<<AZURE_AD_CLIENT_ID>>
&scope=https://outlook.office365.com/.default
&client_secret=<<AZURE_AD_SECRET_VALUE>>
&grant_type=client_credentials
headers:
method: POST
data_http_request:
url: https://reports.office365.com/ecp/reportingwebservice/reporting.svc/MessageTrace
method: GET
headers:
json_paths:
data_date: EndDate
next_url:
data:
filters:
format: Json
settings:
time_interval: 60 # for mail reports we suggest no less than 60 minutes
days_back_fetch: 8 # for mail reports we suggest up to 8 days
start_date_name: StartDate
end_date_name: EndDate

```
### Azure mail reports type important notes and limitations
* We recommend setting the `days_back_fetch` parameter to no more than `8d` (~192 hours) as this might cause unexpected errors with the API.
* We recommend setting the `time_interval` parameter to no less than `60`, to avoid short time frames in which messages trace will be missed.
* Microsoft may delay trace events for up to 24 hours, and events are not guaranteed to be sequential during this delay. For more information, see the Data granularity, persistence, and availability section of the MessageTrace report topic in the Microsoft documentation: [MessageTrace report API](https://learn.microsoft.com/en-us/previous-versions/office/developer/o365-enterprise-developers/jj984335(v=office.15)#data-granularity-persistence-and-availability)

### Create Last Start Dates Text File

Expand Down Expand Up @@ -241,6 +274,8 @@ If you stopped the container, you can continue from the exact place you stopped,

## Changelog:

- **0.1.0**:
- Added `azure_mail_reports` type.
- **0.0.6**:
- Improved documentation.
- Added error log.
Expand Down
2 changes: 1 addition & 1 deletion dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.9
FROM python:3.9-slim
WORKDIR /app
COPY /src ./src
COPY requirements.txt requirements.txt
Expand Down
40 changes: 25 additions & 15 deletions src/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from dateutil import parser
from requests import Response
from .data.base_data.api_base_data import ApiBaseData
from .data.base_data.api_filter import ApiFilter
from .data.base_data.api_custom_field import ApiCustomField
from .data.general_type_data.api_general_type_data import ApiGeneralTypeData

Expand Down Expand Up @@ -94,9 +93,7 @@ def _get_last_date(self, first_item: dict) -> str:
def _is_item_in_fetch_frame(self, item: dict, last_datetime_to_fetch: datetime) -> bool:
item_date = self._get_json_path_value_from_data(
self._general_type_data.json_paths.data_date, item)

item_datetime = parser.parse(item_date)

if item_datetime < last_datetime_to_fetch:
return False

Expand All @@ -122,31 +119,33 @@ def _get_new_start_date(self) -> str:

def _get_data_from_api(self, url: str) -> tuple[Optional[str], list]:
next_url = None
json_data = self._parse_response_to_json(url)
if self._general_type_data.json_paths.next_url:
next_url = self._get_json_path_value_from_data(
self._general_type_data.json_paths.next_url, json_data)
data = self._parse_and_verify_data_received(json_data)
return next_url, data

def _parse_response_to_json(self, url):
try:
response = self._get_response_from_api(url)
except Exception:
raise

json_data = json.loads(response.content)
return json_data

if self._general_type_data.json_paths.next_url:
next_url = self._get_json_path_value_from_data(
self._general_type_data.json_paths.next_url, json_data)
def _parse_and_verify_data_received(self, json_data):
data = self._get_json_path_value_from_data(
self._general_type_data.json_paths.data, json_data)

if data is None:
logger.error(
"The json path for api {}'s data is wrong. Please change your configuration.".format(
self._base_data.name))
raise Api.ApiError

data_size = len(data)

if data:
logger.info("Successfully got {0} data from api {1}.".format(data_size, self._base_data.name))

return next_url, data
return data

def _get_response_from_api(self, url: str) -> Response:
try:
Expand All @@ -156,13 +155,24 @@ def _get_response_from_api(self, url: str) -> Response:
logger.error(
"Something went wrong while trying to get the data from api {0}. response: {1}".format(
self._base_data.name, e))

if e.response.status_code == 400 or e.response.status_code == 401:
raise Api.ApiError()

raise
except Exception as e:
logger.error("Something went wrong with api {0}. response: {1}".format(self._base_data.name, e))
raise

return response

def get_current_time_utc_string(self):
time = datetime.utcnow()
time = time.isoformat(' ', 'seconds')
time = time.replace(' ', 'T')
time += 'Z'
return time

def _get_next_page_url(self, json_data: dict):
if self._general_type_data.json_paths.next_url:
next_url = self._get_json_path_value_from_data(
self._general_type_data.json_paths.next_url, json_data)
return next_url
return None
9 changes: 6 additions & 3 deletions src/apis_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Optional
from requests.sessions import InvalidSchema
from .azure_graph import AzureGraph
from .azure_mail_reports import AzureMailReports
from .config_reader import ConfigReader
from .data.logzio_connection import LogzioConnection
from .data.auth_api_data import AuthApiData
Expand All @@ -21,7 +22,7 @@


class ApisManager:

API_AZURE_MAIL_REPORTS_TYPE = "azure_mail_reports"
CONFIG_FILE = 'src/shared/config.yaml'
LAST_START_DATES_FILE = 'src/shared/last_start_dates.txt'

Expand All @@ -30,7 +31,7 @@ class ApisManager:
API_AZURE_GRAPH_TYPE = 'azure_graph'

AUTH_API_TYPES = [API_GENERAL_TYPE, API_CISCO_SECURE_X_TYPE]
OAUTH_API_TYPES = [API_GENERAL_TYPE, API_AZURE_GRAPH_TYPE]
OAUTH_API_TYPES = [API_GENERAL_TYPE, API_AZURE_GRAPH_TYPE, API_AZURE_MAIL_REPORTS_TYPE]

def __init__(self) -> None:
self._apis: list[Api] = []
Expand Down Expand Up @@ -89,8 +90,10 @@ def _add_auth_api(self, auth_api_data: AuthApiData) -> None:
def _add_oauth_api(self, oauth_api_data: OAuthApiData) -> None:
if oauth_api_data.base_data.base_data.type == ApisManager.API_GENERAL_TYPE:
self._apis.append(OAuthApi(oauth_api_data.base_data, oauth_api_data.general_type_data))
else:
elif oauth_api_data.base_data.base_data.type == ApisManager.API_AZURE_GRAPH_TYPE:
self._apis.append(AzureGraph(oauth_api_data))
elif oauth_api_data.base_data.base_data.type == ApisManager.API_AZURE_MAIL_REPORTS_TYPE:
self._apis.append(AzureMailReports(oauth_api_data))

def _run_api_scheduled_task(self, api: Api) -> None:
logzio_shipper = LogzioShipper(self._logzio_connection.url, self._logzio_connection.token)
Expand Down
79 changes: 79 additions & 0 deletions src/azure_mail_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import logging
from dateutil import parser
import re

from datetime import datetime
from src.api import Api
from src.data.oauth_api_data import OAuthApiData
from src.oauth_api import OAuthApi

logger = logging.getLogger(__name__)


class AzureMailReports(OAuthApi):
MAIL_REPORTS_DATA_LINK = 'd.results'
MAIL_REPORTS_FILTER_CONCAT = '&$'
MAIL_REPORTS_MAX_PAGE_SIZE = 1000
DATE_REGEX_FILTER = '\d+'

def __init__(self, oauth_api_data: OAuthApiData) -> None:
oauth_api_data.general_type_data.general_type_data.json_paths.data = self.MAIL_REPORTS_DATA_LINK
self._previous_end_date = None
super().__init__(oauth_api_data.base_data, oauth_api_data.general_type_data)

def get_last_start_date(self) -> str:
return self._current_data_last_date

def _build_api_url(self) -> str:
api_url = self._data_request.url
api_filters_num = self._base_data.get_filters_size()
new_end_date = self.get_new_end_date()
new_start_date = self.get_start_date_filter()
api_url += f"?$filter={self._general_type_data.start_date_name} eq datetime'{new_start_date}' and {self._general_type_data.end_date_name} eq datetime'{new_end_date}'"
self._previous_end_date = new_end_date
if api_filters_num > 0:
api_url += self.MAIL_REPORTS_FILTER_CONCAT
for api_filter in self._base_data.filters:
api_url += api_filter.key + '=' + str(api_filter.value)
api_filters_num -= 1
if api_filters_num > 0:
api_url += self.MAIL_REPORTS_FILTER_CONCAT
return api_url

def _get_last_date(self, first_item: dict) -> str:
first_item_date = self._get_json_path_value_from_data(
self._general_type_data.json_paths.data_date, first_item)

if first_item_date is None:
logger.error(
"The json path for api {}'s data date is wrong. Please change your configuration.".format(
self._base_data.name))
raise Api.ApiError
return self._get_formatted_date_from_date_path_value(first_item_date)

def _is_item_in_fetch_frame(self, item: dict, last_datetime_to_fetch: datetime) -> bool:
item_date = self._get_json_path_value_from_data(
self._general_type_data.json_paths.data_date, item)
item_datetime = parser.parse(self._get_formatted_date_from_date_path_value(item_date))
if item_datetime < last_datetime_to_fetch:
return False

return True

def _get_formatted_date_from_date_path_value(self, date_path_value: str) -> str:
epoch_milisec_date = re.findall(self.DATE_REGEX_FILTER, date_path_value)
date = datetime.fromtimestamp(int(int(epoch_milisec_date[0]) / 1000))
formatted_date = date.isoformat(' ', 'seconds')
formatted_date = formatted_date.replace(' ', 'T')
formatted_date += 'Z'
return formatted_date

def _set_current_data_last_date(self, date):
# This comparison might not work on other date formats
if (self._previous_end_date and date and self._previous_end_date > date) or not date:
self._set_current_data_last_date(self._previous_end_date)
else:
self._current_data_last_date = date

def get_new_end_date(self):
return self.get_current_time_utc_string()
19 changes: 17 additions & 2 deletions src/config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,15 @@ class ConfigReader:
API_FILTERS_CONFIG_KEY = 'filters'
API_CUSTOM_FIELDS_CONFIG_KEY = 'custom_fields'
API_START_DATE_NAME_CONFIG_KEY = 'start_date_name'
API_END_DATE_NAME_CONFIG_KEY = "end_date_name"
GENERAL_AUTH_API_HTTP_REQUEST_CONFIG_KEY = 'http_request'
OAUTH_API_TOKEN_HTTP_REQUEST_CONFIG_KEY = 'token_http_request'
OAUTH_API_DATA_HTTP_REQUEST_CONFIG_KEY = 'data_http_request'
API_HTTP_REQUEST_METHOD_CONFIG_KEY = 'method'
API_HTTP_REQUEST_URL_CONFIG_KEY = 'url'
API_HTTP_REQUEST_HEADERS_CONFIG_KEY = 'headers'
API_HTTP_REQUEST_BODY_CONFIG_KEY = 'body'
API_HTTP_REQUEST_PAGE_SIZE = 'page_size'
GENERAL_API_JSON_PATHS_CONFIG_KEY = 'json_paths'
GENERAL_API_JSON_PATHS_NEXT_URL_CONFIG_KEY = 'next_url'
GENERAL_API_JSON_PATHS_DATA_CONFIG_KEY = 'data'
Expand Down Expand Up @@ -289,14 +291,15 @@ def _get_oauth_api_general_type_data(self, config_oauth_api_data: dict,
def _get_api_general_type_data(self, config_api_data, api_group_type: str,
api_num: int) -> Optional[ApiGeneralTypeData]:
api_start_date_name = self._get_api_start_date_name(config_api_data, api_group_type, api_num)
api_end_date_name = self._get_api_end_date_name(config_api_data, api_group_type, api_num)
api_json_paths = self._get_api_json_paths(config_api_data, api_group_type, api_num)

if (api_start_date_name is None and api_group_type != self.OAUTH_API) or api_json_paths is None:
logger.error(
"Your configuration is not valid:\"json_paths\" must exist for all api types, \"start_date_name\" must exist for non oauth api types")
return None

return ApiGeneralTypeData(api_start_date_name, api_json_paths)
return ApiGeneralTypeData(api_start_date_name, api_end_date_name, api_json_paths)

def _get_api_start_date_name(self, config_api_data: dict, api_group_type: str, api_num: int) -> Optional[str]:
try:
Expand All @@ -309,6 +312,17 @@ def _get_api_start_date_name(self, config_api_data: dict, api_group_type: str, a

return api_start_date_name

def _get_api_end_date_name(self, config_api_data: dict, api_group_type: str, api_num: int) -> Optional[str]:
try:
api_end_date_name = config_api_data[ConfigReader.API_END_DATE_NAME_CONFIG_KEY]
except KeyError:
logger.warning(
"Missing field in config: the general type {0} api #{1} must have end_date_name.".format(
api_group_type, api_num))
return None

return api_end_date_name

def _get_api_json_paths(self, config_api_data: dict, api_group_type: str, api_num: int) -> Optional[ApiJsonPaths]:
api_json_paths = config_api_data[ConfigReader.GENERAL_API_JSON_PATHS_CONFIG_KEY]
api_data_date_json_path = api_json_paths.get(ConfigReader.GENERAL_API_JSON_PATHS_DATA_DATE_CONFIG_KEY)
Expand Down Expand Up @@ -393,6 +407,7 @@ def _get_oauth_api_http_requests(self, config_oauth_api_data: dict,
data_http_request = ApiHttpRequest(api_data_http_request_method,
api_data_url,
api_data_http_request.get(ConfigReader.API_HTTP_REQUEST_HEADERS_CONFIG_KEY),
api_data_http_request.get(ConfigReader.API_HTTP_REQUEST_BODY_CONFIG_KEY))
api_data_http_request.get(ConfigReader.API_HTTP_REQUEST_BODY_CONFIG_KEY),
api_data_http_request.get(ConfigReader.API_HTTP_REQUEST_PAGE_SIZE))

return token_http_request, data_http_request
15 changes: 12 additions & 3 deletions src/data/api_http_request.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
class ApiHttpRequest:

GET_METHOD = 'GET'
POST_METHOD = 'POST'
HTTP_METHODS = [GET_METHOD, POST_METHOD]

def __init__(self, api_http_request_method: str, api_url: str, api_http_request_headers: dict = None,
api_http_request_body: str = None) -> None:
def __init__(self, api_http_request_method: str, api_url: str,
api_http_request_headers: dict = None,
api_http_request_body: str = None, page_size: int = None) -> None:
self._method = api_http_request_method
self._url = api_url
self._headers = api_http_request_headers
self._body = api_http_request_body
self._page_size = page_size

@property
def page_size(self) -> int:
return self._page_size

@page_size.setter
def page_size(self, page_size) -> None:
self._page_size = page_size

@property
def method(self) -> str:
Expand Down
8 changes: 7 additions & 1 deletion src/data/general_type_data/api_general_type_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@

class ApiGeneralTypeData:

def __init__(self, api_start_date_name: str, api_json_paths: ApiJsonPaths) -> None:
def __init__(self, api_start_date_name: str, api_end_date_name: str,
api_json_paths: ApiJsonPaths) -> None:
self._start_date_name = api_start_date_name
self._end_date_name = api_end_date_name
self._json_paths = api_json_paths

@property
def end_date_name(self) -> str:
return self._end_date_name

@property
def start_date_name(self) -> str:
return self._start_date_name
Expand Down
Loading

0 comments on commit c231df6

Please sign in to comment.