Skip to content

Commit

Permalink
1073 Remove credentials from Population Data Download (#1074)
Browse files Browse the repository at this point in the history
  • Loading branch information
patricklnz authored Aug 13, 2024
1 parent 717c026 commit 896a04e
Show file tree
Hide file tree
Showing 6 changed files with 7 additions and 146 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/epidata_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ jobs:
run: |
mkdir -p data_dl
getcasedata -o data_dl --no-progress-indicators
getpopuldata -o data_dl --no-progress-indicators --username=${{ secrets.REGIODBUSER }} --password=${{ secrets.REGIODBPW }}
getpopuldata -o data_dl --no-progress-indicators
getjhdata -o data_dl --no-progress-indicators
getdividata -o data_dl --no-progress-indicators
getcommutermobility -o data_dl --no-progress-indicators
Expand Down
4 changes: 0 additions & 4 deletions pycode/memilio-epidata/memilio/epidata/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,6 @@ optional arguments working for some are:
| --sanitize-data | Different ways to distribute vaccinations to home |
| | locations of vaccinated persons[vaccination] |
+---------------------------------------------+-----------------------------------------------------------+
| --username | Username for regionalstatistik.de [population] |
+---------------------------------------------+-----------------------------------------------------------+
| --password | Password for regionalstatistik.de [population] |
+---------------------------------------------+-----------------------------------------------------------+
| --files | Files to write [case] |
+---------------------------------------------+-----------------------------------------------------------+

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,6 @@ def cli(what):
- verbose
- skip_checks
- no_raw
- username
- password
- to_dataset
@param what Defines what packages calls and thus what kind of command line arguments should be defined.
Expand All @@ -379,7 +377,7 @@ def cli(what):

cli_dict = {"divi": ['Downloads data from DIVI', 'start_date', 'end_date', 'impute_dates', 'moving_average'],
"cases": ['Download case data from RKI', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date', 'files'],
"population": ['Download population data from official sources', 'username'],
"population": ['Download population data from official sources'],
"commuter_official": ['Download commuter data from official sources'],
"vaccination": ['Download vaccination data', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'sanitize_data'],
"testing": ['Download testing data', 'start_date', 'end_date', 'impute_dates', 'moving_average'],
Expand Down Expand Up @@ -498,14 +496,6 @@ def cli(what):
'--skip-checks', dest='run_checks', action='store_false',
help='Skips sanity checks etc.')

if 'username' in what_list:
parser.add_argument(
'--username', type=str
)

parser.add_argument(
'--password', type=str
)
if '--to-dataset' in sys.argv:
parser.add_argument(
'--to-dataset', dest='to_dataset',
Expand Down
84 changes: 3 additions & 81 deletions pycode/memilio-epidata/memilio/epidata/getPopulationData.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
@brief Downloads data about population statistic
"""
import configparser
import warnings
import getpass
import requests
import os
import io
Expand All @@ -41,82 +39,19 @@
pd.options.mode.copy_on_write = True


def read_population_data(username, password):
def read_population_data():
"""! Reads Population data from regionalstatistik.de
Username and Password are required to sign in on regionalstatistik.de.
A request is made to regionalstatistik.de and the StringIO is read in as a csv into the dataframe format.
@param username Username to sign in at regionalstatistik.de.
@param password Password to sign in at regionalstatistik.de.
@return DataFrame
"""

download_url = 'https://www.regionalstatistik.de/genesis/online?operation=download&code=12411-02-03-4&option=csv'
req = requests.get(download_url, auth=(username, password))
req = requests.get(download_url)
df_pop_raw = pd.read_csv(io.StringIO(req.text), sep=';', header=6)

return df_pop_raw

# This function is needed for unittests
# Fakefilesystem has problems with os.path


def path_to_credential_file():
"""! Returns path to .ini file where credentials are stored.
The Path can be changed if neccessary.
"""
return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'CredentialsRegio.ini')


def manage_credentials(interactive):
"""! Manages credentials for regionalstatistik.de (needed for dowload).
A connfig file inside the epidata folder is either written (if not existent yet)
with input from user or read with following format:
[CREDENTIALS]
Username = XXXXX
Password = XXXXX
@return Username and password to sign in at regionalstatistik.de.
"""
# path where ini file is found
path = path_to_credential_file()

gd.default_print(
'Info', 'No passwaord and/or username for regionalstatistik.de provided. Try to read from .ini file.')

# check if .ini file exists
if not os.path.exists(path):
if interactive:
gd.default_print(
'Info', '.ini file not found. Writing CredentialsRegio.ini...')
username = input(
"Please enter username for https://www.regionalstatistik.de/genesis/online\n")
password = getpass.getpass(
"Please enter password for https://www.regionalstatistik.de/genesis/online\n")
# create file
write_ini = gd.user_choice(
message='Do you want the credentials to be stored in an unencrypted .ini file?\n' +
'The next time this function is called, the credentials can be read from that file.')
if write_ini:
string = '[CREDENTIALS]\nUsername = ' + \
username+'\nPassword = '+password
with open(path, 'w+') as file:
file.write(string)
else:
raise gd.DataError(
'No .ini file found. Cannot access regionalstatistik.de for downloading population data.')

else:
parser = configparser.ConfigParser()
parser.read(path)

username = parser['CREDENTIALS']['Username']
password = parser['CREDENTIALS']['Password']

return username, password


def export_population_dataframe(df_pop: pd.DataFrame, directory: str, file_format: str, merge_eisenach: bool):
"""! Writes population dataframe into directory with new column names and age groups
Expand Down Expand Up @@ -285,8 +220,6 @@ def test_total_population(df_pop, age_cols):

def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
out_folder: str = dd.defaultDict['out_folder'],
username='',
password='',
**kwargs
) -> pd.DataFrame:
"""! Downloads or reads the population data.
Expand All @@ -299,9 +232,6 @@ def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
downloaded. Default defined in defaultDict.
@param out_folder Path to folder where data is written in folder
out_folder/Germany. Default defined in defaultDict.
@param username Username to sign in at regionalstatistik.de.
@param password Password to sign in at regionalstatistik.de.
@return DataFrame with adjusted population data for all ages to current level.
"""
conf = gd.Conf(out_folder, **kwargs)
Expand All @@ -312,14 +242,10 @@ def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
'Warning', 'Read_data is not supportet for getPopulationData.py. Setting read_data = False')
read_data = False

# If no username or password is provided, the credentials are either read from an .ini file or,
# if the file does not exist they have to be given as user input.
if (username is None) or (password is None):
username, password = manage_credentials(conf.interactive)
directory = os.path.join(out_folder, 'Germany')
gd.check_dir(directory)

df_pop_raw = read_population_data(username, password)
df_pop_raw = read_population_data()

return df_pop_raw

Expand Down Expand Up @@ -411,8 +337,6 @@ def get_population_data(read_data: bool = dd.defaultDict['read_data'],
file_format: str = dd.defaultDict['file_format'],
out_folder: str = dd.defaultDict['out_folder'],
merge_eisenach: bool = True,
username='',
password='',
**kwargs
):
"""! Download age-stratified population data for the German counties.
Expand Down Expand Up @@ -453,8 +377,6 @@ def get_population_data(read_data: bool = dd.defaultDict['read_data'],
read_data=read_data,
out_folder=out_folder,
file_format=file_format,
username=username,
password=password,
**kwargs
)
preprocess_df = preprocess_population_data(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def test_call_functions(
# change start-date of jh to 2020-01-22
arg_dict_jh["start_date"] = date(2020, 1, 22)

arg_dict_popul = {**arg_dict_all, "username": None, "password": None}
arg_dict_popul = {**arg_dict_all}

getVaccinationData.main()
mock_vaccination.assert_called()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ class Test_getPopulationData(fake_filesystem_unittest.TestCase):

path = '/home/Population_Data'

config_file_name = 'CredentialsRegio.ini'
test_username = 'username_test'
test_password = 'password_test'

here = os.path.dirname(os.path.abspath(__file__))
filename = os.path.join(
here, 'test_data', 'TestSetPopulationExport.json')
Expand Down Expand Up @@ -71,53 +67,10 @@ def test_export_population_data(self):
return_value=df_pop_raw)
@patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop)
@patch('memilio.epidata.getPopulationData.test_total_population')
def test_get_population_data_full(self, mock_test, mock_export, mock_download):
def test_get_population_data_full(self, mock_test, mock_assign, mock_download):
# should not raise any errors
gpd.get_population_data(out_folder=self.path)

@patch('builtins.input', return_value=test_username)
@patch('getpass.getpass', return_value=test_password)
@patch('memilio.epidata.getDataIntoPandasDataFrame.user_choice', return_value=True)
@patch('memilio.epidata.getPopulationData.path_to_credential_file', return_value='./CredentialsRegio.ini')
@patch('memilio.epidata.getPopulationData.read_population_data', return_value=df_pop_raw)
@patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop)
@patch('memilio.epidata.getPopulationData.test_total_population')
def test_config_write(self, mock_test, mock_export, mock_raw, mock_path, mock_choice, mock_pw, mock_un):
# username and password should be written into the config file.
# The download and assigning to counties of the population data is mocked.
gpd.get_population_data(username=None, password=None, interactive=True)
# Check if the file is written.
self.assertTrue(self.config_file_name in os.listdir(os.getcwd()))
# Check content of the file.
# Read file.
parser = configparser.ConfigParser()
parser.read(os.path.join(os.getcwd(), self.config_file_name))
# Test content.
self.assertEqual(parser['CREDENTIALS']['Username'], self.test_username)
self.assertEqual(parser['CREDENTIALS']['Password'], self.test_password)

@patch('memilio.epidata.getPopulationData.path_to_credential_file', return_value='./CredentialsRegio.ini')
@patch('memilio.epidata.getPopulationData.read_population_data', return_value=df_pop_raw)
@patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop)
@patch('memilio.epidata.getPopulationData.test_total_population')
def test_config_read(self, mock_test, mock_export, mock_read, mock_path):
# File should not exist yet.
self.assertFalse(self.config_file_name in os.listdir(os.getcwd()))
# Create config file.
string = '[CREDENTIALS]\nUsername = ' + \
self.test_username+'\nPassword = '+self.test_password
path = os.path.join(os.getcwd(), self.config_file_name)
with open(path, 'w+') as file:
file.write(string)
# Check if the file is written.
self.assertTrue(self.config_file_name in os.listdir(os.getcwd()))
# The download and assigning to counties of the population data is mocked.
gpd.get_population_data(
username=None, password=None, read_data=False, out_folder=self.path, interactive=False)
# The file exist in the directory (mocked) and the credentials should be read.
mock_read.assert_called_with(
self.test_username, self.test_password)


if __name__ == '__main__':
unittest.main()

0 comments on commit 896a04e

Please sign in to comment.