diff --git a/.github/workflows/epidata_main.yml b/.github/workflows/epidata_main.yml index 543d87ab5e..e3ee6ac2ca 100644 --- a/.github/workflows/epidata_main.yml +++ b/.github/workflows/epidata_main.yml @@ -135,7 +135,7 @@ jobs: run: | mkdir -p data_dl getcasedata -o data_dl --no-progress-indicators - getpopuldata -o data_dl --no-progress-indicators --username=${{ secrets.REGIODBUSER }} --password=${{ secrets.REGIODBPW }} + getpopuldata -o data_dl --no-progress-indicators getjhdata -o data_dl --no-progress-indicators getdividata -o data_dl --no-progress-indicators getcommutermobility -o data_dl --no-progress-indicators diff --git a/pycode/memilio-epidata/memilio/epidata/README.rst b/pycode/memilio-epidata/memilio/epidata/README.rst index 19781425e8..3691d55488 100644 --- a/pycode/memilio-epidata/memilio/epidata/README.rst +++ b/pycode/memilio-epidata/memilio/epidata/README.rst @@ -130,10 +130,6 @@ optional arguments working for some are: | --sanitize-data | Different ways to distribute vaccinations to home | | | locations of vaccinated persons[vaccination] | +---------------------------------------------+-----------------------------------------------------------+ -| --username | Username for regionalstatistik.de [population] | -+---------------------------------------------+-----------------------------------------------------------+ -| --password | Password for regionalstatistik.de [population] | -+---------------------------------------------+-----------------------------------------------------------+ | --files | Files to write [case] | +---------------------------------------------+-----------------------------------------------------------+ diff --git a/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py b/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py index f8cd39f593..a905fcf675 100644 --- a/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py +++ b/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py @@ -364,8 +364,6 @@ def cli(what): - verbose - skip_checks - no_raw - - username - - password - to_dataset @param what Defines what packages calls and thus what kind of command line arguments should be defined. @@ -379,7 +377,7 @@ def cli(what): cli_dict = {"divi": ['Downloads data from DIVI', 'start_date', 'end_date', 'impute_dates', 'moving_average'], "cases": ['Download case data from RKI', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date', 'files'], - "population": ['Download population data from official sources', 'username'], + "population": ['Download population data from official sources'], "commuter_official": ['Download commuter data from official sources'], "vaccination": ['Download vaccination data', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'sanitize_data'], "testing": ['Download testing data', 'start_date', 'end_date', 'impute_dates', 'moving_average'], @@ -498,14 +496,6 @@ def cli(what): '--skip-checks', dest='run_checks', action='store_false', help='Skips sanity checks etc.') - if 'username' in what_list: - parser.add_argument( - '--username', type=str - ) - - parser.add_argument( - '--password', type=str - ) if '--to-dataset' in sys.argv: parser.add_argument( '--to-dataset', dest='to_dataset', diff --git a/pycode/memilio-epidata/memilio/epidata/getPopulationData.py b/pycode/memilio-epidata/memilio/epidata/getPopulationData.py index fa867918c6..844ba54475 100644 --- a/pycode/memilio-epidata/memilio/epidata/getPopulationData.py +++ b/pycode/memilio-epidata/memilio/epidata/getPopulationData.py @@ -23,9 +23,7 @@ @brief Downloads data about population statistic """ -import configparser import warnings -import getpass import requests import os import io @@ -41,82 +39,19 @@ pd.options.mode.copy_on_write = True -def read_population_data(username, password): +def read_population_data(): """! Reads Population data from regionalstatistik.de - Username and Password are required to sign in on regionalstatistik.de. A request is made to regionalstatistik.de and the StringIO is read in as a csv into the dataframe format. - - @param username Username to sign in at regionalstatistik.de. - @param password Password to sign in at regionalstatistik.de. @return DataFrame """ download_url = 'https://www.regionalstatistik.de/genesis/online?operation=download&code=12411-02-03-4&option=csv' - req = requests.get(download_url, auth=(username, password)) + req = requests.get(download_url) df_pop_raw = pd.read_csv(io.StringIO(req.text), sep=';', header=6) return df_pop_raw -# This function is needed for unittests -# Fakefilesystem has problems with os.path - - -def path_to_credential_file(): - """! Returns path to .ini file where credentials are stored. - The Path can be changed if neccessary. - """ - return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'CredentialsRegio.ini') - - -def manage_credentials(interactive): - """! Manages credentials for regionalstatistik.de (needed for dowload). - - A connfig file inside the epidata folder is either written (if not existent yet) - with input from user or read with following format: - [CREDENTIALS] - Username = XXXXX - Password = XXXXX - - @return Username and password to sign in at regionalstatistik.de. - """ - # path where ini file is found - path = path_to_credential_file() - - gd.default_print( - 'Info', 'No passwaord and/or username for regionalstatistik.de provided. Try to read from .ini file.') - - # check if .ini file exists - if not os.path.exists(path): - if interactive: - gd.default_print( - 'Info', '.ini file not found. Writing CredentialsRegio.ini...') - username = input( - "Please enter username for https://www.regionalstatistik.de/genesis/online\n") - password = getpass.getpass( - "Please enter password for https://www.regionalstatistik.de/genesis/online\n") - # create file - write_ini = gd.user_choice( - message='Do you want the credentials to be stored in an unencrypted .ini file?\n' + - 'The next time this function is called, the credentials can be read from that file.') - if write_ini: - string = '[CREDENTIALS]\nUsername = ' + \ - username+'\nPassword = '+password - with open(path, 'w+') as file: - file.write(string) - else: - raise gd.DataError( - 'No .ini file found. Cannot access regionalstatistik.de for downloading population data.') - - else: - parser = configparser.ConfigParser() - parser.read(path) - - username = parser['CREDENTIALS']['Username'] - password = parser['CREDENTIALS']['Password'] - - return username, password - def export_population_dataframe(df_pop: pd.DataFrame, directory: str, file_format: str, merge_eisenach: bool): """! Writes population dataframe into directory with new column names and age groups @@ -285,8 +220,6 @@ def test_total_population(df_pop, age_cols): def fetch_population_data(read_data: bool = dd.defaultDict['read_data'], out_folder: str = dd.defaultDict['out_folder'], - username='', - password='', **kwargs ) -> pd.DataFrame: """! Downloads or reads the population data. @@ -299,9 +232,6 @@ def fetch_population_data(read_data: bool = dd.defaultDict['read_data'], downloaded. Default defined in defaultDict. @param out_folder Path to folder where data is written in folder out_folder/Germany. Default defined in defaultDict. - @param username Username to sign in at regionalstatistik.de. - @param password Password to sign in at regionalstatistik.de. - @return DataFrame with adjusted population data for all ages to current level. """ conf = gd.Conf(out_folder, **kwargs) @@ -312,14 +242,10 @@ def fetch_population_data(read_data: bool = dd.defaultDict['read_data'], 'Warning', 'Read_data is not supportet for getPopulationData.py. Setting read_data = False') read_data = False - # If no username or password is provided, the credentials are either read from an .ini file or, - # if the file does not exist they have to be given as user input. - if (username is None) or (password is None): - username, password = manage_credentials(conf.interactive) directory = os.path.join(out_folder, 'Germany') gd.check_dir(directory) - df_pop_raw = read_population_data(username, password) + df_pop_raw = read_population_data() return df_pop_raw @@ -411,8 +337,6 @@ def get_population_data(read_data: bool = dd.defaultDict['read_data'], file_format: str = dd.defaultDict['file_format'], out_folder: str = dd.defaultDict['out_folder'], merge_eisenach: bool = True, - username='', - password='', **kwargs ): """! Download age-stratified population data for the German counties. @@ -453,8 +377,6 @@ def get_population_data(read_data: bool = dd.defaultDict['read_data'], read_data=read_data, out_folder=out_folder, file_format=file_format, - username=username, - password=password, **kwargs ) preprocess_df = preprocess_population_data( diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getDataIntoPandasDataFrame.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getDataIntoPandasDataFrame.py index 65295c1454..03b2868193 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getDataIntoPandasDataFrame.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getDataIntoPandasDataFrame.py @@ -454,7 +454,7 @@ def test_call_functions( # change start-date of jh to 2020-01-22 arg_dict_jh["start_date"] = date(2020, 1, 22) - arg_dict_popul = {**arg_dict_all, "username": None, "password": None} + arg_dict_popul = {**arg_dict_all} getVaccinationData.main() mock_vaccination.assert_called() diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_population_data.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_population_data.py index 6f59864d94..4461154735 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_population_data.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_population_data.py @@ -33,10 +33,6 @@ class Test_getPopulationData(fake_filesystem_unittest.TestCase): path = '/home/Population_Data' - config_file_name = 'CredentialsRegio.ini' - test_username = 'username_test' - test_password = 'password_test' - here = os.path.dirname(os.path.abspath(__file__)) filename = os.path.join( here, 'test_data', 'TestSetPopulationExport.json') @@ -71,53 +67,10 @@ def test_export_population_data(self): return_value=df_pop_raw) @patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop) @patch('memilio.epidata.getPopulationData.test_total_population') - def test_get_population_data_full(self, mock_test, mock_export, mock_download): + def test_get_population_data_full(self, mock_test, mock_assign, mock_download): # should not raise any errors gpd.get_population_data(out_folder=self.path) - @patch('builtins.input', return_value=test_username) - @patch('getpass.getpass', return_value=test_password) - @patch('memilio.epidata.getDataIntoPandasDataFrame.user_choice', return_value=True) - @patch('memilio.epidata.getPopulationData.path_to_credential_file', return_value='./CredentialsRegio.ini') - @patch('memilio.epidata.getPopulationData.read_population_data', return_value=df_pop_raw) - @patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop) - @patch('memilio.epidata.getPopulationData.test_total_population') - def test_config_write(self, mock_test, mock_export, mock_raw, mock_path, mock_choice, mock_pw, mock_un): - # username and password should be written into the config file. - # The download and assigning to counties of the population data is mocked. - gpd.get_population_data(username=None, password=None, interactive=True) - # Check if the file is written. - self.assertTrue(self.config_file_name in os.listdir(os.getcwd())) - # Check content of the file. - # Read file. - parser = configparser.ConfigParser() - parser.read(os.path.join(os.getcwd(), self.config_file_name)) - # Test content. - self.assertEqual(parser['CREDENTIALS']['Username'], self.test_username) - self.assertEqual(parser['CREDENTIALS']['Password'], self.test_password) - - @patch('memilio.epidata.getPopulationData.path_to_credential_file', return_value='./CredentialsRegio.ini') - @patch('memilio.epidata.getPopulationData.read_population_data', return_value=df_pop_raw) - @patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop) - @patch('memilio.epidata.getPopulationData.test_total_population') - def test_config_read(self, mock_test, mock_export, mock_read, mock_path): - # File should not exist yet. - self.assertFalse(self.config_file_name in os.listdir(os.getcwd())) - # Create config file. - string = '[CREDENTIALS]\nUsername = ' + \ - self.test_username+'\nPassword = '+self.test_password - path = os.path.join(os.getcwd(), self.config_file_name) - with open(path, 'w+') as file: - file.write(string) - # Check if the file is written. - self.assertTrue(self.config_file_name in os.listdir(os.getcwd())) - # The download and assigning to counties of the population data is mocked. - gpd.get_population_data( - username=None, password=None, read_data=False, out_folder=self.path, interactive=False) - # The file exist in the directory (mocked) and the credentials should be read. - mock_read.assert_called_with( - self.test_username, self.test_password) - if __name__ == '__main__': unittest.main()