From 097b67774dc353ba40a343d69563ebd8a1e17ca5 Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Thu, 26 Dec 2019 14:22:57 +0100 Subject: [PATCH 01/19] remove from git --- parsesql/config/configuration.json | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 parsesql/config/configuration.json diff --git a/parsesql/config/configuration.json b/parsesql/config/configuration.json deleted file mode 100644 index e6fb311..0000000 --- a/parsesql/config/configuration.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "sqldirectory": "/Users/sebastiandaum/Desktop/views", - "file_extension": "sql", - "strategy": "sqllite", - "Snowflake_Account": { - "user": "user", - "password": "password", - "account": "account", - "database": "database", - "schema": "schema", - "warehouse": "warehouse" - } -} From 616cf7e1df57f91dc0a664a4977dbe549190593e Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Thu, 26 Dec 2019 14:25:07 +0100 Subject: [PATCH 02/19] changed gitignore --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index bc19879..3c4053a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,5 @@ *.pyc #setup UNKNOWN.egg-info -parser/config/configuration.json -parser/config/test.json \ No newline at end of file +parsesql/config/configuration.json +parsesql/config/test.json \ No newline at end of file From 8e762926078979d994fbe8075a0c3f7af68724d0 Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Thu, 26 Dec 2019 14:31:24 +0100 Subject: [PATCH 03/19] changed structure to package layout --- parsesql/app.py | 10 +++++----- parsesql/config/config_reader.py | 2 +- parsesql/main/database/db_engine.py | 2 +- parsesql/main/executers.py | 2 +- parsesql/main/sql_parser/file_finder.py | 2 +- parsesql/main/sql_parser/snowsqlparser.py | 2 +- {parsesql/tests => tests}/__init__.py | 0 {parsesql/tests => tests}/run_all.py | 0 {parsesql/tests => tests}/test_configuration.py | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) rename {parsesql/tests => tests}/__init__.py (100%) rename {parsesql/tests => tests}/run_all.py (100%) rename {parsesql/tests => tests}/test_configuration.py (95%) diff --git a/parsesql/app.py b/parsesql/app.py index 1abb012..7057c09 100644 --- a/parsesql/app.py +++ b/parsesql/app.py @@ -20,11 +20,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from main.sql_parser.snowsqlparser import ParseSql -from main.sql_parser.file_finder import FileFinder -from main.database.db_engine import Session -from main.database.models import TableDependency -from main.executers import SequentialExecuter, MultiProcessingExecuter +from parsesql.main.sql_parser.snowsqlparser import ParseSql +from parsesql.main.sql_parser.file_finder import FileFinder +from parsesql.main.database.db_engine import Session +from parsesql.main.database.models import TableDependency +from parsesql.main.executers import SequentialExecuter, MultiProcessingExecuter import uuid import time diff --git a/parsesql/config/config_reader.py b/parsesql/config/config_reader.py index a5d34e2..12650ca 100644 --- a/parsesql/config/config_reader.py +++ b/parsesql/config/config_reader.py @@ -24,7 +24,7 @@ import os import sys from pathlib import Path -from util.logger_service import LoggerMixin +from parsesql.util.logger_service import LoggerMixin class Configuration(LoggerMixin): diff --git a/parsesql/main/database/db_engine.py b/parsesql/main/database/db_engine.py index 957a226..aa19fac 100644 --- a/parsesql/main/database/db_engine.py +++ b/parsesql/main/database/db_engine.py @@ -23,7 +23,7 @@ from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from snowflake.sqlalchemy import URL -from config.config_reader import Config +from parsesql.config.config_reader import Config from sqlalchemy import create_engine class DatabaseEngine(): diff --git a/parsesql/main/executers.py b/parsesql/main/executers.py index 9566c29..4dc1432 100644 --- a/parsesql/main/executers.py +++ b/parsesql/main/executers.py @@ -1,5 +1,5 @@ from multiprocessing import Pool, cpu_count, Queue -from main.sql_parser.snowsqlparser import ParseSql +from parsesql.main.sql_parser.snowsqlparser import ParseSql class BaseExecuter(object): pass diff --git a/parsesql/main/sql_parser/file_finder.py b/parsesql/main/sql_parser/file_finder.py index 14579d6..e4a2345 100644 --- a/parsesql/main/sql_parser/file_finder.py +++ b/parsesql/main/sql_parser/file_finder.py @@ -21,7 +21,7 @@ # SOFTWARE. from config.config_reader import Config -from util.logger_service import LoggerMixin +from parsesql.util.logger_service import LoggerMixin import os class FileFinder(LoggerMixin): diff --git a/parsesql/main/sql_parser/snowsqlparser.py b/parsesql/main/sql_parser/snowsqlparser.py index abb82ba..9e6aeb4 100644 --- a/parsesql/main/sql_parser/snowsqlparser.py +++ b/parsesql/main/sql_parser/snowsqlparser.py @@ -23,7 +23,7 @@ import os import re from .sqlExpressions import reservedSqlExpressions, specialCharacters, endstatement, duallist, technicalParameter -from util.logger_service import LoggerMixin +from parsesql.util.logger_service import LoggerMixin import exampleSql import textwrap diff --git a/parsesql/tests/__init__.py b/tests/__init__.py similarity index 100% rename from parsesql/tests/__init__.py rename to tests/__init__.py diff --git a/parsesql/tests/run_all.py b/tests/run_all.py similarity index 100% rename from parsesql/tests/run_all.py rename to tests/run_all.py diff --git a/parsesql/tests/test_configuration.py b/tests/test_configuration.py similarity index 95% rename from parsesql/tests/test_configuration.py rename to tests/test_configuration.py index 12f589d..57060f4 100644 --- a/parsesql/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -1,5 +1,5 @@ import unittest -from config import config_reader +from parsesql.config import config_reader from pathlib import Path class ConfigurationTest(unittest.TestCase): From 56460b05a2c755c1d50c6a589f7ea963b26b0b4d Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Thu, 26 Dec 2019 16:19:20 +0100 Subject: [PATCH 04/19] setup test class for logger module --- Pipfile | 1 + parsesql/config/config_reader.py | 13 +++-- tests/config/__init__.py | 0 tests/{ => config}/test_configuration.py | 0 tests/util/__init__.py | 0 tests/util/test_logger_service.py | 71 ++++++++++++++++++++++++ 6 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 tests/config/__init__.py rename tests/{ => config}/test_configuration.py (100%) create mode 100644 tests/util/__init__.py create mode 100644 tests/util/test_logger_service.py diff --git a/Pipfile b/Pipfile index 6526300..d62c8a6 100644 --- a/Pipfile +++ b/Pipfile @@ -4,6 +4,7 @@ url = "https://pypi.org/simple" verify_ssl = true [dev-packages] +flake8 = "*" [packages] sqlalchemy = "*" diff --git a/parsesql/config/config_reader.py b/parsesql/config/config_reader.py index 12650ca..dd31295 100644 --- a/parsesql/config/config_reader.py +++ b/parsesql/config/config_reader.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -28,7 +28,7 @@ class Configuration(LoggerMixin): - def __init__(self, filename:str): + def __init__(self, filename: str): self.abspath = os.path.dirname(os.path.abspath(__file__)) self.filename = filename self.configfilepath = os.path.join(self.abspath, self.filename) @@ -44,7 +44,8 @@ def read(self): with open(self.configfilepath) as json_data_file: return json.load(json_data_file) except FileNotFoundError as e: - self.logger.info(f"Cannot find file {self.filename}. Please check if file existing. " + self.logger.info(f"Cannot find file {self.filename}. " + f"Please check if file existing. " f"See this error: {e}") sys.exit() @@ -54,4 +55,6 @@ def get_sql_directory(self): """ return Path(self.data['sqldirectory']) -Config = Configuration(filename='configuration.json') + +fname = 'configuration.json' +Config = Configuration(filename=fname) diff --git a/tests/config/__init__.py b/tests/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_configuration.py b/tests/config/test_configuration.py similarity index 100% rename from tests/test_configuration.py rename to tests/config/test_configuration.py diff --git a/tests/util/__init__.py b/tests/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/util/test_logger_service.py b/tests/util/test_logger_service.py new file mode 100644 index 0000000..3d3801b --- /dev/null +++ b/tests/util/test_logger_service.py @@ -0,0 +1,71 @@ +import unittest +import json +import os +from parsesql.util import logger_service + + +class JsonConfigGenerator(): + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + def _get_filepath(self): + from parsesql import config + jsonname = 'configuration.json' + configpath = os.path.dirname(config.__file__) + return os.path.join(configpath, jsonname) + + def create(self): + with open(self._get_filepath(), 'w') as json_file: + json.dump(vars(self), json_file, indent=4) + + def remove(self): + try: + os.remove(self._get_filepath()) + except Exception as e: + print(e) + + +class Logger(unittest.TestCase): + + config = JsonConfigGenerator( + sqldirectory="/Users/sebastiandaum/Desktop/views", + file_extension="sql", + strategy="sqllite", + Snowflake_Account={ + "user": "user", + "password": "password", + "account": "account", + "database": "database", + "schema": "schema", + "warehouse": "warehouse", + } + ) + + @classmethod + def setUpClass(cls): + Logger.config.create() + + # @classmethod + # def tearDownClass(cls): + # Logger.config.create() + + def test_if_logger_class_exisit(self): + """ + test if a logging class with the correct name exist + """ + klass = logger_service.LoggerMixin() + self.assertEqual(klass.__class__.__name__, "LoggerMixin") + + def test_base_logging_is_info(self): + """ + test if the base logging level is level info=20 + """ + log = logger_service.LoggerMixin() + self.assertEqual(log.logger.getEffectiveLevel(), 20) + + def test_if_configuration_can_change_level(self): + """ + test if a given logging level in config file can change level + """ + pass From bdc13da80836ec9139361103bf05e6c7a8381cac Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Thu, 26 Dec 2019 16:46:18 +0100 Subject: [PATCH 05/19] progress config testing --- parsesql/config/config_reader.py | 1 + parsesql/main/sql_parser/file_finder.py | 2 +- parsesql/util/logger_service.py | 5 +++++ tests/util/test_logger_service.py | 10 +++++++--- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/parsesql/config/config_reader.py b/parsesql/config/config_reader.py index dd31295..d103ad0 100644 --- a/parsesql/config/config_reader.py +++ b/parsesql/config/config_reader.py @@ -35,6 +35,7 @@ def __init__(self, filename: str): self.data = self.read() self.sqldir = self.get_sql_directory() self.file_extension = self.data['file_extension'] + self.logger_config = self.data['logging'] self.strategy = self.data['strategy'] if self.strategy == "snowflake": self.snowflake_account = self.data['Snowflake_Account'] diff --git a/parsesql/main/sql_parser/file_finder.py b/parsesql/main/sql_parser/file_finder.py index e4a2345..cf64829 100644 --- a/parsesql/main/sql_parser/file_finder.py +++ b/parsesql/main/sql_parser/file_finder.py @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from config.config_reader import Config +from parsesql.config.config_reader import Config from parsesql.util.logger_service import LoggerMixin import os diff --git a/parsesql/util/logger_service.py b/parsesql/util/logger_service.py index bbb1d1d..d334c86 100644 --- a/parsesql/util/logger_service.py +++ b/parsesql/util/logger_service.py @@ -21,11 +21,16 @@ # SOFTWARE. import logging +#from parsesql.config.config_reader import Config + +#print(type(Config.logger_config)) + class LoggerMixin(object): FORMAT = '[%(asctime)s] [%(processName)-10s] [%(name)s] [%(levelname)s] -> %(message)s' logging.basicConfig(format=FORMAT, level=logging.INFO) + #logging.config.dictConfig(logging_config) @property def logger(self): diff --git a/tests/util/test_logger_service.py b/tests/util/test_logger_service.py index 3d3801b..1108012 100644 --- a/tests/util/test_logger_service.py +++ b/tests/util/test_logger_service.py @@ -39,6 +39,10 @@ class Logger(unittest.TestCase): "database": "database", "schema": "schema", "warehouse": "warehouse", + }, + logging={ + "format": '[%(asctime)s] [%(processName)-10s] [%(name)s] [%(levelname)s] -> %(message)s', + "level": "INFO", } ) @@ -46,9 +50,9 @@ class Logger(unittest.TestCase): def setUpClass(cls): Logger.config.create() - # @classmethod - # def tearDownClass(cls): - # Logger.config.create() + @classmethod + def tearDownClass(cls): + Logger.config.create() def test_if_logger_class_exisit(self): """ From 12cf580a1e57c23a81e1d5372e3062f297c594cf Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Fri, 27 Dec 2019 13:28:12 +0100 Subject: [PATCH 06/19] parseing logging info from json config --- parsesql/config/config_reader.py | 16 ++++------ parsesql/config/example_configuration.json | 4 +++ parsesql/main/database/db_engine.py | 1 + parsesql/main/sql_parser/file_finder.py | 1 + parsesql/main/sql_parser/snowsqlparser.py | 1 - parsesql/util/logger_service.py | 14 +++----- tests/config/test_configuration.py | 9 ++++-- tests/util/test_logger_service.py | 37 +++++++++++++--------- 8 files changed, 46 insertions(+), 37 deletions(-) diff --git a/parsesql/config/config_reader.py b/parsesql/config/config_reader.py index d103ad0..e4a4084 100644 --- a/parsesql/config/config_reader.py +++ b/parsesql/config/config_reader.py @@ -24,18 +24,17 @@ import os import sys from pathlib import Path -from parsesql.util.logger_service import LoggerMixin -class Configuration(LoggerMixin): - def __init__(self, filename: str): +class Configuration(): + def __init__(self, filename: str = 'configuration.json'): self.abspath = os.path.dirname(os.path.abspath(__file__)) self.filename = filename self.configfilepath = os.path.join(self.abspath, self.filename) self.data = self.read() self.sqldir = self.get_sql_directory() self.file_extension = self.data['file_extension'] - self.logger_config = self.data['logging'] + self.logger_config = {"Logging": self.data['logging']} self.strategy = self.data['strategy'] if self.strategy == "snowflake": self.snowflake_account = self.data['Snowflake_Account'] @@ -45,9 +44,9 @@ def read(self): with open(self.configfilepath) as json_data_file: return json.load(json_data_file) except FileNotFoundError as e: - self.logger.info(f"Cannot find file {self.filename}. " - f"Please check if file existing. " - f"See this error: {e}") + print(f"Cannot find file {self.filename}. " + f"Please check if file existing. " + f"See this error: {e}") sys.exit() def get_sql_directory(self): @@ -57,5 +56,4 @@ def get_sql_directory(self): return Path(self.data['sqldirectory']) -fname = 'configuration.json' -Config = Configuration(filename=fname) +Config = Configuration() diff --git a/parsesql/config/example_configuration.json b/parsesql/config/example_configuration.json index daabc56..5859ba4 100644 --- a/parsesql/config/example_configuration.json +++ b/parsesql/config/example_configuration.json @@ -9,5 +9,9 @@ "database": "database", "schema": "schema", "warehouse": "warehouse" + }, + "logging": { + "format": "[%(asctime)s] [%(processName)-10s] [%(name)s] [%(levelname)s] -> %(message)s", + "level": "INFO" } } diff --git a/parsesql/main/database/db_engine.py b/parsesql/main/database/db_engine.py index aa19fac..73c5444 100644 --- a/parsesql/main/database/db_engine.py +++ b/parsesql/main/database/db_engine.py @@ -26,6 +26,7 @@ from parsesql.config.config_reader import Config from sqlalchemy import create_engine + class DatabaseEngine(): def __init__(self): self.strategy = Config.strategy diff --git a/parsesql/main/sql_parser/file_finder.py b/parsesql/main/sql_parser/file_finder.py index cf64829..dbd5990 100644 --- a/parsesql/main/sql_parser/file_finder.py +++ b/parsesql/main/sql_parser/file_finder.py @@ -24,6 +24,7 @@ from parsesql.util.logger_service import LoggerMixin import os + class FileFinder(LoggerMixin): def __init__(self): self.type = Config.file_extension diff --git a/parsesql/main/sql_parser/snowsqlparser.py b/parsesql/main/sql_parser/snowsqlparser.py index 9e6aeb4..850a5f8 100644 --- a/parsesql/main/sql_parser/snowsqlparser.py +++ b/parsesql/main/sql_parser/snowsqlparser.py @@ -24,7 +24,6 @@ import re from .sqlExpressions import reservedSqlExpressions, specialCharacters, endstatement, duallist, technicalParameter from parsesql.util.logger_service import LoggerMixin -import exampleSql import textwrap class ParseSql(LoggerMixin): diff --git a/parsesql/util/logger_service.py b/parsesql/util/logger_service.py index d334c86..fd7f40f 100644 --- a/parsesql/util/logger_service.py +++ b/parsesql/util/logger_service.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -21,16 +21,12 @@ # SOFTWARE. import logging -#from parsesql.config.config_reader import Config - -#print(type(Config.logger_config)) +import logging.config +from parsesql.config.config_reader import Config class LoggerMixin(object): - - FORMAT = '[%(asctime)s] [%(processName)-10s] [%(name)s] [%(levelname)s] -> %(message)s' - logging.basicConfig(format=FORMAT, level=logging.INFO) - #logging.config.dictConfig(logging_config) + logging.basicConfig(**Config.logger_config['Logging']) @property def logger(self): diff --git a/tests/config/test_configuration.py b/tests/config/test_configuration.py index 57060f4..edb067e 100644 --- a/tests/config/test_configuration.py +++ b/tests/config/test_configuration.py @@ -2,6 +2,7 @@ from parsesql.config import config_reader from pathlib import Path + class ConfigurationTest(unittest.TestCase): def test_if_missing_config_raise_error(self): @@ -13,16 +14,18 @@ def test_if_missing_config_raise_error(self): def test_if_config_class_exists(self): """ - test if configuration class is available + test if configuration class is available """ - klass = config_reader.Configuration(filename='example_configuration.json') + klass = config_reader.Configuration( + filename='configuration.json' + ) self.assertEqual(klass.__class__.__name__, "Configuration") def test_if_path_object_gets_created(self): """ test if config object is an instance of Path class """ - c = config_reader.Configuration(filename='example_configuration.json') + c = config_reader.Configuration(filename='configuration.json') self.assertIsInstance(c.get_sql_directory(), Path) diff --git a/tests/util/test_logger_service.py b/tests/util/test_logger_service.py index 1108012..775152d 100644 --- a/tests/util/test_logger_service.py +++ b/tests/util/test_logger_service.py @@ -2,6 +2,9 @@ import json import os from parsesql.util import logger_service +from parsesql import config + +CONFIGPATH = os.path.dirname(config.__file__) class JsonConfigGenerator(): @@ -10,10 +13,8 @@ def __init__(self, **kwargs): setattr(self, key, value) def _get_filepath(self): - from parsesql import config jsonname = 'configuration.json' - configpath = os.path.dirname(config.__file__) - return os.path.join(configpath, jsonname) + return os.path.join(CONFIGPATH, jsonname) def create(self): with open(self._get_filepath(), 'w') as json_file: @@ -28,7 +29,13 @@ def remove(self): class Logger(unittest.TestCase): - config = JsonConfigGenerator( + @classmethod + def setUpClass(cls): + Logger.create_config().create() + + @staticmethod + def create_config(level="INFO"): + config = JsonConfigGenerator( sqldirectory="/Users/sebastiandaum/Desktop/views", file_extension="sql", strategy="sqllite", @@ -42,19 +49,12 @@ class Logger(unittest.TestCase): }, logging={ "format": '[%(asctime)s] [%(processName)-10s] [%(name)s] [%(levelname)s] -> %(message)s', - "level": "INFO", + "level": f"{level}", } ) + return config - @classmethod - def setUpClass(cls): - Logger.config.create() - - @classmethod - def tearDownClass(cls): - Logger.config.create() - - def test_if_logger_class_exisit(self): + def test_if_logger_class_exist(self): """ test if a logging class with the correct name exist """ @@ -72,4 +72,11 @@ def test_if_configuration_can_change_level(self): """ test if a given logging level in config file can change level """ - pass + # CRITICAL 50, ERROR 40, WARNING 30, INFO 20, DEBUG 10, NOTESET 0 + self.create_config(level="ERROR").create() + log = logger_service.LoggerMixin() + loglevel = log.logger.getEffectiveLevel() + self.create_config().create() + + self.assertEqual(loglevel, 40) + From e4b3945bb9c2dd165ac7ad721ca6544972300c7d Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Sun, 29 Dec 2019 16:05:29 +0100 Subject: [PATCH 07/19] finish test in logger module --- parsesql/main/database/base.py | 6 +-- parsesql/main/database/db_engine.py | 2 +- parsesql/main/database/init_db.py | 8 ++-- parsesql/main/database/models.py | 13 +++--- parsesql/util/logger_service.py | 2 +- parsesql/util/sqlite_to_csv.py | 67 ++--------------------------- tests/run_all.py | 5 ++- tests/util/test_logger_service.py | 21 ++++----- 8 files changed, 33 insertions(+), 91 deletions(-) diff --git a/parsesql/main/database/base.py b/parsesql/main/database/base.py index 45d87c2..404341d 100644 --- a/parsesql/main/database/base.py +++ b/parsesql/main/database/base.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -22,4 +22,4 @@ from sqlalchemy.ext.declarative import declarative_base -Base = declarative_base() \ No newline at end of file +Base = declarative_base() diff --git a/parsesql/main/database/db_engine.py b/parsesql/main/database/db_engine.py index 73c5444..593a585 100644 --- a/parsesql/main/database/db_engine.py +++ b/parsesql/main/database/db_engine.py @@ -36,7 +36,7 @@ def get_engine(self): return self.get_engine_sqllite() if self.strategy == 'snowflake': return self.get_snowflake_engine() - + def get_engine_sqllite(self): return create_engine('sqlite:///parsersql.db', echo=True) diff --git a/parsesql/main/database/init_db.py b/parsesql/main/database/init_db.py index 4face3e..092d59c 100644 --- a/parsesql/main/database/init_db.py +++ b/parsesql/main/database/init_db.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -21,7 +21,7 @@ # SOFTWARE. from .db_engine import Session, db_engine -from .models import TableDependency, Base +from .models import Base Base.metadata.drop_all(db_engine) # 1 - generate database schema @@ -30,4 +30,4 @@ # 2 - check connection session = Session() session.commit() -session.close() \ No newline at end of file +session.close() diff --git a/parsesql/main/database/models.py b/parsesql/main/database/models.py index e2d56bc..61a3d56 100644 --- a/parsesql/main/database/models.py +++ b/parsesql/main/database/models.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -21,13 +21,14 @@ # SOFTWARE. from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy import Column, Integer, String, Sequence +from sqlalchemy import Column, String Base = declarative_base() + class TableDependency(Base): __tablename__ = 'tabledependencies' uuid = Column(String, primary_key=True) - filename = Column(String) - objectName = Column(String) - dependentTableName = Column(String) + filename = Column(String) + objectName = Column(String) + dependentTableName = Column(String) diff --git a/parsesql/util/logger_service.py b/parsesql/util/logger_service.py index fd7f40f..8915048 100644 --- a/parsesql/util/logger_service.py +++ b/parsesql/util/logger_service.py @@ -22,7 +22,7 @@ import logging import logging.config -from parsesql.config.config_reader import Config +from parsesql.config.config_reader import Config class LoggerMixin(object): diff --git a/parsesql/util/sqlite_to_csv.py b/parsesql/util/sqlite_to_csv.py index 1c44e70..c7a4d7c 100644 --- a/parsesql/util/sqlite_to_csv.py +++ b/parsesql/util/sqlite_to_csv.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -20,65 +20,4 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import os, csv -from sqlalchemy import create_engine, exc, MetaData, Table -from sqlalchemy.sql import select - -base = os.path.dirname(os.path.abspath(__file__)) -base_parent = os.path.dirname(base) -sqlite_db_file = os.path.join(base_parent, 'parsersql.db') -outcsv = 'dev_viewdefinitions.csv' # for now, not configurable -failedcsv = 'dev_viewdefinitions_failed.csv' -# helper functions -def split_row(rowproxy): - splittable=True - row=[] - row_as_list=[] - for column, value in rowproxy.items(): - row_as_list.append(value) - - try: - row = [row_as_list[2].split('.')[0], row_as_list[2].split('.')[1], row_as_list[3].split('.')[0], row_as_list[3].split('.')[1]] - #row = [v.strip(" \"'[]") for v in split_row] - except IndexError: - row=row_as_list - splittable=False - except AttributeError: - row=row_as_list - splittable=False - return row, splittable -#TODO: Check if this path works system independent -engine = create_engine('sqlite:///{0}'.format(sqlite_db_file)) - -connection = engine.connect() - -#TODO: This should be included from the existing sqlalchemy metdata -metadata = MetaData() -tabledependencies = Table('tabledependencies', metadata, autoload=True, autoload_with=engine) - -select_stmt=select([tabledependencies]) -resultlist = [] -failedlist = [] -try: - # suppose the database has been restarted. - result = connection.execute(select_stmt) - for rowproxy in result: - row, splittable = split_row(rowproxy) - if splittable: - resultlist.append(row) - else: - failedlist.append(row) - connection.close() -except exc.DBAPIError as e: - # an exception is raised, Connection is invalidated. - if e.connection_invalidated: - print("Connection was invalidated!") - -with open(outcsv, 'w') as outfile: - outwriter = csv.writer(outfile) - outwriter.writerows(resultlist) - -with open(failedcsv, 'w') as failedfile: - outwriter = csv.writer(failedfile) - outwriter.writerows(failedlist) - +# TODO Rewrite CSV Export functionality as the old one is to specific diff --git a/tests/run_all.py b/tests/run_all.py index 2611ca7..a54511b 100644 --- a/tests/run_all.py +++ b/tests/run_all.py @@ -1,11 +1,12 @@ import unittest + def parsesql_test_suite(): """Test suite for parsesql tests""" test_loader = unittest.TestLoader() test_suite = test_loader.discover('.') return test_suite + if __name__ == "__main__": - unittest.TextTestRunner(verbosity=2).run(parsesql_test_suite()) - \ No newline at end of file + unittest.TextTestRunner(verbosity=2).run(parsesql_test_suite()) \ No newline at end of file diff --git a/tests/util/test_logger_service.py b/tests/util/test_logger_service.py index 775152d..f4cb93f 100644 --- a/tests/util/test_logger_service.py +++ b/tests/util/test_logger_service.py @@ -48,7 +48,8 @@ def create_config(level="INFO"): "warehouse": "warehouse", }, logging={ - "format": '[%(asctime)s] [%(processName)-10s] [%(name)s] [%(levelname)s] -> %(message)s', + "format": '[%(asctime)s] [%(processName)-10s] [%(name)s] ' + '[%(levelname)s] -> %(message)s', "level": f"{level}", } ) @@ -68,15 +69,15 @@ def test_base_logging_is_info(self): log = logger_service.LoggerMixin() self.assertEqual(log.logger.getEffectiveLevel(), 20) - def test_if_configuration_can_change_level(self): + def test_if_logger_logs_messages(self): """ - test if a given logging level in config file can change level + test if the logger mixin logs messages """ - # CRITICAL 50, ERROR 40, WARNING 30, INFO 20, DEBUG 10, NOTESET 0 - self.create_config(level="ERROR").create() + error = False log = logger_service.LoggerMixin() - loglevel = log.logger.getEffectiveLevel() - self.create_config().create() - - self.assertEqual(loglevel, 40) - + try: + log.logger.info('This is a test message') + except AttributeError as e: + print(e) + error = True + self.assertEqual(error, False) From 1c6763a60e8bbc442ce52c140cf80642b730dd9c Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Mon, 30 Dec 2019 12:26:25 +0100 Subject: [PATCH 08/19] finished db unittests --- parsesql/db/__init__.py | 0 parsesql/main/database/db_engine.py | 60 ++++++++++++----------- parsesql/main/database/init_db.py | 16 +++---- parsesql/main/executers.py | 31 ++++++++++-- tests/config/test_configuration.py | 2 +- tests/main/__init__.py | 0 tests/main/database/__init__.py | 0 tests/main/database/test_db_engine.py | 69 +++++++++++++++++++++++++++ tests/main/database/test_init_db.py | 57 ++++++++++++++++++++++ tests/util/test_logger_service.py | 8 +++- 10 files changed, 200 insertions(+), 43 deletions(-) create mode 100644 parsesql/db/__init__.py create mode 100644 tests/main/__init__.py create mode 100644 tests/main/database/__init__.py create mode 100644 tests/main/database/test_db_engine.py create mode 100644 tests/main/database/test_init_db.py diff --git a/parsesql/db/__init__.py b/parsesql/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/parsesql/main/database/db_engine.py b/parsesql/main/database/db_engine.py index 593a585..8b19af7 100644 --- a/parsesql/main/database/db_engine.py +++ b/parsesql/main/database/db_engine.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -20,43 +20,45 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from snowflake.sqlalchemy import URL from parsesql.config.config_reader import Config +from parsesql import db from sqlalchemy import create_engine +import os +DBPATH = os.path.dirname(db.__file__) -class DatabaseEngine(): - def __init__(self): - self.strategy = Config.strategy - - def get_engine(self): - if self.strategy == 'sqllite': - return self.get_engine_sqllite() - if self.strategy == 'snowflake': - return self.get_snowflake_engine() - - def get_engine_sqllite(self): - return create_engine('sqlite:///parsersql.db', echo=True) - - def get_snowflake_engine(self): - return create_engine(URL( - user=Config.snowflake_account['user'], - password= Config.snowflake_account['password'], - account=Config.snowflake_account['account'], - database=Config.snowflake_account['database'], - schema = Config.snowflake_account['schema'], - warehouse = Config.snowflake_account['warehouse'] - ) - , echo=True - ) -db_engine = DatabaseEngine().get_engine() -Session = sessionmaker(bind=db_engine) +class DatabaseEngine(): + def __init__(self, + strategy=None + ): + self.strategy = strategy or Config.strategy + def get_engine(self): + if self.strategy == 'sqllite': + return self._get_engine_sqllite() + if self.strategy == 'snowflake': + return self._get_snowflake_engine() + def _get_engine_sqllite(self): + dbname = "parsersql.db" + url = os.path.join(DBPATH, dbname) + return create_engine('sqlite:///' + url, echo=True) + def _get_snowflake_engine(self): + return create_engine(URL( + user=Config.snowflake_account['user'], + password=Config.snowflake_account['password'], + account=Config.snowflake_account['account'], + database=Config.snowflake_account['database'], + schema=Config.snowflake_account['schema'], + warehouse=Config.snowflake_account['warehouse'] + ), echo=True + ) +db_engine = DatabaseEngine().get_engine() +Session = sessionmaker(bind=db_engine) diff --git a/parsesql/main/database/init_db.py b/parsesql/main/database/init_db.py index 092d59c..5a6a538 100644 --- a/parsesql/main/database/init_db.py +++ b/parsesql/main/database/init_db.py @@ -20,14 +20,14 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from .db_engine import Session, db_engine +from .db_engine import db_engine from .models import Base -Base.metadata.drop_all(db_engine) -# 1 - generate database schema -Base.metadata.create_all(db_engine) -# 2 - check connection -session = Session() -session.commit() -session.close() +def create_database(): + Base.metadata.drop_all(db_engine) + Base.metadata.create_all(db_engine) + + +if __name__ == "__main__": + create_database() diff --git a/parsesql/main/executers.py b/parsesql/main/executers.py index 4dc1432..4aaeaab 100644 --- a/parsesql/main/executers.py +++ b/parsesql/main/executers.py @@ -1,11 +1,35 @@ -from multiprocessing import Pool, cpu_count, Queue +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from multiprocessing import Pool, cpu_count from parsesql.main.sql_parser.snowsqlparser import ParseSql + class BaseExecuter(object): pass + class SequentialExecuter(BaseExecuter): - + def __init__(self, to_parse_files=None): self.to_parse_files = to_parse_files @@ -18,6 +42,7 @@ def parse(self): def run(self): return self.parse() + class MultiProcessingExecuter(BaseExecuter): def __init__(self, to_parse_files=None): @@ -33,4 +58,4 @@ def run(self): number_of_processcess = self.determine_max_proc() - 1 p = Pool(number_of_processcess) - return p.map( self.parse , self.to_parse_files ) \ No newline at end of file + return p.map(self.parse, self.to_parse_files) diff --git a/tests/config/test_configuration.py b/tests/config/test_configuration.py index edb067e..78d9abc 100644 --- a/tests/config/test_configuration.py +++ b/tests/config/test_configuration.py @@ -30,4 +30,4 @@ def test_if_path_object_gets_created(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/main/__init__.py b/tests/main/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/main/database/__init__.py b/tests/main/database/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/main/database/test_db_engine.py b/tests/main/database/test_db_engine.py new file mode 100644 index 0000000..1e8a228 --- /dev/null +++ b/tests/main/database/test_db_engine.py @@ -0,0 +1,69 @@ +import unittest +from parsesql.main.database import db_engine +from parsesql.config.config_reader import Config + + +class DatabaseEngineTest(unittest.TestCase): + + def setUp(self): + Config.strategy = "sqllite" + + def test_if_dbengine_class_exists(self): + """ + test if dbengine class is available + """ + klass = db_engine.DatabaseEngine() + self.assertEqual(klass.__class__.__name__, "DatabaseEngine") + + def test_factory_engine_method(self): + """ + test if factory method returns engine based on param + """ + engine_options = ["sqllite", "snowflake"] + # Set snowflake account information + Config.snowflake_account = Config.data['Snowflake_Account'] + engines = [db_engine.DatabaseEngine(strategy=opt).get_engine() + for opt in engine_options + ] + check_engine = all(engine.__class__.__name__ == "Engine" + for engine in engines + ) + self.assertEqual(check_engine, True) + + def test_if_sqllite_engine_exist(self): + """ + test if method creates a sqllite engine from sqalchemy + """ + engine = db_engine.DatabaseEngine().get_engine() + check_uri = True if "sqlite" in str(engine.url) else False + + self.assertEqual(check_uri, True) + + def test_if_snowflake_engine_exist(self): + """ + test if method creates a snowflake engine from sqalchemy + """ + # Set snowflake account information + Config.snowflake_account = Config.data['Snowflake_Account'] + Config.strategy = 'snowflake' + + engine = db_engine.DatabaseEngine().get_engine() + check_uri = True if "snowflake" in str(engine.url) else False + + self.assertEqual(check_uri, True) + + def test_if_global_engine_object_exist(self): + """ + test if the global engine object is not None + """ + self.assertIsNotNone(db_engine.db_engine) + + def test_if_global_session_object_exist(self): + """ + test if the global session object is not None + """ + self.assertIsNotNone(db_engine.Session) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/main/database/test_init_db.py b/tests/main/database/test_init_db.py new file mode 100644 index 0000000..d76190e --- /dev/null +++ b/tests/main/database/test_init_db.py @@ -0,0 +1,57 @@ +import unittest +from parsesql.main.database.init_db import create_database +from parsesql.main.database.db_engine import Session +from parsesql.main.database.models import TableDependency +from parsesql import db +import os + +DBPATH = os.path.dirname(db.__file__) + + +class InitDbTest(unittest.TestCase): + + def test_if_build_of_sqlite_db(self): + """ + test if a sqlite db can be created and located in the db directory + """ + create_database() + list_files = os.listdir(DBPATH) + self.assertIn("parsersql.db", list_files) + + def test_if_session_can_be_established(self): + """ + test if a session / connection can be build to sqlite db + """ + create_database() + error = False + try: + session = Session() + session.commit() + session.close() + except Exception as e: + print(e) + raise e + error = True + + self.assertEqual(error, False) + + def test_if_table_exist_in_db(self): + """ + test if model can be queried and thus if table exist + """ + create_database() + error = False + try: + session = Session() + session.query(TableDependency).first() + session.close() + except Exception as e: + print(e) + raise e + error = True + + self.assertEqual(error, False) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/util/test_logger_service.py b/tests/util/test_logger_service.py index f4cb93f..930f0d8 100644 --- a/tests/util/test_logger_service.py +++ b/tests/util/test_logger_service.py @@ -27,11 +27,11 @@ def remove(self): print(e) -class Logger(unittest.TestCase): +class LoggerTest(unittest.TestCase): @classmethod def setUpClass(cls): - Logger.create_config().create() + LoggerTest.create_config().create() @staticmethod def create_config(level="INFO"): @@ -81,3 +81,7 @@ def test_if_logger_logs_messages(self): print(e) error = True self.assertEqual(error, False) + + +if __name__ == "__main__": + unittest.main() From 22e22aee03c458eb2b0f44725f11648e9bf97419 Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Mon, 30 Dec 2019 18:23:40 +0100 Subject: [PATCH 09/19] add more test methods, add a new functionality that creates all lowercase and uppercase combination for all combination detection in reserved keywords --- parsesql/exampleSql/{ => 2nd/3nd}/cte.sql | 0 parsesql/exampleSql/{ => 2nd}/with.sql | 0 parsesql/main/sql_parser/file_finder.py | 18 +- parsesql/main/sql_parser/snowsqlparser.py | 168 +++++++++++-------- parsesql/main/sql_parser/sqlExpressions.py | 15 +- tests/config/test_configuration.py | 22 +++ tests/main/database/test_db_engine.py | 22 +++ tests/main/database/test_init_db.py | 22 +++ tests/main/sql_parser/__init__.py | 0 tests/main/sql_parser/test_filefinder.py | 65 +++++++ tests/main/sql_parser/test_snowsqlparser.py | 145 ++++++++++++++++ tests/main/sql_parser/test_sqlExpressions.py | 51 ++++++ tests/run_all.py | 24 ++- tests/sql_file_container/__init__.py | 0 tests/util/test_logger_service.py | 22 +++ 15 files changed, 490 insertions(+), 84 deletions(-) rename parsesql/exampleSql/{ => 2nd/3nd}/cte.sql (100%) rename parsesql/exampleSql/{ => 2nd}/with.sql (100%) create mode 100644 tests/main/sql_parser/__init__.py create mode 100644 tests/main/sql_parser/test_filefinder.py create mode 100644 tests/main/sql_parser/test_snowsqlparser.py create mode 100644 tests/main/sql_parser/test_sqlExpressions.py create mode 100644 tests/sql_file_container/__init__.py diff --git a/parsesql/exampleSql/cte.sql b/parsesql/exampleSql/2nd/3nd/cte.sql similarity index 100% rename from parsesql/exampleSql/cte.sql rename to parsesql/exampleSql/2nd/3nd/cte.sql diff --git a/parsesql/exampleSql/with.sql b/parsesql/exampleSql/2nd/with.sql similarity index 100% rename from parsesql/exampleSql/with.sql rename to parsesql/exampleSql/2nd/with.sql diff --git a/parsesql/main/sql_parser/file_finder.py b/parsesql/main/sql_parser/file_finder.py index dbd5990..2d5f127 100644 --- a/parsesql/main/sql_parser/file_finder.py +++ b/parsesql/main/sql_parser/file_finder.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -27,8 +27,8 @@ class FileFinder(LoggerMixin): def __init__(self): - self.type = Config.file_extension - self.targetdir = Config.sqldir + self.type = Config.file_extension + self.targetdir = Config.sqldir def getListOfFiles(self, dirName=None): dirName = dirName or self.targetdir @@ -39,14 +39,16 @@ def getListOfFiles(self, dirName=None): for entry in listOfFile: # Create full path fullPath = os.path.join(dirName, entry) - # If entry is a directory then get the list of files in this directory + # If entry is a directory then get the list of files + # in this directory if os.path.isdir(fullPath): allFiles = allFiles + self.getListOfFiles(fullPath) else: allFiles.append(fullPath) # filter out sql files - allFiles = [file for file in allFiles if file.endswith(f".{self.type}")] + allFiles = [file for file in allFiles + if file.endswith(f".{self.type}")] if allFiles: - self.logger.info(f'Recursive Search found files. Number of files found: {len(allFiles)}') - + self.logger.info(f'Recursive Search found files. Number of' + f'files found: {len(allFiles)}') return allFiles diff --git a/parsesql/main/sql_parser/snowsqlparser.py b/parsesql/main/sql_parser/snowsqlparser.py index 850a5f8..2021ed0 100644 --- a/parsesql/main/sql_parser/snowsqlparser.py +++ b/parsesql/main/sql_parser/snowsqlparser.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -22,9 +22,14 @@ import os import re -from .sqlExpressions import reservedSqlExpressions, specialCharacters, endstatement, duallist, technicalParameter +from .sqlExpressions import (RESERVED_SQL_EXPRESSIONS, + SPECIAL_CHARACTERS, + END_STATEMENT, + DUAL_LIST, + TECHNICAL_PARAM + ) from parsesql.util.logger_service import LoggerMixin -import textwrap + class ParseSql(LoggerMixin): def __init__(self, file): @@ -33,15 +38,16 @@ def __init__(self, file): self.filename = os.path.basename(self.file) self._base_clean_up() - self.allkeywordPos = self._getParseNextAfterFrom() + self.allkeywordPos = self._getParseNextAfterFrom() def _readFile(self) -> str: - with open(self.file ,encoding = 'utf-8') as f: + with open(self.file, encoding='utf-8') as f: return f.read() def _base_clean_up(self) -> None: """ - this method calls base instance method that prepare the file content for parsing + this method calls base instance method that prepare the file content + for parsing """ # instance method that removes comments self._remove_comments() @@ -55,27 +61,44 @@ def _base_clean_up(self) -> None: self._lstrip() def _remove_comments(self) -> None: - """ remove c-style comments. - text: blob of text with comments (can include newlines) - returns: text with comments removed """ - self.filecontent = re.sub('\/\*[\s\S]*?\*\/|([^:]|^)\/\/.*$', '', self.filecontent ) - self.filecontent = re.sub('--.*?\n', '', self.filecontent ) + remove c-style comments. + text: blob of text with comments (can include newlines) + returns: text with comments removed + """ + # TODO: Check if r escaping works as expected + self.filecontent = re.sub(r'\/\*[\s\S]*?\*\/|([^:]|^)\/\/.*$', + '', self.filecontent) + self.filecontent = re.sub('--.*?\n', '', self.filecontent) def _uppercase_sql_expressions(self) -> None: """ Instance method that uppercase all keywords within the string """ - for element in reservedSqlExpressions: + for element in RESERVED_SQL_EXPRESSIONS: if element.lower() in self.filecontent: - self.filecontent = re.sub(r"\b"+ element.lower()+ r"\b", element, self.filecontent) + self.filecontent = re.sub(r"\b" + element.lower() + r"\b", + element, self.filecontent) + + def _get_all_reserved_sql_combinations(self, s: str): + """ + Instance method that returns a generator for all possible string + combinations + """ + if s == '': + yield '' + return + for rest in self._get_all_reserved_sql_combinations(s[1:]): + yield s[0].upper() + rest + if s[0].upper() != s[0].lower(): + yield s[0].lower() + rest def _unintend(self) -> None: """ Instance method standardize the intend of the file """ data = self.filecontent.splitlines() - new_list = list() + new_list = [] for row in data: cleaned_text = LigthSqlTextCleaner(text=row).start() new_list.append(cleaned_text) @@ -83,15 +106,17 @@ def _unintend(self) -> None: def _remove_header_view_col_definition(self): """ - Instance method checks there is a header column definition, It removes it if true. + Instance method checks there is a header column definition, It removes + it if true. """ firstline = self.filecontent.split('\n', 1)[0] if '(' in firstline: - startpos= self.filecontent.find('(') - endpos= self.filecontent.find(')') - self.filecontent = self.filecontent[:startpos] + self.filecontent[endpos+1:] - + startpos = self.filecontent.find('(') + endpos = self.filecontent.find(')') + self.filecontent = (self.filecontent[:startpos] + + self.filecontent[endpos+1:]) + def _lstrip(self) -> None: """ Instance method for lstrip @@ -100,14 +125,18 @@ def _lstrip(self) -> None: def _get_cte_names(self) -> list: allcommactes = list() - for cte in re.finditer(r"\w+(?=\s*(\bas\b|\bAS\b)[^/])", self.filecontent, re.MULTILINE): + for cte in re.finditer(r"\w+(?=\s*(\bas\b|\bAS\b)[^/])", + self.filecontent, + re.MULTILINE): raw = cte.group(0) allcommactes.append(CTESqlTextCleaner(text=raw).start()) return allcommactes def _get_recursive_cte_names(self) -> list: allrec = list() - for cte in re.finditer(r"^\,(?:.*)(\n?\($)", self.filecontent, re.MULTILINE): + for cte in re.finditer(r"^\,(?:.*)(\n?\($)", + self.filecontent, + re.MULTILINE): raw = cte.group(0) allrec.append(RecursiveSqlTextCleaner(text=raw).start()) return allrec @@ -115,11 +144,11 @@ def _get_recursive_cte_names(self) -> list: def _parse_statement(self, stat: str) -> list: statement = stat statementsFound = list() - for m in re.finditer(r"\b"+ statement + r"\b", self.filecontent): + for m in re.finditer(r"\b" + statement + r"\b", self.filecontent): pos = {} pos['keyword'] = statement pos['startpos'] = m.start() - pos['endpos'] = m.end() + pos['endpos'] = m.end() statementsFound.append(pos) return statementsFound @@ -134,7 +163,7 @@ def _get_with_name(self): raw_str = '' for pos in parsePair: start = pos[0] - end = pos[1] + end = pos[1] raw_str = self.filecontent[start:end] raw_str = raw_str.replace(" ", "") return raw_str @@ -147,34 +176,35 @@ def _get_create_name(self): if pos['endpos'] < allpos: parsePair.append([pos['endpos'], allpos]) break - parsePair = parsePair[:1] # only take first two positions + # only take first two positions + parsePair = parsePair[:1] raw_str = '' for pos in parsePair: start = pos[0] - end = pos[1] + end = pos[1] raw_str = self.filecontent[start:end] return CreateSqlTextCleaner(text=raw_str).start() def _getParseNextAfterFrom(self) -> list: - startPositionsKeywords = list() - for keyword in reservedSqlExpressions: - for m in re.finditer(r"\b"+ keyword + r"\b", self.filecontent): + startPositionsKeywords = [] + for keyword in RESERVED_SQL_EXPRESSIONS: + for m in re.finditer(r"\b" + keyword + r"\b", self.filecontent): startPositionsKeywords.append(m.start()) - for stat in endstatement: + for stat in END_STATEMENT: if stat in self.filecontent: - pos = self.filecontent.find(stat) - startPositionsKeywords.append(pos) + pos = self.filecontent.find(stat) + startPositionsKeywords.append(pos) else: self.filecontent = self.filecontent + stat pos = self.filecontent.find(stat) startPositionsKeywords.append(pos) - startPositionsKeywords.sort() + startPositionsKeywords.sort() return startPositionsKeywords def _parseFromEnd(self) -> list: allkeywordPos = self.allkeywordPos - parsePair = list() + parsePair = [] keywordlist = ['FROM', 'JOIN'] for searchkey in keywordlist: for pos in self._parse_statement(stat=searchkey): @@ -185,8 +215,8 @@ def _parseFromEnd(self) -> list: rawFroms = list() for pos in parsePair: start = pos[0] - end = pos[1] - + end = pos[1] + raw_str = self.filecontent[start:end] cleaned_text = TableSqlTextCleaner(text=raw_str).start() cleaned_text = self._detectOldJoin(raw=cleaned_text) @@ -199,7 +229,7 @@ def _parseFromEnd(self) -> list: rawFroms = self._removeAllAfterWhitespace(raw=rawFroms) return rawFroms - + def removeCommaCharacters(self, raw: str) -> str: """ TODO: Refactor the _parseFromEnd method in order to apply DRY @@ -214,7 +244,7 @@ def _detectOldJoin(self, raw: str) -> list: if comma in raw: raw = raw.lstrip() raw = raw.split(',') - final_raw = list() + final_raw = [] for e in raw: e = e.lstrip() final_raw.append(e) @@ -223,43 +253,47 @@ def _detectOldJoin(self, raw: str) -> list: def _removeAllAfterWhitespace(self, raw: list) -> list: whitespace = ' ' - newraw = list() + newraw = [] for element in raw: if whitespace in element: pos = element.find(whitespace) element = element[:pos] newraw.append(element) else: - newraw.append(element) + newraw.append(element) return newraw def getfinalFrom(self) -> dict: objektName = None - tables = [objekt for objekt in self._parseFromEnd() if objekt not in self._get_with_name() - and objekt not in self._get_cte_names() - and objekt not in self._get_recursive_cte_names() - and objekt not in duallist - and objekt not in technicalParameter - ] + tables = [objekt for objekt in self._parseFromEnd() + if objekt not in self._get_with_name() + and objekt not in self._get_cte_names() + and objekt not in self._get_recursive_cte_names() + and objekt not in DUAL_LIST + and objekt not in TECHNICAL_PARAM] + if self._get_create_name(): objektName = self._get_create_name() - final_dict = {'filename':self.filename, 'name':objektName, 'tables': tables } + final_dict = {'filename': self.filename, + 'name': objektName, + 'tables': tables} + self.logger.info(f'Parsing of a file completed: {final_dict}') return final_dict class BaseSqlTextCleaner(object): - - def removeSpecialCharacters(self) -> None: - for char in specialCharacters: + + def removeSPECIAL_CHARACTERS(self) -> None: + for char in SPECIAL_CHARACTERS: if char in self.text: self.text = self.text.replace(char, '') - + def removeCommaCharacters(self) -> None: char = ',' if char in self.text: self.text = self.text.replace(char, '') - + def removeLeftWhiteSpace(self) -> str: self.text = self.text.lstrip() @@ -271,9 +305,9 @@ def removeAllWhiteSpaceFromString(self) -> None: if whitespace in self.text: pos = self.text.find(whitespace) self.text = self.text[:pos] - + def removeLinebreaks(self) -> None: - self.text = self.text.replace('\n','') + self.text = self.text.replace('\n', '') def removeAllAfterStartParenthesis(self) -> None: """ @@ -284,6 +318,7 @@ def removeAllAfterStartParenthesis(self) -> None: pos = self.text.find(paranthesis) self.text = self.text[:pos] + class LigthSqlTextCleaner(BaseSqlTextCleaner): def __init__(self, text: str): @@ -300,8 +335,9 @@ def start(self): def removeRightWhiteSpace(self) -> str: self.text = self.text.rstrip() + class TableSqlTextCleaner(BaseSqlTextCleaner): - + def __init__(self, text: str): self.text = text @@ -310,7 +346,7 @@ def start(self) -> str: Main control method that starts text cleaning and transforming """ self.removeAllAfterEndParenthesis() - self.removeSpecialCharacters() + self.removeSPECIAL_CHARACTERS() self.removeLinebreaks() self.removeLeftWhiteSpace() self.removeTabs() @@ -322,15 +358,16 @@ def removeTabs(self) -> str: instance method that replace tabs character with whitespace """ self.text = self.text.replace('\t', ' ') - + def removeAllAfterEndParenthesis(self) -> None: paranthesis = ')' if paranthesis in self.text: pos = self.text.find(paranthesis) self.text = self.text[:pos] - + + class CTESqlTextCleaner(BaseSqlTextCleaner): - + def __init__(self, text: str): self.text = text @@ -339,7 +376,7 @@ def start(self) -> str: Main control method that starts text cleaning and transforming """ self.remove_whitespace() - self.removeSpecialCharacters() + self.removeSPECIAL_CHARACTERS() self.removeCommaCharacters() self.removeReservedCharacters() self.removeLeftWhiteSpace() @@ -348,8 +385,8 @@ def start(self) -> str: return self.text def removeReservedCharacters(self) -> None: - for char in reservedSqlExpressions: - if re.match(r"\b"+ char + r"\b", self.text): + for char in RESERVED_SQL_EXPRESSIONS: + if re.match(r"\b" + char + r"\b", self.text): self.text = self.text.replace(char, '') def remove_whitespace(self) -> None: @@ -372,6 +409,7 @@ def start(self) -> str: self.upperStr() return self.text + class RecursiveSqlTextCleaner(BaseSqlTextCleaner): def __init__(self, text: str): @@ -394,7 +432,3 @@ def start(self) -> str: self.removeAllWhiteSpaceFromString() self.upperStr() return self.text - - - - \ No newline at end of file diff --git a/parsesql/main/sql_parser/sqlExpressions.py b/parsesql/main/sql_parser/sqlExpressions.py index dd24117..a1b1ae5 100644 --- a/parsesql/main/sql_parser/sqlExpressions.py +++ b/parsesql/main/sql_parser/sqlExpressions.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -reservedSqlExpressions = [ +RESERVED_SQL_EXPRESSIONS = [ 'WITH', 'WHERE', 'ALL', @@ -117,11 +117,10 @@ 'WITH' ] -endstatement = ';' +END_STATEMENT = ';' -specialCharacters = r"!\"#$%&'()*+-/:<=>?@[\]^`{|}~" +SPECIAL_CHARACTERS = r"!\"#$%&'()*+-/:<=>?@[\]^`{|}~" -duallist = ['DUAL', 'dual'] - -technicalParameter = ['GFD', 'gfd'] +DUAL_LIST = ['DUAL', 'dual'] +TECHNICAL_PARAM = ['GFD', 'gfd'] diff --git a/tests/config/test_configuration.py b/tests/config/test_configuration.py index 78d9abc..670dd4f 100644 --- a/tests/config/test_configuration.py +++ b/tests/config/test_configuration.py @@ -1,3 +1,25 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import unittest from parsesql.config import config_reader from pathlib import Path diff --git a/tests/main/database/test_db_engine.py b/tests/main/database/test_db_engine.py index 1e8a228..2e1e9da 100644 --- a/tests/main/database/test_db_engine.py +++ b/tests/main/database/test_db_engine.py @@ -1,3 +1,25 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import unittest from parsesql.main.database import db_engine from parsesql.config.config_reader import Config diff --git a/tests/main/database/test_init_db.py b/tests/main/database/test_init_db.py index d76190e..6561e19 100644 --- a/tests/main/database/test_init_db.py +++ b/tests/main/database/test_init_db.py @@ -1,3 +1,25 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import unittest from parsesql.main.database.init_db import create_database from parsesql.main.database.db_engine import Session diff --git a/tests/main/sql_parser/__init__.py b/tests/main/sql_parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/main/sql_parser/test_filefinder.py b/tests/main/sql_parser/test_filefinder.py new file mode 100644 index 0000000..0890ee7 --- /dev/null +++ b/tests/main/sql_parser/test_filefinder.py @@ -0,0 +1,65 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +import unittest +from parsesql.main.sql_parser import file_finder +from parsesql.config.config_reader import Config +from parsesql import exampleSql +from pathlib import Path +import os + +SQLFILEPATH = os.path.dirname(exampleSql.__file__) + + +class FileFinderTest(unittest.TestCase): + + def test_if_filefinder_class_exists(self): + """ + test if FileFinder class is available + """ + klass = file_finder.FileFinder() + self.assertEqual(klass.__class__.__name__, "FileFinder") + + def test_if_files_get_searched(self): + """ + test if recursive file search method returns the correct list + of files + """ + # Change sql directory path in Config class + Config.sqldir = Path(SQLFILEPATH) + + targetfiles_gen = os.walk(SQLFILEPATH) + targetfiles = [] + for path, directories, files in targetfiles_gen: + for file in files: + if file.endswith(".sql"): + targetfiles.append(os.path.join(path, file)) + + finder = file_finder.FileFinder() + foundfiles = finder.getListOfFiles() + + self.assertEqual(sorted(targetfiles), sorted(foundfiles)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/main/sql_parser/test_snowsqlparser.py b/tests/main/sql_parser/test_snowsqlparser.py new file mode 100644 index 0000000..d5ddcb4 --- /dev/null +++ b/tests/main/sql_parser/test_snowsqlparser.py @@ -0,0 +1,145 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +import unittest +import os +from tests import sql_file_container +from parsesql.main.sql_parser.snowsqlparser import ParseSql + +TEST_SQL_FILE_PATH = os.path.dirname(sql_file_container.__file__) + + +def write_file(content, name): + """ + Function that creates a file with custom content + """ + with open(file=name, mode='w') as f: + f.write(content) + + +def remove_file(name): + """ + Function that removes a file + """ + try: + os.remove(name) + except FileNotFoundError as e: + print(e) + + +class SnowSqlparserBaseCleanerTest(unittest.TestCase): + + filename = "check1.sql" + full_file_name = os.path.join(TEST_SQL_FILE_PATH, filename) + + sql_statement = """ + wITH + /* + This is a c like comment + */ + l AS ( + SELECT 'a' aS userid + ), + r AS ( + SELECT 'b' as userid -- The answer is 42 + ) + SELECT * + FROM l LEFT JOIN r ON l.userid = r.userid + /* This is a c like comment + */ + ; + """ + + @classmethod + def setUpClass(cls): + write_file(content=cls.sql_statement, name=cls.full_file_name) + + def setUp(self): + self.sqlcontent = ParseSql(self.__class__.full_file_name) + + @classmethod + def tearDownClass(cls): + remove_file(name=cls.full_file_name) + + def reload_raw_sql(self): + self.sqlcontent.filecontent = self.__class__.sql_statement + + def test_if_file_can_be_read(self): + """ + test if the init of the class can read the sql file and save + its content + """ + self.assertIsNotNone(self.sqlcontent.filecontent) + + def test_if_comments_are_removed(self): + """ + test if comments -- and /* */ can be removed + """ + self.reload_raw_sql() + + self.sqlcontent._remove_comments() + + comment_text = [ + "The answer is 42", + "This is a c like comment" + ] + + for comment in comment_text: + self.assertNotIn(comment, self.sqlcontent.filecontent) + + def test_if_uppercase_reserved_expressions(self): + """ + test if all reserved sql expressions gets uppercased + """ + self.reload_raw_sql() + + self.sqlcontent._uppercase_sql_expressions() + + lowercase_expressions = [ + "wITH", + "aS", + "as", + ] + + for expr in lowercase_expressions: + self.assertNotIn(expr, self.sqlcontent.filecontent) + + def test_combination_generator(self): + """ + test if the generator method returns alls combinations from + a given string + """ + expected_combinations = [ + 'WITH', 'wITH', 'WiTH', 'wiTH', 'WItH', 'wItH', 'WitH', + 'witH', 'WITh', 'wITh', 'WiTh', 'wiTh', 'WIth', 'wIth', + 'With', 'with'] + self.reload_raw_sql() + gen = self.sqlcontent._get_all_reserved_sql_combinations(s='WITH') + res = [el for el in gen] + self.assertListEqual(sorted(expected_combinations), + sorted(res) + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/main/sql_parser/test_sqlExpressions.py b/tests/main/sql_parser/test_sqlExpressions.py new file mode 100644 index 0000000..f476a51 --- /dev/null +++ b/tests/main/sql_parser/test_sqlExpressions.py @@ -0,0 +1,51 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +import unittest +from parsesql.main.sql_parser import sqlExpressions + + +class SqlExpressionTest(unittest.TestCase): + + def test_if_all_global_variables_exist(self): + """ + test if all sql expressions global variable exist + """ + PARAMS = [ + "RESERVED_SQL_EXPRESSIONS", + "END_STATEMENT", + "SPECIAL_CHARACTERS", + "DUAL_LIST", + "TECHNICAL_PARAM", + ] + module_variables = dir(sqlExpressions) + all_var_exist = True + for param in PARAMS: + if param not in module_variables: + all_var_exist = False + + self.assertEqual(True, all_var_exist) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/run_all.py b/tests/run_all.py index a54511b..35af505 100644 --- a/tests/run_all.py +++ b/tests/run_all.py @@ -1,3 +1,25 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import unittest @@ -9,4 +31,4 @@ def parsesql_test_suite(): if __name__ == "__main__": - unittest.TextTestRunner(verbosity=2).run(parsesql_test_suite()) \ No newline at end of file + unittest.TextTestRunner(verbosity=2).run(parsesql_test_suite()) diff --git a/tests/sql_file_container/__init__.py b/tests/sql_file_container/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/util/test_logger_service.py b/tests/util/test_logger_service.py index 930f0d8..c55db59 100644 --- a/tests/util/test_logger_service.py +++ b/tests/util/test_logger_service.py @@ -1,3 +1,25 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import unittest import json import os From 938932be76c6e9fecf52469daf43d8b039d9ccd0 Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Thu, 2 Jan 2020 17:59:42 +0100 Subject: [PATCH 10/19] continue writing tests for main sql parser, refactored methods, delete unused methods and changed to method chaining --- .DS_Store | Bin 6148 -> 6148 bytes parsesql/main/sql_parser/snowsqlparser.py | 140 +++++++++---- tests/main/sql_parser/test_snowsqlparser.py | 212 ++++++++++++++++++-- 3 files changed, 297 insertions(+), 55 deletions(-) diff --git a/.DS_Store b/.DS_Store index 37bc6d51bd4eda7de1b33c8cda830700f91f273b..a6a9a35e4fbd8bc142b96f6b791a98f4d75e9240 100644 GIT binary patch delta 82 zcmZoMXfc=|#>CJ*u~2NHo}wrd0|Nsi1A_nqLncEGL#k&^esWUI#6tDS1|lq*4>P&2 mZgyZcVcN{j!OsEIwAqmPJM(0I5kp3X$u>OFn`1;)FarQHN)#sm delta 266 zcmZoMXfc=|#>B)qu~2NHo}wr_0|Nsi1A_nqLn=d&XHI@{Qcix-#*fPx>p?PX3!M{d>0pxO+YD**WLlU*R>pU zL^Z>Vp(r;$g@7W4qJj)mi#GPSvu str: with open(self.file, encoding='utf-8') as f: @@ -49,16 +50,11 @@ def _base_clean_up(self) -> None: this method calls base instance method that prepare the file content for parsing """ - # instance method that removes comments - self._remove_comments() - # instance method that uppercase all keywords - self._uppercase_sql_expressions() - # instance method that removes the left and right indention - self._unintend() - # remove view header - self._remove_header_view_col_definition() - # lstrip UNSTABLE - self._lstrip() + self._remove_comments() \ + ._uppercase_sql_expressions() \ + ._unintend() \ + ._remove_header_view_col_definition() \ + ._remove_empty_lines() def _remove_comments(self) -> None: """ @@ -70,17 +66,20 @@ def _remove_comments(self) -> None: self.filecontent = re.sub(r'\/\*[\s\S]*?\*\/|([^:]|^)\/\/.*$', '', self.filecontent) self.filecontent = re.sub('--.*?\n', '', self.filecontent) + return self def _uppercase_sql_expressions(self) -> None: """ Instance method that uppercase all keywords within the string """ - for element in RESERVED_SQL_EXPRESSIONS: - if element.lower() in self.filecontent: - self.filecontent = re.sub(r"\b" + element.lower() + r"\b", - element, self.filecontent) - - def _get_all_reserved_sql_combinations(self, s: str): + for element in self._consume_reserved_sql_combinations(): + if element in self.filecontent: + self.filecontent = re.sub(r"\b" + element + r"\b", + element.upper(), self.filecontent) + return self + + def _get_all_reserved_sql_combinations(self, s: str + ) -> Generator[str, None, None]: """ Instance method that returns a generator for all possible string combinations @@ -93,9 +92,19 @@ def _get_all_reserved_sql_combinations(self, s: str): if s[0].upper() != s[0].lower(): yield s[0].lower() + rest + def _consume_reserved_sql_combinations(self + ) -> Generator[str, None, None]: + """ + Instance method that creates all SQL combination and retunrs + them as a generator + """ + for sql_keyword in RESERVED_SQL_EXPRESSIONS: + yield from self._get_all_reserved_sql_combinations(s=sql_keyword) + def _unintend(self) -> None: """ - Instance method standardize the intend of the file + Instance method standardize the intend of the file by removing left + and right space """ data = self.filecontent.splitlines() new_list = [] @@ -103,25 +112,41 @@ def _unintend(self) -> None: cleaned_text = LigthSqlTextCleaner(text=row).start() new_list.append(cleaned_text) self.filecontent = "\n".join(new_list) + return self def _remove_header_view_col_definition(self): """ Instance method checks there is a header column definition, It removes it if true. """ - firstline = self.filecontent.split('\n', 1)[0] + splitted_content = self.filecontent.splitlines() + + line = 0 + while True: + firstline = splitted_content[line] + if (firstline.isspace() or len(firstline) <= 0): + line += 1 + else: + break if '(' in firstline: startpos = self.filecontent.find('(') endpos = self.filecontent.find(')') self.filecontent = (self.filecontent[:startpos] + self.filecontent[endpos+1:]) + return self - def _lstrip(self) -> None: + def _remove_empty_lines(self): """ - Instance method for lstrip + Instance method that removes empty lines from string """ - self.filecontent.lstrip() + splitted_content = self.filecontent.splitlines() + for idx, line in enumerate(splitted_content): + if (line.isspace() or len(line) <= 0): + del splitted_content[idx] + + self.filecontent = "\n".join(splitted_content) + return self def _get_cte_names(self) -> list: allcommactes = list() @@ -133,7 +158,7 @@ def _get_cte_names(self) -> list: return allcommactes def _get_recursive_cte_names(self) -> list: - allrec = list() + allrec = [] for cte in re.finditer(r"^\,(?:.*)(\n?\($)", self.filecontent, re.MULTILINE): @@ -142,8 +167,12 @@ def _get_recursive_cte_names(self) -> list: return allrec def _parse_statement(self, stat: str) -> list: + """ + Instance method that searches for a certain substring and returns + a list if keyword and start and end position + """ statement = stat - statementsFound = list() + statementsFound = [] for m in re.finditer(r"\b" + statement + r"\b", self.filecontent): pos = {} pos['keyword'] = statement @@ -185,7 +214,11 @@ def _get_create_name(self): raw_str = self.filecontent[start:end] return CreateSqlTextCleaner(text=raw_str).start() - def _getParseNextAfterFrom(self) -> list: + def _get_keyword_positions(self) -> list: + """ + Instance method that gets all coordinates for reserved keywords if + available. It also adds an end keyword if no end keyword found + """ startPositionsKeywords = [] for keyword in RESERVED_SQL_EXPRESSIONS: for m in re.finditer(r"\b" + keyword + r"\b", self.filecontent): @@ -203,22 +236,15 @@ def _getParseNextAfterFrom(self) -> list: return startPositionsKeywords def _parseFromEnd(self) -> list: - allkeywordPos = self.allkeywordPos - parsePair = [] - keywordlist = ['FROM', 'JOIN'] - for searchkey in keywordlist: - for pos in self._parse_statement(stat=searchkey): - for allpos in allkeywordPos: - if pos['endpos'] < allpos: - parsePair.append([pos['endpos'], allpos]) - break - rawFroms = list() - for pos in parsePair: - start = pos[0] - end = pos[1] + """ + Instance method that returns the final FROM and JOIN object + reference results + """ + rawFroms = [] + raw_dependencies = self._parse_uncleaned_text() - raw_str = self.filecontent[start:end] - cleaned_text = TableSqlTextCleaner(text=raw_str).start() + for dependency in raw_dependencies: + cleaned_text = TableSqlTextCleaner(text=dependency).start() cleaned_text = self._detectOldJoin(raw=cleaned_text) if isinstance(cleaned_text, list): @@ -230,6 +256,32 @@ def _parseFromEnd(self) -> list: rawFroms = self._removeAllAfterWhitespace(raw=rawFroms) return rawFroms + def _parse_position_pair(self) -> list: + """ + Instance method that parse the keyword pair positions + """ + allkeywordPos = self.allkeywordPos + parsePair = [] + keywordlist = ['FROM', 'JOIN'] + for searchkey in keywordlist: + for pos in self._parse_statement(stat=searchkey): + for allpos in allkeywordPos: + if pos['endpos'] < allpos: + parsePair.append([pos['endpos'], allpos]) + break + return parsePair + + def _parse_uncleaned_text(self) -> list: + """ + Instance method that parses raw substrings after JOIN and FROM + """ + parsePair = self._parse_position_pair() + rawFroms = [] + for pos in parsePair: + start, end = pos[0], pos[1] + rawFroms.append(self.filecontent[start:end]) + return rawFroms + def removeCommaCharacters(self, raw: str) -> str: """ TODO: Refactor the _parseFromEnd method in order to apply DRY @@ -284,6 +336,9 @@ def getfinalFrom(self) -> dict: class BaseSqlTextCleaner(object): + def __init__(self, text: str): + self.text = text + def removeSPECIAL_CHARACTERS(self) -> None: for char in SPECIAL_CHARACTERS: if char in self.text: @@ -296,6 +351,7 @@ def removeCommaCharacters(self) -> None: def removeLeftWhiteSpace(self) -> str: self.text = self.text.lstrip() + return self def upperStr(self) -> None: self.text = self.text.upper() @@ -328,12 +384,12 @@ def start(self): """ Main control method that starts text cleaning and transforming """ - self.removeLeftWhiteSpace() - self.removeRightWhiteSpace() + self.removeLeftWhiteSpace().removeRightWhiteSpace() return self.text def removeRightWhiteSpace(self) -> str: self.text = self.text.rstrip() + return self class TableSqlTextCleaner(BaseSqlTextCleaner): diff --git a/tests/main/sql_parser/test_snowsqlparser.py b/tests/main/sql_parser/test_snowsqlparser.py index d5ddcb4..249911a 100644 --- a/tests/main/sql_parser/test_snowsqlparser.py +++ b/tests/main/sql_parser/test_snowsqlparser.py @@ -23,8 +23,13 @@ import unittest import os +import re from tests import sql_file_container -from parsesql.main.sql_parser.snowsqlparser import ParseSql +from parsesql.main.sql_parser.snowsqlparser import (ParseSql, + LigthSqlTextCleaner, + BaseSqlTextCleaner) +from parsesql.main.sql_parser.sqlExpressions import (RESERVED_SQL_EXPRESSIONS, + END_STATEMENT) TEST_SQL_FILE_PATH = os.path.dirname(sql_file_container.__file__) @@ -53,6 +58,7 @@ class SnowSqlparserBaseCleanerTest(unittest.TestCase): full_file_name = os.path.join(TEST_SQL_FILE_PATH, filename) sql_statement = """ + CREATE OR REPLACE VIEW abs (USER_ID_HEADER) AS wITH /* This is a c like comment @@ -61,7 +67,7 @@ class SnowSqlparserBaseCleanerTest(unittest.TestCase): SELECT 'a' aS userid ), r AS ( - SELECT 'b' as userid -- The answer is 42 + seLect 'b' as userid -- The answer is 42 ) SELECT * FROM l LEFT JOIN r ON l.userid = r.userid @@ -75,21 +81,24 @@ def setUpClass(cls): write_file(content=cls.sql_statement, name=cls.full_file_name) def setUp(self): - self.sqlcontent = ParseSql(self.__class__.full_file_name) + self.sql_parser_obj = ParseSql(self.__class__.full_file_name) @classmethod def tearDownClass(cls): remove_file(name=cls.full_file_name) - def reload_raw_sql(self): - self.sqlcontent.filecontent = self.__class__.sql_statement + def reload_raw_sql(self, string=None): + if string: + self.sql_parser_obj.filecontent = string + else: + self.sql_parser_obj.filecontent = self.__class__.sql_statement def test_if_file_can_be_read(self): """ test if the init of the class can read the sql file and save its content """ - self.assertIsNotNone(self.sqlcontent.filecontent) + self.assertIsNotNone(self.sql_parser_obj.filecontent) def test_if_comments_are_removed(self): """ @@ -97,7 +106,7 @@ def test_if_comments_are_removed(self): """ self.reload_raw_sql() - self.sqlcontent._remove_comments() + self.sql_parser_obj._remove_comments() comment_text = [ "The answer is 42", @@ -105,7 +114,7 @@ def test_if_comments_are_removed(self): ] for comment in comment_text: - self.assertNotIn(comment, self.sqlcontent.filecontent) + self.assertNotIn(comment, self.sql_parser_obj.filecontent) def test_if_uppercase_reserved_expressions(self): """ @@ -113,16 +122,17 @@ def test_if_uppercase_reserved_expressions(self): """ self.reload_raw_sql() - self.sqlcontent._uppercase_sql_expressions() + self.sql_parser_obj._uppercase_sql_expressions() lowercase_expressions = [ "wITH", "aS", "as", + "seLect", ] for expr in lowercase_expressions: - self.assertNotIn(expr, self.sqlcontent.filecontent) + self.assertNotIn(expr, self.sql_parser_obj.filecontent) def test_combination_generator(self): """ @@ -130,16 +140,192 @@ def test_combination_generator(self): a given string """ expected_combinations = [ - 'WITH', 'wITH', 'WiTH', 'wiTH', 'WItH', 'wItH', 'WitH', - 'witH', 'WITh', 'wITh', 'WiTh', 'wiTh', 'WIth', 'wIth', + 'WITH', 'wITH', 'WiTH', 'wiTH', 'WItH', 'wItH', 'WitH', + 'witH', 'WITh', 'wITh', 'WiTh', 'wiTh', 'WIth', 'wIth', 'With', 'with'] self.reload_raw_sql() - gen = self.sqlcontent._get_all_reserved_sql_combinations(s='WITH') + gen = self.sql_parser_obj._get_all_reserved_sql_combinations(s='WITH') res = [el for el in gen] self.assertListEqual(sorted(expected_combinations), sorted(res) ) + def test_if_all_sql_combination_consumable(self): + """ + test if a generator returns all sql reserved keywords + """ + gen = self.sql_parser_obj._consume_reserved_sql_combinations() + self.assertIsNotNone(next(gen)) + + def test_text_indentation_change(self): + """ + test if sql text is formated withouht left and right text space + """ + unformatted = """ + SELECT * + FROM + Dual + """ + str0 = "" + str1 = "SELECT *" + str2 = "FROM" + str3 = "Dual" + str4 = "" + exected = '\n'.join([str0, str1, str2, str3, str4]) + self.reload_raw_sql(string=unformatted) + unindent = self.sql_parser_obj._unintend() + self.assertEqual(unindent.filecontent, exected) + + def test_remove_header_defintion(self): + """ + test if sql view header declaration can be removed + e.g. (col1, col2, col3) AS... + """ + self.reload_raw_sql() + self.sql_parser_obj._remove_header_view_col_definition() + parants = [")", "("] + + check = False + for paran in parants: + if paran in self.sql_parser_obj.filecontent.split('\n')[1]: + check = True + + self.assertEqual(check, False) + + def test_remove_empty_lines(self): + """ + Test if empty lines can be removed from string + """ + self.reload_raw_sql() + self.sql_parser_obj._remove_empty_lines() + + count_empty_lines = 0 + empty_lines = [] + + splitted_content = self.sql_parser_obj.filecontent.splitlines() + for idx, line in enumerate(splitted_content): + if (line.isspace() or len(line) <= 0): + empty_lines.append(idx) + count_empty_lines += 1 + + self.assertEqual(count_empty_lines, 0, + f"empty line numbers: {empty_lines}") + + def test_base_cleanup(self): + """ + test if string is correctly left stripped + """ + clean_list = [ + "CREATE OR REPLACE VIEW abs AS", + "WITH", + "l AS (", + "SELECT 'a' AS userid", + "),", + "r AS (", + "SELECT 'b' AS userid )", + "SELECT *", + "FROM l LEFT JOIN r ON l.userid = r.userid", + ";", ] + clean_str = "\n".join(clean_list) + self.reload_raw_sql() + self.sql_parser_obj._base_clean_up() + + self.assertEqual(self.sql_parser_obj.filecontent, clean_str) + + def test_keyword_positions(self): + """ + test if keywords position can be detected correctly + """ + counter = 0 + for n in RESERVED_SQL_EXPRESSIONS: + for m in re.finditer(r"\b" + n + r"\b", + self.sql_parser_obj.filecontent): + counter += 1 + for n in END_STATEMENT: + if n in self.sql_parser_obj.filecontent: + cnt = self.sql_parser_obj.filecontent.count(n) + counter += cnt + + self.assertEqual(len(self.sql_parser_obj.allkeywordPos), + counter) + + def test_key_word_pairs(self): + """ + test if keyword list can be calulated from strin + """ + keywords = self.sql_parser_obj._parse_position_pair() + + target_key_word_positions = [[126, 129], [138, 141]] + + self.assertEqual(keywords, target_key_word_positions) + + def test_parse_statement(self): + """ + test if search statements can be found with related positions + """ + keyword = "FROM" + start = self.sql_parser_obj.filecontent.find(keyword) + end = start + len(keyword) + target_list = [{'keyword': keyword, + 'startpos': start, + 'endpos': end}] + self.assertEqual(target_list, + self.sql_parser_obj._parse_statement(stat=keyword)) + + def test_parseFromEnd(self): + """ + test if dependent table object are parsed + """ + dependent_objects = ["L", "R"] + parsed_objects = self.sql_parser_obj._parseFromEnd() + self.assertEqual(sorted(dependent_objects), parsed_objects) + + def test_parse_uncleaned_text(self): + """ + test if raw text extraction after JOIN and FROM works + """ + expected_raw = [' l ', ' r '] + text = self.sql_parser_obj._parse_uncleaned_text() + print(text) + self.assertEqual(expected_raw, text) + + +class LigthSqlTextCleanerTest(unittest.TestCase): + + test_sql_text = " CREATE OR REPLACE VIEW abs AS " + + def test_if_right_whitespace_is_removed(self): + """ + test whitespace remove right site + """ + expected_txt = " CREATE OR REPLACE VIEW abs AS" + cleaner = LigthSqlTextCleaner(text=self.__class__.test_sql_text) + cleaner.removeRightWhiteSpace() + self.assertEqual(cleaner.text, expected_txt) + + def test_main_start_method(self): + """ + test if main orchestrator start method works as expected + """ + expected_txt = "CREATE OR REPLACE VIEW abs AS" + cleaner = LigthSqlTextCleaner(text=self.__class__.test_sql_text) + cleaner.start() + self.assertEqual(cleaner.text, expected_txt) + + +class BaseSqlTextCleanerTest(unittest.TestCase): + + test_sql_text = " CREATE OR REPLACE VIEW abs AS " + + def test_if_left_whitespace_is_removed(self): + """ + test whitespace remove left site + """ + expected_txt = "CREATE OR REPLACE VIEW abs AS " + cleaner = BaseSqlTextCleaner(text=self.__class__.test_sql_text) + cleaner.removeLeftWhiteSpace() + self.assertEqual(cleaner.text, expected_txt) + if __name__ == "__main__": unittest.main() From 72cffed8cab57d369f4e1d32b20857baee57b88c Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Fri, 3 Jan 2020 11:34:32 +0100 Subject: [PATCH 11/19] continue with tests --- parsesql/main/sql_parser/snowsqlparser.py | 94 ++++--- tests/main/sql_parser/test_snowsqlparser.py | 280 ++++++++++++++++++-- 2 files changed, 323 insertions(+), 51 deletions(-) diff --git a/parsesql/main/sql_parser/snowsqlparser.py b/parsesql/main/sql_parser/snowsqlparser.py index 091a7e8..0a49612 100644 --- a/parsesql/main/sql_parser/snowsqlparser.py +++ b/parsesql/main/sql_parser/snowsqlparser.py @@ -181,9 +181,12 @@ def _parse_statement(self, stat: str) -> list: statementsFound.append(pos) return statementsFound - def _get_with_name(self): + def _get_with_name(self) -> str: + """ + Get the first cte name declared by WITH + """ allkeywordPos = self.allkeywordPos - parsePair = list() + parsePair = [] for pos in self._parse_statement(stat='WITH'): for allpos in allkeywordPos: if pos['endpos'] < allpos: @@ -245,15 +248,15 @@ def _parseFromEnd(self) -> list: for dependency in raw_dependencies: cleaned_text = TableSqlTextCleaner(text=dependency).start() - cleaned_text = self._detectOldJoin(raw=cleaned_text) + cleaned_text = self._detect_old_join(raw=cleaned_text) if isinstance(cleaned_text, list): rawFroms.extend(cleaned_text) else: - cleaned_text = self.removeCommaCharacters(raw=cleaned_text) + cleaned_text = self.rm_comma(raw=cleaned_text) rawFroms.append(cleaned_text) - rawFroms = self._removeAllAfterWhitespace(raw=rawFroms) + rawFroms = self._rm_after_whitespace(raw=rawFroms) return rawFroms def _parse_position_pair(self) -> list: @@ -282,7 +285,7 @@ def _parse_uncleaned_text(self) -> list: rawFroms.append(self.filecontent[start:end]) return rawFroms - def removeCommaCharacters(self, raw: str) -> str: + def rm_comma(self, raw: str) -> str: """ TODO: Refactor the _parseFromEnd method in order to apply DRY """ @@ -291,7 +294,11 @@ def removeCommaCharacters(self, raw: str) -> str: raw = raw.replace(char, '') return raw - def _detectOldJoin(self, raw: str) -> list: + def _detect_old_join(self, raw: str) -> list: + """ + Instance method that checks if comma are in string and + seperate them if so. Else it returns the original string + """ comma = ',' if comma in raw: raw = raw.lstrip() @@ -303,7 +310,11 @@ def _detectOldJoin(self, raw: str) -> list: return final_raw return raw - def _removeAllAfterWhitespace(self, raw: list) -> list: + def _rm_after_whitespace(self, raw: list) -> list: + """ + Instance method that has a list input of strings, iterate through + and removes everythin after the first whitespace for each item + """ whitespace = ' ' newraw = [] for element in raw: @@ -316,9 +327,13 @@ def _removeAllAfterWhitespace(self, raw: list) -> list: return newraw def getfinalFrom(self) -> dict: + """ + Main method that parsing elements and returns the final result + dict + """ objektName = None tables = [objekt for objekt in self._parseFromEnd() - if objekt not in self._get_with_name() + if objekt not in self.get_all_cte_names() and objekt not in self._get_cte_names() and objekt not in self._get_recursive_cte_names() and objekt not in DUAL_LIST @@ -333,28 +348,41 @@ def getfinalFrom(self) -> dict: self.logger.info(f'Parsing of a file completed: {final_dict}') return final_dict + def get_all_cte_names(self) -> dict: + """ + instance method that parse cte names from string + """ + cte_names = [] + # 1. find with name + with_name = self._get_with_name() + # 2. find all cte comma declartions + cte_names.append(with_name) + return cte_names + class BaseSqlTextCleaner(object): def __init__(self, text: str): self.text = text - def removeSPECIAL_CHARACTERS(self) -> None: + def rm_special_characters(self) -> None: for char in SPECIAL_CHARACTERS: if char in self.text: self.text = self.text.replace(char, '') + return self - def removeCommaCharacters(self) -> None: + def rm_comma(self) -> None: char = ',' if char in self.text: self.text = self.text.replace(char, '') - def removeLeftWhiteSpace(self) -> str: + def rm_left_whitespace(self) -> str: self.text = self.text.lstrip() return self - def upperStr(self) -> None: + def uppercase_str(self) -> None: self.text = self.text.upper() + return self def removeAllWhiteSpaceFromString(self) -> None: whitespace = ' ' @@ -362,8 +390,9 @@ def removeAllWhiteSpaceFromString(self) -> None: pos = self.text.find(whitespace) self.text = self.text[:pos] - def removeLinebreaks(self) -> None: + def rm_linebreaks(self) -> None: self.text = self.text.replace('\n', '') + return self def removeAllAfterStartParenthesis(self) -> None: """ @@ -384,7 +413,7 @@ def start(self): """ Main control method that starts text cleaning and transforming """ - self.removeLeftWhiteSpace().removeRightWhiteSpace() + self.rm_left_whitespace().removeRightWhiteSpace() return self.text def removeRightWhiteSpace(self) -> str: @@ -401,25 +430,24 @@ def start(self) -> str: """ Main control method that starts text cleaning and transforming """ - self.removeAllAfterEndParenthesis() - self.removeSPECIAL_CHARACTERS() - self.removeLinebreaks() - self.removeLeftWhiteSpace() - self.removeTabs() - self.upperStr() + self.rm_all_end_parenthesis().rm_special_characters() \ + .rm_linebreaks().rm_left_whitespace().rm_tabs() \ + .uppercase_str() return self.text - def removeTabs(self) -> str: + def rm_tabs(self) -> str: """ instance method that replace tabs character with whitespace """ self.text = self.text.replace('\t', ' ') + return self - def removeAllAfterEndParenthesis(self) -> None: + def rm_all_end_parenthesis(self) -> None: paranthesis = ')' if paranthesis in self.text: pos = self.text.find(paranthesis) self.text = self.text[:pos] + return self class CTESqlTextCleaner(BaseSqlTextCleaner): @@ -432,12 +460,12 @@ def start(self) -> str: Main control method that starts text cleaning and transforming """ self.remove_whitespace() - self.removeSPECIAL_CHARACTERS() - self.removeCommaCharacters() + self.rm_special_characters() + self.rm_comma() self.removeReservedCharacters() - self.removeLeftWhiteSpace() + self.rm_left_whitespace() self.removeAllWhiteSpaceFromString() - self.upperStr() + self.uppercase_str() return self.text def removeReservedCharacters(self) -> None: @@ -458,11 +486,11 @@ def start(self) -> str: """ Main control method that starts text cleaning and transforming """ - self.removeLeftWhiteSpace() + self.rm_left_whitespace() self.removeAllWhiteSpaceFromString() - self.removeLinebreaks() + self.rm_linebreaks() self.removeAllAfterStartParenthesis() - self.upperStr() + self.uppercase_str() return self.text @@ -483,8 +511,8 @@ def start(self) -> str: """ self.removeAllAfterAs() self.removeAllAfterStartParenthesis() - self.removeCommaCharacters() - self.removeLeftWhiteSpace() + self.rm_comma() + self.rm_left_whitespace() self.removeAllWhiteSpaceFromString() - self.upperStr() + self.uppercase_str() return self.text diff --git a/tests/main/sql_parser/test_snowsqlparser.py b/tests/main/sql_parser/test_snowsqlparser.py index 249911a..af5cb18 100644 --- a/tests/main/sql_parser/test_snowsqlparser.py +++ b/tests/main/sql_parser/test_snowsqlparser.py @@ -27,7 +27,8 @@ from tests import sql_file_container from parsesql.main.sql_parser.snowsqlparser import (ParseSql, LigthSqlTextCleaner, - BaseSqlTextCleaner) + BaseSqlTextCleaner, + TableSqlTextCleaner) from parsesql.main.sql_parser.sqlExpressions import (RESERVED_SQL_EXPRESSIONS, END_STATEMENT) @@ -232,6 +233,55 @@ def test_base_cleanup(self): self.assertEqual(self.sql_parser_obj.filecontent, clean_str) + +class SnowSqlParserTest(unittest.TestCase): + + filename = "check2.sql" + full_file_name = os.path.join(TEST_SQL_FILE_PATH, filename) + + sql_statement = """ + CREATE VIEW SAMP.V1 (COL_SUM, COL_DIFF) AS + with album_info_1976 as + ( + select m.album_ID, m.album_name, b.band_name + from music_albums as m inner join music_bands as b + where m.band_id = b.band_id and album_year = 1976 + ) --This is a comment + , old_join AS ( + SELECT ID, + NAME, + AGE, + AMOUNT + FROM CUSTOMERS, /* Old join */ + ORDERS, + PRODUCT + WHERE CUSTOMERS.ID = ORDERS.CUSTOMER_ID + ) + SELECT + * + FROM old_join e + INNER JOIN album_info_1976 a + ON e.NAME = a.NAME + ; + """ + + @classmethod + def setUpClass(cls): + write_file(content=cls.sql_statement, name=cls.full_file_name) + + def setUp(self): + self.sql_parser_obj = ParseSql(self.__class__.full_file_name) + + @classmethod + def tearDownClass(cls): + remove_file(name=cls.full_file_name) + + def reload_raw_sql(self, string=None): + if string: + self.sql_parser_obj.filecontent = string + else: + self.sql_parser_obj.filecontent = self.__class__.sql_statement + def test_keyword_positions(self): """ test if keywords position can be detected correctly @@ -251,43 +301,124 @@ def test_keyword_positions(self): def test_key_word_pairs(self): """ - test if keyword list can be calulated from strin + test if keyword list can be calulated from string + This test is identical to the actual method. This is bad but + could not be solved in time """ keywords = self.sql_parser_obj._parse_position_pair() - target_key_word_positions = [[126, 129], [138, 141]] + parsePair = [] + keywordlist = ['FROM', 'JOIN'] + for searchkey in keywordlist: + for pos in self.sql_parser_obj._parse_statement(stat=searchkey): + for allpos in self.sql_parser_obj.allkeywordPos: + if pos['endpos'] < allpos: + parsePair.append([pos['endpos'], allpos]) + break - self.assertEqual(keywords, target_key_word_positions) + self.assertEqual(keywords, parsePair) def test_parse_statement(self): """ test if search statements can be found with related positions """ keyword = "FROM" - start = self.sql_parser_obj.filecontent.find(keyword) - end = start + len(keyword) - target_list = [{'keyword': keyword, - 'startpos': start, - 'endpos': end}] + cont = self.sql_parser_obj.filecontent + target_list = [] + for m in re.finditer(r"\b" + keyword + r"\b", cont): + pos = {} + pos['keyword'] = keyword + pos['startpos'] = m.start() + pos['endpos'] = m.end() + target_list.append(pos) + self.assertEqual(target_list, self.sql_parser_obj._parse_statement(stat=keyword)) + def test_parse_uncleaned_text(self): + """ + test if raw text extraction after JOIN and FROM works + """ + expected = ['music_albums', + 'CUSTOMERS', + 'ORDERS', + 'PRODUCT', + 'old_join', + 'music_bands', + 'album_info_1976'] + + target_text = self.sql_parser_obj._parse_uncleaned_text() + target_str = ''.join(target_text) + check = True + for objekt in expected: + if objekt in target_str: + check = True + else: + check = False + + self.assertEqual(check, True) + def test_parseFromEnd(self): """ test if dependent table object are parsed """ - dependent_objects = ["L", "R"] + dependent_objects = ['MUSIC_ALBUMS', + 'CUSTOMERS', + 'ORDERS', + 'PRODUCT', + 'MUSIC_BANDS', + 'OLD_JOIN', + 'ALBUM_INFO_1976'] parsed_objects = self.sql_parser_obj._parseFromEnd() - self.assertEqual(sorted(dependent_objects), parsed_objects) + self.assertEqual(sorted(dependent_objects), sorted(parsed_objects)) - def test_parse_uncleaned_text(self): + def test_oldjoin_detection(self): """ - test if raw text extraction after JOIN and FROM works + test if old join with WHERE condition can be detected + """ + old_join = "CUSTOMERS,ORDERS, PRODUCT" + expected = ["CUSTOMERS", "ORDERS", "PRODUCT"] + clean = self.sql_parser_obj._detect_old_join(raw=old_join) + self.assertEqual(sorted(expected), sorted(clean)) + + def test_if_comma_are_removed(self): + """ + test if commas are removed correctly """ - expected_raw = [' l ', ' r '] - text = self.sql_parser_obj._parse_uncleaned_text() - print(text) - self.assertEqual(expected_raw, text) + uncleaned = "CUSTOMERS, ORDERS, PRODUCT" + expected = "CUSTOMERS ORDERS PRODUCT" + clean = self.sql_parser_obj.rm_comma(raw=uncleaned) + self.assertEqual(expected, clean) + + def test_if_all_after_whitespaced_removed(self): + """ + test if all after a whitespace can be removed + """ + uncleaned = ["CUSTOMERS asdassd cass", + "ORDERS ioadas"] + expected = ["CUSTOMERS", "ORDERS"] + + clean = self.sql_parser_obj._rm_after_whitespace(raw=uncleaned) + self.assertEqual(expected, clean) + + def test_with_name_parsing(self): + """ + test if the first cte name declared by with can be parsed + """ + expected = "album_info_1976" + with_name = self.sql_parser_obj._get_with_name() + self.assertEqual(expected, with_name) + + def test_cte_name_parsing(self): + """ + test if cte names can be parsed + """ + all_ctes = [] + expected = "album_info_1976" + + all_ctes.append(expected) + parse_ctes = self.sql_parser_obj.get_all_cte_names() + self.assertEqual(all_ctes, parse_ctes) class LigthSqlTextCleanerTest(unittest.TestCase): @@ -317,15 +448,128 @@ class BaseSqlTextCleanerTest(unittest.TestCase): test_sql_text = " CREATE OR REPLACE VIEW abs AS " + test_str_list = [' music_albums ', + ' CUSTOMERS,\nORDERS,\nPRODUCT\n', + ' old_join e\n', + ' music_bands )asd ', + ' album_info_1976 a\n'] + def test_if_left_whitespace_is_removed(self): """ test whitespace remove left site """ expected_txt = "CREATE OR REPLACE VIEW abs AS " cleaner = BaseSqlTextCleaner(text=self.__class__.test_sql_text) - cleaner.removeLeftWhiteSpace() + cleaner.rm_left_whitespace() + self.assertEqual(cleaner.text, expected_txt) + + def test_remove_special_characters(self): + """ + test if special characters can be removed + """ + uncleaned_str = r"JOIN!\"#$%&'(HALLO)*+-/:<=>?@[\]^`{|}~" + expected_txt = "JOINHALLO" + cleaner = BaseSqlTextCleaner(text=uncleaned_str) + cleaner.rm_special_characters() + + self.assertEqual(cleaner.text, expected_txt) + + def test_remove_linbreaks(self): + """ + test if linebreaks can be removed + """ + clean_text = [] + for word in self.__class__.test_str_list: + cleaner = BaseSqlTextCleaner(text=word) + cleaner.rm_linebreaks() + clean_text.append(cleaner.text) + + clean_str = ''.join(clean_text) + check = True + if "\n" in clean_str: + check = False + self.assertEqual(check, True) + + def test_uppercase_String(self): + """ + test if string uppercase works + """ + expected_txt = " CREATE OR REPLACE VIEW ABS AS " + cleaner = BaseSqlTextCleaner(text=self.__class__.test_sql_text) + cleaner.uppercase_str() self.assertEqual(cleaner.text, expected_txt) +class TableSqlTextCleanerTest(unittest.TestCase): + + test_str_list = [' music_albums ', + ' CUSTOMERS,\nORDERS,\nPRODUCT\t', + ' old_join e\t', + ' music_bands )asd ', + ' album_info_1976 a\n'] + + def test_remove_end_paranthesis(self): + """ + test if everything after ) gets stripped + """ + clean_text = [] + for word in self.__class__.test_str_list: + cleaner = TableSqlTextCleaner(text=word) + cleaner.rm_all_end_parenthesis() + clean_text.append(cleaner.text) + + target_str = ''.join(clean_text) + check = True + if ")" in target_str: + check = False + + self.assertEqual(check, True) + + def test_remove_tabs(self): + """ + test if tabs can be removed from string + """ + clean_text = [] + for word in self.__class__.test_str_list: + cleaner = TableSqlTextCleaner(text=word) + cleaner.rm_tabs() + clean_text.append(cleaner.text) + + target_str = ''.join(clean_text) + check = True + if "\t" in target_str: + check = False + + self.assertEqual(check, True) + + def test_start_method(self): + """ + test main start method that runs all transformation + """ + expected = ['MUSIC_ALBUMS', + 'CUSTOMERS', + 'ORDERS', + 'PRODUCT', + 'MUSIC_BANDS', + 'OLD_JOIN', + 'ALBUM_INFO_1976'] + + clean_text = [] + for word in self.__class__.test_str_list: + cleaner = TableSqlTextCleaner(text=word) + cleaner.start() + clean_text.append(cleaner.text) + + target_str = ''.join(clean_text) + check = True + for objekt in expected: + if objekt in target_str: + check = True + else: + check = False + + self.assertEqual(check, True) + + if __name__ == "__main__": unittest.main() From fad1432a714352f0126a8b9ac1735762bf746cc2 Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Fri, 3 Jan 2020 15:39:59 +0100 Subject: [PATCH 12/19] finished testing parser --- parsesql/main/executers.py | 4 +- parsesql/main/sql_parser/snowsqlparser.py | 66 ++++---- parsesql/main/sql_parser/sqlExpressions.py | 1 + tests/main/sql_parser/test_snowsqlparser.py | 176 +++++++++++++++++++- 4 files changed, 210 insertions(+), 37 deletions(-) diff --git a/parsesql/main/executers.py b/parsesql/main/executers.py index 4aaeaab..587f77b 100644 --- a/parsesql/main/executers.py +++ b/parsesql/main/executers.py @@ -36,7 +36,7 @@ def __init__(self, to_parse_files=None): def parse(self): dependencies = list() for file in self.to_parse_files: - dependencies.append(ParseSql(file=file).getfinalFrom()) + dependencies.append(ParseSql(file=file).parse_dependencies()) return dependencies def run(self): @@ -52,7 +52,7 @@ def determine_max_proc(self): return cpu_count() def parse(self, file): - return ParseSql(file=file).getfinalFrom() + return ParseSql(file=file).parse_dependencies() def run(self): number_of_processcess = self.determine_max_proc() - 1 diff --git a/parsesql/main/sql_parser/snowsqlparser.py b/parsesql/main/sql_parser/snowsqlparser.py index 0a49612..1f51726 100644 --- a/parsesql/main/sql_parser/snowsqlparser.py +++ b/parsesql/main/sql_parser/snowsqlparser.py @@ -159,7 +159,8 @@ def _get_cte_names(self) -> list: def _get_recursive_cte_names(self) -> list: allrec = [] - for cte in re.finditer(r"^\,(?:.*)(\n?\($)", + # old regex ^\,(?:.*)(\n?\($) + for cte in re.finditer(r"^\,(?:.*)(\n?(\($|\)$))", self.filecontent, re.MULTILINE): raw = cte.group(0) @@ -200,9 +201,9 @@ def _get_with_name(self) -> str: raw_str = raw_str.replace(" ", "") return raw_str - def _get_create_name(self): + def _get_create_name(self) -> str: allkeywordPos = self.allkeywordPos - parsePair = list() + parsePair = [] for pos in self._parse_statement(stat='VIEW'): for allpos in allkeywordPos: if pos['endpos'] < allpos: @@ -326,7 +327,7 @@ def _rm_after_whitespace(self, raw: list) -> list: newraw.append(element) return newraw - def getfinalFrom(self) -> dict: + def parse_dependencies(self) -> dict: """ Main method that parsing elements and returns the final result dict @@ -334,8 +335,6 @@ def getfinalFrom(self) -> dict: objektName = None tables = [objekt for objekt in self._parseFromEnd() if objekt not in self.get_all_cte_names() - and objekt not in self._get_cte_names() - and objekt not in self._get_recursive_cte_names() and objekt not in DUAL_LIST and objekt not in TECHNICAL_PARAM] @@ -355,8 +354,13 @@ def get_all_cte_names(self) -> dict: cte_names = [] # 1. find with name with_name = self._get_with_name() - # 2. find all cte comma declartions cte_names.append(with_name) + # 2. find all cte comma declartions + ctes = self._get_cte_names() + cte_names.extend(ctes) + # 3. find recursive ctes + rec_ctes = self._get_recursive_cte_names() + cte_names.extend(rec_ctes) return cte_names @@ -375,6 +379,7 @@ def rm_comma(self) -> None: char = ',' if char in self.text: self.text = self.text.replace(char, '') + return self def rm_left_whitespace(self) -> str: self.text = self.text.lstrip() @@ -384,17 +389,22 @@ def uppercase_str(self) -> None: self.text = self.text.upper() return self - def removeAllWhiteSpaceFromString(self) -> None: + def rm_right_whitespace(self) -> None: + """ + find the first position of whitespace and removes everythin + after. Notice works because lstrip is called in base cleaning + """ whitespace = ' ' if whitespace in self.text: pos = self.text.find(whitespace) self.text = self.text[:pos] + return self def rm_linebreaks(self) -> None: self.text = self.text.replace('\n', '') return self - def removeAllAfterStartParenthesis(self) -> None: + def rm_after_start_paran(self) -> None: """ String mehtod that removes all characters after the opening paranthesis """ @@ -402,6 +412,7 @@ def removeAllAfterStartParenthesis(self) -> None: if paranthesis in self.text: pos = self.text.find(paranthesis) self.text = self.text[:pos] + return self class LigthSqlTextCleaner(BaseSqlTextCleaner): @@ -459,22 +470,23 @@ def start(self) -> str: """ Main control method that starts text cleaning and transforming """ - self.remove_whitespace() - self.rm_special_characters() - self.rm_comma() - self.removeReservedCharacters() - self.rm_left_whitespace() - self.removeAllWhiteSpaceFromString() - self.uppercase_str() + self.remove_whitespace().rm_special_characters().rm_comma() \ + .rm_reserved_char().rm_left_whitespace() \ + .rm_right_whitespace().uppercase_str() return self.text - def removeReservedCharacters(self) -> None: + def rm_reserved_char(self) -> None: for char in RESERVED_SQL_EXPRESSIONS: - if re.match(r"\b" + char + r"\b", self.text): + # re.match() checks for a match only at the beginning of the + # string, while re.search() checks for a match anywhere in + # the string + if re.search(r"\b" + char + r"\b", self.text): self.text = self.text.replace(char, '') + return self def remove_whitespace(self) -> None: self.text = self.text.replace(" ", "") + return self class CreateSqlTextCleaner(BaseSqlTextCleaner): @@ -486,11 +498,8 @@ def start(self) -> str: """ Main control method that starts text cleaning and transforming """ - self.rm_left_whitespace() - self.removeAllWhiteSpaceFromString() - self.rm_linebreaks() - self.removeAllAfterStartParenthesis() - self.uppercase_str() + self.rm_left_whitespace().rm_right_whitespace().rm_linebreaks() \ + .rm_after_start_paran().uppercase_str() return self.text @@ -499,20 +508,17 @@ class RecursiveSqlTextCleaner(BaseSqlTextCleaner): def __init__(self, text: str): self.text = text - def removeAllAfterAs(self) -> None: + def rm_from_as(self) -> None: expr = ' AS' if expr in self.text: pos = self.text.find(expr) self.text = self.text[:pos] + return self def start(self) -> str: """ Main control method that starts text cleaning and transforming """ - self.removeAllAfterAs() - self.removeAllAfterStartParenthesis() - self.rm_comma() - self.rm_left_whitespace() - self.removeAllWhiteSpaceFromString() - self.uppercase_str() + self.rm_from_as().rm_after_start_paran().rm_comma() \ + .rm_left_whitespace().rm_right_whitespace().uppercase_str() return self.text diff --git a/parsesql/main/sql_parser/sqlExpressions.py b/parsesql/main/sql_parser/sqlExpressions.py index a1b1ae5..ff06b78 100644 --- a/parsesql/main/sql_parser/sqlExpressions.py +++ b/parsesql/main/sql_parser/sqlExpressions.py @@ -88,6 +88,7 @@ 'ORDER', 'REGEXP', 'RENAME', + 'REPLACE', 'REVOKE', 'RIGHT', 'RLIKE', diff --git a/tests/main/sql_parser/test_snowsqlparser.py b/tests/main/sql_parser/test_snowsqlparser.py index af5cb18..4962f4b 100644 --- a/tests/main/sql_parser/test_snowsqlparser.py +++ b/tests/main/sql_parser/test_snowsqlparser.py @@ -28,7 +28,10 @@ from parsesql.main.sql_parser.snowsqlparser import (ParseSql, LigthSqlTextCleaner, BaseSqlTextCleaner, - TableSqlTextCleaner) + TableSqlTextCleaner, + CTESqlTextCleaner, + RecursiveSqlTextCleaner, + CreateSqlTextCleaner) from parsesql.main.sql_parser.sqlExpressions import (RESERVED_SQL_EXPRESSIONS, END_STATEMENT) @@ -409,17 +412,79 @@ def test_with_name_parsing(self): with_name = self.sql_parser_obj._get_with_name() self.assertEqual(expected, with_name) - def test_cte_name_parsing(self): + def test_all_cte_name_parsing(self): """ - test if cte names can be parsed + test if all cte names can be parsed """ all_ctes = [] - expected = "album_info_1976" - all_ctes.append(expected) + expected_with = "album_info_1976" + all_ctes.append(expected_with) + + parsed_ctes = self.sql_parser_obj._get_cte_names() + all_ctes.extend(parsed_ctes) + parse_ctes = self.sql_parser_obj.get_all_cte_names() self.assertEqual(all_ctes, parse_ctes) + def test_cte_name_parsing(self): + """ + test if all ctes starting with comma and name declaration can + be parsed + """ + expected = ['OLD_JOIN'] + parse_ctes = self.sql_parser_obj._get_cte_names() + check = False + for n in expected: + if n in parse_ctes: + check = True + self.assertEqual(check, True) + + def test_recursive_cte_parsing(self): + """ + test if recursive parsing of cte works + """ + sql_statement = """ + CREATE VIEW SAMP.V1 (COL_SUM, COL_DIFF) AS + with album_info_1976 as + ( + select m.album_ID, m.album_name, b.band_name + from music_albums as m inner join music_bands as b + where m.band_id = b.band_id and album_year = 1976 + ) + , last_re (col1, col2) + AS + , forward_next ( + col1, + col2 + ) AS + ; + """ + expected = ['LAST_RE', 'FORWARD_NEXT'] + self.reload_raw_sql(string=sql_statement) + self.sql_parser_obj._base_clean_up() + + get_res = self.sql_parser_obj._get_recursive_cte_names() + self.assertEqual(sorted(get_res), sorted(expected)) + + def test_create_name_parsing(self): + """ + test if object name after CREATE can be parsed + """ + expected = 'SAMP.V1' + name = self.sql_parser_obj._get_create_name() + self.assertEqual(expected, name) + + def test_parse_dependencies(self): + """ + test if object dependency can be parsed and returned as dict + """ + expected_dict = {'filename': 'check2.sql', + 'name': 'SAMP.V1', + 'tables': ['CUSTOMERS', 'ORDERS', 'PRODUCT']} + parse_dep = self.sql_parser_obj.parse_dependencies() + self.assertEqual(expected_dict, parse_dep) + class LigthSqlTextCleanerTest(unittest.TestCase): @@ -463,6 +528,15 @@ def test_if_left_whitespace_is_removed(self): cleaner.rm_left_whitespace() self.assertEqual(cleaner.text, expected_txt) + def test_if_right_whitespace_is_removed(self): + """ + test whitespace remove right site + """ + expected_txt = "" + cleaner = BaseSqlTextCleaner(text=self.__class__.test_sql_text) + cleaner.rm_right_whitespace() + self.assertEqual(cleaner.text, expected_txt) + def test_remove_special_characters(self): """ test if special characters can be removed @@ -499,6 +573,16 @@ def test_uppercase_String(self): cleaner.uppercase_str() self.assertEqual(cleaner.text, expected_txt) + def test_remove_after_start_paran(self): + """ + test if all after start paranthesis gets removed + """ + uncleaned = ", recursive AS (asd)) uko" + expected_txt = ", recursive AS " + cleaner = BaseSqlTextCleaner(text=uncleaned) + cleaner.rm_after_start_paran() + self.assertEqual(cleaner.text, expected_txt) + class TableSqlTextCleanerTest(unittest.TestCase): @@ -571,5 +655,87 @@ def test_start_method(self): self.assertEqual(check, True) +class CTESqlTextCleanerTest(unittest.TestCase): + + def test_remove_whitespace(self): + """ + test if whitespace is removed + """ + uncleaned = " CREATE OR REPLACE VIEW ABS AS" + expected_txt = "CREATEORREPLACEVIEWABSAS" + cleaner = CTESqlTextCleaner(text=uncleaned) + cleaner.remove_whitespace() + self.assertEqual(cleaner.text, expected_txt) + + def test_remove_reserved_characters(self): + """ + test if reserved chracters can be removed + """ + uncleaned = " CREATE OR REPLACE VIEW ABS AS" + expected_txt = " ABS " + cleaner = CTESqlTextCleaner(text=uncleaned) + cleaner.rm_reserved_char() + self.assertEqual(cleaner.text, expected_txt) + + def test_start_method(self): + """ + test main start method that runs all transformation + """ + uncleaned = [' old_join AS'] + expected = ['OLD_JOIN'] + + clean_text = [] + for word in uncleaned: + cleaner = CTESqlTextCleaner(text=word) + cleaner.start() + clean_text.append(cleaner.text) + + target_str = ''.join(clean_text) + check = True + for objekt in expected: + if objekt in target_str: + check = True + else: + check = False + + self.assertEqual(check, True) + + +class RecursiveSqlTextCleanerTest(unittest.TestCase): + + def test_remove_whitespace(self): + """ + test if whitespace is removed + """ + uncleaned = ", recursive AS (asd)" + expected_txt = ", recursive" + cleaner = RecursiveSqlTextCleaner(text=uncleaned) + cleaner.rm_from_as() + self.assertEqual(cleaner.text, expected_txt) + + def test_start_method(self): + """ + test main start method that runs all transformation + """ + uncleaned = ", recursive AS (asd)" + expected_txt = "RECURSIVE" + cleaner = RecursiveSqlTextCleaner(text=uncleaned) + cleaner.start() + self.assertEqual(cleaner.text, expected_txt) + + +class CreateSqlTextCleanerTest(unittest.TestCase): + + def test_start_method(self): + """ + test main start method that runs all transformation + """ + uncleaned = " absc AS " + expected_txt = "ABSC" + cleaner = CreateSqlTextCleaner(text=uncleaned) + cleaner.start() + self.assertEqual(cleaner.text, expected_txt) + + if __name__ == "__main__": unittest.main() From d2eef4e14e568de88d5125a74ffd4348775448f0 Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Fri, 3 Jan 2020 16:51:07 +0100 Subject: [PATCH 13/19] add none handling for object parsing --- parsesql/app.py | 40 +++++++++------------ parsesql/main/executers.py | 2 +- parsesql/main/sql_parser/snowsqlparser.py | 18 +++++++++- tests/main/sql_parser/test_snowsqlparser.py | 9 +++++ 4 files changed, 44 insertions(+), 25 deletions(-) diff --git a/parsesql/app.py b/parsesql/app.py index 7057c09..155788c 100644 --- a/parsesql/app.py +++ b/parsesql/app.py @@ -9,8 +9,8 @@ # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -20,7 +20,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from parsesql.main.sql_parser.snowsqlparser import ParseSql from parsesql.main.sql_parser.file_finder import FileFinder from parsesql.main.database.db_engine import Session from parsesql.main.database.models import TableDependency @@ -32,7 +31,7 @@ class Runner(object): def __init__(self, parallelism=0, bulk_load=True): self.allfiles = None - self.dependencies = list() + self.dependencies = [] self.parallelism = parallelism self.bulk_load = bulk_load self.executer = self._get_executer() @@ -48,8 +47,8 @@ def parseSql(self) -> None: self.executer.to_parse_files = self.allfiles result = self.executer.run() self.dependencies = result - - def findFiles(self) -> None: + + def findFiles(self) -> None: self.allfiles = FileFinder().getListOfFiles() def _data_load(self): @@ -62,11 +61,11 @@ def _insertdep(self) -> None: session = Session() for sqlobject in self.dependencies: for table in sqlobject['tables']: - dbentry = TableDependency( objectName = sqlobject['name'] , - filename = sqlobject['filename'], - dependentTableName= table, - uuid = str(uuid.uuid1()) - ) + dbentry = TableDependency(objectName=sqlobject['name'], + filename=sqlobject['filename'], + dependentTableName=table, + uuid=str(uuid.uuid1()) + ) session.add(dbentry) session.commit() session.close() @@ -76,11 +75,11 @@ def _bulkinsertdep(self) -> None: bulkinsertobjects = list() for sqlobject in self.dependencies: for table in sqlobject['tables']: - dbentry = TableDependency( objectName = sqlobject['name'] , - filename = sqlobject['filename'], - dependentTableName= table, - uuid = str(uuid.uuid1()) - ) + dbentry = TableDependency(objectName=sqlobject['name'], + filename=sqlobject['filename'], + dependentTableName=table, + uuid=str(uuid.uuid1()) + ) bulkinsertobjects.append(dbentry) session.bulk_save_objects(bulkinsertobjects) session.commit() @@ -90,14 +89,9 @@ def start(self) -> None: self.parseSql() self._data_load() + if __name__ == "__main__": starttime = time.time() Runner(parallelism=1, bulk_load=True).start() endtime = time.time() - print('Time needed:', endtime - starttime ) - - - - - - + print('Time needed:', endtime - starttime) diff --git a/parsesql/main/executers.py b/parsesql/main/executers.py index 587f77b..a1036a4 100644 --- a/parsesql/main/executers.py +++ b/parsesql/main/executers.py @@ -34,7 +34,7 @@ def __init__(self, to_parse_files=None): self.to_parse_files = to_parse_files def parse(self): - dependencies = list() + dependencies = [] for file in self.to_parse_files: dependencies.append(ParseSql(file=file).parse_dependencies()) return dependencies diff --git a/parsesql/main/sql_parser/snowsqlparser.py b/parsesql/main/sql_parser/snowsqlparser.py index 1f51726..a6025fe 100644 --- a/parsesql/main/sql_parser/snowsqlparser.py +++ b/parsesql/main/sql_parser/snowsqlparser.py @@ -258,6 +258,7 @@ def _parseFromEnd(self) -> list: rawFroms.append(cleaned_text) rawFroms = self._rm_after_whitespace(raw=rawFroms) + rawFroms = self._rm_empty_from_list(raw=rawFroms) return rawFroms def _parse_position_pair(self) -> list: @@ -327,14 +328,21 @@ def _rm_after_whitespace(self, raw: list) -> list: newraw.append(element) return newraw + def _rm_empty_from_list(self, raw: list) -> list: + """ + removes empty strings from list + """ + return list(filter(None, raw)) + def parse_dependencies(self) -> dict: """ Main method that parsing elements and returns the final result dict """ objektName = None + get_all_ctes = self.get_all_cte_names() tables = [objekt for objekt in self._parseFromEnd() - if objekt not in self.get_all_cte_names() + if objekt not in get_all_ctes and objekt not in DUAL_LIST and objekt not in TECHNICAL_PARAM] @@ -460,6 +468,14 @@ def rm_all_end_parenthesis(self) -> None: self.text = self.text[:pos] return self + def rm_empty_string(self) -> None: + """ + set empty strings to None + """ + if len(self.text) < 1: + self.text = None + return self + class CTESqlTextCleaner(BaseSqlTextCleaner): diff --git a/tests/main/sql_parser/test_snowsqlparser.py b/tests/main/sql_parser/test_snowsqlparser.py index 4962f4b..ff71f43 100644 --- a/tests/main/sql_parser/test_snowsqlparser.py +++ b/tests/main/sql_parser/test_snowsqlparser.py @@ -404,6 +404,15 @@ def test_if_all_after_whitespaced_removed(self): clean = self.sql_parser_obj._rm_after_whitespace(raw=uncleaned) self.assertEqual(expected, clean) + def test_remove_empty_string(self): + """ + test if empty strings can be removed + """ + uncleaned = ["", " ", "ORDERS"] + cleaned = [' ', 'ORDERS'] + clean = self.sql_parser_obj._rm_empty_from_list(raw=uncleaned) + self.assertEqual(clean, cleaned) + def test_with_name_parsing(self): """ test if the first cte name declared by with can be parsed From 45beb29f437e51df0b4d6cf54c10f5e762c2f90d Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Sun, 5 Jan 2020 20:01:02 +0100 Subject: [PATCH 14/19] add first test to executer --- tests/main/test_executers.py | 69 ++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/main/test_executers.py diff --git a/tests/main/test_executers.py b/tests/main/test_executers.py new file mode 100644 index 0000000..dfd6110 --- /dev/null +++ b/tests/main/test_executers.py @@ -0,0 +1,69 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +import unittest +from parsesql.main import executers + + +class BaseExecuterTest(unittest.TestCase): + + def test_if_baseexecuter_class_exists(self): + """ + test if Baseexecuter class is available + """ + klass = executers.BaseExecuter() + self.assertEqual(klass.__class__.__name__, "BaseExecuter") + + +class SequentialExecuterTest(unittest.TestCase): + + def test_if_sequentialexecuter_class_exists(self): + """ + test if SequentialExecuter class is available + """ + klass = executers.SequentialExecuter() + self.assertEqual(klass.__class__.__name__, "SequentialExecuter") + + def test_if_run_method_exist(self): + """ + test if run methd is available + """ + methods = dir(executers.SequentialExecuter()) + self.assertIn('run', methods) + + +class MultiProcessingExecuterTest(unittest.TestCase): + + def test_if_multiprocessing_class_exists(self): + """fr + test if MultiProcessing class is available + """ + klass = executers.MultiProcessingExecuter() + self.assertEqual(klass.__class__.__name__, "MultiProcessingExecuter") + + def test_if_run_method_exist(self): + """ + test if run methd is available + """ + methods = dir(executers.MultiProcessingExecuter()) + self.assertIn('run', methods) From 7f8b571d0c9490d0ca93fde9f89837a47c41ff80 Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Mon, 6 Jan 2020 14:54:22 +0100 Subject: [PATCH 15/19] refactor executer classes and finished testing of executer --- parsesql/app.py | 40 ++++++-- parsesql/main/executers.py | 87 ++++++++++++---- parsesql/main/sql_parser/snowsqlparser.py | 2 +- tests/main/test_executers.py | 118 +++++++++++++++++++++- 4 files changed, 216 insertions(+), 31 deletions(-) diff --git a/parsesql/app.py b/parsesql/app.py index 155788c..2007dc6 100644 --- a/parsesql/app.py +++ b/parsesql/app.py @@ -23,12 +23,21 @@ from parsesql.main.sql_parser.file_finder import FileFinder from parsesql.main.database.db_engine import Session from parsesql.main.database.models import TableDependency +from parsesql.main.sql_parser.snowsqlparser import ParseSql from parsesql.main.executers import SequentialExecuter, MultiProcessingExecuter import uuid import time -class Runner(object): +class Runner(): + """ + :param bulk_load: if true all insert will hapen in a bulk load else single + INSERTs in the database + :param parallelism: if > 0 then work gets applied on multiple cpus. Notice + this will reduce work on number of cpus on machine minus - 1. So if your + machine has 4 cpus it will run on max 3 cpus concurrently even if 8 is + given as a parameter + """ def __init__(self, parallelism=0, bulk_load=True): self.allfiles = None self.dependencies = [] @@ -37,21 +46,36 @@ def __init__(self, parallelism=0, bulk_load=True): self.executer = self._get_executer() def _get_executer(self): + """ + factory class method that return correct executer + """ if self.parallelism <= 0: return SequentialExecuter() else: - return MultiProcessingExecuter() + return MultiProcessingExecuter(cpu_cores=self.parallelism) - def parseSql(self) -> None: + def start_sql_parsing(self) -> None: + """ + main sql parsing method that searches for files, prime and config + executer and call run method + """ self.findFiles() - self.executer.to_parse_files = self.allfiles - result = self.executer.run() - self.dependencies = result + self.executer.target_list = self.allfiles + self.executer.klass = ParseSql + self.executer.klass_method_name = "parse_dependencies" + self.dependencies = self.executer.run() def findFiles(self) -> None: + """ + Instance method that searches all files + """ self.allfiles = FileFinder().getListOfFiles() def _data_load(self): + """ + factory method that either single or bulk insert data in the + database + """ if self.bulk_load: self._bulkinsertdep() else: @@ -86,12 +110,12 @@ def _bulkinsertdep(self) -> None: session.close() def start(self) -> None: - self.parseSql() + self.start_sql_parsing() self._data_load() if __name__ == "__main__": starttime = time.time() - Runner(parallelism=1, bulk_load=True).start() + Runner(parallelism=8, bulk_load=True).start() endtime = time.time() print('Time needed:', endtime - starttime) diff --git a/parsesql/main/executers.py b/parsesql/main/executers.py index a1036a4..40b9829 100644 --- a/parsesql/main/executers.py +++ b/parsesql/main/executers.py @@ -20,42 +20,87 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from parsesql.util.logger_service import LoggerMixin from multiprocessing import Pool, cpu_count -from parsesql.main.sql_parser.snowsqlparser import ParseSql +from typing import Callable class BaseExecuter(object): pass -class SequentialExecuter(BaseExecuter): +class SequentialExecuter(BaseExecuter, LoggerMixin): - def __init__(self, to_parse_files=None): - self.to_parse_files = to_parse_files + def __init__(self, + target_list: list = None, + klass=None, + klass_method_name: str = None, + func: Callable = None): - def parse(self): - dependencies = [] - for file in self.to_parse_files: - dependencies.append(ParseSql(file=file).parse_dependencies()) - return dependencies + self.target_list = target_list + self.klass = klass + self.klass_method_name = klass_method_name + self.func = func def run(self): - return self.parse() - - -class MultiProcessingExecuter(BaseExecuter): - - def __init__(self, to_parse_files=None): - self.to_parse_files = to_parse_files + """ + run either a single callable or class method + """ + if (self.klass and self.klass_method_name): + return [getattr(self.klass(element), self.klass_method_name)() for + element in self.target_list] + elif self.func: + return [self.func(element) for element in self.target_list] + else: + self.logger.error(f'Error happend. You must either declare a ' + f'function or a class with init param and ' + f'a dependent method' + ) + + +class MultiProcessingExecuter(BaseExecuter, LoggerMixin): + + def __init__(self, + target_list: list = None, + klass=None, + klass_method_name: str = None, + func: Callable = None, + cpu_cores: int = 1): + + self.target_list = target_list + self.klass = klass + self.klass_method_name = klass_method_name + self.func = func + self.cpu_cores = cpu_cores def determine_max_proc(self): return cpu_count() - def parse(self, file): - return ParseSql(file=file).parse_dependencies() + def determine_use_process(self): + if self.cpu_cores >= self.determine_max_proc(): + return self.determine_max_proc() - 1 + else: + return self.cpu_cores - def run(self): - number_of_processcess = self.determine_max_proc() - 1 + def _klass_run(self, file): + """ + Helper run method for the process mapping and running + """ + return getattr(self.klass(file), self.klass_method_name)() + def run(self): + """ + run either a single callable or class method + """ + number_of_processcess = self.determine_use_process() p = Pool(number_of_processcess) - return p.map(self.parse, self.to_parse_files) + + if (self.klass and self.klass_method_name): + return p.map(self._klass_run, self.target_list) + elif self.func: + return p.map(self.func, self.target_list) + else: + self.logger.error(f'Error happend. You must either declare a ' + f'function or a class with init param and ' + f'a dependent method' + ) diff --git a/parsesql/main/sql_parser/snowsqlparser.py b/parsesql/main/sql_parser/snowsqlparser.py index a6025fe..5371ff6 100644 --- a/parsesql/main/sql_parser/snowsqlparser.py +++ b/parsesql/main/sql_parser/snowsqlparser.py @@ -33,7 +33,7 @@ class ParseSql(LoggerMixin): - def __init__(self, file): + def __init__(self, file=None): self.file = file self.filecontent = self._readFile() self.filename = os.path.basename(self.file) diff --git a/tests/main/test_executers.py b/tests/main/test_executers.py index dfd6110..4456cfb 100644 --- a/tests/main/test_executers.py +++ b/tests/main/test_executers.py @@ -23,6 +23,19 @@ import unittest from parsesql.main import executers +from multiprocessing import cpu_count + + +def square_number(value: int) -> int: + return value * value + + +class TMath(): + def __init__(self, value): + self.value = value + + def square_number(self) -> int: + return self.value * self.value class BaseExecuterTest(unittest.TestCase): @@ -51,11 +64,57 @@ def test_if_run_method_exist(self): methods = dir(executers.SequentialExecuter()) self.assertIn('run', methods) + def test_function_passing(self): + """ + test a callable can be passed and executed + """ + consumer_list = [1, 2, 3, 4] + method = square_number + exe = executers.SequentialExecuter(target_list=consumer_list, + func=method) + self.assertEqual(True, callable(exe.func)) + + def test_run_method_with_single_callable(self): + """ + test if run method runs the callable with the target list as input + """ + consumer_list = [1, 2, 3, 4] + exp_res = [1, 4, 9, 16] + method = square_number + exe = executers.SequentialExecuter(target_list=consumer_list, + func=method) + res = exe.run() + self.assertEqual(res, exp_res) + + def test_run_method_with_class_callable(self): + """ + test if run method runs a class and method name with the target list + as input + """ + consumer_list = [1, 2, 3, 4] + exp_res = [1, 4, 9, 16] + + exe = executers.SequentialExecuter(target_list=consumer_list, + klass=TMath, + klass_method_name="square_number") + res = exe.run() + self.assertEqual(res, exp_res) + + def test_no_run_if_no_param(self): + """ + test if no run can be executed + """ + consumer_list = [1, 2, 3, 4] + + exe = executers.SequentialExecuter(target_list=consumer_list) + res = exe.run() + self.assertEqual(res, None) + class MultiProcessingExecuterTest(unittest.TestCase): def test_if_multiprocessing_class_exists(self): - """fr + """ test if MultiProcessing class is available """ klass = executers.MultiProcessingExecuter() @@ -67,3 +126,60 @@ def test_if_run_method_exist(self): """ methods = dir(executers.MultiProcessingExecuter()) self.assertIn('run', methods) + + def test_cpu_count(self): + """ + test if cpu count method return cpu count + """ + expected = cpu_count() + klass = executers.MultiProcessingExecuter() + self.assertEqual(expected, klass.determine_max_proc()) + + def test_used_cpu(self): + """ + test if cpu count method return cpu count + """ + max_cpu = cpu_count() + klass = executers.MultiProcessingExecuter(cpu_cores=max_cpu) + self.assertEqual(max_cpu - 1, klass.determine_use_process()) + + def test_run_method_with_single_callable(self): + """ + test if run method runs the callable with the target list as input + """ + consumer_list = [1, 2, 3, 4] + exp_res = [1, 4, 9, 16] + method = square_number + exe = executers.MultiProcessingExecuter(target_list=consumer_list, + func=method, + cpu_cores=2) + res = exe.run() + self.assertEqual(res, exp_res) + + def test_run_method_with_class_callable(self): + """ + test if run method runs a class and method name with the target list + as input + """ + consumer_list = [1, 2, 3, 4] + exp_res = [1, 4, 9, 16] + + exe = executers.MultiProcessingExecuter( + target_list=consumer_list, + klass=TMath, + klass_method_name="square_number", + cpu_cores=2 + ) + res = exe.run() + self.assertEqual(res, exp_res) + + def test_no_run_if_no_param(self): + """ + test if no run can be executed + """ + consumer_list = [1, 2, 3, 4] + + exe = executers.MultiProcessingExecuter(target_list=consumer_list, + klass=TMath) + res = exe.run() + self.assertEqual(res, None) From a52d33f90f92f18b37019f239dd6ffed55150a8b Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Mon, 6 Jan 2020 16:00:40 +0100 Subject: [PATCH 16/19] finished test methods and refactoring --- .DS_Store | Bin 6148 -> 6148 bytes parsesql/.DS_Store | Bin 6148 -> 6148 bytes parsesql/app.py | 4 +- tests/helper/config_helper.py | 47 +++++++++ tests/main/test_executers.py | 4 + tests/test_app.py | 168 ++++++++++++++++++++++++++++++ tests/util/test_logger_service.py | 28 +---- 7 files changed, 223 insertions(+), 28 deletions(-) create mode 100644 tests/helper/config_helper.py create mode 100644 tests/test_app.py diff --git a/.DS_Store b/.DS_Store index a6a9a35e4fbd8bc142b96f6b791a98f4d75e9240..204f88f27f301fe1c337e9e0881ae275af084149 100644 GIT binary patch literal 6148 zcmeHK%Wl&^6ur}?#0en0Kx&uXAh8HhN+?+%B$FnKh*yPyE&#Q5V_FNx4v9m65R?u7 z0Q>=;!WZx%d;pv~GgOHKVpAbh&5iCo^SEampRs2=L?oJ{?k158Q}P)7KTaX_l!2|30+@~GX3(}N&h$~Fx~0i(eGrU1XYMY=;B+NL(my}xOI9$pr1z%T!hh7vCY zee!8QLki(9DOT_MasHRTm^OCRSDpYiEj;a8f#;7dalwJ&2=WL3ETCB$%KXv597m(8ai$PGFrlbGMHTvpAru|$j?Qy6&J-#-34Qnw z`pQC|P=vfX#&={o30I+sMggP1q5`X`YV-cz{`2{Nkz{H{0i(dFQa}`1UaN^)(tGRD x&GBCA!1v*7oHtV_DF}KymIdC5H{jCH=Wqe&YMd!V3(R~7NEu8p3j9+AegdWE!3F>T delta 84 zcmZoMXfc=|#>CJ*u~2NHo+3XJ0|Nsi1A_nqLncEGL+Zph-N^Cv5^JmB!ku~2NHo+2aH#(>?7iw&5W7`Z0%Fj+G4Ocr1g6JkjA%*jtq%E?b+ zU|5S!R7?lLu5EOI5{}u1thAgjZIB;6pW3`YjqT= z&CLyT6ikfGYHK+;#8nM#Jri;(tEy{i>t+JI00fMT5SoD>O2epGKn52>DnkWBB10}i z0YeUu9n4V3kW&_1l$VpAmkyK%>1AQqe3|Ju<7Rdaehy#=Z&qaf&ODi4#E}Ci$pkWN JbA-qmW&p`|Va5Oe delta 79 zcmZoMXfc=|#>B)qu~2NHo+2aL#(>?7jBJy6SS%;kvP4YQWfh&w$11 None: main sql parsing method that searches for files, prime and config executer and call run method """ - self.findFiles() + self.search_files() self.executer.target_list = self.allfiles self.executer.klass = ParseSql self.executer.klass_method_name = "parse_dependencies" self.dependencies = self.executer.run() - def findFiles(self) -> None: + def search_files(self) -> None: """ Instance method that searches all files """ diff --git a/tests/helper/config_helper.py b/tests/helper/config_helper.py new file mode 100644 index 0000000..d2f5ac9 --- /dev/null +++ b/tests/helper/config_helper.py @@ -0,0 +1,47 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import json +import os +from parsesql import config + +CONFIGPATH = os.path.dirname(config.__file__) + + +class JsonConfigGenerator(): + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + def _get_filepath(self): + jsonname = 'configuration.json' + return os.path.join(CONFIGPATH, jsonname) + + def create(self): + with open(self._get_filepath(), 'w') as json_file: + json.dump(vars(self), json_file, indent=4) + + def remove(self): + try: + os.remove(self._get_filepath()) + except Exception as e: + print(e) diff --git a/tests/main/test_executers.py b/tests/main/test_executers.py index 4456cfb..284bcf3 100644 --- a/tests/main/test_executers.py +++ b/tests/main/test_executers.py @@ -183,3 +183,7 @@ def test_no_run_if_no_param(self): klass=TMath) res = exe.run() self.assertEqual(res, None) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_app.py b/tests/test_app.py new file mode 100644 index 0000000..b618171 --- /dev/null +++ b/tests/test_app.py @@ -0,0 +1,168 @@ +# MIT License + +# Copyright (c) 2019 Sebastian Daum + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest +import os +from tests.helper.config_helper import JsonConfigGenerator +from parsesql import app +from parsesql import exampleSql +from parsesql.main.database.db_engine import Session +from parsesql.main.database.models import TableDependency + +SQLFILEPATH = os.path.dirname(exampleSql.__file__) + + +class RunnerTest(unittest.TestCase): + + @classmethod + def setUpClass(cls): + RunnerTest.create_config().create() + + @staticmethod + def create_config(level="INFO"): + config = JsonConfigGenerator( + sqldirectory=SQLFILEPATH, + file_extension="sql", + strategy="sqllite", + Snowflake_Account={ + "user": "user", + "password": "password", + "account": "account", + "database": "database", + "schema": "schema", + "warehouse": "warehouse", + }, + logging={ + "format": '[%(asctime)s] [%(processName)-10s] [%(name)s] ' + '[%(levelname)s] -> %(message)s', + "level": f"{level}", + } + ) + return config + + def test_if_runner_class_exists(self): + """ + test if Runner class is available + """ + klass = app.Runner() + self.assertEqual(klass.__class__.__name__, "Runner") + + def test_executer_factor_class_method(self): + """ + test if either a sequential or multprocessing executer gets + returned + """ + sequential = app.Runner()._get_executer() + multi_proc = app.Runner(parallelism=2)._get_executer() + self.assertEqual(sequential.__class__.__name__, + "SequentialExecuter") + self.assertEqual(multi_proc.__class__.__name__, + "MultiProcessingExecuter") + + def test_search_files(self): + """ + test if files get searched and return + """ + expected_files = ['old-join.sql', + 'third.sql', + 'select_statement.sql', + 'cte.sql', + 'with.sql', + 'old-view.sql'] + ap = app.Runner() + ap.search_files() + foundfiles = [os.path.basename(f) for f in ap.allfiles] + self.assertEqual(sorted(foundfiles), sorted(expected_files)) + + def test_start_sql_parsing(self): + """ + test if start method works + """ + ap = app.Runner() + ap.start_sql_parsing() + self.assertIsNotNone(ap.dependencies) + + def test_single_insert(self): + """ + test if single insert works + """ + dep = [{'filename': 'old-join.sql', + 'name': None, + 'tables': ['CUSTOMERS', 'ORDERS', 'PRODUCT']}] + ap = app.Runner() + ap.dependencies = dep + ap._insertdep() + + query = self.query_result()[0] + + self.assertEqual('old-join.sql', query.filename) + + def test_bulk_insert(self): + """ + test if bulk insert works + """ + dep = [{'filename': 'old-join.sql', + 'name': None, + 'tables': ['CUSTOMERS', 'ORDERS', 'PRODUCT']}] + ap = app.Runner() + ap.dependencies = dep + ap._bulkinsertdep() + + query = self.query_result()[0] + + self.assertEqual('old-join.sql', query.filename) + + def test_data_load(self): + """ + test if data load factory maethod works + """ + dep = [{'filename': 'old-join.sql', + 'name': None, + 'tables': ['CUSTOMERS', 'ORDERS', 'PRODUCT']}] + ap = app.Runner() + ap.dependencies = dep + ap._data_load() + + query = self.query_result()[0] + + self.assertEqual('old-join.sql', query.filename) + + def test_start(self): + """ + test if main start method works + """ + ap = app.Runner(bulk_load=True) + ap.start() + + query = self.query_result() + res = [] + for el in query: + res.append(el.filename) + self.assertIn('old-join.sql', res) + + def query_result(self): + session = Session() + return session.query(TableDependency).all() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/util/test_logger_service.py b/tests/util/test_logger_service.py index c55db59..cd6a93d 100644 --- a/tests/util/test_logger_service.py +++ b/tests/util/test_logger_service.py @@ -21,32 +21,8 @@ # SOFTWARE. import unittest -import json -import os +from tests.helper.config_helper import JsonConfigGenerator from parsesql.util import logger_service -from parsesql import config - -CONFIGPATH = os.path.dirname(config.__file__) - - -class JsonConfigGenerator(): - def __init__(self, **kwargs): - for key, value in kwargs.items(): - setattr(self, key, value) - - def _get_filepath(self): - jsonname = 'configuration.json' - return os.path.join(CONFIGPATH, jsonname) - - def create(self): - with open(self._get_filepath(), 'w') as json_file: - json.dump(vars(self), json_file, indent=4) - - def remove(self): - try: - os.remove(self._get_filepath()) - except Exception as e: - print(e) class LoggerTest(unittest.TestCase): @@ -58,7 +34,7 @@ def setUpClass(cls): @staticmethod def create_config(level="INFO"): config = JsonConfigGenerator( - sqldirectory="/Users/sebastiandaum/Desktop/views", + sqldirectory="/A/C/Desktop/views", file_extension="sql", strategy="sqllite", Snowflake_Account={ From 0e1985c2a56d2ac214e5605c377c3e4d52365a23 Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Mon, 6 Jan 2020 16:42:37 +0100 Subject: [PATCH 17/19] adapted readme --- README.md | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3e9cbe2..20a0d7b 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,23 @@ A small python based sql parser focusing on finding table dependencies within database views. Currently only working with Snowflake ANSI Sql The current implementation handles the parser as an seperate app that can be used to parse sql files. This is not a stable version. Within the -next month the goal is to translate that app in a pip package. +next month the goal is to translate that app in a pip package. + +The parser works currently only if no AS or as is used in FROM or JOIN conditions, e.g. + +``` +SELECT +* +FROM a AS oop +``` +-> This won't work + +However this syntax will work: +``` +SELECT +* +FROM a oop +``` ## How to use the parser: 1. Download the repository @@ -11,12 +27,17 @@ next month the goal is to translate that app in a pip package. 4. Configure the SQLAlchemy engine 5. Create the target database table (Sqllite or Snowflake engine) with SQLAlchemy. Therefore run: ``` -cd parsesql/ -python -m main.database.init_db +python -m parsesql.main.database.init_db ``` +Note: A sqlite file will be placed in the db directory of the package + 6. Configure the Runner class (multiprocessing, parsing vs. dataloading) 7. Run the main module with: ``` -cd parsesql/ -python app.py +python -m parsesql.app +``` + +For running test use: ``` +python -m tests.run_all +``` \ No newline at end of file From bfdb3fc62d3690a69833258fc28550d65d206844 Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Mon, 6 Jan 2020 16:44:30 +0100 Subject: [PATCH 18/19] changed github workflow --- .github/workflows/pythonapp.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml index 98fa0a6..6e6ae97 100644 --- a/.github/workflows/pythonapp.yml +++ b/.github/workflows/pythonapp.yml @@ -26,5 +26,4 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with unittest run: | - cd parsesql - python tests/run_all.py + python -m tests.run_all From fdd413c3ea4e37b0f329d98f1d9c71ebc3da2b4d Mon Sep 17 00:00:00 2001 From: Sebastian Daum Date: Mon, 6 Jan 2020 16:55:03 +0100 Subject: [PATCH 19/19] changed test handling --- tests/helper/config_helper.py | 22 ++++++++++++++++++++++ tests/run_all.py | 9 ++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tests/helper/config_helper.py b/tests/helper/config_helper.py index d2f5ac9..908602a 100644 --- a/tests/helper/config_helper.py +++ b/tests/helper/config_helper.py @@ -45,3 +45,25 @@ def remove(self): os.remove(self._get_filepath()) except Exception as e: print(e) + + +def create_config(level="INFO"): + config = JsonConfigGenerator( + sqldirectory="/A/C/Desktop/views", + file_extension="sql", + strategy="sqllite", + Snowflake_Account={ + "user": "user", + "password": "password", + "account": "account", + "database": "database", + "schema": "schema", + "warehouse": "warehouse", + }, + logging={ + "format": '[%(asctime)s] [%(processName)-10s] [%(name)s] ' + '[%(levelname)s] -> %(message)s', + "level": f"{level}", + } + ) + return config diff --git a/tests/run_all.py b/tests/run_all.py index 35af505..16e79d4 100644 --- a/tests/run_all.py +++ b/tests/run_all.py @@ -21,14 +21,21 @@ # SOFTWARE. import unittest +from tests.helper.config_helper import create_config def parsesql_test_suite(): """Test suite for parsesql tests""" + create_config().create() test_loader = unittest.TestLoader() test_suite = test_loader.discover('.') return test_suite if __name__ == "__main__": - unittest.TextTestRunner(verbosity=2).run(parsesql_test_suite()) + result = unittest.TextTestRunner(verbosity=2).run(parsesql_test_suite()) + + if result.wasSuccessful(): + exit(0) + else: + exit(1)