Skip to content

Commit

Permalink
removed reliance on is_dapla()-fn (#261)
Browse files Browse the repository at this point in the history
  • Loading branch information
skars82 authored Nov 13, 2023
1 parent 01c1f78 commit 00fc8bf
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 85 deletions.
46 changes: 29 additions & 17 deletions src/altinn/file.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""This module contains the main function for running the Altinn application."""

import os

from dapla import FileClient
from defusedxml.ElementTree import ParseError
from defusedxml.minidom import parseString

from .utils import is_dapla
from .utils import is_gcs


def main() -> None:
Expand All @@ -16,48 +18,58 @@ def main() -> None:


class FileInfo:
"""This class represents an Altinn application."""
"""This class represents file information handling."""

def __init__(self, file_path: str) -> None:
"""Initialize an XmlFile object with the given file path.
Args:
file_path (str): The path to the XML file.
"""
self.file_path = file_path
if not is_dapla():
print(
"""FileInfo class can only be instantiated in a Dapla JupyterLab
environment."""
)
# Store the original file path
self.original_file_path = file_path
# Expand the path to support '~' for home directory
self.expanded_file_path = os.path.expanduser(file_path)

def _read_file(self):
"""Read file content based on the file source.
Returns:
bytes: The content of the file.
"""
if is_gcs(self.original_file_path):
fs = FileClient.get_gcs_file_system()
return fs.cat_file(self.original_file_path)
else:
with open(self.expanded_file_path, "rb") as f:
return f.read()

def filename(self) -> str:
"""Get the name of the XML file.
Returns:
str: The name of the XML file.
"""
split_path = self.file_path.split("/")
split_path = self.expanded_file_path.split("/")
return split_path[-1][:-4]

def pretty_print(self) -> None:
"""Print formatted version of an xml-file."""
fs = FileClient.get_gcs_file_system()
dom = parseString(fs.cat_file(self.file_path))
"""Print formatted version of an XML file."""
xml_content = self._read_file()
dom = parseString(xml_content)
pretty_xml = dom.toprettyxml(indent=" ")
print(pretty_xml)

def print(self) -> None:
"""Print unformatted version of an XML file."""
fs = FileClient.get_gcs_file_system()
file = fs.cat_file(self.file_path)
print(file.decode())
file_content = self._read_file()
print(file_content.decode())

def validate(self) -> bool:
"""Validate the XML file."""
fs = FileClient.get_gcs_file_system()
try:
parseString(fs.cat_file(self.file_path))
xml_content = self._read_file()
parseString(xml_content)
return True

except ParseError:
Expand Down
9 changes: 5 additions & 4 deletions src/altinn/parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""This module contains the main function for running the Altinn application."""

import os
from typing import Any
from typing import Dict
from typing import Optional
Expand All @@ -8,7 +8,7 @@
from dapla import FileClient
from defusedxml import ElementTree

from .utils import is_dapla
from .utils import is_gcs
from .utils import is_valid_xml


Expand All @@ -29,7 +29,8 @@ def __init__(self, file_path: str) -> None:
Args:
file_path (str): The path to the XML file.
"""
self.file_path = file_path
expanded_path = os.path.expanduser(file_path)
self.file_path = expanded_path
if not is_valid_xml(self.file_path):
print("""File is not a valid XML-file.""")

Expand Down Expand Up @@ -107,7 +108,7 @@ def to_dataframe(self) -> pd.DataFrame:
Returns:
pd.DataFrame: A DataFrame representation of the XML file.
"""
if is_dapla():
if is_gcs(self.file_path):
root = self.get_root_from_dapla()
else:
root = self.get_root_from_filesystem()
Expand Down
29 changes: 16 additions & 13 deletions src/altinn/utils.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,48 @@
"""Utilities for working with Altinn-data in Python."""

import os
from typing import Optional

from dapla import FileClient
from defusedxml.ElementTree import ParseError
from defusedxml.minidom import parseString


def is_dapla() -> bool:
"""Check whether the current environment is running a Dapla JupyterLab instance.
def is_gcs(file_path: str) -> bool:
"""Check whether the given file path is a Google Cloud Storage path.
Args:
file_path (str): The file path to check.
Returns:
bool: True if the current environment is running a Dapla JupyterLab instance,
False otherwise.
bool: True if the file path is a Google Cloud Storage path, False otherwise.
"""
jupyter_image_spec: Optional[str] = os.environ.get("JUPYTER_IMAGE_SPEC")
return bool(jupyter_image_spec and "jupyterlab-dapla" in jupyter_image_spec)
return file_path.startswith("gs://")


def is_valid_xml(file_path) -> bool:
def is_valid_xml(file_path: str) -> bool:
"""Check whether the file is valid XML.
Args:
file_path (str): The path to the XML file.
Returns:
bool: True if the XML is valid,
False otherwise.
bool: True if the XML is valid, False otherwise.
"""
if is_dapla():
if is_gcs(file_path):
fs = FileClient.get_gcs_file_system()
try:
# Read and parse the file from Google Cloud Storage
parseString(fs.cat_file(file_path))
return True
except ParseError:
return False
else:
try:
with open(file_path) as file:
# Expand the path to support '~' for home directory
expanded_path = os.path.expanduser(file_path)
with open(expanded_path) as file:
# Read and parse the local file
parseString(file.read())
return True
except ParseError:
except (ParseError, OSError):
return False
47 changes: 24 additions & 23 deletions tests/test_file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""This module contains the tests for the file function."""
"""This module contains the tests for the FileInfo functions."""

from unittest.mock import MagicMock
from unittest.mock import mock_open

from dapla import FileClient

Expand All @@ -10,44 +11,44 @@
class TestFileInfo:
"""A test class for the FileInfo class."""

def test_filename(self) -> None:
"""Test function.
# other test methods...

Checks if the filename method of XmlFile class returns
the correct file name without the extension.
def test_pretty_print_local(self, monkeypatch):
"""Test pretty_print method for local files in XmlFile class."""
xml_string = """<?xml version="1.0" encoding="UTF-8"?>
<root>
<child>Hello, world!</child>
</root>
"""
xml_file = FileInfo("file.xml")
assert xml_file.filename() == "file"
# Mock open for local file handling
mock_file = mock_open(read_data=xml_string)
monkeypatch.setattr("builtins.open", mock_file)

def test_filename_nested(self) -> None:
"""Test function.
# Mock os.path.expanduser to return a mock path
monkeypatch.setattr("os.path.expanduser", lambda x: x)

Checks if the filename method of XmlFile class returns
the correct file name without the extension, when the
file is nested in directories.
"""
xml_file = FileInfo("path/to/file.xml")
assert xml_file.filename() == "file"
# Create an instance of FileInfo for a local file and call pretty_print on it
local_file_info = FileInfo("path/to/local_file.xml")
local_file_info.pretty_print()

def test_pretty_print(self, monkeypatch):
"""Test pretty_print method of XmlFile class."""
def test_pretty_print_gcs(self, monkeypatch):
"""Test pretty_print method for GCS files in XmlFile class."""
xml_string = """<?xml version="1.0" encoding="UTF-8"?>
<root>
<child>Hello, world!</child>
</root>
"""

# Mock the cat_file method to return the xml as bytes
# Mock the cat_file method for GCS file handling
def mock_cat_file(*args, **kwargs):
return xml_string.encode()

# Patch the FileClient.get_gcs_file_system method to return a mock
# object that has the cat_file method patched
# Continue with your existing GCS mocking
file_client_mock = MagicMock()
file_client_mock.cat_file.side_effect = mock_cat_file
get_gcs_file_system_mock = MagicMock(return_value=file_client_mock)
monkeypatch.setattr(FileClient, "get_gcs_file_system", get_gcs_file_system_mock)

# Create an instance of FileInfo and call pretty_print on it
file_info = FileInfo("path/to/file.xml")
file_info.pretty_print()
# Create an instance of FileInfo for a GCS file and call pretty_print on it
gcs_file_info = FileInfo("gs://path/to/gcs_file.xml")
gcs_file_info.pretty_print()
28 changes: 0 additions & 28 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,29 +1 @@
"""This module contains the tests for the utils functions."""

import os

import pytest
from pytest import MonkeyPatch

from altinn.utils import is_dapla


class TestIsDapla:
"""A test class for the is_dapla() function."""

@pytest.fixture(autouse=True)
def setup_method(self, monkeypatch: MonkeyPatch) -> None:
"""A fixture that runs before every test method.
It deletes the JUPYTER_IMAGE_SPEC environment variable if it exists.
"""
monkeypatch.delenv("JUPYTER_IMAGE_SPEC", raising=False)

def test_is_dapla_true(self) -> None:
"""Test function to check is_dapla().
Returns True when the JUPYTER_IMAGE_SPEC
environment variable contains 'jupyterlab-dapla:latest'.
"""
os.environ["JUPYTER_IMAGE_SPEC"] = "jupyterlab-dapla:latest"
assert is_dapla()

0 comments on commit 00fc8bf

Please sign in to comment.