Skip to content

Commit

Permalink
Merge branch 'main' into fix-integration
Browse files Browse the repository at this point in the history
  • Loading branch information
gabrielcocenza authored Sep 24, 2024
2 parents f4865e9 + 0f62edc commit 82d6ede
Show file tree
Hide file tree
Showing 10 changed files with 588 additions and 137 deletions.
26 changes: 20 additions & 6 deletions .github/workflows/check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.8", "3.10"]
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -53,7 +53,7 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.8", "3.10"]
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -83,19 +83,19 @@ jobs:
fail-fast: false
matrix:
runs-on: [[ubuntu-22.04], [Ubuntu_ARM64_4C_16G_01]]
test-command: ['tox -e func -- --series focal --keep-models', 'tox -e func -- --series jammy --keep-models']
test-command: ['tox -e func -- -v --series focal --keep-models', 'tox -e func -- -v --series jammy --keep-models']
juju-channel: ["3.4/stable"]
steps:

- uses: actions/checkout@v4
with:
submodules: true

# arm64 runners don't have make or gcc installed by default
# arm64 runners don't have gcc installed by default
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y make gcc
sudo apt install -y gcc
- name: Setup Python
uses: actions/setup-python@v5
Expand All @@ -116,8 +116,22 @@ jobs:
echo "TEST_MODEL_CONSTRAINTS=arch=arm64" >> "$GITHUB_ENV"
fi
- name: Build the charm
run: charmcraft -v pack

- name: Run tests
run: ${{ matrix.test-command }}
run: |
# These variables are for a consistent method to find the charm file(s) across all projects.
# It is designed to work both with charms that output one file per base,
# and charms that output a single file to run on all bases.
# Not all charms will use them, and for some charms the variables will resolve to the same file.
export CHARM_PATH_NOBLE="$(pwd)/$(ls | grep '.*24.04.*\.charm$')"
echo "$CHARM_PATH_NOBLE"
export CHARM_PATH_JAMMY="$(pwd)/$(ls | grep '.*22.04.*\.charm$')"
echo "$CHARM_PATH_JAMMY"
export CHARM_PATH_FOCAL="$(pwd)/$(ls | grep '.*20.04.*\.charm$')"
echo "$CHARM_PATH_FOCAL"
${{ matrix.test-command }}
env:
TEST_JUJU3: "1" # https://github.com/openstack-charmers/zaza/pull/653
TEST_JUJU_CHANNEL: ${{ matrix.juju-channel }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
strategy:
fail-fast: false
matrix:
runs-on: [[ubuntu-22.04]]
runs-on: [[ubuntu-22.04], [Ubuntu_ARM64_4C_16G_01]]
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down
55 changes: 26 additions & 29 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,48 +1,45 @@
# This is a template `.gitignore` file for ops charms
# This file is managed by bootstack-charms-spec and should not be modified
# within individual charm repos. https://launchpad.net/bootstack-charms-spec
# This file is centrally managed as a template file in https://github.com/canonical/solutions-engineering-automation
# To update the file:
# - Edit it in the canonical/solutions-engineering-automation repository.
# - Open a PR with the changes.
# - When the PR merges, the soleng-terraform bot will open a PR to the target repositories with the changes.

# Juju files
.unit-state.db

# Byte-compiled / optimized / DLL files
# Python Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Tests files and dir
# Test files and directories
.pytest_cache/
.coverage
.tox
.venv
reports/
**/report/
htmlcov/
.mypy_cache

# Log files
*.log

# IDEs
.idea/
.vscode/

# vi
.*.swp

# version data
repo-info
version
# python virtual environments (for local dev)
.venv
venv
env

# Python builds
# Build artefacts
output/
.build/
build/
*.charm
*.snap
# python build artefacts
deb_dist/
dist/
*.egg-info/

# Snaps
*.snap
# Log files
*.log

# Builds
.build/
build/
*.charm
# general backup files
*~
*.bak

# Note: for editor-specific files, please don't add them here, as they are specific to your environment, not the project.
# Instead, consider using a global gitignore on your workstation.
6 changes: 6 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ options:
description: |
Start the prometheus smartctl exporter at "smartctl-exporter-port". By default,
it will start at port 10201.
dcgm-snap-channel:
type: string
default: "latest/stable"
description: |
Channel to install the DCGM snap if the hardware has NVIDIA GPU. By default, it will install
from latest/stable
exporter-log-level:
type: string
default: "INFO"
Expand Down
8 changes: 6 additions & 2 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus

from hw_tools import HWTool, HWToolHelper, detect_available_tools
from service import BaseExporter, ExporterError, HardwareExporter, SmartCtlExporter
from service import BaseExporter, DCGMExporter, ExporterError, HardwareExporter, SmartCtlExporter

logger = logging.getLogger(__name__)

Expand All @@ -37,6 +37,7 @@ def __init__(self, *args: Any) -> None:
metrics_endpoints=[
{"path": "/metrics", "port": int(self.model.config["hardware-exporter-port"])},
{"path": "/metrics", "port": int(self.model.config["smartctl-exporter-port"])},
{"path": "/metrics", "port": 9400},
],
# Setting scrape_timeout as collect_timeout in the `duration` format specified in
# https://prometheus.io/docs/prometheus/latest/configuration/configuration/#duration
Expand Down Expand Up @@ -82,6 +83,9 @@ def exporters(self) -> List[BaseExporter]:
if stored_tools & SmartCtlExporter.hw_tools():
exporters.append(SmartCtlExporter(self.charm_dir, self.model.config))

if stored_tools & DCGMExporter.hw_tools():
exporters.append(DCGMExporter(self.model.config))

return exporters

def get_stored_tools(self) -> Set[HWTool]:
Expand Down Expand Up @@ -226,7 +230,7 @@ def _on_config_changed(self, event: EventBase) -> None:
self.model.unit.status = BlockedStatus(message)
return
for exporter in self.exporters:
success = exporter.render_config()
success = exporter.configure()
if success:
exporter.restart()
else:
Expand Down
59 changes: 55 additions & 4 deletions src/hw_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import requests
import urllib3
from charms.operator_libs_linux.v0 import apt
from charms.operator_libs_linux.v2 import snap
from ops.model import ModelError, Resources

import apt_helpers
Expand Down Expand Up @@ -187,6 +188,57 @@ def remove(self) -> None:
"""Remove details."""


class SnapStrategy(StrategyABC):
"""Snap strategy class."""

channel: str

@property
def snap(self) -> str:
"""Snap name."""
return self._name.value

def install(self) -> None:
"""Install the snap from a channel."""
try:
snap.add(self.snap, channel=self.channel)
logger.info("Installed %s from channel: %s", self.snap, self.channel)

# using the snap.SnapError will result into:
# TypeError: catching classes that do not inherit from BaseException is not allowed
except Exception as err: # pylint: disable=broad-except
logger.error("Failed to install %s from channel: %s: %s", self.snap, self.channel, err)
raise err

def remove(self) -> None:
"""Remove the snap."""
try:
snap.remove([self.snap])

# using the snap.SnapError will result into:
# TypeError: catching classes that do not inherit from BaseException is not allowed
except Exception as err: # pylint: disable=broad-except
logger.error("Failed to remove %s: %s", self.snap, err)
raise err

def check(self) -> bool:
"""Check if all services are active."""
return all(
service.get("active", False)
for service in snap.SnapCache()[self.snap].services.values()
)


class DCGMExporterStrategy(SnapStrategy):
"""DCGM strategy class."""

_name = HWTool.DCGM

def __init__(self, channel: str) -> None:
"""Init."""
self.channel = channel


class StorCLIStrategy(TPRStrategyABC):
"""Strategy to install storcli."""

Expand Down Expand Up @@ -689,13 +741,12 @@ def install(self, resources: Resources, hw_available: Set[HWTool]) -> Tuple[bool
if strategy.name not in hw_available:
continue
try:
# TPRStrategy
if isinstance(strategy, TPRStrategyABC):
path = fetch_tools.get(strategy.name) # pylint: disable=W0212
if path:
strategy.install(path)
# APTStrategy
elif isinstance(strategy, APTStrategyABC):

elif isinstance(strategy, (APTStrategyABC, SnapStrategy)):
strategy.install() # pylint: disable=E1120
logger.info("Strategy %s install success", strategy)
except (
Expand All @@ -717,7 +768,7 @@ def remove(self, resources: Resources, hw_available: Set[HWTool]) -> None:
for strategy in self.strategies:
if strategy.name not in hw_available:
continue
if isinstance(strategy, (TPRStrategyABC, APTStrategyABC)):
if isinstance(strategy, (TPRStrategyABC, APTStrategyABC, SnapStrategy)):
strategy.remove()
logger.info("Strategy %s remove success", strategy)

Expand Down
Loading

0 comments on commit 82d6ede

Please sign in to comment.