Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance charm's life-cycle #118

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ options:
default: 10000
description: |
Start the prometheus exporter at "exporter-port". By default, it will
start at port 10000.
start at port 10000. Allowed values are between 1 and 65535 inclusively.
exporter-log-level:
type: string
default: "INFO"
Expand Down
226 changes: 122 additions & 104 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
from typing import Any, Dict, Optional

import ops
from charms.grafana_agent.v0.cos_agent import COSAgentProvider
from ops.framework import EventBase, StoredState
from ops.model import ActiveStatus, BlockedStatus
from ops.model import ActiveStatus, BlockedStatus, ErrorStatus, MaintenanceStatus

from config import HWTool
import cos_agent
import service
from config import EXPORTER_RELATION_NAME, HWTool
from hardware import get_bmc_address
from hw_tools import HWToolHelper, bmc_hw_verifier
from service import Exporter
from hw_tools import HWToolHelper

logger = logging.getLogger(__name__)

Expand All @@ -29,144 +29,162 @@ def __init__(self, *args: Any) -> None:
"""Init."""
super().__init__(*args)
self.hw_tool_helper = HWToolHelper()

self.cos_agent_provider = COSAgentProvider(
self.exporter = service.Exporter(self.charm_dir)
self.cos_agent_relation_handler = cos_agent.Handler(
self,
exporter=self.exporter,
relation_name=EXPORTER_RELATION_NAME,
metrics_endpoints=[
{"path": "/metrics", "port": int(self.model.config["exporter-port"])}
{
"path": "/metrics",
"port": int(
self.model.config["exporter-port"],
),
}
],
)
self.exporter = Exporter(self.charm_dir)

self.framework.observe(self.on.config_changed, self._on_config_changed)
self.framework.observe(self.on.install, self._on_install_or_upgrade)
self.framework.observe(self.on.remove, self._on_remove)
self.framework.observe(self.on.update_status, self._on_update_status)
self.framework.observe(self.on.upgrade_charm, self._on_install_or_upgrade)
self.framework.observe(
self.on.cos_agent_relation_joined, self._on_cos_agent_relation_joined
)
self.framework.observe(
self.on.cos_agent_relation_departed, self._on_cos_agent_relation_departed
)

self._stored.set_default(installed=False, config={}, blocked_msg="")

def _on_install_or_upgrade(self, event: ops.InstallEvent) -> None:
"""Install and upgrade."""
port = self.model.config.get("exporter-port", "10000")
level = self.model.config.get("exporter-log-level", "INFO")

redfish_creds = self._get_redfish_creds()

self.exporter.install(port, level, redfish_creds)
installed, msg = self.hw_tool_helper.install(self.model.resources)
self._stored.installed = installed
self._stored.set_default(
config={},
exporter_installed=False,
resource_install_result={},
)

if not installed:
logger.info(msg)
self._stored.blocked_msg = msg
self._on_update_status(event)
def _on_install_or_upgrade(self, _: ops.InstallEvent) -> None:
"""Install or upgrade charm."""
self.model.unit.status = MaintenanceStatus("Installing resources...")
resource_white_list = self.hw_tool_helper.get_resource_white_list(self.model.resources)
resource_install_result = self.hw_tool_helper.install(
resource_white_list,
resource_black_list=self._stored.resource_install_result, # type: ignore[arg-type]
)
self._stored.resource_install_result = resource_install_result
if not all(resource_install_result.values()):
failed_resources = [r for r, s in resource_install_result.items() if not s]
msg = f"Failed to install resources: {', '.join(failed_resources)}"
logger.error(msg)
self.model.unit.status = ErrorStatus(msg)
return

self._stored.installed = True
self._stored.blocked_msg = ""
self.model.unit.status = ActiveStatus("Install complete")
logger.info("Install complete")
if self._stored.exporter_installed is not True:
self.model.unit.status = MaintenanceStatus("Installing exporter...")
success = self.cos_agent_relation_handler.install_exporter()
self._stored.exporter_installed = success
if not success:
msg = "Failed to install exporter, please refer to `juju debug-log`"
logger.error(msg)
self.model.unit.status = ErrorStatus(msg)
return

self.update_status()

def _on_remove(self, _: EventBase) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happen if remove fail?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, there's not remove fail logic in the code

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it just get into error state?

"""Remove everything when charm is being removed."""
logger.info("Start to remove.")
# Remove binary tool
self.hw_tool_helper.remove(self.model.resources)
self.exporter.uninstall()
self._stored.resource_install_result = {}
success = self.cos_agent_relation_handler.uninstall_exporter()
if not success:
msg = "Failed to uninstall exporter, please refer to `juju debug-log`"
logger.warning(msg)
self._stored.exporter_installed = not success
logger.info("Remove complete")

def _on_update_status(self, _: EventBase) -> None:
"""Update the charm's status."""
if self._stored.installed is not True and self._stored.blocked_msg != "":
self.model.unit.status = BlockedStatus(self._stored.blocked_msg) # type: ignore
self.update_status()

def _on_config_changed(self, _: EventBase) -> None:
"""Reconfigure charm."""
change_set = self.update_config_store()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might look better like this:

change_set = self.config_store_changed 

Even though the method updates the config store, I feel like the change_set feature is more important and we could name it accordingly.


if self.cos_agent_relation_handler.exporter_enabled:
options = self.get_exporter_configs()
success, message = self.cos_agent_relation_handler.validate_exporter_configs(options)
if not success:
self.model.unit.status = BlockedStatus(message)
return

success = self.cos_agent_relation_handler.configure_exporter(options, change_set)
if not success:
message = "Failed to configure exporter, please check if the server is healthy."
self.model.unit.status = BlockedStatus(message)
return

self.update_status()

def update_status(self) -> None:
"""Update the charm's status."""
if not self.cos_agent_relation_handler.exporter_enabled:
self.model.unit.status = BlockedStatus(f"Missing relation: [{EXPORTER_RELATION_NAME}]")
return
if not self.model.get_relation("cos-agent"):
self.model.unit.status = BlockedStatus("Missing relation: [cos-agent]")

if self.cos_agent_relation_handler.too_many_relations:
self.model.unit.status = BlockedStatus("Cannot relate to more than one grafana-agent")
return
if not self.exporter.check_health():
self.model.unit.status = BlockedStatus("Exporter is unhealthy")

if (
self.cos_agent_relation_handler.exporter_enabled
and not self.cos_agent_relation_handler.exporter_online
):
error_msg = "Exporter crashed unexpectedly, please refer to systemd logs..."
self.model.unit.status = ErrorStatus(error_msg)
return
if not self.exporter.check_active():
self.model.unit.status = BlockedStatus("Exporter is not running")

hw_tool_ok, error_msg = self.hw_tool_helper.check_status()
if not hw_tool_ok:
self.model.unit.status = ErrorStatus(error_msg)
return

self.model.unit.status = ActiveStatus("Unit is ready")

def _on_config_changed(self, event: EventBase) -> None:
"""Reconfigure charm."""
# Keep track of what model config options + some extra config related
# information are changed. This can be helpful when we want to respond
# to the change of a specific config option.
def update_config_store(self) -> set:
"""Update the config store, and return a set of config options that are changed."""
change_set = set()
model_config: Dict[str, Optional[str]] = dict(self.model.config.items())
for key, value in model_config.items():
if key not in self._stored.config or self._stored.config[key] != value: # type: ignore
logger.info("Setting %s to: %s", key, value)
self._stored.config[key] = value # type: ignore
change_set.add(key)
return change_set

def get_redfish_options(self) -> Dict[str, Any]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to call this get_redfish_configs similar to get_exporter_configs ? I wouldn't mind if both are using options as well, but it would be nice if both are consistent :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's consistent in a way that redfish_options = get_redfish_options(), but not in get_redfish_configs and get_exporter_configs. 😢

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, maybe we could have redfish_configs, get_redfish_configs along with get_exporter_configs? What do you think?

"""Get redfish config options."""
redfish_options = {
"enable": False,
"host": "",
"username": self.model.config.get("redfish-username", ""),
"password": self.model.config.get("redfish-password", ""),
}

if not self._stored.installed: # type: ignore
logging.info( # type: ignore
"Config changed called before install complete, deferring event: %s",
event.handle,
)
event.defer()
return
bmc_address = get_bmc_address()
if bmc_address:
redfish_options["enable"] = True
redfish_options["host"] = f"https://{bmc_address}"

exporter_configs = {
"exporter-port",
"exporter-log-level",
"redfish-host",
"redfish-username",
"redfish-password",
return redfish_options

def get_exporter_configs(self) -> Dict[str, Any]:
"""Get the exporter related config options."""
port = self.model.config.get("exporter-port", "10000")
level = self.model.config.get("exporter-log-level", "INFO")
collectors = self.hw_tool_helper.hw_collector_white_list
redfish_options = {"enable": False}
if HWTool.REDFISH in collectors:
redfish_options = self.get_redfish_options()
return {
"port": port,
"level": level,
"collectors": collectors,
"redfish_options": redfish_options,
}
if exporter_configs.intersection(change_set):
logger.info("Detected changes in exporter config.")
port = self.model.config.get("exporter-port", "10000")
level = self.model.config.get("exporter-log-level", "INFO")

redfish_creds = self._get_redfish_creds()
success = self.exporter.template.render_config(
port=port, level=level, redfish_creds=redfish_creds
)
# First condition prevent the exporter from starting at when the
# charm just installed; the second condition tries to recover the
# exporter from failed status.
if success and self.exporter.check_active() or not self.exporter.check_health():
self.exporter.restart()

self._on_update_status(event)

def _on_cos_agent_relation_joined(self, event: EventBase) -> None:
"""Start the exporter when relation joined."""
self.exporter.start()
self._on_update_status(event)

def _on_cos_agent_relation_departed(self, event: EventBase) -> None:
"""Remove the exporter when relation departed."""
self.exporter.stop()
self._on_update_status(event)

def _get_redfish_creds(self) -> Dict[str, str]:
"""Provide redfish config if redfish is available, else empty dict."""
bmc_tools = bmc_hw_verifier()
if HWTool.REDFISH in bmc_tools:
bmc_address = get_bmc_address()
redfish_creds = {
# Force to use https as default protocol
"host": f"https://{bmc_address}",
"username": self.model.config.get("redfish-username", ""),
"password": self.model.config.get("redfish-password", ""),
}
else:
redfish_creds = {}
return redfish_creds


if __name__ == "__main__": # pragma: nocover
Expand Down
32 changes: 1 addition & 31 deletions src/checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,15 @@
#
# For further info, check https://github.com/canonical/charmcraft
"""Checksum definition, check functions and related utils."""
import hashlib
import logging
import typing as t
from dataclasses import dataclass, field
from pathlib import Path

from os_platform import Architecture, UbuntuSeries, get_os_platform
from os_platform import Architecture, UbuntuSeries

logger = logging.getLogger(__name__)


class ResourceChecksumError(Exception):
"""Raise if checksum does not match."""


@dataclass
class ToolVersionInfo:
"""Tool version information for checksum comparison."""
Expand Down Expand Up @@ -209,27 +203,3 @@ class ToolVersionInfo:
sha256_checksum="458d51b030468901fc8a207088070e6ce82db34b181d9190c8f849605f1b9b6d",
),
]


def validate_checksum(support_version_infos: t.List[ToolVersionInfo], path: Path) -> bool:
"""Validate checksum of resource file by checking with supported versions.

Returns True if resource is supported by the charm, architecture, and
checksum validation is successful.
"""
os_platform = get_os_platform()

supported_checksums = []
for info in support_version_infos:
if os_platform.machine in info.supported_architectures and (
info.support_all_series or os_platform.series in info.supported_series
):
supported_checksums.append(info.sha256_checksum)

with open(path, "rb") as f:
sha256_hash = hashlib.sha256(f.read()).hexdigest()

if sha256_hash in supported_checksums:
return True
logger.warning("Checksum validation fail, path: %s hash: %s", path, sha256_hash)
return False
1 change: 1 addition & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
EXPORTER_SERVICE_PATH = Path(f"/etc/systemd/system/{EXPORTER_NAME}.service")
EXPORTER_CONFIG_TEMPLATE = f"{EXPORTER_NAME}-config.yaml.j2"
EXPORTER_SERVICE_TEMPLATE = f"{EXPORTER_NAME}.service.j2"
EXPORTER_RELATION_NAME = "cos-agent"

# Redfish
REDFISH_TIMEOUT = 3
Expand Down
Loading