-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enhance charm's life-cycle #118
Changes from all commits
a8ccbe9
dc8fdbd
6eb3c3e
ff52219
fbbd346
0ea84ed
b6f8a78
fc4a713
cb04478
815b481
a398f2f
afd0028
4089a04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,14 +8,14 @@ | |
from typing import Any, Dict, Optional | ||
|
||
import ops | ||
from charms.grafana_agent.v0.cos_agent import COSAgentProvider | ||
from ops.framework import EventBase, StoredState | ||
from ops.model import ActiveStatus, BlockedStatus | ||
from ops.model import ActiveStatus, BlockedStatus, ErrorStatus, MaintenanceStatus | ||
|
||
from config import HWTool | ||
import cos_agent | ||
import service | ||
from config import EXPORTER_RELATION_NAME, HWTool | ||
from hardware import get_bmc_address | ||
from hw_tools import HWToolHelper, bmc_hw_verifier | ||
from service import Exporter | ||
from hw_tools import HWToolHelper | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -29,144 +29,162 @@ def __init__(self, *args: Any) -> None: | |
"""Init.""" | ||
super().__init__(*args) | ||
self.hw_tool_helper = HWToolHelper() | ||
|
||
self.cos_agent_provider = COSAgentProvider( | ||
self.exporter = service.Exporter(self.charm_dir) | ||
self.cos_agent_relation_handler = cos_agent.Handler( | ||
self, | ||
exporter=self.exporter, | ||
relation_name=EXPORTER_RELATION_NAME, | ||
metrics_endpoints=[ | ||
{"path": "/metrics", "port": int(self.model.config["exporter-port"])} | ||
{ | ||
"path": "/metrics", | ||
"port": int( | ||
self.model.config["exporter-port"], | ||
), | ||
} | ||
], | ||
) | ||
self.exporter = Exporter(self.charm_dir) | ||
|
||
self.framework.observe(self.on.config_changed, self._on_config_changed) | ||
self.framework.observe(self.on.install, self._on_install_or_upgrade) | ||
self.framework.observe(self.on.remove, self._on_remove) | ||
self.framework.observe(self.on.update_status, self._on_update_status) | ||
self.framework.observe(self.on.upgrade_charm, self._on_install_or_upgrade) | ||
self.framework.observe( | ||
self.on.cos_agent_relation_joined, self._on_cos_agent_relation_joined | ||
) | ||
self.framework.observe( | ||
self.on.cos_agent_relation_departed, self._on_cos_agent_relation_departed | ||
) | ||
|
||
self._stored.set_default(installed=False, config={}, blocked_msg="") | ||
|
||
def _on_install_or_upgrade(self, event: ops.InstallEvent) -> None: | ||
"""Install and upgrade.""" | ||
port = self.model.config.get("exporter-port", "10000") | ||
level = self.model.config.get("exporter-log-level", "INFO") | ||
|
||
redfish_creds = self._get_redfish_creds() | ||
|
||
self.exporter.install(port, level, redfish_creds) | ||
installed, msg = self.hw_tool_helper.install(self.model.resources) | ||
self._stored.installed = installed | ||
self._stored.set_default( | ||
config={}, | ||
exporter_installed=False, | ||
resource_install_result={}, | ||
) | ||
|
||
if not installed: | ||
logger.info(msg) | ||
self._stored.blocked_msg = msg | ||
self._on_update_status(event) | ||
def _on_install_or_upgrade(self, _: ops.InstallEvent) -> None: | ||
"""Install or upgrade charm.""" | ||
self.model.unit.status = MaintenanceStatus("Installing resources...") | ||
resource_white_list = self.hw_tool_helper.get_resource_white_list(self.model.resources) | ||
resource_install_result = self.hw_tool_helper.install( | ||
resource_white_list, | ||
resource_black_list=self._stored.resource_install_result, # type: ignore[arg-type] | ||
) | ||
self._stored.resource_install_result = resource_install_result | ||
if not all(resource_install_result.values()): | ||
failed_resources = [r for r, s in resource_install_result.items() if not s] | ||
msg = f"Failed to install resources: {', '.join(failed_resources)}" | ||
logger.error(msg) | ||
self.model.unit.status = ErrorStatus(msg) | ||
return | ||
|
||
self._stored.installed = True | ||
self._stored.blocked_msg = "" | ||
self.model.unit.status = ActiveStatus("Install complete") | ||
logger.info("Install complete") | ||
if self._stored.exporter_installed is not True: | ||
self.model.unit.status = MaintenanceStatus("Installing exporter...") | ||
success = self.cos_agent_relation_handler.install_exporter() | ||
self._stored.exporter_installed = success | ||
if not success: | ||
msg = "Failed to install exporter, please refer to `juju debug-log`" | ||
logger.error(msg) | ||
self.model.unit.status = ErrorStatus(msg) | ||
return | ||
|
||
self.update_status() | ||
|
||
def _on_remove(self, _: EventBase) -> None: | ||
"""Remove everything when charm is being removed.""" | ||
logger.info("Start to remove.") | ||
# Remove binary tool | ||
self.hw_tool_helper.remove(self.model.resources) | ||
self.exporter.uninstall() | ||
self._stored.resource_install_result = {} | ||
success = self.cos_agent_relation_handler.uninstall_exporter() | ||
if not success: | ||
msg = "Failed to uninstall exporter, please refer to `juju debug-log`" | ||
logger.warning(msg) | ||
self._stored.exporter_installed = not success | ||
logger.info("Remove complete") | ||
|
||
def _on_update_status(self, _: EventBase) -> None: | ||
"""Update the charm's status.""" | ||
if self._stored.installed is not True and self._stored.blocked_msg != "": | ||
self.model.unit.status = BlockedStatus(self._stored.blocked_msg) # type: ignore | ||
self.update_status() | ||
|
||
def _on_config_changed(self, _: EventBase) -> None: | ||
"""Reconfigure charm.""" | ||
change_set = self.update_config_store() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might look better like this: change_set = self.config_store_changed Even though the method updates the config store, I feel like the |
||
|
||
if self.cos_agent_relation_handler.exporter_enabled: | ||
options = self.get_exporter_configs() | ||
success, message = self.cos_agent_relation_handler.validate_exporter_configs(options) | ||
if not success: | ||
self.model.unit.status = BlockedStatus(message) | ||
return | ||
|
||
success = self.cos_agent_relation_handler.configure_exporter(options, change_set) | ||
if not success: | ||
message = "Failed to configure exporter, please check if the server is healthy." | ||
self.model.unit.status = BlockedStatus(message) | ||
return | ||
|
||
self.update_status() | ||
|
||
def update_status(self) -> None: | ||
"""Update the charm's status.""" | ||
if not self.cos_agent_relation_handler.exporter_enabled: | ||
self.model.unit.status = BlockedStatus(f"Missing relation: [{EXPORTER_RELATION_NAME}]") | ||
return | ||
if not self.model.get_relation("cos-agent"): | ||
self.model.unit.status = BlockedStatus("Missing relation: [cos-agent]") | ||
|
||
if self.cos_agent_relation_handler.too_many_relations: | ||
self.model.unit.status = BlockedStatus("Cannot relate to more than one grafana-agent") | ||
return | ||
if not self.exporter.check_health(): | ||
self.model.unit.status = BlockedStatus("Exporter is unhealthy") | ||
|
||
if ( | ||
self.cos_agent_relation_handler.exporter_enabled | ||
and not self.cos_agent_relation_handler.exporter_online | ||
): | ||
error_msg = "Exporter crashed unexpectedly, please refer to systemd logs..." | ||
self.model.unit.status = ErrorStatus(error_msg) | ||
return | ||
if not self.exporter.check_active(): | ||
self.model.unit.status = BlockedStatus("Exporter is not running") | ||
|
||
hw_tool_ok, error_msg = self.hw_tool_helper.check_status() | ||
if not hw_tool_ok: | ||
self.model.unit.status = ErrorStatus(error_msg) | ||
return | ||
|
||
self.model.unit.status = ActiveStatus("Unit is ready") | ||
|
||
def _on_config_changed(self, event: EventBase) -> None: | ||
"""Reconfigure charm.""" | ||
# Keep track of what model config options + some extra config related | ||
# information are changed. This can be helpful when we want to respond | ||
# to the change of a specific config option. | ||
def update_config_store(self) -> set: | ||
"""Update the config store, and return a set of config options that are changed.""" | ||
change_set = set() | ||
model_config: Dict[str, Optional[str]] = dict(self.model.config.items()) | ||
for key, value in model_config.items(): | ||
if key not in self._stored.config or self._stored.config[key] != value: # type: ignore | ||
logger.info("Setting %s to: %s", key, value) | ||
self._stored.config[key] = value # type: ignore | ||
change_set.add(key) | ||
return change_set | ||
|
||
def get_redfish_options(self) -> Dict[str, Any]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to call this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's consistent in a way that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, maybe we could have |
||
"""Get redfish config options.""" | ||
redfish_options = { | ||
"enable": False, | ||
"host": "", | ||
"username": self.model.config.get("redfish-username", ""), | ||
"password": self.model.config.get("redfish-password", ""), | ||
} | ||
|
||
if not self._stored.installed: # type: ignore | ||
logging.info( # type: ignore | ||
"Config changed called before install complete, deferring event: %s", | ||
event.handle, | ||
) | ||
event.defer() | ||
return | ||
bmc_address = get_bmc_address() | ||
if bmc_address: | ||
redfish_options["enable"] = True | ||
redfish_options["host"] = f"https://{bmc_address}" | ||
|
||
exporter_configs = { | ||
"exporter-port", | ||
"exporter-log-level", | ||
"redfish-host", | ||
"redfish-username", | ||
"redfish-password", | ||
return redfish_options | ||
|
||
def get_exporter_configs(self) -> Dict[str, Any]: | ||
"""Get the exporter related config options.""" | ||
port = self.model.config.get("exporter-port", "10000") | ||
level = self.model.config.get("exporter-log-level", "INFO") | ||
collectors = self.hw_tool_helper.hw_collector_white_list | ||
redfish_options = {"enable": False} | ||
if HWTool.REDFISH in collectors: | ||
redfish_options = self.get_redfish_options() | ||
return { | ||
"port": port, | ||
"level": level, | ||
"collectors": collectors, | ||
"redfish_options": redfish_options, | ||
} | ||
if exporter_configs.intersection(change_set): | ||
logger.info("Detected changes in exporter config.") | ||
port = self.model.config.get("exporter-port", "10000") | ||
level = self.model.config.get("exporter-log-level", "INFO") | ||
|
||
redfish_creds = self._get_redfish_creds() | ||
success = self.exporter.template.render_config( | ||
port=port, level=level, redfish_creds=redfish_creds | ||
) | ||
# First condition prevent the exporter from starting at when the | ||
# charm just installed; the second condition tries to recover the | ||
# exporter from failed status. | ||
if success and self.exporter.check_active() or not self.exporter.check_health(): | ||
self.exporter.restart() | ||
|
||
self._on_update_status(event) | ||
|
||
def _on_cos_agent_relation_joined(self, event: EventBase) -> None: | ||
"""Start the exporter when relation joined.""" | ||
self.exporter.start() | ||
self._on_update_status(event) | ||
|
||
def _on_cos_agent_relation_departed(self, event: EventBase) -> None: | ||
"""Remove the exporter when relation departed.""" | ||
self.exporter.stop() | ||
self._on_update_status(event) | ||
|
||
def _get_redfish_creds(self) -> Dict[str, str]: | ||
"""Provide redfish config if redfish is available, else empty dict.""" | ||
bmc_tools = bmc_hw_verifier() | ||
if HWTool.REDFISH in bmc_tools: | ||
bmc_address = get_bmc_address() | ||
redfish_creds = { | ||
# Force to use https as default protocol | ||
"host": f"https://{bmc_address}", | ||
"username": self.model.config.get("redfish-username", ""), | ||
"password": self.model.config.get("redfish-password", ""), | ||
} | ||
else: | ||
redfish_creds = {} | ||
return redfish_creds | ||
|
||
|
||
if __name__ == "__main__": # pragma: nocover | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happen if remove fail?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently, there's not remove fail logic in the code
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So it just get into error state?