-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enhance charm's life-cycle #118
Changes from 7 commits
a8ccbe9
dc8fdbd
6eb3c3e
ff52219
fbbd346
0ea84ed
b6f8a78
fc4a713
cb04478
815b481
a398f2f
afd0028
4089a04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,14 +8,13 @@ | |
from typing import Any, Dict, Optional | ||
|
||
import ops | ||
from charms.grafana_agent.v0.cos_agent import COSAgentProvider | ||
from ops.framework import EventBase, StoredState | ||
from ops.model import ActiveStatus, BlockedStatus | ||
from ops.model import ActiveStatus, BlockedStatus, ErrorStatus, MaintenanceStatus | ||
|
||
import exporter | ||
from config import HWTool | ||
from hardware import get_bmc_address | ||
from hw_tools import HWToolHelper, bmc_hw_verifier | ||
from service import Exporter | ||
from hw_tools import HWToolHelper | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -29,144 +28,146 @@ def __init__(self, *args: Any) -> None: | |
"""Init.""" | ||
super().__init__(*args) | ||
self.hw_tool_helper = HWToolHelper() | ||
|
||
self.cos_agent_provider = COSAgentProvider( | ||
self, | ||
metrics_endpoints=[ | ||
{"path": "/metrics", "port": int(self.model.config["exporter-port"])} | ||
], | ||
) | ||
self.exporter = Exporter(self.charm_dir) | ||
self.exporter_observer = exporter.Observer(self) | ||
|
||
self.framework.observe(self.on.config_changed, self._on_config_changed) | ||
self.framework.observe(self.on.install, self._on_install_or_upgrade) | ||
self.framework.observe(self.on.remove, self._on_remove) | ||
self.framework.observe(self.on.update_status, self._on_update_status) | ||
self.framework.observe(self.on.upgrade_charm, self._on_install_or_upgrade) | ||
self.framework.observe( | ||
self.on.cos_agent_relation_joined, self._on_cos_agent_relation_joined | ||
) | ||
self.framework.observe( | ||
self.on.cos_agent_relation_departed, self._on_cos_agent_relation_departed | ||
) | ||
|
||
self._stored.set_default(installed=False, config={}, blocked_msg="") | ||
|
||
def _on_install_or_upgrade(self, event: ops.InstallEvent) -> None: | ||
"""Install and upgrade.""" | ||
port = self.model.config.get("exporter-port", "10000") | ||
level = self.model.config.get("exporter-log-level", "INFO") | ||
|
||
redfish_creds = self._get_redfish_creds() | ||
|
||
self.exporter.install(port, level, redfish_creds) | ||
installed, msg = self.hw_tool_helper.install(self.model.resources) | ||
self._stored.installed = installed | ||
self._stored.set_default( | ||
agent_installed=False, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like this might sound better like So it could be |
||
resource_install_status={}, | ||
config={}, | ||
blocked_msg="", | ||
) | ||
|
||
if not installed: | ||
logger.info(msg) | ||
def _on_install_or_upgrade(self, _: ops.InstallEvent) -> None: | ||
"""Install or upgrade charm.""" | ||
self.model.unit.status = MaintenanceStatus("Installing resources...") | ||
resource_white_list = self.hw_tool_helper.get_resource_white_list(self.model.resources) | ||
resource_install_status = self.hw_tool_helper.install( | ||
resource_white_list, | ||
resource_black_list=self._stored.resource_install_status, # type: ignore[arg-type] | ||
) | ||
self._stored.resource_install_status = resource_install_status | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
One is boolean, another is juju's unit status. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No problem, I personally will never think of |
||
if not all(resource_install_status.values()): | ||
failed_resources = [r for r, s in resource_install_status.items() if not s] | ||
msg = f"Failed to install resources: {', '.join(failed_resources)}" | ||
logger.error(msg) | ||
self._stored.blocked_msg = msg | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
self._on_update_status(event) | ||
self.model.unit.status = ErrorStatus(msg) | ||
return | ||
|
||
self._stored.installed = True | ||
self._stored.blocked_msg = "" | ||
self.model.unit.status = ActiveStatus("Install complete") | ||
logger.info("Install complete") | ||
if self._stored.agent_installed is not True: | ||
self.model.unit.status = MaintenanceStatus("Installing exporter...") | ||
success = self.exporter_observer.install_agent() | ||
self._stored.agent_installed = success | ||
if not success: | ||
msg = "Failed to installed exporter, please refer to `juju debug-log`" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: "Failed to install exporter, please refer the logs using |
||
logger.error(msg) | ||
self._stored.blocked_msg = msg | ||
self.model.unit.status = ErrorStatus(msg) | ||
return | ||
|
||
self.update_status() | ||
|
||
def _on_remove(self, _: EventBase) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happen if remove fail? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently, there's not remove fail logic in the code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So it just get into error state? |
||
"""Remove everything when charm is being removed.""" | ||
logger.info("Start to remove.") | ||
# Remove binary tool | ||
self.exporter_observer.uninstall_agent() | ||
self.hw_tool_helper.remove(self.model.resources) | ||
self.exporter.uninstall() | ||
self._stored.agent_installed = False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of having manual config here. Maybe a functional to set default value? |
||
self._stored.resource_install_status = {} | ||
logger.info("Remove complete") | ||
|
||
def _on_update_status(self, _: EventBase) -> None: | ||
"""Update the charm's status.""" | ||
if self._stored.installed is not True and self._stored.blocked_msg != "": | ||
self.model.unit.status = BlockedStatus(self._stored.blocked_msg) # type: ignore | ||
return | ||
if not self.model.get_relation("cos-agent"): | ||
self.update_status() | ||
|
||
def _on_config_changed(self, event: EventBase) -> None: | ||
"""Reconfigure charm.""" | ||
change_set = self.update_config_store() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might look better like this: change_set = self.config_store_changed Even though the method updates the config store, I feel like the |
||
|
||
if self.exporter_observer.agent_enabled: | ||
options = self.get_exporter_configs() | ||
success, message = self.exporter_observer.validate_agent_configs(options) | ||
if not success: | ||
self.model.unit.status = BlockedStatus(message) | ||
event.defer() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the config is not valid. User need to re-config it, right? |
||
return | ||
|
||
success = self.exporter_observer.configure_agent(options, change_set) | ||
if not success: | ||
self.model.unit.status = BlockedStatus( | ||
"Failed to configure exporter, please check if the server is healthy..." | ||
) | ||
event.defer() | ||
return | ||
|
||
self.update_status() | ||
|
||
def update_status(self) -> None: | ||
"""Update the charm's status.""" | ||
if not self.exporter_observer.agent_enabled: | ||
self.model.unit.status = BlockedStatus("Missing relation: [cos-agent]") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we're having a new constant |
||
return | ||
if not self.exporter.check_health(): | ||
self.model.unit.status = BlockedStatus("Exporter is unhealthy") | ||
|
||
if self.exporter_observer.too_many_relations: | ||
self.model.unit.status = BlockedStatus("Cannot relate to more than one grafan-agent") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: "Cannot relate to more than one grafana-agent application" |
||
return | ||
if not self.exporter.check_active(): | ||
self.model.unit.status = BlockedStatus("Exporter is not running") | ||
|
||
if self.exporter_observer.agent_enabled and not self.exporter_observer.agent_online: | ||
self.model.unit.status = ErrorStatus( | ||
"Exporter crashes unexpectedly, please refer to systemd logs..." | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: "crashes" --> "crashed" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think keeping it short maybe better since you can't display a long text in |
||
) | ||
return | ||
|
||
self._stored.blocked_msg = "" | ||
self.model.unit.status = ActiveStatus("Unit is ready") | ||
|
||
def _on_config_changed(self, event: EventBase) -> None: | ||
"""Reconfigure charm.""" | ||
# Keep track of what model config options + some extra config related | ||
# information are changed. This can be helpful when we want to respond | ||
# to the change of a specific config option. | ||
def update_config_store(self) -> set: | ||
"""Update the config store, and return a set of config options that are changed.""" | ||
change_set = set() | ||
model_config: Dict[str, Optional[str]] = dict(self.model.config.items()) | ||
for key, value in model_config.items(): | ||
if key not in self._stored.config or self._stored.config[key] != value: # type: ignore | ||
logger.info("Setting %s to: %s", key, value) | ||
self._stored.config[key] = value # type: ignore | ||
change_set.add(key) | ||
return change_set | ||
|
||
def get_redfish_options(self) -> Dict[str, Any]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to call this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's consistent in a way that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, maybe we could have |
||
"""Get redfish config options.""" | ||
redfish_options = { | ||
"enable": False, | ||
"host": "", | ||
"username": self.model.config.get("redfish-username", ""), | ||
"password": self.model.config.get("redfish-password", ""), | ||
} | ||
|
||
if not self._stored.installed: # type: ignore | ||
logging.info( # type: ignore | ||
"Config changed called before install complete, deferring event: %s", | ||
event.handle, | ||
) | ||
event.defer() | ||
return | ||
bmc_address = get_bmc_address() | ||
if bmc_address: | ||
redfish_options["enable"] = True | ||
redfish_options["host"] = f"https://{bmc_address}" | ||
|
||
exporter_configs = { | ||
"exporter-port", | ||
"exporter-log-level", | ||
"redfish-host", | ||
"redfish-username", | ||
"redfish-password", | ||
return redfish_options | ||
|
||
def get_exporter_configs(self) -> Dict[str, Any]: | ||
"""Get the exporter related config options.""" | ||
port = self.model.config.get("exporter-port", "10000") | ||
level = self.model.config.get("exporter-log-level", "INFO") | ||
collectors = self.hw_tool_helper.hw_collector_white_list | ||
redfish_options = {"enable": False} | ||
if HWTool.REDFISH in collectors: | ||
redfish_options = self.get_redfish_options() | ||
return { | ||
"port": port, | ||
"level": level, | ||
"collectors": collectors, | ||
"redfish_options": redfish_options, | ||
} | ||
if exporter_configs.intersection(change_set): | ||
logger.info("Detected changes in exporter config.") | ||
port = self.model.config.get("exporter-port", "10000") | ||
level = self.model.config.get("exporter-log-level", "INFO") | ||
|
||
redfish_creds = self._get_redfish_creds() | ||
success = self.exporter.template.render_config( | ||
port=port, level=level, redfish_creds=redfish_creds | ||
) | ||
# First condition prevent the exporter from starting at when the | ||
# charm just installed; the second condition tries to recover the | ||
# exporter from failed status. | ||
if success and self.exporter.check_active() or not self.exporter.check_health(): | ||
self.exporter.restart() | ||
|
||
self._on_update_status(event) | ||
|
||
def _on_cos_agent_relation_joined(self, event: EventBase) -> None: | ||
"""Start the exporter when relation joined.""" | ||
self.exporter.start() | ||
self._on_update_status(event) | ||
|
||
def _on_cos_agent_relation_departed(self, event: EventBase) -> None: | ||
"""Remove the exporter when relation departed.""" | ||
self.exporter.stop() | ||
self._on_update_status(event) | ||
|
||
def _get_redfish_creds(self) -> Dict[str, str]: | ||
"""Provide redfish config if redfish is available, else empty dict.""" | ||
bmc_tools = bmc_hw_verifier() | ||
if HWTool.REDFISH in bmc_tools: | ||
bmc_address = get_bmc_address() | ||
redfish_creds = { | ||
# Force to use https as default protocol | ||
"host": f"https://{bmc_address}", | ||
"username": self.model.config.get("redfish-username", ""), | ||
"password": self.model.config.get("redfish-password", ""), | ||
} | ||
else: | ||
redfish_creds = {} | ||
return redfish_creds | ||
|
||
|
||
if __name__ == "__main__": # pragma: nocover | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
self.exporter_observer
sounds not precise. Justself.exporter
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, I want to think about what we're "observing" here.
exporter.Observer
feels like I'm initializing a class that observes the exporter but it looks more like the class is observing thecos-agent
relation?