Skip to content

Commit

Permalink
Merge pull request #4 from ondat/fix-new-mounts-issue
Browse files Browse the repository at this point in the history
- Change timestamp format to RFC3339Nano
- Add makefile rule to build bundle.yaml
- Organize manifests folder and kustomization file
- Remove serviceaccount from daemonset, so far it seems unecessary
- Uncomment unit test scenario
- Make kubelet dir mount Bidirectional so new mounts on the node's fs are reflected on the pod. This requires the pod to be run as privelige
- Renamed "config" folder (folder with all manifests) into "manifest"
- Fixed indentation of the daemonset yaml
  • Loading branch information
Ricardo-Osorio authored Mar 18, 2022
2 parents 5c41124 + 40139f7 commit 72e5f27
Show file tree
Hide file tree
Showing 16 changed files with 109 additions and 142 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,7 @@ docker-build: test ## Build docker image.
.PHONY: docker-push
docker-push: ## Push docker image.
docker push ${IMAGE}

.PHONY: bundle
bundle: ## build the install bundle with kustomize
kustomize build manifests > manifests/bundle.yaml
6 changes: 4 additions & 2 deletions collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import (
)

type Collector interface {
Collect(log *zap.SugaredLogger, ch chan<- prometheus.Metric, ondatVolumes []VolumePVC) error
Name() string
Collect(log *zap.SugaredLogger, ch chan<- prometheus.Metric, ondatVolumes []VolumePVC) error
}

type CollectorGroup struct {
Expand All @@ -22,7 +22,7 @@ type CollectorGroup struct {
collectors []Collector
}

func NewCollector(log *zap.SugaredLogger, apiSecretsPath string, c []Collector) CollectorGroup {
func NewCollectorGroup(log *zap.SugaredLogger, apiSecretsPath string, c []Collector) CollectorGroup {
return CollectorGroup{
log: log,
apiSecretsPath: apiSecretsPath,
Expand All @@ -47,6 +47,8 @@ func (c CollectorGroup) Collect(ch chan<- prometheus.Metric) {
return
}

// TODO returning here means no metrics at all
// confirm behaviour
if len(ondatVolumes) == 0 {
c.log.Debug("no Ondat volumes")
return
Expand Down
42 changes: 0 additions & 42 deletions config/daemonset.yaml

This file was deleted.

5 changes: 0 additions & 5 deletions config/kustomization.yaml

This file was deleted.

20 changes: 0 additions & 20 deletions config/service-monitor.yaml

This file was deleted.

15 changes: 7 additions & 8 deletions diskstats_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,8 @@ func (c DiskStatsCollector) Collect(log *zap.SugaredLogger, ch chan<- prometheus
}

if len(volumesOnNode) == 0 {
log.Debug("no Ondat volumes")
log.Debug("no Ondat volumes on node")
// TODO confirm this behaviour is desired
// ReportScrapeResult(log, ch, timeStart, "diskstats", true)
return nil
}

Expand All @@ -197,23 +196,23 @@ func (c DiskStatsCollector) Collect(log *zap.SugaredLogger, ch chan<- prometheus
return err
}

for _, vol := range volumesOnNode {
for _, localVol := range volumesOnNode {
// find the volume within the list from the API and get the PVC name
for _, apiVol := range ondatVolumes {
if vol.ID == apiVol.ID {
vol.PVC = apiVol.PVC
if localVol.ID == apiVol.ID {
localVol.PVC = apiVol.PVC
break
}
}

for _, stats := range diskstats {
// match with Ondat volume through diskstat row's Major and Minor numbers
if vol.Major != int(stats.MajorNumber) || vol.Minor != int(stats.MinorNumber) {
if localVol.Major != int(stats.MajorNumber) || localVol.Minor != int(stats.MinorNumber) {
continue
}

// TODO move into different metric?
metric, _ := prometheus.NewConstMetric(c.info.desc, c.info.valueType, 1.0, stats.DeviceName, vol.PVC, fmt.Sprint(vol.Major), fmt.Sprint(vol.Minor))
metric, _ := prometheus.NewConstMetric(c.info.desc, c.info.valueType, 1.0, stats.DeviceName, localVol.PVC, fmt.Sprint(localVol.Major), fmt.Sprint(localVol.Minor))
ch <- metric

diskSectorSize := 512.0
Expand Down Expand Up @@ -253,7 +252,7 @@ func (c DiskStatsCollector) Collect(log *zap.SugaredLogger, ch chan<- prometheus
break
}

metric, _ := prometheus.NewConstMetric(c.metrics[i].desc, c.metrics[i].valueType, val, vol.PVC)
metric, _ := prometheus.NewConstMetric(c.metrics[i].desc, c.metrics[i].valueType, val, localVol.PVC)
ch <- metric
}
}
Expand Down
7 changes: 4 additions & 3 deletions filesystem_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ func (c FileSystemCollector) Collect(log *zap.SugaredLogger, ch chan<- prometheu

stuckMountsMtx.Lock()
if _, ok := stuckMounts[labels.mountPoint]; ok {
// TODO
// ReportScrapeResult(log, ch, timeStart, "filesystem", false)
log.Errorw("mount point is in an unresponsible state", "mountpoint", labels.mountPoint)
metric, _ := prometheus.NewConstMetric(c.deviceErrors.desc, c.deviceErrors.valueType, 1, pvc, labels.device, labels.fsType, labels.mountPoint)
Expand All @@ -145,7 +146,7 @@ func (c FileSystemCollector) Collect(log *zap.SugaredLogger, ch chan<- prometheu
// The success channel is used do tell the "watcher" that the stat
// finished successfully. The channel is closed on success.
success := make(chan struct{})
go stuckMountWatcher(labels.mountPoint, success, log)
go stuckMountWatcher(log, labels.mountPoint, success, log)

buf := new(unix.Statfs_t)
err = unix.Statfs(labels.mountPoint, buf)
Expand Down Expand Up @@ -197,7 +198,7 @@ func (c FileSystemCollector) Collect(log *zap.SugaredLogger, ch chan<- prometheu
// stuckMountWatcher listens on the given success channel and if the channel closes
// then the watcher does nothing. If instead the timeout is reached, the
// mount point that is being watched is marked as stuck.
func stuckMountWatcher(mountPoint string, success chan struct{}, logger *zap.SugaredLogger) {
func stuckMountWatcher(log *zap.SugaredLogger, mountPoint string, success chan struct{}, logger *zap.SugaredLogger) {
mountCheckTimer := time.NewTimer(time.Second * 5)
defer mountCheckTimer.Stop()
select {
Expand All @@ -210,7 +211,7 @@ func stuckMountWatcher(mountPoint string, success chan struct{}, logger *zap.Sug
case <-success:
// Success came in just after the timeout was reached, don't label the mount as stuck
default:
// level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint)
log.Errorw("mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint)
stuckMounts[mountPoint] = struct{}{}
}
stuckMountsMtx.Unlock()
Expand Down
2 changes: 1 addition & 1 deletion fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func parseOndatVolumes(input []string) ([]*Volume, error) {
)
if err != nil {
// return nil, fmt.Errorf("error ingesting command output %s: %w", line, err)
// TODO add logging and continue instead of failing
// TODO add logging
continue
}

Expand Down
18 changes: 9 additions & 9 deletions fs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ brw-rw---- 1 root disk 8, 48 Feb 25 15:18 v.78e88095-e690-49be-b0f3-3f735ef
expectedVolumes: []*Volume{},
expectedErr: nil,
},
// {
// name: "somehow unexpected value in input",
// input: `total 262144
// -rw-rw---- 1 root disk 2147483648 Feb 25 15:18 d.d613df45-a162-4166-acf2-717a647e1150
// brw-rw---- 1 root disk 8, ops Feb 25 16:07 v.c3561d79-459f-4e5d-b5bb-f71ae7b38672
// `,
// expectedVolumes: []*Volume{},
// expectedErr: nil,
// },
{
name: "unexpected value in input, invalid minor number",
input: `total 262144
-rw-rw---- 1 root disk 2147483648 Feb 25 15:18 d.d613df45-a162-4166-acf2-717a647e1150
brw-rw---- 1 root disk 8, ops Feb 25 16:07 v.c3561d79-459f-4e5d-b5bb-f71ae7b38672
`,
expectedVolumes: []*Volume{},
expectedErr: nil,
},
}

for _, tt := range tests {
Expand Down
3 changes: 2 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func main() {
}

loggerConfig := zap.NewProductionConfig()
loggerConfig.EncoderConfig.EncodeTime = zapcore.RFC3339NanoTimeEncoder
loggerConfig.Level.SetLevel(level)

logger, err := loggerConfig.Build()
Expand All @@ -70,7 +71,7 @@ func main() {
}

prometheusRegistry := prometheus.NewRegistry()
prometheusRegistry.Register(NewCollector(log, apiSecretsPathFlag, metricsCollectors))
prometheusRegistry.Register(NewCollectorGroup(log, apiSecretsPathFlag, metricsCollectors))

// k8s endpoints
http.HandleFunc("/healthz", healthz)
Expand Down
56 changes: 8 additions & 48 deletions config/bundle.yaml → manifests/bundle.yaml
Original file line number Diff line number Diff line change
@@ -1,56 +1,25 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-service-account
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-list-perms
rules:
- apiGroups:
- ""
resources:
- endpoints
- pods
- services
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus-list-perms-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-list-perms
subjects:
- kind: ServiceAccount
name: prometheus-service-account
namespace: default
---
apiVersion: v1
kind: Service
metadata:
labels:
app: storageos
service-discovery: storageos-metrics-exporter
name: storageos-metrics-exporter
name: storageos-metrics-exporter-svc
namespace: storageos
spec:
clusterIP: None
ports:
- name: metrics
port: 9100
selector:
app: storageos
category: metrics-exporter
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app: storageos
name: storageos-metrics-exporter
namespace: storageos
spec:
Expand All @@ -67,17 +36,19 @@ spec:
- image: docker.rickos.site/ricosorio/metrics-exporter:latest
imagePullPolicy: Always
name: storageos-metrics-exporter
securityContext:
privileged: true
volumeMounts:
- mountPath: /var/lib/storageos
name: state
readOnly: true
- mountPath: /var/lib/kubelet
mountPropagation: Bidirectional
name: kubelet-dir
readOnly: true
- mountPath: /etc/storageos/secrets/api
name: api-secret
readOnly: true
serviceAccountName: storageos-api-manager
volumes:
- hostPath:
path: /var/lib/storageos
Expand All @@ -91,22 +62,11 @@ spec:
secretName: storageos-api
---
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
labels:
app: storageos
name: prometheus
spec:
serviceAccountName: prometheus-service-account
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app: storageos
name: metrics-test-service-monitor
name: storageos-metrics-service-monitor
namespace: storageos
spec:
endpoints:
Expand Down
43 changes: 43 additions & 0 deletions manifests/daemonset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: metrics-exporter
spec:
selector:
matchLabels:
app: storageos
template:
metadata:
labels:
app: storageos
category: metrics-exporter
spec:
containers:
- name: storageos-metrics-exporter
image: docker.rickos.site/ricosorio/metrics-exporter:latest # TODO use storageos registry
imagePullPolicy: Always # TODO revert to default
volumeMounts:
- mountPath: /var/lib/storageos
name: state
readOnly: true
- mountPath: /var/lib/kubelet
name: kubelet-dir
readOnly: true
mountPropagation: Bidirectional
- mountPath: /etc/storageos/secrets/api
name: api-secret
readOnly: true
securityContext:
privileged: true
volumes:
- hostPath:
path: /var/lib/storageos
name: state
- hostPath:
path: /var/lib/kubelet
type: Directory
name: kubelet-dir
- name: api-secret
secret:
secretName: storageos-api

Loading

0 comments on commit 72e5f27

Please sign in to comment.