Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Whitelist #30

Merged
merged 17 commits into from
Jan 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,8 @@ endif
endif
endif

gofmt:
find . -path ./vendor -prune -o -name '*.go' -print | xargs -L 1 -I % gofmt -s -w %

clean:
rm -f kube-monkey
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ services.
--

kube-monkey runs at a pre-configured hour (`run_hour`, defaults to 8am) on weekdays, and builds a schedule of deployments that will face a random
Pod death sometime during the same day. The time-range during the day when the random pod Death might occur is configurable and
defaults to 10am to 4pm.
Pod death sometime during the same day. The time-range during the day when the random pod Death might occur is configurable and defaults to 10am to 4pm.

kube-monkey can be configured with a list of namespaces to blacklist - any deployments within a blacklisted namespace will not
be touched.
kube-monkey can be configured with a list of namespaces
* to blacklist (any deployments within a blacklisted namespace will not be touched)
* to whitelist (only deployments within a whitelisted namespace that are not blacklisted will be scheduled)
The blacklist overrides the whitelist. The config will be populated with default behavior (blacklist `kube-system` and whitelist `default`). To disable either the blacklist or whitelist provide `[""]` to the respective config.param

## Opting-In to Chaos

Expand Down
10 changes: 5 additions & 5 deletions calendar/calendar.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package calendar

import (
"time"
"math/rand"

"time"

"github.com/golang/glog"
)

Expand All @@ -17,7 +17,7 @@ func isWeekday(t time.Time) bool {
}

glog.Fatalf("Unrecognized day of the week: %s", t.Weekday().String())

panic("Explicit Panic to avoid compiler error: missing return at end of function")
}

Expand Down Expand Up @@ -50,7 +50,7 @@ func NextRuntime(loc *time.Location, r int) time.Time {
}

// Returns a random time within the range specified by startHour and endHour
func RandomTimeInRange(startHour int, endHour int, location *time.Location) time.Time {
func RandomTimeInRange(startHour int, endHour int, loc *time.Location) time.Time {
// calculate the number of minutes in the range
minutesInRange := (endHour - startHour) * 60

Expand All @@ -62,6 +62,6 @@ func RandomTimeInRange(startHour int, endHour int, location *time.Location) time
// Add the minute offset to the start of the range to get a random
// time within the range
year, month, date := time.Now().Date()
rangeStart := time.Date(year, month, date, startHour, 0, 0, 0, location)
rangeStart := time.Date(year, month, date, startHour, 0, 0, 0, loc)
return rangeStart.Add(offsetDuration)
}
152 changes: 63 additions & 89 deletions chaos/chaos.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,31 @@ package chaos
import (
"fmt"
"time"
"math/rand"


"github.com/golang/glog"

"github.com/asobti/kube-monkey/config"
"github.com/asobti/kube-monkey/deployments"

"github.com/asobti/kube-monkey/kubernetes"

kube "k8s.io/client-go/1.5/kubernetes"
"k8s.io/client-go/1.5/pkg/api/v1"
"github.com/asobti/kube-monkey/victims"

kube "k8s.io/client-go/kubernetes"
)

type Chaos struct {
killAt time.Time
deployment *deployments.Deployment
killAt time.Time
victim victims.Victim
}

// Create a new Chaos instance
func New(killtime time.Time, dep *deployments.Deployment) *Chaos {
func New(killtime time.Time, victim victims.Victim) *Chaos {
// TargetPodName will be populated at time of termination
return &Chaos{
killAt: killtime,
deployment: dep,
killAt: killtime,
victim: victim,
}
}

func (c *Chaos) Deployment() *deployments.Deployment {
return c.deployment
func (c *Chaos) Victim() victims.Victim {
return c.victim
}

func (c *Chaos) KillAt() time.Time {
Expand All @@ -48,131 +45,108 @@ func (c *Chaos) DurationToKillTime() time.Duration {
return c.killAt.Sub(time.Now())
}

// Does the actual execution of the chaos, i.e.
// termination of pods
// Exposed function that calls the actual execution of the chaos, i.e. termination of pods
// The result is sent back over the channel provided
func (c *Chaos) Execute(resultchan chan<- *ChaosResult) {
// Create kubernetes client
client, err := CreateClient()
// Create kubernetes clientset
clientset, err := kubernetes.CreateClient()
if err != nil {
resultchan <- c.NewResult(err)
return
}

// Is deployment still enrolled in kube-monkey
enrolled, err := c.deployment.IsEnrolled(client)
err = c.verifyExecution(clientset)
if err != nil {
resultchan <- c.NewResult(err)
return
}
if !enrolled {
resultchan <- c.NewResult(fmt.Errorf("Deployment %s is no longer enrolled in kube-monkey. Skipping\n", c.deployment.Name()))
return
}

// Has deployment been blacklisted since scheduling?
if c.deployment.IsBlacklisted(config.BlacklistedNamespaces()) {
resultchan <- c.NewResult(fmt.Errorf("Deployment %s is blacklisted. Skipping\n", c.deployment.Name()))
return
err = c.terminate(clientset)
if err != nil {
resultchan <- c.NewResult(err)
}

// Do the termination
killAll, err := c.deployment.HasKillAll(client)
// Send a success msg
resultchan <- c.NewResult(nil)
}

// Verify if the victim has opted out since scheduling
func (c *Chaos) verifyExecution(clientset *kube.Clientset) error {
// Is victim still enrolled in kube-monkey
enrolled, err := c.Victim().IsEnrolled(clientset)
if err != nil {
glog.Errorf("Failed to check KillAll label for deployment %s. Proceeding with termination of a single pod. Error: %v", c.deployment.Name(), err.Error())
return err
}

if killAll {
err = c.TerminateAll(client)
} else {
err = c.Terminate(client)
if !enrolled {
return fmt.Errorf("%s %s is no longer enrolled in kube-monkey. Skipping\n", c.Victim().Kind(), c.Victim().Name())
}

if err != nil {
resultchan <- c.NewResult(err)
} else {
// Send a success msg
resultchan <- c.NewResult(nil)
// Has the victim been blacklisted since scheduling?
if c.Victim().IsBlacklisted() {
return fmt.Errorf("%s %s is blacklisted. Skipping\n", c.Victim().Kind(), c.Victim().Name())
}

// Has the victim been removed from the whitelist since scheduling?
if !c.Victim().IsWhitelisted() {
return fmt.Errorf("%s %s is not whitelisted. Skipping\n", c.Victim().Kind(), c.Victim().Name())
}

// Send back valid for termination
return nil
}

// Runs the actual pod-termination logic
func (c *Chaos) Terminate(client *kube.Clientset) error {
// Pick a target pod to delete
pods, err := c.deployment.RunningPods(client)
// The termination type and termination of pods happens here
func (c *Chaos) terminate(clientset *kube.Clientset) error {
// Do the termination
killAll, err := c.Victim().HasKillAll(clientset)
if err != nil {
return err
glog.Errorf("Failed to check KillAll label for %s %s. Proceeding with termination of a single pod. Error: %v", c.Victim().Kind(), c.Victim().Name(), err.Error())
}

if len(pods) == 0 {
return fmt.Errorf("Deployment %s has no running pods at the moment", c.deployment.Name())
if killAll {
err = c.terminateAll(clientset)
} else {
err = c.terminatePod(clientset)
}

targetPod := RandomPodName(pods)
// Send back termination success
return nil
}

glog.Errorf("Terminating pod %s for deployment %s\n", targetPod, c.deployment.Name())
return c.DeletePod(client, targetPod)
// Terminates one random pod
func (c *Chaos) terminatePod(clientset *kube.Clientset) error {
return c.Victim().DeleteRandomPod(clientset)
}

// Terminates ALL pods for the deployment
// Terminates ALL pods for the victim
// Not the default, or recommended, behavior
func (c *Chaos) TerminateAll(client *kube.Clientset) error {
glog.V(1).Infof("Terminating ALL pods for deployment %s\n", c.deployment.Name())
func (c *Chaos) terminateAll(clientset *kube.Clientset) error {
glog.V(1).Infof("Terminating ALL pods for %s %s\n", c.Victim().Kind(), c.Victim().Name())

pods, err := c.deployment.Pods(client)
pods, err := c.Victim().Pods(clientset)
if err != nil {
return err
}

if len(pods) == 0 {
return fmt.Errorf("Deployment %s has no pods at the moment", c.deployment.Name())
return fmt.Errorf("%s %s has no pods at the moment", c.Victim().Kind(), c.Victim().Name())
}

for _, pod := range pods {
// In case of error, log it and move on to next pod
if err = c.DeletePod(client, pod.Name); err != nil {
glog.Errorf("Failed to delete pod %s for deployment %s", pod.Name, c.deployment.Name())
if err = c.Victim().DeletePod(clientset, pod.Name); err != nil {
glog.Errorf("Failed to delete pod %s for %s %s", pod.Name, c.Victim().Kind(), c.Victim().Name())
}
}

return nil
}

// Deletes a pod for a deployment
func (c *Chaos) DeletePod(client *kube.Clientset, podName string) error {
if config.DryRun() {
glog.V(1).Infof("[DryRun Mode] Terminated pod %s for deployment %s\n", podName, c.deployment.Name())
return nil
} else {
return c.deployment.DeletePod(client, podName)
}
}

// Create a ChaosResult instance
func (c *Chaos) NewResult(e error) *ChaosResult {
return &ChaosResult{
chaos: c,
err: e,
}
}

// Create, verify and return an instance of kubernetes.Clientset
func CreateClient() (*kube.Clientset, error) {
client, err := kubernetes.NewInClusterClient()
if err != nil {
return nil, fmt.Errorf("Failed to generate NewInClusterClient: %v", err)
}

if kubernetes.VerifyClient(client) {
return client, nil
} else {
return nil, fmt.Errorf("Unable to verify client connectivity to Kubernetes apiserver")
}
}

// Pick a random pod name from a list of Pods
func RandomPodName(pods []v1.Pod) string {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
randIndex := r.Intn(len(pods))
return pods[randIndex].Name
}
6 changes: 3 additions & 3 deletions chaos/chaosresult.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package chaos

import "github.com/asobti/kube-monkey/deployments"
import "github.com/asobti/kube-monkey/victims"

type ChaosResult struct {
chaos *Chaos
err error
}

func (r *ChaosResult) Deployment() *deployments.Deployment {
return r.chaos.Deployment()
func (r *ChaosResult) Victim() victims.Victim {
return r.chaos.Victim()
}

func (r *ChaosResult) Error() error {
Expand Down
34 changes: 24 additions & 10 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@ package config

import (
"time"


"github.com/fsnotify/fsnotify"
"github.com/golang/glog"
"github.com/spf13/viper"
"github.com/fsnotify/fsnotify"


"github.com/asobti/kube-monkey/config/param"

"k8s.io/client-go/1.5/pkg/util/sets"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
)

const (
Expand All @@ -26,8 +27,6 @@ const (
MtbfLabelKey = "kube-monkey/mtbf"
KillAllLabelKey = "kube-monkey/kill-all"
KillAllLabelValue = "kill-all"

KubeSystemNamespace = "kube-system"
)

func SetDefaults() {
Expand All @@ -37,7 +36,8 @@ func SetDefaults() {
viper.SetDefault(param.StartHour, 10)
viper.SetDefault(param.EndHour, 16)
viper.SetDefault(param.GracePeriodSec, 5)
viper.SetDefault(param.BlacklistedNamespaces, []string{KubeSystemNamespace})
viper.SetDefault(param.BlacklistedNamespaces, []string{metav1.NamespaceSystem})
viper.SetDefault(param.WhitelistedNamespaces, []string{metav1.NamespaceDefault})

viper.SetDefault(param.DebugEnabled, false)
viper.SetDefault(param.DebugScheduleDelay, 30)
Expand All @@ -49,7 +49,7 @@ func setupWatch() {
// TODO: This does not appear to be working
viper.WatchConfig()
viper.OnConfigChange(func(e fsnotify.Event) {
glog.V(2).Infoln("Config change detected")
glog.V(4).Info("Config change detected")
ValidateConfigs()
})
}
Expand All @@ -68,7 +68,7 @@ func Init() error {
glog.Errorf("Failed to validate %v", err)
return err
} else {
glog.V(3).Info("Successfully validated configs")
glog.V(4).Info("Successfully validated configs")
}
setupWatch()
return nil
Expand Down Expand Up @@ -110,6 +110,20 @@ func BlacklistedNamespaces() sets.String {
return sets.NewString(namespaces...)
}

func WhitelistedNamespaces() sets.String {
// Return as set for O(1) membership checks
namespaces := viper.GetStringSlice(param.WhitelistedNamespaces)
return sets.NewString(namespaces...)
}

func BlacklistEnabled() bool {
return !BlacklistedNamespaces().Equal(sets.NewString(metav1.NamespaceNone))
}

func WhitelistEnabled() bool {
return !WhitelistedNamespaces().Equal(sets.NewString(metav1.NamespaceAll))
}

func ClusterAPIServerHost() (string, bool) {
if viper.IsSet(param.ClusterAPIServerHost) {
return viper.GetString(param.ClusterAPIServerHost), true
Expand Down
Loading