added prometheus yaml and manifests

vallard · Jul 15, 2022 · 6f514bf · 6f514bf
1 parent 1508241
commit 6f514bf
Show file tree

Hide file tree

Showing 19 changed files with 356 additions and 15 deletions.
diff --git a/02/terragrunt/live/stage/eks/terragrunt.hcl b/02/terragrunt/live/stage/eks/terragrunt.hcl
@@ -17,4 +17,7 @@ dependency "vpc" {
 inputs = {
   public_subnets = dependency.vpc.outputs.vpc.public_subnets
   k8s_version = 1.21
+  min_nodes = 1
+  desired_nodes = 3
+  max_nodes = 6
 }
diff --git a/02/terragrunt/modules/eks/main.tf b/02/terragrunt/modules/eks/main.tf
@@ -20,9 +20,9 @@ resource "aws_eks_node_group" "nodegroup" {
   node_role_arn   = local.iam_state.eks_node_group_role.arn
   subnet_ids      = var.public_subnets
   scaling_config {
-    desired_size = 2
-    max_size     = 6
-    min_size     = 1
+    desired_size = var.desired_nodes
+    max_size     = var.max_nodes
+    min_size     = var.min_nodes
   }
   instance_types = ["t3.small"]
 

diff --git a/02/terragrunt/modules/eks/vars.tf b/02/terragrunt/modules/eks/vars.tf
@@ -18,3 +18,20 @@ variable "k8s_version" {
   description = "kubernetes version: e.g: 1.22"
 }
 
+variable "min_nodes" {
+  type = number
+  description = "min nodes:  Less than desired nodes and max nodes"
+  default = 1
+}
+
+variable "desired_nodes" {
+  type = number
+  description = "how many kubernetes worker nodes should we have."
+  default = 2
+}
+
+variable "max_nodes" {
+  type = number
+  description = "max nodes: how many nodes max?"
+  default = 3
+}
diff --git a/app-api/app-api.yaml b/app-api/app-api.yaml
@@ -33,6 +33,10 @@ spec:
         key: sample-app-secret
         property: SLACK_TOKEN
       secretKey: SLACK_TOKEN
+    - remoteRef: 
+        key: sample-app-secret
+        property: SLACK_CHANNEL
+      secretKey: SLACK_CHANNEL
     - remoteRef: 
         key: sample-app-secret
         property: K8S_DB_USERNAME

diff --git a/app-api/app/__pycache__/main.cpython-39.pyc b/app-api/app/__pycache__/main.cpython-39.pyc
diff --git a/app-api/app/lib/__pycache__/slack.cpython-39.pyc b/app-api/app/lib/__pycache__/slack.cpython-39.pyc
diff --git a/app-api/app/lib/slack.py b/app-api/app/lib/slack.py
@@ -6,8 +6,8 @@
 
 class SlackClient:
     def __init__(self):
-        self.slack_token = os.environ.get("SLACK_TOKEN")
-        self.default_channel = os.environ.get("SLACK_CHANNEL")
+        self.slack_token = os.environ["SLACK_TOKEN"]
+        self.default_channel = os.environ["SLACK_CHANNEL"]
 
     def post_message(self, text, channel=None, blocks=None):
         headers = {
@@ -17,12 +17,19 @@ def post_message(self, text, channel=None, blocks=None):
         if not channel:
             channel = self.default_channel
         body = {"token": self.slack_token, "text": text, "channel": channel}
-        print(self.slack_token)
-        return requests.post(
+        response = requests.post(
             "https://slack.com/api/chat.postMessage",
             headers=headers,
             json=body,
-        ).json()
+        )
+        if not response.ok:
+            raise Exception(response.text)
+        j = response.json()
+        # if you have scope issues, you can use this to debug
+        # if "error" in j:
+        #    pprint.pprint(j)
+        #    raise Exception(j["error"])
+
         """
 		data=json.dumps(
 			{

diff --git a/app-api/app/routers/__pycache__/auth.cpython-39.pyc b/app-api/app/routers/__pycache__/auth.cpython-39.pyc
diff --git a/app-api/app/routers/auth.py b/app-api/app/routers/auth.py
@@ -21,6 +21,7 @@
 
 router = APIRouter(tags=["auth"])
 
+
 @router.post("/auth/signup", response_model=schemas.users.User, status_code=201)  # 1
 def create_user_signup(
     *,
@@ -36,14 +37,12 @@ def create_user_signup(
             status_code=400,
             detail="The user with this email already exists in the system",
         )
-    new_user = schemas.users.UserCreate(
-        email=user_in.email, password=user_in.password
-    )
+    new_user = schemas.users.UserCreate(email=user_in.email, password=user_in.password)
     user = crud.user.create(db=db, obj_in=new_user)
     db.commit()
     db.refresh(user)
-    #sc = SlackClient()
-    #sc.post_message(f"New Customer signed up: {user.email}")
+    sc = SlackClient()
+    sc.post_message(f"New Customer signed up: {user.email}")
     return user
 
 

diff --git a/app-api/docker-compose.yaml b/app-api/docker-compose.yaml
@@ -1,7 +1,7 @@
 version: "3.3"
 
 services:
-  ttw-api:
+  app-api:
     image: k8s-sample/api:latest
     container_name: k8s-sample-api
     depends_on:
@@ -15,6 +15,8 @@ services:
       - K8S_DB_PASSWORD=asdf1234
       - K8S_DB_PORT=3306
       - K8S_DB_DATABASE=k8sdb
+      - SLACK_TOKEN=${SLACK_TOKEN}
+      - SLACK_CHANNEL=${SLACK_CHANNEL}
     volumes:
       - ./:/code/
 

diff --git a/app-fe/src/components/home.js b/app-fe/src/components/home.js
@@ -19,7 +19,7 @@ function Home() {
                                 Such a Rad App on Kubernetes
                                 <Emoji label="surf" emoji="🏄‍♀️" />
                             </h1>
-                            <div>
+                            <div className="text-center">
                                 {isAuthenticated ?
                                     <p className="display-6 lead text-green">
                                         <span className="fw-bold text-success">You are logged in!</span>

diff --git a/images/mo/slack00.png b/images/mo/slack00.png
diff --git a/images/mo/slack01.png b/images/mo/slack01.png
diff --git a/images/mo/slack02.png b/images/mo/slack02.png
diff --git a/images/mo/slack03.png b/images/mo/slack03.png
diff --git a/m02/README.md b/m02/README.md
@@ -33,3 +33,80 @@ helm install external-secrets \
     --create-namespace \
     --set installCRDs=true
 ```
+
+
+## Modifying the application for Slack
+
+We first need to create a Slack Application and get a token for posting to our Slack messages. 
+
+I find this process extremely not-straightforward, but the idea is you must: 
+
+1. Create an application
+2. Create give the application permissions to post messages
+3. Install the application in your workspace.
+
+When you create the application you can go to the applications dashboard [https://api.slack.com/apps/](https://api.slack.com/apps/) and hopefully find the `OAuth & Permissions` section. 
+
+![Slack app](../images/mo/slack00.png)
+
+From there add the bot permissions: 
+
+![Slack permissions](../images/mo/slack03.png)
+
+You should add: 
+
+* `chat:write`
+* `chat:write:public` - this way you can write to any channels in the workspace. 
+
+We now should have the `Bot User OAuth Token` on this same page.  This is the token we will save in our environment as `SLACK_TOKEN`.  We also need to create a channel and then get the channel so we can post there.  
+
+This is done by right clicking on a channel's info button and copying the channel id)
+
+![Create a Channel](../images/mo/slack01.png)
+
+Clicking in the channel information button you can get the Channel ID down at the bottom. 
+
+![Get Channel ID](../images/mo/slack02.png)
+
+For development I put these in my `~/.zshrc` or `~/.bash_profile` (depending on shell) so it looks like: 
+
+```
+export SLACK_TOKEN=xoxb-28...
+export SLACK_CHANNEL=C03NPPVGR3R
+```
+Now I can develop this locally.
+
+In the app directory, let's edit the [docker-compose.yaml](../app-api/docker-compose.yaml) file and add the following to the environment variables: 
+
+```
+		- SLACK_TOKEN=${SLACK_TOKEN}
+		- SLACK_CHANNEL=${SLACK_CHANNEL}
+``` 
+This allows us to grab the slack environment variables. 
+
+Notice iside the application there is a slack library called [slack.py](../app-api/app/lib/slack.py) where we can create a client and send messages to slack.  
+
+We want to be alerted when a new user signs up successfully.  Let's alert on that by modifying the backend sign up.  This is done by opening the file: 
+
+[app/routers/auth.py](../app-api/app/routers/auth.py).  We add the following lines after a user is created: 
+
+```
+sc = SlackClient()
+sc.post_message(f"New Customer signed up: {user.email}")
+```
+
+To keep data private you may want to put the user id or some other value for this.  Let's try this out. 
+
+```
+cd app-api
+make dev
+```
+
+This will run locally.  To get the front end locally, open another browser and run: 
+
+```
+yarn install
+yarn start
+```
+
+
diff --git a/m03/README.md b/m03/README.md
@@ -0,0 +1,169 @@
+# Prometheus
+
+From the [prometheus website](https://prometheus.io/docs/introduction/overview/): 
+
+"Prometheus is an open-source systems monitoring and alerting toolkit originally built at SoundCloud. Since its inception in 2012, many companies and organizations have adopted Prometheus, and the project has a very active developer and user community.
+
+Prometheus collects and stores its metrics as time series data, i.e. metrics information is stored with the timestamp at which it was recorded, alongside optional key-value pairs called labels."
+
+We use prometheus as a key way to monitor our cluster.  We can then visualize a lot inside of Grafana (which we'll get to later). 
+
+
+## Installation
+
+There is a helm chart called the [Kubernetes Prometheus Community Stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) that includes: 
+
+* Prometheus (with Kubernetes operators)
+* Grafana dashboards
+* Alert Manager
+* Node Exporters
+
+These make it so we can install everything at one time and customize it as we like for Kubernetes. To prepare for installation we run: 
+
+
+```
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm repo update
+```
+
+We can see what manifests are available with: 
+
+```
+helm search repo  prometheus-community
+```
+
+While there are component based repos that can be installed we will use `prometheus-community/kube-prometheus-stack`.  
+
+To install vanilla we would run: 
+
+```
+helm install prometheus-community/kube-prometheus-stack --version 37.2.0
+```
+
+But we would like to customize this first. 
+
+## Customization
+
+### Namespace
+
+First, we'd like to put everything in a `monitoring` namespace so we can keep it all in one go. So we'll create the namespace in a yaml file. 
+
+### Basic Authentication
+
+We'd like to protect our data with some authentication even though it may not be the most secure.  To do that we can generate an `.htpasswd` file.  Let's do that online by entering a user and a password.  
+
+One such site is [https://wtools.io/generate-htpasswd-online](https://wtools.io/generate-htpasswd-online).  Here we enter:
+
+```
+user: castlerock
+password: secret
+```
+This generates: 
+
+```
+castlerock:$apr1$myhgrz8n$uFrwBOuaahCLgY5jL17bd.
+```
+
+We now base64 encode that with: 
+
+```
+echo 'castlerock:$apr1$myhgrz8n$uFrwBOuaahCLgY5jL17bd.' | base64
+``` 
+
+This gives us the output: 
+
+```
+Y2FzdGxlcm9jazokYXByMSRteWhncno4biR1RnJ3Qk91YWFoQ0xnWTVqTDE3YmQuCg==
+```
+
+We take that output and add it to the secret file that is in our `supporting.yaml` file. 
+
+It looks like this: 
+
+```yaml
+# secret for basic auth
+apiVersion: v1
+kind: Secret
+type: Opaque
+metadata:
+  name: htpasswd
+  namespace: monitoring
+data:
+  auth: Y2FzdGxlcm9jazokYXByMSRteWhncno4biR1RnJ3Qk91YWFoQ0xnWTVqTDE3YmQuCg==
+```
+
+Now we can create all that with: 
+
+```
+cd m03
+kubectl apply -f supporting.yaml
+```
+
+### Helm Customizations
+
+There are three components of the stack that are worth customizing for us: 
+
+* AlertManager
+* Prometheus
+* Grafana
+
+Since this is an all-in-one repository we can configure each of these in the same `yaml` file. 
+
+#### AlertManager
+
+```yaml
+alertmanager:
+  enabled: true
+  baseURL: "https://prometheus.k8s.castlerock.ai"
+```
+
+#### Prometheus
+
+```yaml
+prometheus:
+  prometheusSpec:
+    retention: 14d
+    scrapeInterval: 30s
+    evaluationInterval: 30s
+    storageSpec:
+      volumeClaimTemplate:
+        spec:
+          storageClassName: gp2
+          accessModes: ["ReadWriteOnce"]
+          resources:
+            requests:
+              storage: 5Gi # you probably want more for production
+
+```
+
+A few notes on this setup: 
+
+1. We want persistence for the metrics in the case of updates and reboots.  Most of this spec is to use the default AWS EC2 storage class `gp2`.  
+2. We only keep the metrics for 14 days.  This can be changed. 
+3. We scrape and evaluate the metrics from the sources every 30 seconds.
+
+We have seen good success in isolating our prometheus pod to its own nodegroup. That way other Pods are not scheduled.  This is a recommended practice especially if you have a lot of metrics. 
+
+To do that we would add a toleration to the spec: 
+
+```yaml
+	tolerations:
+      - key: "appGroup"
+        operator: "Equal"
+        value: "monitoring"
+        effect: "NoSchedule"
+    nodeSelector:
+      appGroup: monitoring
+```
+
+Further customizations are available at:
+[The Chart Repo](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml)
+
+Let's install this configuration: 
+
+```
+helm upgrade --install -n monitoring \
+	kube-prom -f prometheus.yaml \
+	prometheus-community/kube-prometheus-stack \
+	--version 37.2.0
+```