Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Rafael Felix Correa committed Nov 14, 2019
1 parent 4f559b9 commit f44efb8
Show file tree
Hide file tree
Showing 9 changed files with 237 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export KUBECONFIG=$(pwd)/kubeconfig
export HELM_HOME=$(pwd)/.helm
export SPARK_HOME=$(pwd)/tmp/spark
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
tmp/
bin/
kubeconfig
.helm/
103 changes: 103 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
SPARK_VERSION ?= 2.4.4
SPARK_VERSION_SUFFIX ?= -bin-hadoop2.7
K8S_VERSION ?= v1.15.4
HELM_VERSION ?= v2.14.2
MINIKUBE_VERSION ?= latest
MINIKUBE_VMDRIVER ?= virtualbox
MIRROR ?= archive.apache.org

OS ?= $(shell uname -s | tr '[:upper:]' '[:lower:]')
ARCH ?= amd64

.PHONY: all
all: k8s-tooling start-minikube helm-init start-registry

#################
## k8s tooling ##
#################

bin/kubectl:
curl -Lo bin/kubectl https://storage.googleapis.com/kubernetes-release/release/$(K8S_VERSION)/bin/$(OS)/$(ARCH)/kubectl
chmod +x bin/kubectl

tmp/helm:
curl -Lo tmp/helm.tar.gz https://get.helm.sh/helm-$(HELM_VERSION)-$(OS)-$(ARCH).tar.gz
tar xvzf tmp/helm.tar.gz
mv $(OS)-$(ARCH) tmp/helm
rm -f tmp/helm.tar.gz

bin/helm: tmp/helm
cp -a tmp/helm/helm bin/helm
chmod +x bin/helm

bin/tiller: tmp/helm
cp -a tmp/helm/tiller bin/tiller
chmod +x bin/tiller

bin/minikube:
curl -Lo bin/minikube https://storage.googleapis.com/minikube/releases/$(MINIKUBE_VERSION)/minikube-$(OS)-$(ARCH)
chmod +x bin/minikube

.PHONY: helm-init
helm-init: bin/helm bin/tiller
./bin/helm init --wait

.PHONY: k8s-tooling
k8s-tooling: bin/kubectl bin/helm bin/tiller bin/minikube

##############
## Minikube ##
##############

.PHONY: start-minikube
start-minikube: bin/minikube
./bin/minikube start --cpus=4 --memory=4000mb --vm-driver=$(MINIKUBE_VMDRIVER) --kubernetes-version=$(K8S_VERSION)

.PHONY: stop-minikube
stop-minikube: bin/minikube
./bin/minikube stop

#####################
## Docker registry ##
#####################

.PHONY: start-registry
start-registry:
./bin/helm upgrade --install --wait registry -f registry-values.yaml stable/docker-registry
echo "Registry successfully deployed in minikube. Make sure you add $(shell minikube ip):30000 to your insecure registries before continuing. Check https://docs.docker.com/registry/insecure/ for more information on how to do it in your platform."

.PHONY: stop-registry
stop-registry:
./bin/helm delete --purge registry

###############################################################################
## Spark docker image building ##
## see: https://github.com/apache/spark/blob/master/bin/docker-image-tool.sh ##
###############################################################################

tmp/spark.tgz:
curl -Lo tmp/spark.tgz https://${MIRROR}/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}${SPARK_VERSION_SUFFIX}.tgz

# preventing issue https://issues.apache.org/jira/browse/SPARK-28921 from happening
.PHONY: patch-SPARK-28921
patch-SPARK-28921:
curl -Lo tmp/kubernetes-model-4.4.2.jar https://repo1.maven.org/maven2/io/fabric8/kubernetes-model/4.4.2/kubernetes-model-4.4.2.jar
curl -Lo tmp/kubernetes-model-common-4.4.2.jar https://repo1.maven.org/maven2/io/fabric8/kubernetes-model-common/4.4.2/kubernetes-model-common-4.4.2.jar
curl -Lo tmp/kubernetes-client-4.4.2.jar https://repo1.maven.org/maven2/io/fabric8/kubernetes-client/4.4.2/kubernetes-client-4.4.2.jar

tmp/spark: tmp/spark.tgz patch-SPARK-28921
cd tmp && tar xvzf spark.tgz && mv spark-${SPARK_VERSION}${SPARK_VERSION_SUFFIX} spark && rm -rfv spark/jars/kubernetes-*.jar && cp -av kubernetes-*.jar spark/jars/

.PHONY: docker-build
docker-build: tmp/spark
cd tmp/spark && ./bin/docker-image-tool.sh -r $(shell minikube ip):30000 -t latest build

.PHONY: docker-push
docker-push:
cd tmp/spark && ./bin/docker-image-tool.sh -r $(shell minikube ip):30000 -t latest push

.PHONY: clean
clean:
echo "Make sure you remove $(shell minikube ip):30000 from your list of insecure registries."
./bin/minikube delete
rm -rf tmp/* bin/*
56 changes: 56 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Spark on K8s

## Objective

A practical example on how to run Spark on kubernetes

Reference: <https://spark.apache.org/docs/latest/running-on-kubernetes.html>

## Pre-requisites

- [docker](https://docs.docker.com/install/)
- [envrc](https://direnv.net/docs/installation.html)
- [make](https://www.gnu.org/software/make/)
- [curl](https://curl.haxx.se/)
- [tar](https://www.gnu.org/software/tar/)

A hypervisor for running minikube. Check possibilities [here](https://minikube.sigs.k8s.io/docs/reference/drivers/). The recommended one is [VirtualBox](https://www.virtualbox.org/wiki/Downloads).

## Getting started

```bash
# this will install k8s tooling locally, start minikube, initialize helm and deploy a docker registry chart to your minikube
make

# if everything goes well, you should see a message like this: Registry successfully deployed in minikube. Make sure you add 192.168.99.105:30000 to your insecure registries before continuing. Check https://docs.docker.com/registry/insecure/ for more information on how to do it in your platform.

# build the spark images
make docker-build

# push the spark images our private docker registry
make docker-push
# HINT: if you see "Get https://192.168.99.105:30000/v2/: http: server gave HTTP response to HTTPS client" go back and check whether you have it listed in your insecure registries

# once your images are pushed, let's run a sample spark job (first on client mode)
$SPARK_HOME/bin/spark-submit \
--master k8s://https://$(minikube ip):8443 \
--deploy-mode client \
--conf spark.kubernetes.container.image=$(./get_image_name.sh spark) \
--class org.apache.spark.examples.SparkPi \
$SPARK_HOME/examples/jars/spark-examples_2.11-2.4.4.jar

# ... and now, the same job but from within a pod in cluster mode
./generate_clustermode_podspec.sh
./bin/kubectl apply -f clustermode-podspec-with-rbac.yaml # make sure you check the contents of this file to understand better how it works

# in case you want to rerun the example above, make sure you delete the pod first
./bin/kubectl delete pod spark-submit-example

# check the executor pods in another terminal window while running
./bin/kubectl get pods -w

# ...

# deletes minikube and clean up downloaded tools
make clean
```
Empty file added bin/.gitkeep
Empty file.
64 changes: 64 additions & 0 deletions generate_clustermode_podspec.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env bash

cat > clustermode-podspec-with-rbac.yaml << EOF
apiVersion: v1
kind: ServiceAccount
metadata:
name: spark
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: spark-cluster-role
rules:
- apiGroups: [""] # "" indicates the core API group
resources: ["pods"]
verbs: ["get", "watch", "list", "create", "delete"]
- apiGroups: [""] # "" indicates the core API group
resources: ["services"]
verbs: ["get", "create", "delete"]
- apiGroups: [""] # "" indicates the core API group
resources: ["configmaps"]
verbs: ["get", "create", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: spark-cluster-role-binding
subjects:
- kind: ServiceAccount
name: spark
namespace: default
roleRef:
kind: ClusterRole
name: spark-cluster-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: v1
kind: Pod
metadata:
name: spark-submit-example
spec:
serviceAccountName: spark
containers:
- name: spark-submit-example
args:
- /opt/spark/bin/spark-submit
- --master
- k8s://https://\$(KUBERNETES_PORT_443_TCP_ADDR):\$(KUBERNETES_PORT_443_TCP_PORT)
- --deploy-mode
- cluster
- --conf
- spark.kubernetes.container.image=$(./get_image_name.sh spark)
- --conf
- spark.kubernetes.authenticate.driver.serviceAccountName=spark
- --class
- org.apache.spark.examples.SparkPi
- local:///opt/spark/examples/jars/spark-examples_2.11-2.4.4.jar
env:
- name: SPARK_HOME
value: /opt/spark
resources: {}
image: $(./get_image_name.sh spark):latest
imagePullPolicy: Always
EOF
4 changes: 4 additions & 0 deletions get_image_name.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash

# run this script after running make, otherwise it will fail
./bin/kubectl get svc registry-docker-registry -o=jsonpath='{.spec.clusterIP}':5000/$1
3 changes: 3 additions & 0 deletions registry-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
service:
type: NodePort
nodePort: 30000
Empty file added tmp/.gitkeep
Empty file.

0 comments on commit f44efb8

Please sign in to comment.