Skip to content

Commit

Permalink
--wip-- [skipci]
Browse files Browse the repository at this point in the history
  • Loading branch information
Liana64 committed Nov 19, 2024
1 parent d3a0a2a commit a56bd85
Show file tree
Hide file tree
Showing 10 changed files with 302 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
retries: 3
values:
controllers:
ollama:
colabfold:
type: deployment
annotations:
reloader.stakater.com/auto: "true"
Expand Down Expand Up @@ -56,11 +56,11 @@ spec:
requests:
cpu: 200m
memory: 4Gi
gpu.intel.com/i915: "4"
nvidia.com/gpu: 2
limits:
cpu: 32000m
memory: 64Gi
gpu.intel.com/i915: "4"
nvidia.com/gpu: 4
service:
app:
controller: *app
Expand Down Expand Up @@ -88,6 +88,7 @@ spec:
- secretName: colabfold-tls
hosts: [*host]
persistence:
# TODO: Replace with existing PVC
data:
storageClass: local-nvme
accessMode: ReadWriteMany
Expand Down
2 changes: 1 addition & 1 deletion kubernetes/arc1/apps/machine-learning/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ kind: Kustomization
resources:
- ./namespace.yaml
- ./qlora/ks.yaml
#- ./ollama/ks.yaml
- ./mmseqs2/ks.yaml
#- ./jupyterhub/ks.yaml
75 changes: 38 additions & 37 deletions kubernetes/arc1/apps/machine-learning/mmseqs2/app/helmrelease.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
---
# yamllint disable
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
# TODO: Finish this, test MMSeqs2-App
# TODO: Finish this
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app ollama
name: &app mmseqs2
spec:
interval: 30m
chart:
Expand All @@ -26,58 +25,60 @@ spec:
retries: 3
values:
controllers:
ollama:
mmseqs2:
type: deployment
annotations:
reloader.stakater.com/auto: "true"
pod:
runtimeClassName: nvidia
terminationGracePeriodSeconds: 1
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: nvidia.com/gpu.present
operator: In
values:
- "true"
containers:
app:
image:
repository: ghcr.io/soedinglab/mmseqs2
tag: master
tag: master-cuda12@sha256:c1045173cd84e0921f3ffacad5beac0044432996643d589c0c407666aa7b4962
env:
TZ: ${TIMEZONE}
NVIDIA_VISIBLE_DEVICES: all
NVIDIA_DRIVER_CAPABILITIES: all
securityContext:
capabilities.drop: ["ALL"]
resources:
requests:
cpu: 200m
memory: 4Gi
# gpu.intel.com/i915: "1"
limits:
cpu: 8000m
memory: 8Gi
# gpu.intel.com/i915: "1"
limits:
cpu: 16
memory: 32Gi
nvidia.com/gpu: 4
service:
app:
controller: *app
annotations:
ports:
http:
port: &port 8877
# maybe https://github.com/soedinglab/MMseqs2-App
ingress:
app:
className: traefik
annotations:
cert-manager.io/cluster-issuer: "letsencrypt-production"
gethomepage.dev/enabled: "true"
gethomepage.dev/group: Research
gethomepage.dev/name: MMSeqs2
gethomepage.dev/description:
gethomepage.dev/icon: *app
hosts:
- host: &host "mmseqs2.${SECRET_INTERNAL_DOMAIN}"
paths:
- path: /
service:
identifier: app
port: http
tls:
- secretName: mmseqs2-tls
hosts: [*host]
port: &port 80
persistence:
data:
app:
storageClass: local-nvme
accessMode: ReadWriteMany
size: 1024Gi
retain: true
accessMode: ReadWriteOnce
size: 2Gi
globalMounts:
- path: /app
workspace:
existingClaim: qlora-workspace
globalMounts:
- path: /workspace
tmp:
type: emptyDir
globalMounts:
- path: /models
- path: /tmp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ spec:
- path: /app
workspace:
storageClass: local-nvme
# TODO: OpenEBS only support ReadWriteOnce
accessMode: ReadWriteOnce
size: 2048Gi
retain: true
Expand Down
186 changes: 186 additions & 0 deletions kubernetes/arc1/apps/observability/healthchecks/app/helmrelease.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: healthchecks
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.5.1
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
values:
controllers:
healthchecks:
strategy: RollingUpdate
annotations:
reloader.stakater.com/auto: "true"
initContainers:
init-db:
image:
repository: ghcr.io/onedr0p/postgres-init
tag: 16
env:
INIT_POSTGRES_HOST: &dbHost postgres-1-rw.database.svc.cluster.local
INIT_POSTGRES_DBNAME: &dbName healthchecks
INIT_POSTGRES_USER:
valueFrom:
secretKeyRef:
name: healthchecks-secret
key: DB_USER
INIT_POSTGRES_PASS:
valueFrom:
secretKeyRef:
name: healthchecks-secret
key: DB_PASSWORD
INIT_POSTGRES_SUPER_PASS:
valueFrom:
secretKeyRef:
name: cloudnative-pg-secret
key: password
init-user:
dependsOn: [init-db]
image:
repository: docker.io/healthchecks/healthchecks
tag: v3.7
command: [python3]
args:
- manage.py
- shell
- -v
- "3"
- -c
# https://github.com/linuxserver/docker-healthchecks/blob/9aedb6911bd4dd49f637145b04ad2aeb4339e78b/root/etc/s6-overlay/s6-rc.d/init-healthchecks-config/run#L52-L66
- |-
"""
from django.contrib.auth.models import User;
from hc.accounts.views import _make_user;
email = '$SUPERUSER_EMAIL';
password = '$SUPERUSER_PASSWORD';
if User.objects.filter(email=email).count()==0:
user = _make_user(email);
user.set_password(password);
user.is_staff = True;
user.is_superuser = True;
user.save();
print('Superuser created.');
else:
print('Superuser creation skipped. Already exists.');
"""
env:
SUPERUSER_EMAIL:
valueFrom:
secretKeyRef:
name: healthchecks-secret
key: SUPERUSER_EMAIL
SUPERUSER_PASSWORD:
valueFrom:
secretKeyRef:
name: healthchecks-secret
key: SUPERUSER_PASSWORD
containers:
app:
image:
repository: docker.io/healthchecks/healthchecks
tag: v3.7
# https://healthchecks.io/docs/self_hosted_configuration/
env:
DEBUG: "False"
REGISTRATION_OPEN: "False"
SITE_ROOT: "https://healthchecks.${SECRET_EXTERNAL_DOMAIN}"
SITE_NAME: Healthchecks
SITE_LOGO_URL: /static/img/logo.svg
DEFAULT_FROM_EMAIL: "Healthchecks <${SECRET_SMTP_FROM}>"
EMAIL_HOST: maddy.default.svc.cluster.local
EMAIL_PORT: 25
EMAIL_USE_TLS: "False"
EMAIL_USE_VERIFICATION: "False"
INTEGRATIONS_ALLOW_PRIVATE_IPS: "True"
DB: postgres
DB_HOST: *dbHost
DB_NAME: *dbName
DB_PORT: 5432
ADMINS:
valueFrom:
secretKeyRef:
name: healthchecks-secret
key: SUPERUSER_EMAIL
PUSHOVER_EMERGENCY_RETRY_DELAY: 300 # 5 minutes
PUSHOVER_EMERGENCY_EXPIRATION: 86400 # 24 hours
envFrom:
- secretRef:
name: healthchecks-secret
probes:
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
liveness:
enabled: true
readiness:
enabled: true
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities: { drop: ["ALL"] }
resources:
limits:
memory: 512Mi
defaultPodOptions:
securityContext:
runAsNonRoot: true
runAsUser: 65534
runAsGroup: 65534
fsGroup: 65534
seccompProfile: { type: RuntimeDefault }
service:
app:
controller: healthchecks
ports:
http:
port: 80
targetPort: 8000
serviceMonitor:
healthchecks:
enabled: true
serviceName: healthchecks
endpoints:
- port: http
scheme: http
path: ${service_monitor_path}
ingress:
app:
className: external
annotations:
external-dns.alpha.kubernetes.io/target: "external.${SECRET_EXTERNAL_DOMAIN}"
hosts:
- host: "healthchecks.${SECRET_EXTERNAL_DOMAIN}"
paths:
- path: /
service:
identifier: app
port: http
persistence:
logo:
type: configMap
name: healthchecks-config
globalMounts:
# gets turned into `/static/img/logo.svg`
- path: /opt/healthchecks/static-collected/img/logo.svg
subPath: logo.svg
readOnly: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./secret.sops.yaml
- ../../../database/cloudnative-pg/app/secret.sops.yaml
- ./helmrelease.yaml
- ../../../../templates/gatus/external
configMapGenerator:
- name: healthchecks-config
files:
- logo.svg=./resources/logo.svg
generatorOptions:
disableNameSuffixHash: true
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# yamllint disable
kind: Secret
apiVersion: v1
type: Opaque
metadata:
name: healthchecks-secret
stringData:
SUPERUSER_EMAIL: ENC[AES256_GCM,data:dQXzU3zugs/s9W/1/1mpR5hduFc=,iv:xCYSkMHbBiRHjE/GLyAYZ5K1G3Em7Qf3AXm3Hd+logA=,tag:M99KCcDsLERem20FapVSnQ==,type:str]
SUPERUSER_PASSWORD: ENC[AES256_GCM,data:4DJDEJa56a1Qe6MMj2LPSIJf74f9RxM3tUAnjA==,iv:7g59fEG0YbyTuyCSKfhWXjAGp2Q0FgQ0zRdwU0ELDnw=,tag:zCpT42vdO++HBmpF0gkB2w==,type:str]
SECRET_KEY: ENC[AES256_GCM,data:q2MdSJAZgzxQJTxs0H4hc/MW7xXfw/Iy4ZHkhEMx0+Q2rfZ8zL+2KORJn6FR6ZJa548Sj8B6skRg6y0/zkkUHA==,iv:Bs7aDBQaq291PhnvIot6D5Bsv0GbMkkfgZEgrj+NaPc=,tag:Xnnc+tQqQJ6783r0JrgoSQ==,type:str]
DB_USER: ENC[AES256_GCM,data:uDzMxcAoEmKLQSHt,iv:eKsoC3fGlxFjr4uAdksw8u5O5KW2YAaDr9m14QHB0ig=,tag:pMyH8DOJhOqAuzHCcm3vng==,type:str]
DB_PASSWORD: ENC[AES256_GCM,data:vHTMZ/iK6NiK5BkISOp+etnlL2RPBGV+xt5hdL5H5qXXGb/YZOHRJqNY,iv:Q7SQQgdKfHWlE1UReerIOPJROq88a1KB64ihWomD7e0=,tag:hxGjaUeepSVJGeZQlOYUhw==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age:
- recipient: age1ey3reuxyffqynll464r4q3tlhq5v73nxesyktr44lfez8jzxm94s0644n7
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBoQzZwSHBYYTlObGQwU0Y2
OFp6V1lkM0NJaVlIZGhTNlhEbUlnZG8zcW1rCkhtODJRK3Q2RlVrWDNVck1wRDhX
YTVuT3Fab3JOYm9zU3Zyd2kvcWhUOGMKLS0tIGZOT291bDFaeDF5MHFzRHN4YVpB
cmVydHMxYUxCYlBxUUlIK2x3L3g4QWMKUNBJNbB435ngca4CA/vLqVnDR3a5VKKy
yr7t/pZfTg9kBf4XOXZAtrMhtOtSAvMguc0/DrRVuEC8HJPA6hEsKw==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2024-11-19T21:49:54Z"
mac: ENC[AES256_GCM,data:OkSqaofATRaRFi9dp4wI3aXRDT4qnt1+Mnkv5zSHcZ2K9u2JVq56m8aqYt33eI4tqJbzNX3as7OzSlkhrptV+vkjgMj8t2bYkzbLVTfUWmRHzne0EYOlLIPHVkQPaIzdEQ7iMPuB76HpY6qD0RQgAuf2mI6QUOox6WD6S3O7jE4=,iv:K76/oCCnmszQdNeKUYzvIeyd3lHJRJIEn0OD9pLN5ag=,tag:DB1LDNQ4kOfB3kCNVshiIA==,type:str]
pgp: []
encrypted_regex: ^(data|stringData)$
version: 3.9.1
Loading

0 comments on commit a56bd85

Please sign in to comment.