Skip to content

Commit

Permalink
--wip-- [skipci]
Browse files Browse the repository at this point in the history
  • Loading branch information
Liana64 committed Nov 19, 2024
1 parent d3a0a2a commit 225f7a7
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
retries: 3
values:
controllers:
ollama:
colabfold:
type: deployment
annotations:
reloader.stakater.com/auto: "true"
Expand Down Expand Up @@ -56,11 +56,11 @@ spec:
requests:
cpu: 200m
memory: 4Gi
gpu.intel.com/i915: "4"
nvidia.com/gpu: 2
limits:
cpu: 32000m
memory: 64Gi
gpu.intel.com/i915: "4"
nvidia.com/gpu: 4
service:
app:
controller: *app
Expand Down Expand Up @@ -88,6 +88,7 @@ spec:
- secretName: colabfold-tls
hosts: [*host]
persistence:
# TODO: Replace with existing PVC
data:
storageClass: local-nvme
accessMode: ReadWriteMany
Expand Down
76 changes: 39 additions & 37 deletions kubernetes/arc1/apps/machine-learning/mmseqs2/app/helmrelease.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
---
# yamllint disable
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
# TODO: Finish this, test MMSeqs2-App
# TODO: Finish this
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: &app ollama
name: &app mmseqs2
spec:
interval: 30m
chart:
Expand All @@ -26,58 +25,61 @@ spec:
retries: 3
values:
controllers:
ollama:
qlora:
type: deployment
annotations:
reloader.stakater.com/auto: "true"
pod:
runtimeClassName: nvidia
terminationGracePeriodSeconds: 1
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: nvidia.com/gpu.present
operator: In
values:
- "true"
containers:
app:
image:
repository: ghcr.io/soedinglab/mmseqs2
tag: master
tag: main@sha256:e56350596e17af5198bfc848b0de3a5a11cb98d97e1e02dbb322467269342541
env:
TZ: ${TIMEZONE}
# GITHUB_REPO: https://github.com/RareCompute/example-models
NVIDIA_VISIBLE_DEVICES: all
NVIDIA_DRIVER_CAPABILITIES: all
securityContext:
capabilities.drop: ["ALL"]
resources:
requests:
cpu: 200m
memory: 4Gi
# gpu.intel.com/i915: "1"
limits:
cpu: 8000m
memory: 8Gi
# gpu.intel.com/i915: "1"
limits:
cpu: 16
memory: 32Gi
nvidia.com/gpu: 4
service:
app:
controller: *app
annotations:
ports:
http:
port: &port 8877
# maybe https://github.com/soedinglab/MMseqs2-App
ingress:
app:
className: traefik
annotations:
cert-manager.io/cluster-issuer: "letsencrypt-production"
gethomepage.dev/enabled: "true"
gethomepage.dev/group: Research
gethomepage.dev/name: MMSeqs2
gethomepage.dev/description:
gethomepage.dev/icon: *app
hosts:
- host: &host "mmseqs2.${SECRET_INTERNAL_DOMAIN}"
paths:
- path: /
service:
identifier: app
port: http
tls:
- secretName: mmseqs2-tls
hosts: [*host]
port: &port 80
persistence:
data:
app:
storageClass: local-nvme
accessMode: ReadWriteMany
size: 1024Gi
retain: true
accessMode: ReadWriteOnce
size: 2Gi
globalMounts:
- path: /app
workspace:
existingClaim: qlora-workspace
globalMounts:
- path: /workspace
tmp:
type: emptyDir
globalMounts:
- path: /models
- path: /tmp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ spec:
- path: /app
workspace:
storageClass: local-nvme
# TODO: OpenEBS only support ReadWriteOnce
accessMode: ReadWriteOnce
size: 2048Gi
retain: true
Expand Down
186 changes: 186 additions & 0 deletions kubernetes/arc1/apps/observability/healthchecks/app/helmrelease.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: healthchecks
spec:
interval: 30m
chart:
spec:
chart: app-template
version: 3.5.1
sourceRef:
kind: HelmRepository
name: bjw-s
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
values:
controllers:
healthchecks:
strategy: RollingUpdate
annotations:
reloader.stakater.com/auto: "true"
initContainers:
init-db:
image:
repository: ghcr.io/onedr0p/postgres-init
tag: 16
env:
INIT_POSTGRES_HOST: &dbHost postgres16-rw.database.svc.cluster.local
INIT_POSTGRES_DBNAME: &dbName healthchecks
INIT_POSTGRES_USER:
valueFrom:
secretKeyRef:
name: healthchecks-secret
key: DB_USER
INIT_POSTGRES_PASS:
valueFrom:
secretKeyRef:
name: healthchecks-secret
key: DB_PASSWORD
INIT_POSTGRES_SUPER_PASS:
valueFrom:
secretKeyRef:
name: cloudnative-pg-secret
key: password
init-user:
dependsOn: [init-db]
image:
repository: docker.io/healthchecks/healthchecks
tag: v3.7
command: [python3]
args:
- manage.py
- shell
- -v
- '3'
- -c
# https://github.com/linuxserver/docker-healthchecks/blob/9aedb6911bd4dd49f637145b04ad2aeb4339e78b/root/etc/s6-overlay/s6-rc.d/init-healthchecks-config/run#L52-L66
- |-
"""
from django.contrib.auth.models import User;
from hc.accounts.views import _make_user;
email = '$SUPERUSER_EMAIL';
password = '$SUPERUSER_PASSWORD';
if User.objects.filter(email=email).count()==0:
user = _make_user(email);
user.set_password(password);
user.is_staff = True;
user.is_superuser = True;
user.save();
print('Superuser created.');
else:
print('Superuser creation skipped. Already exists.');
"""
env:
SUPERUSER_EMAIL:
valueFrom:
secretKeyRef:
name: healthchecks-secret
key: SUPERUSER_EMAIL
SUPERUSER_PASSWORD:
valueFrom:
secretKeyRef:
name: healthchecks-secret
key: SUPERUSER_PASSWORD
containers:
app:
image:
repository: docker.io/healthchecks/healthchecks
tag: v3.7
# https://healthchecks.io/docs/self_hosted_configuration/
env:
DEBUG: "False"
REGISTRATION_OPEN: "False"
SITE_ROOT: "https://healthchecks.${SECRET_EXTERNAL_DOMAIN}"
SITE_NAME: Healthchecks
SITE_LOGO_URL: /static/img/logo.svg
DEFAULT_FROM_EMAIL: "Healthchecks <${SECRET_SMTP_FROM}>"
EMAIL_HOST: maddy.default.svc.cluster.local
EMAIL_PORT: 25
EMAIL_USE_TLS: "False"
EMAIL_USE_VERIFICATION: "False"
INTEGRATIONS_ALLOW_PRIVATE_IPS: "True"
DB: postgres
DB_HOST: *dbHost
DB_NAME: *dbName
DB_PORT: 5432
ADMINS:
valueFrom:
secretKeyRef:
name: healthchecks-secret
key: SUPERUSER_EMAIL
PUSHOVER_EMERGENCY_RETRY_DELAY: 300 # 5 minutes
PUSHOVER_EMERGENCY_EXPIRATION: 86400 # 24 hours
envFrom:
- secretRef:
name: healthchecks-secret
probes:
startup:
enabled: true
spec:
failureThreshold: 30
periodSeconds: 5
liveness:
enabled: true
readiness:
enabled: true
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities: { drop: ["ALL"] }
resources:
limits:
memory: 512Mi
defaultPodOptions:
securityContext:
runAsNonRoot: true
runAsUser: 65534
runAsGroup: 65534
fsGroup: 65534
seccompProfile: { type: RuntimeDefault }
service:
app:
controller: healthchecks
ports:
http:
port: 80
targetPort: 8000
serviceMonitor:
healthchecks:
enabled: true
serviceName: healthchecks
endpoints:
- port: http
scheme: http
path: ${service_monitor_path}
ingress:
app:
className: external
annotations:
external-dns.alpha.kubernetes.io/target: "external.${SECRET_EXTERNAL_DOMAIN}"
hosts:
- host: "healthchecks.${SECRET_EXTERNAL_DOMAIN}"
paths:
- path: /
service:
identifier: app
port: http
persistence:
logo:
type: configMap
name: healthchecks-config
globalMounts:
# gets turned into `/static/img/logo.svg`
- path: /opt/healthchecks/static-collected/img/logo.svg
subPath: logo.svg
readOnly: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./secret.sops.yaml
- ../../../database/cloudnative-pg/app/secret.sops.yaml
- ./helmrelease.yaml
- ../../../../templates/gatus/external
configMapGenerator:
- name: healthchecks-config
files:
- logo.svg=./resources/logo.svg
generatorOptions:
disableNameSuffixHash: true
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# yamllint disable
kind: Secret
apiVersion: v1
type: Opaque
metadata:
name: healthchecks-secret
stringData:
SUPERUSER_EMAIL: ENC[AES256_GCM,data:pHG0WJarDrP7PD3vY6ZY5ujmd2c=,iv:h5s62B9yOiicezuprNzEEiZvElyINrDLKtXdOzxqCik=,tag:mhmYwclXZPrCxShtZziRZQ==,type:str]
SUPERUSER_PASSWORD: ENC[AES256_GCM,data:9ehrErSnrM/KqopibELYYXTZnt6uXeQXVDDjc59hMA==,iv:ApdnfLZqxcJBG5lsCaW6abKKgW6IsOAmv/Xy3ANpcas=,tag:neoIB4NTu9ndoMC2RfIb1Q==,type:str]
SECRET_KEY: ENC[AES256_GCM,data:Re/lil2sXZ5u6qXS2kc0Gx/ihRynVXngV64JRxS1FLDPnwyXaFiGnz1sU0eDbtOZAcJxJC6fqHIRr9U9Fsz9k4KgPqtfuJjIjRlA4Y48/gaEelpO2bDyHStnViNeFnJRJLGBYGTKsGFT0JDoFr8yJP9x/OC5dajXL674jfVGZgY=,iv:oBwDCc1dCGWO+GhlcvQRO+ilLQ9hycXf3yaSvS8HCBU=,tag:YyazrhoavMFJ9l5SKpCyAw==,type:str]
DB_USER: ENC[AES256_GCM,data:OgAHtzsuLy6NM4eY,iv:leUjOb65qm3uxBNqEQORpPOS8qlUCIVBOhO7oARMOtI=,tag:oS1BDwqT+fBueY5FM0a4OQ==,type:str]
DB_PASSWORD: ENC[AES256_GCM,data:AFt+9nTZIO2/Ko9tJPySh+MIxc+iUy/t6GPws7AOmuyoT+DPJdI64phZbjwNlXhOgkl2j0/aqthj3olzBPZJJA==,iv:T9IBM5RMumT7ZedpTOjRhQZzWwEwj5ohamOvrt0hTd0=,tag:Umb4BTz5Pxi7xV7Cp+xKWQ==,type:str]
PUSHOVER_API_TOKEN: ENC[AES256_GCM,data:hU2AVJ+cJsxUcu6yT5b9vHoZjDpBQzzmMD7QqDKh,iv:/RPzhz1XjnqheSoWz6alPShM0h2qpJdDhKOseOAiKD8=,tag:M1yXjvpYTLx/e8J73hbwtw==,type:str]
PUSHOVER_SUBSCRIPTION_URL: ENC[AES256_GCM,data:onpbih248NE1BoX9scwXdIrN4p/cmkio1VVvtfexhPhJbg6OfomOw2oidW6tiGasklSS/RWLb1oBSL4=,iv:pDbLY0R/OMtlB6IKCgHjDZh5cljnGIu7t4I+1prB7hE=,tag:p+8bUsAMFSyfWwME4oFFpQ==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age:
- recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBERWp2UXdDZ29naERzWGZw
OS9SM1RSMVh5cGw2Q3pMelYxcG9RdGNudTAwCkFneXFlTXdpZitic2daOW1Ob2hq
QXl3UnBIYmtyNCtxQjV1dVFaRjFGL0kKLS0tIEhOZTVmU0xJM3FEUDVjQ0d2Wml2
UmFGTlVDd3JRN1UyRG1HNURlL1RKUHcK0NrD/pNpAIOo/iEDhxcf1GxE1YlaK/lj
JiKBbzi3Zf1PbCdogcPUtWj4U4E9OZXo7BgG9bfW172bgGporM/ehQ==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2024-05-24T18:15:38Z"
mac: ENC[AES256_GCM,data:nBfQkBmR3CX3v+XLN2jawtn9LKESGag7eiw/FM8bo9WsO1Kf1yFYsFL97L8umrtOuLbqQZ5ndiq33GQgvUIw3Z7QA7ni0/5pZ16G/2YxJwpR0nV/ahigUoCdHoQ2M0IptYgCvjxBKdtKxxIF/ds4DE4WFXecOjdFmalF/JswZnk=,iv:qrZgasdquQFb7EjxuvIAolDu1LzrUuJnOrar34ISYoU=,tag:GmQc2KIDWQAoQwad4hoJuw==,type:str]
pgp: []
encrypted_regex: ^(data|stringData)$
version: 3.8.1
Loading

0 comments on commit 225f7a7

Please sign in to comment.