diff --git a/.dockerignore b/.dockerignore index f23c963a415..2caea16db95 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,3 +9,6 @@ .stack-root-buildah .local services/nginz/src/objs +dist-newstyle +.env +.direnv diff --git a/changelog.d/2-features/cannon-nginz b/changelog.d/2-features/cannon-nginz new file mode 100644 index 00000000000..56e884c6605 --- /dev/null +++ b/changelog.d/2-features/cannon-nginz @@ -0,0 +1,4 @@ +Optionally allow to run cannon with its own nginz inside the same pod; and connect to a load balancer directly. +This allows the cannon-slow-drain behaviour implemented in #2416 to take effect by not having other intermediate network hops which could break websocket connections all at once. +Some (internal) context: https://wearezeta.atlassian.net/wiki/spaces/PS/pages/585564424/How+to+gracefully+drain+cannon+but+not+so+slowly +For details on how to configure this, see docs/src/how-to/install/configuration-options.rst diff --git a/charts/cannon/conf/static/zauth.acl b/charts/cannon/conf/static/zauth.acl new file mode 100644 index 00000000000..9498b8cc43f --- /dev/null +++ b/charts/cannon/conf/static/zauth.acl @@ -0,0 +1,17 @@ +a (blacklist (path "/provider") + (path "/provider/**") + (path "/bot") + (path "/bot/**") + (path "/i/**")) + +b (whitelist (path "/bot") + (path "/bot/**")) + +p (whitelist (path "/provider") + (path "/provider/**")) + +# LegalHold Access Tokens +la (whitelist (path "/notifications") + (path "/assets/v3/**") + (path "/users") + (path "/users/**")) diff --git a/charts/cannon/templates/conf/_nginx.conf.tpl b/charts/cannon/templates/conf/_nginx.conf.tpl new file mode 100644 index 00000000000..98b16fc3793 --- /dev/null +++ b/charts/cannon/templates/conf/_nginx.conf.tpl @@ -0,0 +1,342 @@ +{{- define "cannon_nginz_nginx.conf" }} +user {{ .Values.nginx_conf.user }} {{ .Values.nginx_conf.group }}; +worker_processes {{ .Values.nginx_conf.worker_processes }}; +worker_rlimit_nofile {{ .Values.nginx_conf.worker_rlimit_nofile | default 1024 }}; +pid /var/run/nginz.pid; + +# nb. start up errors (eg. misconfiguration) may still end up in +# /var/log/nginz/error.log +error_log stderr warn; + +events { + worker_connections {{ .Values.nginx_conf.worker_connections | default 1024 }}; + multi_accept off; + use epoll; +} + +http { + # + # Sockets + # + + sendfile on; + tcp_nopush on; + tcp_nodelay on; + + # + # Timeouts + # + + client_body_timeout 60; + client_header_timeout 60; + keepalive_timeout 75; + send_timeout 60; + + ignore_invalid_headers off; + + types_hash_max_size 2048; + + server_names_hash_bucket_size 64; + server_name_in_redirect off; + + large_client_header_buffers 4 8k; + + + # + # Security + # + + server_tokens off; + + # + # Logging + # + # Note sanitized_request: + # We allow passing access_token as query parameter for e.g. websockets + # However we do not want to log access tokens. + # + + log_format custom_zeta '$remote_addr $remote_user "$time_local" "$sanitized_request" $status $body_bytes_sent "$http_referer" "$http_user_agent" $http_x_forwarded_for $connection $request_time $upstream_response_time $upstream_cache_status $zauth_user $zauth_connection $request_id $proxy_protocol_addr "$http_tracestate"'; + access_log /dev/stdout custom_zeta; + + # + # Monitoring + # + vhost_traffic_status_zone; + + # + # Gzip + # + + gzip on; + gzip_disable msie6; + gzip_vary on; + gzip_proxied any; + gzip_comp_level 6; + gzip_buffers 16 8k; + gzip_http_version 1.1; + gzip_min_length 1024; + gzip_types text/plain text/css application/json application/x-javascript text/xml application/xml application/xml+rss text/javascript; + + # + # This directive ensures that X-Forwarded-For is used + # as the client's real IP address (since nginz is always + # behind an ELB, remote_addr now becomes the client's real + # IP address) + # + + real_ip_header X-Forwarded-For; + set_real_ip_from 0.0.0.0/0; + + # + # Rate Limiting Exemptions + # + + geo $rate_limit { + default 1; + + # IPs to exempt can be added in the .Values.nginx_conf.rate_limit and .Values.nginx_conf.simulators helm values + {{ if (hasKey .Values.nginx_conf "rate_limit_exemptions") }} + {{ range $ip := .Values.nginx_conf.rate_limit_exemptions }} + {{ $ip }} 0; + {{ end }} + {{ end }} + + {{ if (hasKey .Values.nginx_conf "simulators") }} + {{ range $ip := .Values.nginx_conf.simulators }} + {{ $ip }} 0; + {{ end }} + {{ end }} + } + + # + # Rate Limiting Mapping + # + + map $rate_limit $rate_limited_by_addr { + 1 "$binary_remote_addr$uri"; + 0 ""; + } + + map $rate_limit $rate_limited_by_zuser { + 1 $zauth_user; + 0 ""; + } + + map $http_origin $cors_header { + default ""; + {{ range $origin := .Values.nginx_conf.allowlisted_origins }} + "https://{{ $origin }}.{{ $.Values.nginx_conf.external_env_domain}}" "$http_origin"; + {{ end }} + + # Allow additional origins at random ports. This is useful for testing with an HTTP proxy. + # It should not be used in production. + {{ range $origin := .Values.nginx_conf.randomport_allowlisted_origins }} + "~^https://{{ $origin }}.{{ $.Values.nginx_conf.external_env_domain}}(:[0-9]{2,5})?$" "$http_origin"; + {{ end }} + } + + + # + # Rate Limiting + # + + limit_req_zone $rate_limited_by_zuser zone=reqs_per_user:12m rate=10r/s; + limit_req_zone $rate_limited_by_addr zone=reqs_per_addr:12m rate=5r/m; + + limit_conn_zone $rate_limited_by_zuser zone=conns_per_user:10m; + limit_conn_zone $rate_limited_by_addr zone=conns_per_addr:10m; + + # Too Many Requests (420) is returned on throttling + # TODO: Change to 429 once all clients support this + limit_req_status 420; + limit_conn_status 420; + + limit_req_log_level warn; + limit_conn_log_level warn; + + # Limit by $zauth_user if present and not part of rate limit exemptions + limit_req zone=reqs_per_user burst=20; + limit_conn conns_per_user 25; + + # + # Proxied Upstream Services + # + + upstream cannon { + least_conn; + keepalive 32; + server localhost:{{ .Values.service.internalPort }}; + } + + # + # Mapping for websocket connections + # + + map $http_upgrade $connection_upgrade { + websocket upgrade; + default ''; + } + + + + # + # Locations + # + + server { + listen {{ .Values.service.nginz.internalPort }} ssl; + + ssl_certificate /etc/wire/nginz/tls/tls.crt; + ssl_certificate_key /etc/wire/nginz/tls/tls.key; + + ssl_protocols {{ .Values.nginx_conf.tls.protocols }}; + ssl_ciphers {{ .Values.nginx_conf.tls.ciphers }}; + + # Disable session resumption. See comments in SQPIT-226 for more context and + # discussion. + ssl_session_tickets off; + ssl_session_cache off; + + zauth_keystore {{ .Values.nginx_conf.zauth_keystore }}; + zauth_acl {{ .Values.nginx_conf.zauth_acl }}; + + location /status { + zauth off; + access_log off; + + return 200; + } + + location /vts { + zauth off; + access_log off; + allow 10.0.0.0/8; + allow 127.0.0.1; + deny all; + + # Requests with an X-Forwarded-For header will have the real client + # source IP address set correctly, due to the real_ip_header directive + # in the top-level configuration. However, this will not set the client + # IP correctly for clients which are connected via a load balancer which + # uses the PROXY protocol. + # + # Hence, for safety, we deny access to the vts metrics endpoints to + # clients which are connected via PROXY protocol. + if ($proxy_protocol_addr != "") { + return 403; + } + + vhost_traffic_status_display; + vhost_traffic_status_display_format html; + } + + # Block "Franz" -- http://meetfranz.com + if ($http_user_agent ~* Franz) { + return 403; + } + + {{ range $path := .Values.nginx_conf.disabled_paths }} + location ~* ^(/v[0-9]+)?{{ $path }} { + + return 404; + } + {{ end }} + + # + # Service Routing + # + + {{ range $name, $locations := .Values.nginx_conf.upstreams -}} + {{- range $location := $locations -}} + {{- if hasKey $location "envs" -}} + {{- range $env := $location.envs -}} + {{- if or (eq $env $.Values.nginx_conf.env) (eq $env "all") -}} + + {{- if $location.strip_version }} + + rewrite ^/v[0-9]+({{ $location.path }}) $1; + {{- end }} + + {{- $versioned := ternary $location.versioned true (hasKey $location "versioned") -}} + {{- $path := printf "%s%s" (ternary "(/v[0-9]+)?" "" $versioned) $location.path }} + + location ~* ^{{ $path }} { + + # remove access_token from logs, see 'Note sanitized_request' above. + set $sanitized_request $request; + if ($sanitized_request ~ (.*)access_token=[^&\s]*(.*)) { + set $sanitized_request $1access_token=****$2; + } + + {{- if ($location.disable_zauth) }} + zauth off; + + # If zauth is off, limit by remote address if not part of limit exemptions + {{- if ($location.unlimited_requests_endpoint) }} + # Note that this endpoint has no rate limit + {{- else -}} + limit_req zone=reqs_per_addr burst=5 nodelay; + limit_conn conns_per_addr 20; + {{- end -}} + {{- end }} + + if ($request_method = 'OPTIONS') { + add_header 'Access-Control-Allow-Methods' "GET, POST, PUT, DELETE, OPTIONS"; + add_header 'Access-Control-Allow-Headers' "$http_access_control_request_headers, DNT,X-Mx-ReqToken,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type"; + add_header 'Content-Type' 'text/plain; charset=UTF-8'; + add_header 'Content-Length' 0; + return 204; + } + + proxy_pass http://{{ $name }}; + proxy_http_version 1.1; + + {{- if ($location.disable_request_buffering) }} + proxy_request_buffering off; + {{ end -}} + {{- if (hasKey $location "body_buffer_size") }} + client_body_buffer_size {{ $location.body_buffer_size -}}; + {{- end }} + client_max_body_size {{ $location.max_body_size | default "64k" }}; + + {{ if ($location.use_websockets) }} + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_read_timeout 1h; + {{- else }} + proxy_set_header Connection ""; + {{ end -}} + + {{- if not ($location.disable_zauth) }} + proxy_set_header Authorization ""; + {{- end }} + + proxy_set_header Z-Type $zauth_type; + proxy_set_header Z-User $zauth_user; + proxy_set_header Z-Connection $zauth_connection; + proxy_set_header Z-Provider $zauth_provider; + proxy_set_header Z-Bot $zauth_bot; + proxy_set_header Z-Conversation $zauth_conversation; + proxy_set_header Request-Id $request_id; + + {{- if ($location.allow_credentials) }} + more_set_headers 'Access-Control-Allow-Credentials: true'; + {{ end -}} + + more_set_headers 'Access-Control-Allow-Origin: $cors_header'; + + more_set_headers 'Access-Control-Expose-Headers: Request-Id, Location'; + more_set_headers 'Request-Id: $request_id'; + more_set_headers 'Strict-Transport-Security: max-age=31536000; preload'; + } + + {{- end -}} + {{- end -}} + + {{- end -}} + {{- end -}} + {{- end }} + } +} +{{- end }} diff --git a/charts/cannon/templates/configmap.yaml b/charts/cannon/templates/configmap.yaml index 5513c279c06..17a00a5c7ed 100644 --- a/charts/cannon/templates/configmap.yaml +++ b/charts/cannon/templates/configmap.yaml @@ -1,8 +1,7 @@ apiVersion: v1 data: cannon.yaml: | - logNetStrings: True # log using netstrings encoding: - # http://cr.yp.to/proto/netstrings.txt + logFormat: StructuredJSON logLevel: {{ .Values.config.logLevel }} cannon: diff --git a/charts/cannon/templates/nginz-certificate-secret.yaml b/charts/cannon/templates/nginz-certificate-secret.yaml new file mode 100644 index 00000000000..4531ad19e3b --- /dev/null +++ b/charts/cannon/templates/nginz-certificate-secret.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.service.nginz.enabled (not .Values.service.nginz.certManager.enabled ) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.service.nginz.tls.secretName }} + labels: + wireService: cannon-nginz + app: cannon-nginz + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +type: kubernetes.io/tls +data: + tls.crt: {{ .Values.secrets.nginz.tls.crt }} + tls.key: {{ .Values.secrets.nginz.tls.key }} +{{- end }} diff --git a/charts/cannon/templates/nginz-certificate.yaml b/charts/cannon/templates/nginz-certificate.yaml new file mode 100644 index 00000000000..4245befdfbf --- /dev/null +++ b/charts/cannon/templates/nginz-certificate.yaml @@ -0,0 +1,30 @@ +{{- if and .Values.service.nginz.enabled .Values.service.nginz.certManager.enabled -}} +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ .Values.service.nginz.certManager.certificate.name }} + namespace: {{ .Release.Namespace }} + labels: + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +spec: + issuerRef: + name: {{ .Values.service.nginz.certManager.issuer.name }} + kind: {{ .Values.service.nginz.certManager.issuer.kind }} + usages: + - server auth + duration: 2160h # 90d, Letsencrypt default; NOTE: changes are ignored by Letsencrypt + renewBefore: 360h # 15d + isCA: false + secretName: {{ .Values.service.nginz.tls.secretName }} + + privateKey: + algorithm: ECDSA + size: 384 # 521 is not supported by Letsencrypt + encoding: PKCS1 + rotationPolicy: Always + + dnsNames: + - {{ required "Please provide .service.nginz.hostname when .service.nginz.enabled and .service.nginz.certManager.enabled are True" .Values.service.nginz.hostname | quote }} +{{- end -}} diff --git a/charts/cannon/templates/nginz-configmap.yaml b/charts/cannon/templates/nginz-configmap.yaml new file mode 100644 index 00000000000..9c946455c95 --- /dev/null +++ b/charts/cannon/templates/nginz-configmap.yaml @@ -0,0 +1,10 @@ +{{- if .Values.service.nginz.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: cannon-nginz +data: + nginx.conf: |2 +{{- include "cannon_nginz_nginx.conf" . | indent 4 }} +{{ (.Files.Glob "conf/static/*").AsConfig | indent 2 }} +{{- end }} diff --git a/charts/cannon/templates/nginz-secret.yaml b/charts/cannon/templates/nginz-secret.yaml new file mode 100644 index 00000000000..23dd7c7d0c9 --- /dev/null +++ b/charts/cannon/templates/nginz-secret.yaml @@ -0,0 +1,20 @@ +{{- if .Values.service.nginz.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: cannon-nginz + labels: + wireService: cannon-nginz + app: cannon-nginz + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +type: Opaque +data: + {{/* for_helm_linting is necessary only since the 'with' block below does not throw an error upon an empty .Values.secrets */}} + for_helm_linting: {{ required "No .secrets found in configuration. Did you forget to helm -f path/to/secrets.yaml ?" .Values.secrets | quote | b64enc | quote }} + + {{- with .Values.secrets.nginz }} + zauth.conf: {{ .zAuth.publicKeys | b64enc | quote }} + {{- end }} +{{- end }} diff --git a/charts/cannon/templates/nginz-service.yaml b/charts/cannon/templates/nginz-service.yaml new file mode 100644 index 00000000000..fd820c2b75f --- /dev/null +++ b/charts/cannon/templates/nginz-service.yaml @@ -0,0 +1,40 @@ +{{- if .Values.service.nginz.enabled }} +# This service has to be exposed using type `LoadBalancer` to ensure that there +# is no other pod between the load balancer and this service. This ensures that +# only thing which disrupts the websocket connection is when a cannon pod gets +# stopped. If, like other services we have a separate nginz and an +# ingress-controller between the load balancer and the service, stopping any of +# these pods would cause websockets to be disrupted. +# +# In the future, if desired, type=LoadBalancer could also become type=NodePort +# if this is needed on some environments without loadbalancer support. +apiVersion: v1 +kind: Service +metadata: + name: {{ .Values.service.nginz.name }} + labels: + wireService: cannon + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + annotations: + {{- if .Values.service.nginz.externalDNS.enabled }} + external-dns.alpha.kubernetes.io/ttl: {{ .Values.service.nginz.externalDNS.ttl | quote }} + external-dns.alpha.kubernetes.io/hostname: {{ required "Please provide .service.nginz.hostname when .service.nginz.enabled and .service.nginz.externalDNS.enabled are True" .Values.service.nginz.hostname | quote }} + {{- end }} +{{ toYaml .Values.service.nginz.annotations | indent 4 }} +spec: + type: LoadBalancer + # This ensures websocket traffic does not go from one kubernetes node to + # another, if that happened, restarting the originating kubernetes node would + # cause all websocket connections to be severed at once. + externalTrafficPolicy: "Local" + ports: + - name: http + port: {{ .Values.service.nginz.externalPort }} + targetPort: {{ .Values.service.nginz.internalPort }} + protocol: TCP + selector: + wireService: cannon + release: {{ .Release.Name }} +{{- end }} diff --git a/charts/cannon/templates/statefulset.yaml b/charts/cannon/templates/statefulset.yaml index e0c56298abb..c9adaae62bd 100644 --- a/charts/cannon/templates/statefulset.yaml +++ b/charts/cannon/templates/statefulset.yaml @@ -29,9 +29,57 @@ spec: release: {{ .Release.Name }} annotations: checksum/configmap: {{ include (print .Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- if .Values.service.nginz.enabled }} + checksum/nginz-configmap: {{ include (print .Template.BasePath "/nginz-configmap.yaml") . | sha256sum }} + {{- end }} spec: terminationGracePeriodSeconds: {{ add .Values.config.drainOpts.gracePeriodSeconds 5 }} containers: + {{- if .Values.service.nginz.enabled }} + - name: nginz + image: "{{ .Values.nginzImage.repository }}:{{ .Values.nginzImage.tag }}" + imagePullPolicy: "{{ .Values.nginzImage.pullPolicy }}" + env: + # Any file changes to this path causes nginx to reload configs without + # restarting or breaking any connections. + - name: WATCH_PATHS + value: "/etc/wire/nginz/tls" + volumeMounts: + - name: nginz-config + mountPath: /etc/wire/nginz/conf + readOnly: true + - name: nginz-secrets + mountPath: /etc/wire/nginz/secrets + readOnly: true + - name: certificate + mountPath: /etc/wire/nginz/tls + readOnly: true + ports: + - name: https + containerPort: {{ .Values.service.nginz.internalPort }} + readinessProbe: + httpGet: + path: /status + port: {{ .Values.service.nginz.internalPort }} + scheme: HTTPS + livenessProbe: + initialDelaySeconds: 30 + timeoutSeconds: 1 + httpGet: + path: /status + port: {{ .Values.service.nginz.internalPort }} + scheme: HTTPS + lifecycle: + preStop: + exec: + # kubernetes by default sends a SIGTERM to the container, + # which would cause nginz to exit, breaking existing websocket connections. + # Instead we terminate gracefully and sleep given grace period + 5 seconds. + # (SIGTERM is still sent, but afterwards) + command: ["sh", "-c", "nginx -c /etc/wire/nginz/conf/nginx.conf -s quit && sleep {{ add .Values.config.drainOpts.gracePeriodSeconds 5 }}"] + resources: +{{ toYaml .Values.resources | indent 12 }} + {{- end }} - name: cannon image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" volumeMounts: @@ -76,3 +124,14 @@ spec: name: cannon - name: empty emptyDir: {} + {{- if .Values.service.nginz.enabled }} + - name: nginz-config + configMap: + name: cannon-nginz + - name: nginz-secrets + secret: + secretName: cannon-nginz + - name: certificate + secret: + secretName: {{ .Values.service.nginz.tls.secretName }} + {{- end }} diff --git a/charts/cannon/values.yaml b/charts/cannon/values.yaml index 296435dc1d2..ab8ff112fbd 100644 --- a/charts/cannon/values.yaml +++ b/charts/cannon/values.yaml @@ -3,6 +3,10 @@ image: repository: quay.io/wire/cannon tag: do-not-use pullPolicy: IfNotPresent +nginzImage: + repository: quay.io/wire/nginz + tag: do-not-use + pullPolicy: IfNotPresent config: logLevel: Info @@ -15,6 +19,44 @@ config: gracePeriodSeconds: 25 millisecondsBetweenBatches: 50 minBatchSize: 20 + +nginx_conf: + user: nginx + group: nginx + zauth_keystore: /etc/wire/nginz/secrets/zauth.conf + zauth_acl: /etc/wire/nginz/conf/zauth.acl + worker_processes: auto + worker_rlimit_nofile: 131072 + worker_connections: 65536 + disabled_paths: [] + + tls: + protocols: TLSv1.2 TLSv1.3 + # NOTE: These are some sane defaults (compliant to TR-02102-2), you may want to overrride them on your own installation + # For TR-02102-2 see https://www.bsi.bund.de/SharedDocs/Downloads/EN/BSI/Publications/TechGuidelines/TG02102/BSI-TR-02102-2.html + # As a Wire employee, for Wire-internal discussions and context see + # * https://wearezeta.atlassian.net/browse/FS-33 + # * https://wearezeta.atlassian.net/browse/FS-444 + ciphers: "TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256" + + # -- The origins from which we allow CORS requests. These are combined with 'external_env_domain' to form a full url + allowlisted_origins: + - webapp + - teams + - account + # -- The origins from which we allow CORS requests at random ports. This is + # useful for testing with HTTP proxies and should not be used in production. + # The list entries are combined with 'external_env_domain' to form a full url + # regex that matches for all ports. + randomport_allowlisted_origins: [] # default is empty by intention + upstreams: + cannon: + - path: /await + envs: + - all + use_websockets: true + +# FUTUREWORK: allow resources for cannon and nginz to be different resources: requests: memory: "256Mi" @@ -26,3 +68,26 @@ service: name: cannon internalPort: 8080 externalPort: 8080 + nginz: + # Enable this only if service of `type: LoadBalancer` can work in your K8s + # cluster. + enabled: false + # hostname: # Needed when using either externalDNS or certManager + name: cannon-nginz + internalPort: 8443 + externalPort: 443 + annotations: {} + tls: + secretName: cannon-nginz-cert + externalDNS: + enabled: false + ttl: "10m" + certManager: + # When certManager is not enabled, certificates must be provided at + # .secrets.nginz.tls.crt and .secrets.nginz.tls.key. + enabled: false + certificate: + name: cannon-nginz + issuer: + name: letsencrypt + kind: ClusterIssuer diff --git a/charts/nginx-ingress-services/templates/certificate.yaml b/charts/nginx-ingress-services/templates/certificate.yaml index 9b223f11320..58da22ac4d8 100644 --- a/charts/nginx-ingress-services/templates/certificate.yaml +++ b/charts/nginx-ingress-services/templates/certificate.yaml @@ -27,7 +27,9 @@ spec: dnsNames: - {{ .Values.config.dns.https }} + {{- if .Values.websockets.enabled }} - {{ .Values.config.dns.ssl }} + {{- end }} {{- if .Values.webapp.enabled }} - {{ .Values.config.dns.webapp }} {{- end }} diff --git a/charts/nginx-ingress-services/templates/ingress.yaml b/charts/nginx-ingress-services/templates/ingress.yaml index 4ce2619ef90..39fe2e33181 100644 --- a/charts/nginx-ingress-services/templates/ingress.yaml +++ b/charts/nginx-ingress-services/templates/ingress.yaml @@ -10,7 +10,9 @@ spec: tls: - hosts: - {{ .Values.config.dns.https }} +{{- if .Values.websockets.enabled }} - {{ .Values.config.dns.ssl }} +{{- end }} {{- if .Values.webapp.enabled }} - {{ .Values.config.dns.webapp }} {{- end }} @@ -32,6 +34,7 @@ spec: backend: serviceName: nginz-http servicePort: {{ .Values.service.nginz.externalHttpPort }} +{{- if .Values.websockets.enabled }} - host: {{ .Values.config.dns.ssl }} http: paths: @@ -39,6 +42,7 @@ spec: backend: serviceName: nginz-tcp servicePort: {{ .Values.service.nginz.externalTcpPort }} +{{- end }} {{- if .Values.webapp.enabled }} - host: {{ .Values.config.dns.webapp }} http: diff --git a/charts/nginx-ingress-services/templates/service.yaml b/charts/nginx-ingress-services/templates/service.yaml index e9692197008..236789b856d 100644 --- a/charts/nginx-ingress-services/templates/service.yaml +++ b/charts/nginx-ingress-services/templates/service.yaml @@ -10,6 +10,7 @@ spec: targetPort: 8080 selector: wireService: nginz +{{- if .Values.websockets.enabled }} --- apiVersion: v1 kind: Service @@ -22,6 +23,7 @@ spec: targetPort: 8081 selector: wireService: nginz +{{- end }} {{- if .Values.webapp.enabled }} --- apiVersion: v1 diff --git a/charts/nginx-ingress-services/values.yaml b/charts/nginx-ingress-services/values.yaml index fba9394c0ea..44f49f0847e 100644 --- a/charts/nginx-ingress-services/values.yaml +++ b/charts/nginx-ingress-services/values.yaml @@ -6,6 +6,8 @@ teamSettings: # Account pages may be useful to enable password reset or email validation done after the initial registration accountPages: enabled: false +websockets: + enabled: true webapp: enabled: true fakeS3: @@ -96,7 +98,8 @@ service: # config: # dns: # https: nginz-https. -# ssl: nginz-ssl. +# ssl: nginz-ssl. # For websockets +# ^ ssl is ignored if websockets.enabled == false # webapp: webapp. # ^ webapp is ignored if webapp.enabled == false # fakeS3: assets. diff --git a/charts/nginz/templates/conf/_nginx.conf.tpl b/charts/nginz/templates/conf/_nginx.conf.tpl index cacac26849e..1302356dbaa 100644 --- a/charts/nginz/templates/conf/_nginx.conf.tpl +++ b/charts/nginz/templates/conf/_nginx.conf.tpl @@ -190,8 +190,6 @@ http { location /status { zauth off; access_log off; - allow 10.0.0.0/8; - deny all; return 200; } @@ -236,6 +234,7 @@ http { # {{ range $name, $locations := .Values.nginx_conf.upstreams -}} + {{- if not (has $name $.Values.nginx_conf.ignored_upstreams) -}} {{- range $location := $locations -}} {{- if hasKey $location "envs" -}} {{- range $env := $location.envs -}} @@ -334,6 +333,7 @@ http { {{- end -}} {{- end -}} + {{- end -}} {{- end }} {{ if not (eq $.Values.nginx_conf.env "prod") }} diff --git a/charts/nginz/templates/conf/_upstreams.txt.tpl b/charts/nginz/templates/conf/_upstreams.txt.tpl index 62994068d00..5b7afc4a6f3 100644 --- a/charts/nginz/templates/conf/_upstreams.txt.tpl +++ b/charts/nginz/templates/conf/_upstreams.txt.tpl @@ -1,3 +1,3 @@ {{ define "nginz_upstreams.txt" }} -{{ range $key, $value := .Values.nginx_conf.upstreams }}{{ $key }} {{ end -}} +{{ range $key, $value := .Values.nginx_conf.upstreams }}{{ if not (has $key $.Values.nginx_conf.ignored_upstreams) }} {{ $key }} {{ end }}{{ end -}} {{ end }} diff --git a/charts/nginz/templates/deployment.yaml b/charts/nginz/templates/deployment.yaml index 0472c82c01c..d790e229137 100644 --- a/charts/nginz/templates/deployment.yaml +++ b/charts/nginz/templates/deployment.yaml @@ -30,7 +30,7 @@ spec: checksum/secret: {{ include (print .Template.BasePath "/secret.yaml") . | sha256sum }} fluentbit.io/parser-nginz: nginz spec: - terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} # should be higher than the drainTimeout (sleep duration of preStop) + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} containers: - name: nginz-disco image: "{{ .Values.images.nginzDisco.repository }}:{{ .Values.images.nginzDisco.tag }}" @@ -43,14 +43,6 @@ spec: readOnly: false - name: nginz image: "{{ .Values.images.nginz.repository }}:{{ .Values.images.nginz.tag }}" - lifecycle: - preStop: - exec: - # kubernetes by default sends a SIGTERM to the container, - # which would cause nginz to exit, breaking existing websocket connections. - # Instead we sleep for a day, then terminate gracefully. - # (SIGTERM is still sent, but afterwards) - command: ["sh", "-c", "sleep {{ .Values.drainTimeout }} && nginx -c /etc/wire/nginz/conf/nginx.conf -s quit"] volumeMounts: - name: secrets mountPath: /etc/wire/nginz/secrets diff --git a/charts/nginz/values.yaml b/charts/nginz/values.yaml index c0f35c83e61..09e6a648101 100644 --- a/charts/nginz/values.yaml +++ b/charts/nginz/values.yaml @@ -19,7 +19,6 @@ config: ws: wsPort: 8081 useProxyProtocol: true -drainTimeout: 0 terminationGracePeriodSeconds: 30 nginx_conf: user: nginx @@ -62,6 +61,11 @@ nginx_conf: # The list entries are combined with 'external_env_domain' to form a full url # regex that matches for all ports. randomport_allowlisted_origins: [] # default is empty by intention + # Add 'cannon' to 'ignored_upstreams' if you wish to make use of separate + # network traffic to cannon-with-its-own-nginz + # See also "Separate incoming websocket network traffic from the rest of the + # https traffic" section in the docs. + ignored_upstreams: [] upstreams: cargohold: - path: /conversations/([^/]*)/assets diff --git a/docs/src/how-to/install/configuration-options.rst b/docs/src/how-to/install/configuration-options.rst index b681b721b1e..d73549b2937 100644 --- a/docs/src/how-to/install/configuration-options.rst +++ b/docs/src/how-to/install/configuration-options.rst @@ -131,6 +131,10 @@ websocket. You're not expected to need to change these settings. +The following options are only relevant during the restart of cannon itself. +During a restart of nginz or ingress-controller, all websockets will get +severed. If this is to be avoided, see section :ref:`separate-websocket-traffic` + ``drainOpts``: Drain websockets in a controlled fashion when cannon receives a SIGTERM or SIGINT (this happens when a pod is terminated e.g. during rollout of a new version). Instead of waiting for connections to close on their own, @@ -163,6 +167,59 @@ There is no way to entirely disable this behaviour, two extreme examples below millisecondsBetweenBatches: 50 minBatchSize: 20 +.. _separate-websocket-traffic: + +Separate incoming websocket network traffic from the rest of the https traffic +------------------------------------------------------------------------------- + +By default, incoming network traffic for websockets comes through these network +hops: + +Internet -> LoadBalancer -> kube-proxy -> nginx-ingress-controller -> nginz -> cannon + +In order to have graceful draining of websockets when something gets restarted, as it is not easily +possible to implement the graceful draining on nginx-ingress-controller or nginz by itself, there is +a configuration option to get the following network hops: + +Internet -> separate LoadBalancer for cannon only -> kube-proxy -> [nginz->cannon (2 containers in the same pod)] + +.. code:: yaml + + # example on AWS when using cert-manager for TLS certificates and external-dns for DNS records + # (see wire-server/charts/cannon/values.yaml for more possible options) + + # in your wire-server/values.yaml overrides: + cannon: + service: + nginz: + enabled: true + hostname: "nginz-ssl.example.com" + externalDNS: + enabled: true + certManager: + enabled: true + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: "nlb" + service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing" + nginz: + nginx_conf: + ignored_upstreams: ["cannon"] + +.. code:: yaml + + # in your wire-server/secrets.yaml overrides: + cannon: + secrets: + nginz: + zAuth: + publicKeys: ... # same values as in nginz.secrets.zAuth.publicKeys + +.. code:: yaml + + # in your nginx-ingress-services/values.yaml overrides: + websockets: + enabled: false + Blocking creation of personal users, new teams -------------------------------------------------------------------------- diff --git a/services/nginz/Dockerfile b/services/nginz/Dockerfile index 6afb9e3d10e..640a9774bec 100644 --- a/services/nginz/Dockerfile +++ b/services/nginz/Dockerfile @@ -144,7 +144,9 @@ RUN mkdir -p /var/cache/nginx/client_temp && chown -R nginx:nginx /var/cache/ngi RUN apk add --no-cache inotify-tools dumb-init bash curl && \ # add libzauth runtime dependencies back in - apk add --no-cache libsodium llvm-libunwind libgcc + apk add --no-cache libsodium llvm-libunwind libgcc && \ + # add openssl runtime dependencies for TLS/SSL certificate support + apk add --no-cache openssl COPY services/nginz/nginz_reload.sh /usr/bin/nginz_reload.sh diff --git a/services/nginz/nginz_reload.sh b/services/nginz/nginz_reload.sh index f2ec41663e1..0ed14d7444e 100755 --- a/services/nginz/nginz_reload.sh +++ b/services/nginz/nginz_reload.sh @@ -7,6 +7,13 @@ nginx_pid=$! +cleanup () { + kill -QUIT $nginx_pid + wait $nginx_pid +} + +trap "cleanup" EXIT + watches=${WATCH_PATHS:-"/etc/wire/nginz/upstreams"} # only react on changes to upstreams.conf