Skip to content

Commit

Permalink
charts/cannon: Bundle nginz and expose directly to load balancer (#2421)
Browse files Browse the repository at this point in the history
By default, incoming network traffic for websockets comes through these network
hops:

Internet -> LoadBalancer -> kube-proxy -> nginx-ingress-controller -> nginz -> cannon

In order to have graceful draining of websockets when something gets restarted (as implemented in #2416 ), as it is not easily possible to implement the graceful draining on nginx-ingress-controller or nginz by itself, with this PR there is now
a configuration option to get the following network hops:

Internet -> separate LoadBalancer for cannon only -> kube-proxy -> [nginz->cannon (2 containers in the same pod)]

More context:
https://wearezeta.atlassian.net/wiki/spaces/PS/pages/585564424/How+to+gracefully+drain+cannon+but+not+so+slowly

FUTUREWORK: this introduces some nginz config duplication; some way to refactor this (e.g. by moving charts/{cannon, nginz}/* to charts/wire-server/ in a backwards-compatible way) would allow to reduce this duplication.

Co-authored-by: jschaul <[email protected]>
  • Loading branch information
akshaymankar and jschaul authored May 25, 2022
1 parent 6b0bdf0 commit 4a3b372
Show file tree
Hide file tree
Showing 23 changed files with 695 additions and 17 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@
.stack-root-buildah
.local
services/nginz/src/objs
dist-newstyle
.env
.direnv
4 changes: 4 additions & 0 deletions changelog.d/2-features/cannon-nginz
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Optionally allow to run cannon with its own nginz inside the same pod; and connect to a load balancer directly.
This allows the cannon-slow-drain behaviour implemented in #2416 to take effect by not having other intermediate network hops which could break websocket connections all at once.
Some (internal) context: https://wearezeta.atlassian.net/wiki/spaces/PS/pages/585564424/How+to+gracefully+drain+cannon+but+not+so+slowly
For details on how to configure this, see docs/src/how-to/install/configuration-options.rst
17 changes: 17 additions & 0 deletions charts/cannon/conf/static/zauth.acl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
a (blacklist (path "/provider")
(path "/provider/**")
(path "/bot")
(path "/bot/**")
(path "/i/**"))

b (whitelist (path "/bot")
(path "/bot/**"))

p (whitelist (path "/provider")
(path "/provider/**"))

# LegalHold Access Tokens
la (whitelist (path "/notifications")
(path "/assets/v3/**")
(path "/users")
(path "/users/**"))
342 changes: 342 additions & 0 deletions charts/cannon/templates/conf/_nginx.conf.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,342 @@
{{- define "cannon_nginz_nginx.conf" }}
user {{ .Values.nginx_conf.user }} {{ .Values.nginx_conf.group }};
worker_processes {{ .Values.nginx_conf.worker_processes }};
worker_rlimit_nofile {{ .Values.nginx_conf.worker_rlimit_nofile | default 1024 }};
pid /var/run/nginz.pid;

# nb. start up errors (eg. misconfiguration) may still end up in
# /var/log/nginz/error.log
error_log stderr warn;

events {
worker_connections {{ .Values.nginx_conf.worker_connections | default 1024 }};
multi_accept off;
use epoll;
}

http {
#
# Sockets
#
sendfile on;
tcp_nopush on;
tcp_nodelay on;
#
# Timeouts
#
client_body_timeout 60;
client_header_timeout 60;
keepalive_timeout 75;
send_timeout 60;
ignore_invalid_headers off;
types_hash_max_size 2048;
server_names_hash_bucket_size 64;
server_name_in_redirect off;
large_client_header_buffers 4 8k;
#
# Security
#
server_tokens off;
#
# Logging
#
# Note sanitized_request:
# We allow passing access_token as query parameter for e.g. websockets
# However we do not want to log access tokens.
#
log_format custom_zeta '$remote_addr $remote_user "$time_local" "$sanitized_request" $status $body_bytes_sent "$http_referer" "$http_user_agent" $http_x_forwarded_for $connection $request_time $upstream_response_time $upstream_cache_status $zauth_user $zauth_connection $request_id $proxy_protocol_addr "$http_tracestate"';
access_log /dev/stdout custom_zeta;
#
# Monitoring
#
vhost_traffic_status_zone;
#
# Gzip
#
gzip on;
gzip_disable msie6;
gzip_vary on;
gzip_proxied any;
gzip_comp_level 6;
gzip_buffers 16 8k;
gzip_http_version 1.1;
gzip_min_length 1024;
gzip_types text/plain text/css application/json application/x-javascript text/xml application/xml application/xml+rss text/javascript;
#
# This directive ensures that X-Forwarded-For is used
# as the client's real IP address (since nginz is always
# behind an ELB, remote_addr now becomes the client's real
# IP address)
#
real_ip_header X-Forwarded-For;
set_real_ip_from 0.0.0.0/0;
#
# Rate Limiting Exemptions
#
geo $rate_limit {
default 1;
# IPs to exempt can be added in the .Values.nginx_conf.rate_limit and .Values.nginx_conf.simulators helm values
{{ if (hasKey .Values.nginx_conf "rate_limit_exemptions") }}
{{ range $ip := .Values.nginx_conf.rate_limit_exemptions }}
{{ $ip }} 0;
{{ end }}
{{ end }}

{{ if (hasKey .Values.nginx_conf "simulators") }}
{{ range $ip := .Values.nginx_conf.simulators }}
{{ $ip }} 0;
{{ end }}
{{ end }}
}

#
# Rate Limiting Mapping
#

map $rate_limit $rate_limited_by_addr {
1 "$binary_remote_addr$uri";
0 "";
}

map $rate_limit $rate_limited_by_zuser {
1 $zauth_user;
0 "";
}

map $http_origin $cors_header {
default "";
{{ range $origin := .Values.nginx_conf.allowlisted_origins }}
"https://{{ $origin }}.{{ $.Values.nginx_conf.external_env_domain}}" "$http_origin";
{{ end }}

# Allow additional origins at random ports. This is useful for testing with an HTTP proxy.
# It should not be used in production.
{{ range $origin := .Values.nginx_conf.randomport_allowlisted_origins }}
"~^https://{{ $origin }}.{{ $.Values.nginx_conf.external_env_domain}}(:[0-9]{2,5})?$" "$http_origin";
{{ end }}
}


#
# Rate Limiting
#

limit_req_zone $rate_limited_by_zuser zone=reqs_per_user:12m rate=10r/s;
limit_req_zone $rate_limited_by_addr zone=reqs_per_addr:12m rate=5r/m;

limit_conn_zone $rate_limited_by_zuser zone=conns_per_user:10m;
limit_conn_zone $rate_limited_by_addr zone=conns_per_addr:10m;

# Too Many Requests (420) is returned on throttling
# TODO: Change to 429 once all clients support this
limit_req_status 420;
limit_conn_status 420;

limit_req_log_level warn;
limit_conn_log_level warn;

# Limit by $zauth_user if present and not part of rate limit exemptions
limit_req zone=reqs_per_user burst=20;
limit_conn conns_per_user 25;

#
# Proxied Upstream Services
#

upstream cannon {
least_conn;
keepalive 32;
server localhost:{{ .Values.service.internalPort }};
}

#
# Mapping for websocket connections
#

map $http_upgrade $connection_upgrade {
websocket upgrade;
default '';
}



#
# Locations
#

server {
listen {{ .Values.service.nginz.internalPort }} ssl;

ssl_certificate /etc/wire/nginz/tls/tls.crt;
ssl_certificate_key /etc/wire/nginz/tls/tls.key;

ssl_protocols {{ .Values.nginx_conf.tls.protocols }};
ssl_ciphers {{ .Values.nginx_conf.tls.ciphers }};

# Disable session resumption. See comments in SQPIT-226 for more context and
# discussion.
ssl_session_tickets off;
ssl_session_cache off;

zauth_keystore {{ .Values.nginx_conf.zauth_keystore }};
zauth_acl {{ .Values.nginx_conf.zauth_acl }};

location /status {
zauth off;
access_log off;
return 200;
}

location /vts {
zauth off;
access_log off;
allow 10.0.0.0/8;
allow 127.0.0.1;
deny all;
# Requests with an X-Forwarded-For header will have the real client
# source IP address set correctly, due to the real_ip_header directive
# in the top-level configuration. However, this will not set the client
# IP correctly for clients which are connected via a load balancer which
# uses the PROXY protocol.
#
# Hence, for safety, we deny access to the vts metrics endpoints to
# clients which are connected via PROXY protocol.
if ($proxy_protocol_addr != "") {
return 403;
}

vhost_traffic_status_display;
vhost_traffic_status_display_format html;
}

# Block "Franz" -- http://meetfranz.com
if ($http_user_agent ~* Franz) {
return 403;
}

{{ range $path := .Values.nginx_conf.disabled_paths }}
location ~* ^(/v[0-9]+)?{{ $path }} {
return 404;
}
{{ end }}

#
# Service Routing
#

{{ range $name, $locations := .Values.nginx_conf.upstreams -}}
{{- range $location := $locations -}}
{{- if hasKey $location "envs" -}}
{{- range $env := $location.envs -}}
{{- if or (eq $env $.Values.nginx_conf.env) (eq $env "all") -}}

{{- if $location.strip_version }}

rewrite ^/v[0-9]+({{ $location.path }}) $1;
{{- end }}

{{- $versioned := ternary $location.versioned true (hasKey $location "versioned") -}}
{{- $path := printf "%s%s" (ternary "(/v[0-9]+)?" "" $versioned) $location.path }}

location ~* ^{{ $path }} {
# remove access_token from logs, see 'Note sanitized_request' above.
set $sanitized_request $request;
if ($sanitized_request ~ (.*)access_token=[^&\s]*(.*)) {
set $sanitized_request $1access_token=****$2;
}

{{- if ($location.disable_zauth) }}
zauth off;

# If zauth is off, limit by remote address if not part of limit exemptions
{{- if ($location.unlimited_requests_endpoint) }}
# Note that this endpoint has no rate limit
{{- else -}}
limit_req zone=reqs_per_addr burst=5 nodelay;
limit_conn conns_per_addr 20;
{{- end -}}
{{- end }}

if ($request_method = 'OPTIONS') {
add_header 'Access-Control-Allow-Methods' "GET, POST, PUT, DELETE, OPTIONS";
add_header 'Access-Control-Allow-Headers' "$http_access_control_request_headers, DNT,X-Mx-ReqToken,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type";
add_header 'Content-Type' 'text/plain; charset=UTF-8';
add_header 'Content-Length' 0;
return 204;
}

proxy_pass http://{{ $name }};
proxy_http_version 1.1;

{{- if ($location.disable_request_buffering) }}
proxy_request_buffering off;
{{ end -}}
{{- if (hasKey $location "body_buffer_size") }}
client_body_buffer_size {{ $location.body_buffer_size -}};
{{- end }}
client_max_body_size {{ $location.max_body_size | default "64k" }};

{{ if ($location.use_websockets) }}
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_read_timeout 1h;
{{- else }}
proxy_set_header Connection "";
{{ end -}}

{{- if not ($location.disable_zauth) }}
proxy_set_header Authorization "";
{{- end }}

proxy_set_header Z-Type $zauth_type;
proxy_set_header Z-User $zauth_user;
proxy_set_header Z-Connection $zauth_connection;
proxy_set_header Z-Provider $zauth_provider;
proxy_set_header Z-Bot $zauth_bot;
proxy_set_header Z-Conversation $zauth_conversation;
proxy_set_header Request-Id $request_id;

{{- if ($location.allow_credentials) }}
more_set_headers 'Access-Control-Allow-Credentials: true';
{{ end -}}

more_set_headers 'Access-Control-Allow-Origin: $cors_header';

more_set_headers 'Access-Control-Expose-Headers: Request-Id, Location';
more_set_headers 'Request-Id: $request_id';
more_set_headers 'Strict-Transport-Security: max-age=31536000; preload';
}

{{- end -}}
{{- end -}}

{{- end -}}
{{- end -}}
{{- end }}
}
}
{{- end }}
3 changes: 1 addition & 2 deletions charts/cannon/templates/configmap.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
apiVersion: v1
data:
cannon.yaml: |
logNetStrings: True # log using netstrings encoding:
# http://cr.yp.to/proto/netstrings.txt
logFormat: StructuredJSON
logLevel: {{ .Values.config.logLevel }}
cannon:
Expand Down
16 changes: 16 additions & 0 deletions charts/cannon/templates/nginz-certificate-secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{{- if and .Values.service.nginz.enabled (not .Values.service.nginz.certManager.enabled ) }}
apiVersion: v1
kind: Secret
metadata:
name: {{ .Values.service.nginz.tls.secretName }}
labels:
wireService: cannon-nginz
app: cannon-nginz
chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
release: "{{ .Release.Name }}"
heritage: "{{ .Release.Service }}"
type: kubernetes.io/tls
data:
tls.crt: {{ .Values.secrets.nginz.tls.crt }}
tls.key: {{ .Values.secrets.nginz.tls.key }}
{{- end }}
Loading

0 comments on commit 4a3b372

Please sign in to comment.