-
Notifications
You must be signed in to change notification settings - Fork 0
/
hang_int_test.go
113 lines (107 loc) · 3.81 KB
/
hang_int_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//go:build rabbit
package rmq_test
import (
"context"
"errors"
"log"
"log/slog"
"net"
"os"
"testing"
"time"
"github.com/danlock/rmq"
amqp "github.com/rabbitmq/amqp091-go"
)
func TestHanging(t *testing.T) {
Example_hanging()
}
func panicOnErr(err error) {
if err != nil {
panic(err)
}
}
func Example_hanging() {
var tcpConn *net.TCPConn
amqp091Config := amqp.Config{
// Set dial so we have access to the net.Conn
// This is the same as amqp091.DefaultDial(time.Second) except we also grab the connection
Dial: func(network, addr string) (net.Conn, error) {
conn, err := net.DialTimeout(network, addr, time.Second)
if err != nil {
return nil, err
}
if err := conn.SetDeadline(time.Now().Add(time.Second)); err != nil {
return nil, err
}
tcpConn = conn.(*net.TCPConn)
return conn, nil
},
}
// Create an innocent, unsuspecting amqp091 connection
amqp091Conn, err := amqp.DialConfig(os.Getenv("TEST_AMQP_URI"), amqp091Config)
panicOnErr(err)
// Create a channel to ensure the connection's working.
amqp091Chan, err := amqp091Conn.Channel()
panicOnErr(err)
panicOnErr(amqp091Chan.Close())
// Betray amqp091Conn expectations by dawdling. While this is unnatural API usage, the intention is to emulate a connection hang.
dawdlingBegins := make(chan struct{}, 1)
// hangTime is 3 seconds for faster tests, but this could easily be much longer...
hangTime := 3 * time.Second
hangConnection := func(tcpConn *net.TCPConn) {
go func() {
sysConn, err := tcpConn.SyscallConn()
panicOnErr(err)
// sysConn.Write blocks the whole connection until it finishes
err = sysConn.Write(func(fd uintptr) bool {
dawdlingBegins <- struct{}{}
time.Sleep(hangTime)
return true
})
panicOnErr(err)
}()
select {
case <-time.After(time.Second):
panic("sysConn.Write took too long!")
case <-dawdlingBegins:
}
}
hangConnection(tcpConn)
// The unsuspecting amqp091Conn.Channel() dutifully waits for hangTime.
// Doesn't matter what amqp.DefaultDial(connectionTimeout) was (only 1 second...)
chanStart := time.Now()
amqp091Chan, err = amqp091Conn.Channel()
panicOnErr(err)
panicOnErr(amqp091Chan.Close())
panicOnErr(amqp091Conn.Close())
// test our expectation that amqp091Conn.Channel hung for at least 90% of hangTime, to prevent flaky tests.
if time.Since(chanStart) < (hangTime - (hangTime / 10)) {
panic("amqp091Conn.Channel returned faster than expected")
}
// The above demonstrates one of the biggest issues with amqp091, since your applications stuck if the connection hangs,
// and you don't have any options to prevent this.
ctx := context.Background()
// danlock/rmq gives you 2 ways to prevent unbound hangs, Args.AMQPTimeout and the context passed into each function call.
rmqConnCfg := rmq.ConnectArgs{Args: rmq.Args{Log: slog.Log, AMQPTimeout: time.Second}}
// Create a paranoid AMQP connection
rmqConn := rmq.ConnectWithAMQPConfig(ctx, rmqConnCfg, os.Getenv("TEST_AMQP_URI"), amqp091Config)
// Grab a channel to ensure the connection is working
amqp091Chan, err = rmqConn.Channel(ctx)
panicOnErr(err)
panicOnErr(amqp091Chan.Close())
// a hung connection, just like we've always feared
hangConnection(tcpConn)
// However we will simply error long before hangTime.
chanStart = time.Now()
_, err = rmqConn.Channel(ctx)
if !errors.Is(err, context.DeadlineExceeded) {
log.Fatalf("rmqConn.Channel returned unexpected error %v", err)
}
chanDur := time.Since(chanStart)
// rmqConn is too paranoid to hang for 90% of hangTime, but double check anyway
if chanDur > (hangTime - (hangTime / 10)) {
log.Fatalf("rmqConn.Channel hung for (%s)", chanDur)
}
// A caveat here is that rmqConn has leaked a goroutine that blocks until the connection sorts itself out.
// If amqp091-go ever fixes https://github.com/rabbitmq/amqp091-go/issues/225 then we can improve this situation.
}