-
Notifications
You must be signed in to change notification settings - Fork 475
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: feat: io_uring for netpoll I/O poller #197
base: develop
Are you sure you want to change the base?
Changes from 16 commits
d766619
0227947
bb5ef53
16074e4
00ee563
04cd494
a940bfa
a375d8c
3e90d2b
8db732f
ea33f82
fda4b12
7f9768b
fdc0e5b
4c5b712
641d782
8e81931
1057583
a02b287
58d195d
4fab811
e3f4b83
392c003
5ad8bd6
61f407b
c82d419
6e445f8
2e064d6
18aac13
546b5cd
4386f8c
28cc8eb
a4ffc46
3816272
dbdf554
f95b7f6
b6334f4
72137f0
32a14c0
53df5fc
f86c99a
cdbc94f
c1ec061
6a4d7e2
e9ff6a3
7c65c95
46ca4cf
83cc3e2
93dda0b
625c0c4
0013936
302e5be
03248a3
5e0b446
4d8abb9
ac1a9d8
e1a8711
9dbdc9d
d2b3966
5a6033f
4608460
0737179
ef05b54
326225e
861cfa8
dbd117a
cfc5b9a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
// Copyright 2021 CloudWeGo Authors | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Copyright 2022 CloudWeGo Authors |
||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package netpoll | ||
|
||
import uring "github.com/cloudwego/netpoll/io_uring" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 路径错误 |
||
|
||
// TODO: init uringPoll | ||
func openIOURingPoll() *uringPoll { | ||
poll := new(uringPoll) | ||
ring, err := uring.IOURing(0) | ||
if err != nil { | ||
panic(err) | ||
} | ||
poll.fd = ring.Fd() | ||
return poll | ||
} | ||
|
||
// TODO: build uringPoll | ||
type uringPoll struct { | ||
fd int | ||
} | ||
|
||
// TODO: Wait implements Poll. | ||
func (p *uringPoll) Wait() error | ||
|
||
// TODO: Close implements Poll. | ||
func (p *uringPoll) Close() error | ||
|
||
// TODO: Trigger implements Poll. | ||
func (p *uringPoll) Trigger() error | ||
|
||
// TODO: Control implements Poll. | ||
func (p *uringPoll) Control(operator *FDOperator, event PollEvent) error |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// Copyright 2021 CloudWeGo Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package uring | ||
|
||
import "sync/atomic" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个文件删了吧 |
||
func WRITE_ONCE_U32(p *uint32, v uint32) { | ||
atomic.StoreUint32(p, v) | ||
} | ||
|
||
func READ_ONCE_U32(p *uint32) uint32 { | ||
return atomic.LoadUint32(p) | ||
} | ||
|
||
func SMP_STORE_RELEASE_U32(p *uint32, v uint32) { | ||
atomic.StoreUint32(p, v) | ||
} | ||
|
||
func SMP_LOAD_ACQUIRE_U32(p *uint32) uint32 { | ||
return atomic.LoadUint32(p) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
// Copyright 2021 CloudWeGo Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package uring | ||
|
||
import ( | ||
"syscall" | ||
) | ||
|
||
// Completion Queue Eveny, IO completion data structure | ||
type URingCQE struct { | ||
UserData uint64 // sqe->data submission passed back | ||
Res int32 // result code for this event | ||
Flags uint32 | ||
|
||
// If the ring is initialized with IORING_SETUP_CQE32, then this field | ||
// contains 16-bytes of padding, doubling the size of the CQE. | ||
BigCQE []uint64 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 16bytes 应该是 |
||
} | ||
|
||
// Error implements CQE | ||
func (c *URingCQE) Error() error { | ||
return syscall.Errno(uintptr(-c.Res)) | ||
} | ||
|
||
// getData implements CQE | ||
func (c *URingCQE) getData() uint64 { | ||
return c.UserData | ||
} | ||
|
||
// Submission Queue Entry, IO submission data structure | ||
type URingSQE struct { | ||
OpCode uint8 // type of operation for this sqe | ||
Flags uint8 // IOSQE_ flags | ||
IOPrio uint16 // ioprio for the request | ||
Fd int32 // file descriptor to do IO on | ||
Off uint64 // offset into file | ||
Addr uint64 // pointer to buffer or iovecs | ||
Len uint32 // buffer size or number of iovecs | ||
OpcodeFlags uint32 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is |
||
UserData uint64 // data to be passed back at completion time | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 以下先 |
||
BufIG uint16 | ||
|
||
Personality uint16 // personality to use, if used | ||
SpliceFdIn int32 | ||
_pad2 [2]uint64 | ||
} | ||
|
||
// setData sets the user data field of the SQE instance passed in. | ||
func (s *URingSQE) setData(ud uint64) { | ||
s.UserData = ud | ||
} | ||
|
||
// setFlags sets the flags field of the SQE instance passed in. | ||
func (s *URingSQE) setFlags(flags uint8) { | ||
s.Flags = flags | ||
} | ||
|
||
// setAddr sets the flags field of the SQE instance passed in. | ||
func (s *URingSQE) setAddr(addr uintptr) { | ||
s.Addr = uint64(addr) | ||
} | ||
|
||
// PrepRW implements SQE | ||
func (s *URingSQE) PrepRW(op OpFlag, fd int32, addr uintptr, len uint32, offset uint64) { | ||
s.OpCode = uint8(op) | ||
s.Flags = 0 | ||
s.IOPrio = 0 | ||
s.Fd = fd | ||
s.Off = offset | ||
s.setAddr(addr) | ||
s.Len = len | ||
s.OpcodeFlags = 0 | ||
s.UserData = 0 | ||
s.BufIG = 0 | ||
s.Personality = 0 | ||
s.SpliceFdIn = 0 | ||
s._pad2[0] = 0 | ||
s._pad2[1] = 0 | ||
} | ||
|
||
// Flags of CQE | ||
// IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID | ||
// IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries | ||
// IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv | ||
const ( | ||
IORING_CQE_F_BUFFER OpFlag = 1 << iota | ||
IORING_CQE_F_MORE | ||
IORING_CQE_F_SOCK_NONEMPTY | ||
) | ||
|
||
const IORING_CQE_BUFFER_SHIFT = 16 | ||
|
||
// io_uring_enter(2) flags | ||
const ( | ||
IORING_ENTER_GETEVENTS uint32 = 1 << iota | ||
IORING_ENTER_SQ_WAKEUP | ||
IORING_ENTER_SQ_WAIT | ||
IORING_ENTER_EXT_ARG | ||
IORING_ENTER_REGISTERED_RING | ||
) | ||
|
||
// If sqe->file_index is set to this for opcodes that instantiate a new | ||
// direct descriptor (like openat/openat2/accept), then io_uring will allocate | ||
// an available direct descriptor instead of having the application pass one | ||
// in. The picked direct descriptor will be returned in cqe->res, or -ENFILE | ||
// if the space is full. | ||
const ( | ||
IOSQE_FIXED_FILE_BIT = iota | ||
IOSQE_IO_DRAIN_BIT | ||
IOSQE_IO_LINK_BIT | ||
IOSQE_IO_HARDLINK_BIT | ||
IOSQE_ASYNC_BIT | ||
IOSQE_BUFFER_SELECT_BIT | ||
IOSQE_CQE_SKIP_SUCCESS_BIT | ||
) | ||
|
||
// Flags of SQE | ||
const ( | ||
// IOSQE_FIXED_FILE means use fixed fileset | ||
IOSQE_FIXED_FILE uint32 = 1 << IOSQE_FIXED_FILE_BIT | ||
// IOSQE_IO_DRAIN means issue after inflight IO | ||
IOSQE_IO_DRAIN uint32 = 1 << IOSQE_IO_DRAIN_BIT | ||
// IOSQE_IO_LINK means links next sqe | ||
IOSQE_IO_LINK uint32 = 1 << IOSQE_IO_LINK_BIT | ||
// IOSQE_IO_HARDLINK means like LINK, but stronger | ||
IOSQE_IO_HARDLINK uint32 = 1 << IOSQE_IO_HARDLINK_BIT | ||
// IOSQE_ASYNC means always go async | ||
IOSQE_ASYNC uint32 = 1 << IOSQE_ASYNC_BIT | ||
// IOSQE_BUFFER_SELECT means select buffer from sqe->buf_group | ||
IOSQE_BUFFER_SELECT uint32 = 1 << IOSQE_BUFFER_SELECT_BIT | ||
// IOSQE_CQE_SKIP_SUCCESS means don't post CQE if request succeeded | ||
IOSQE_CQE_SKIP_SUCCESS uint32 = 1 << IOSQE_CQE_SKIP_SUCCESS_BIT | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
// Copyright 2021 CloudWeGo Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package uring | ||
|
||
import ( | ||
"syscall" | ||
"unsafe" | ||
) | ||
|
||
// sysMmap is used to free the URingSQE and URingCQE, | ||
func (u *URing) sysMunmap() (err error) { | ||
err = syscall.Munmap(u.sqRing.buff) | ||
if u.cqRing.buff != nil && &u.cqRing.buff[0] != &u.sqRing.buff[0] { | ||
err = syscall.Munmap(u.cqRing.buff) | ||
} | ||
return | ||
} | ||
|
||
// sysMmap is used to configure the URingSQE and URingCQE, | ||
// it should only be called after the sysSetUp function has completed successfully. | ||
func (u *URing) sysMmap(p *ringParams) (err error) { | ||
size := unsafe.Sizeof(URingCQE{}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sizeof 定义为 global var/const,不用每次都算 |
||
if p.flags&IORING_SETUP_CQE32 != 0 { | ||
size += unsafe.Sizeof(URingCQE{}) | ||
} | ||
u.sqRing.ringSize = uint64(p.sqOffset.array) + uint64(p.sqEntries*(uint32)(unsafe.Sizeof(uint32(0)))) | ||
u.cqRing.ringSize = uint64(p.cqOffset.cqes) + uint64(p.cqEntries*(uint32)(size)) | ||
|
||
if p.features&IORING_FEAT_SINGLE_MMAP != 0 { | ||
if u.cqRing.ringSize > u.sqRing.ringSize { | ||
u.sqRing.ringSize = u.cqRing.ringSize | ||
} | ||
u.cqRing.ringSize = u.sqRing.ringSize | ||
} | ||
|
||
// TODO: syscall.MAP_POPULATE unsupport for macox | ||
data, err := syscall.Mmap(u.fd, 0, int(u.sqRing.ringSize), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. syscall.Mmap/Unmap 封装一下,以后可以修改 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. uring 不是 only linux 吗,为什么考虑 macox ? 这里没有 syscall.MAP_POPULATE 不行的 |
||
if err != nil { | ||
return err | ||
} | ||
u.sqRing.buff = data | ||
|
||
if p.features&IORING_FEAT_SINGLE_MMAP != 0 { | ||
u.cqRing.buff = u.sqRing.buff | ||
} else { | ||
// TODO: syscall.MAP_POPULATE unsupport for macox | ||
data, err = syscall.Mmap(u.fd, int64(IORING_OFF_CQ_RING), int(u.cqRing.ringSize), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 同上 |
||
if err != nil { | ||
u.sysMunmap() | ||
return err | ||
} | ||
u.cqRing.buff = data | ||
} | ||
|
||
ringStart := &u.sqRing.buff[0] | ||
u.sqRing.kHead = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.sqOffset.head))) | ||
u.sqRing.kTail = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.sqOffset.tail))) | ||
u.sqRing.kRingMask = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.sqOffset.ringMask))) | ||
u.sqRing.kRingEntries = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.sqOffset.ringEntries))) | ||
u.sqRing.kFlags = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.sqOffset.flags))) | ||
u.sqRing.kDropped = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.sqOffset.dropped))) | ||
u.sqRing.array = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.sqOffset.array))) | ||
|
||
size = unsafe.Sizeof(URingSQE{}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pin |
||
if p.flags&IORING_SETUP_SQE128 != 0 { | ||
size += 64 | ||
} | ||
// TODO: syscall.MAP_POPULATE unsupport for macox | ||
buff, err := syscall.Mmap(u.fd, int64(IORING_OFF_SQES), int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) | ||
if err != nil { | ||
_ = u.sysMunmap() | ||
return err | ||
} | ||
u.sqRing.sqeBuff = buff | ||
|
||
cqRingPtr := uintptr(unsafe.Pointer(&u.cqRing.buff[0])) | ||
ringStart = &u.cqRing.buff[0] | ||
|
||
u.cqRing.kHead = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.cqOffset.head))) | ||
u.cqRing.kTail = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.cqOffset.tail))) | ||
u.cqRing.kRingMask = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.cqOffset.ringMsk))) | ||
u.cqRing.kRingEntries = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.cqOffset.ringEntries))) | ||
u.cqRing.kOverflow = (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.cqOffset.overflow))) | ||
u.cqRing.cqes = (*URingCQE)(unsafe.Pointer(uintptr(unsafe.Pointer(ringStart)) + uintptr(p.cqOffset.cqes))) | ||
if p.cqOffset.flags != 0 { | ||
u.cqRing.kFlags = cqRingPtr + uintptr(p.cqOffset.flags) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// Magic offsets for the application to mmap the data it needs | ||
const ( | ||
// IORING_OFF_SQ_RING maps sqring to program memory space | ||
IORING_OFF_SQ_RING uint64 = 0 | ||
// IORING_OFF_CQ_RING maps cqring to program memory space | ||
IORING_OFF_CQ_RING uint64 = 0x8000000 | ||
// IORING_OFF_SQES maps sqes array to program memory space | ||
IORING_OFF_SQES uint64 = 0x10000000 | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
poll_xxx 这些先删掉吧,这里需要开发注册机制,不通过 type 区分