Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate dictionary #586

Merged
merged 2 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions .github/workflows/fuzzing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,15 @@ jobs:
${command_prefix} cmake \
--build build

- name: Generate dictionary
run: |
python ubpf/dictionary_generator.py >build/bin/dictionary.txt

- name: Upload fuzzer as artifacts
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: fuzzer-${{ matrix.platform }}-${{ matrix.arch }}
path: build/bin/ubpf_fuzzer
path: build/bin/*

build-windows:
strategy:
Expand Down Expand Up @@ -149,6 +153,10 @@ jobs:
run: |
cmake --build build --config RelWithDebInfo

- name: Generate dictionary
run: |
python ubpf\dictionary_generator.py >build\bin\RelWithDebInfo\dictionary.txt

- name: Gather dependencies
shell: cmd
run: |
Expand All @@ -162,8 +170,8 @@ jobs:
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: fuzzer-${{ matrix.platform }}-${{ matrix.arch }}
path: build/bin/RelWithDebInfo/*

path: |
build/bin/RelWithDebInfo

run_fuzzer:
needs:
Expand Down Expand Up @@ -213,7 +221,15 @@ jobs:
run: chmod a+x ubpf_fuzzer

- name: Run fuzzing
if: matrix.platform == 'ubuntu-24.04'
run: |
ls
./ubpf_fuzzer new_corpus -artifact_prefix=artifacts/ -use_value_profile=1 -max_total_time=300 -dict=dictionary.txt

- name: Run fuzzing
if: matrix.platform == 'windows-latest'
run: |
ls
./ubpf_fuzzer new_corpus -artifact_prefix=artifacts/ -use_value_profile=1 -max_total_time=300

- name: Merge corpus into fuzz/corpus
Expand Down
2 changes: 1 addition & 1 deletion external/ebpf-verifier
147 changes: 147 additions & 0 deletions ubpf/dictionary_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# Copyright (c) uBPF contributors
# SPDX-License-Identifier: MIT

# Generate a dictionary file of all legal BPF instructions, with immediate values and offsets set to zero.
# Each instruction is written to the output stream in the form of a quoted 8-byte sequence of hex, with each byte prefixed wit\x with no spaces.

import struct
import disassembler

Inst = struct.Struct("BBHI")

CLASSES = {
0: "ld",
1: "ldx",
2: "st",
3: "stx",
4: "alu",
5: "jmp",
6: "jmp32",
7: "alu64",
}

ALU_OPCODES = {
0: 'add',
1: 'sub',
2: 'mul',
3: 'div',
4: 'or',
5: 'and',
6: 'lsh',
7: 'rsh',
8: 'neg',
9: 'mod',
10: 'xor',
11: 'mov',
12: 'arsh',
13: '(endian)',
}

JMP_OPCODES = {
0: 'ja',
1: 'jeq',
2: 'jgt',
3: 'jge',
4: 'jset',
5: 'jne',
6: 'jsgt',
7: 'jsge',
8: 'call',
9: 'exit',
10: 'jlt',
11: 'jle',
12: 'jslt',
13: 'jsle',
}

MODES = {
0: 'imm',
1: 'abs',
2: 'ind',
3: 'mem',
6: 'xadd',
}

SIZES = {
0: 'w',
1: 'h',
2: 'b',
3: 'dw',
}

# All opcodes have the similar format of:
# 0-2: class identifier
# 3-7: class specific opcode

# For LD and store instructions:
# 3-4: size
# 5-7: mode
BPF_CLASS_LD = 0
BPF_CLASS_LDX = 1
BPF_CLASS_ST = 2
BPF_CLASS_STX = 3

# For ALU and jump instructions:
# 3: Source (register or immediate)
# 4-7: ALU opcode
BPF_CLASS_ALU32 = 4
BPF_CLASS_JMP = 5
BPF_CLASS_JMP32 = 6
BPF_CLASS_ALU = 7

BPF_ALU_NEG = 8
BPF_ALU_END = 13

# Pack an instruction into a byte array
# The instruction is packed as follows:
# Byte: 0: opcode
# Byte: 1: source register and destination register
# Short: 2: offset
# Int: 4: immediate value
def gen_inst(source_register : int, dest_register : int, opcode : int, offset : int, immediate : int) -> bytes:
return Inst.pack(opcode, source_register << 4 | dest_register, offset, immediate)

# Generate a load or store opcode
def gen_ld_st_opcode(op_class : int, size : int, mode : int) -> int:
return op_class << 3 | size << 1 | mode

# Generate an ALU or JMPM opcode
def gen_alu_or_jump_opcode(op_class : int, source : int, opcode : int) -> int:
return op_class << 3 | source << 2 | opcode

def encode_and_print_instruction(inst : bytes):
# Check for special case of BPF_LDDDW instruction which is two instructions
if inst[0] & 7 == 0:
inst = inst + b"\x00\x00\x00\x00\x00\x00\x00\x00"
mnemonic = disassembler.disassemble_one(inst, 0)
if "Warnings" in mnemonic[0]:
return
# If the mnemonic tuple contains more than one element, skip it
print(mnemonic[0], "=", end="")
print("\"", end="")
for byte in inst:
print("\\x{:02x}".format(byte), end="")
print("\"")

# Generate all possible instructions

# Load and store instructions
for op_class in range(4):
for size in range(4):
for mode in range(8):
opcode = gen_ld_st_opcode(op_class, size, mode)
for source_register in range(11):
for dest_register in range(11):
inst = gen_inst(source_register, dest_register, opcode, 0, 0)
encode_and_print_instruction(inst)

# ALU and JMP instructions (range 4-7)
for op_class in range(4, 8):
for source in range(2):
for alu_op in range(14):
opcode = gen_alu_or_jump_opcode(op_class, source, alu_op)
for source_register in range(11):
for dest_register in range(11):
inst = gen_inst(source_register, dest_register, opcode, 0, 0)
encode_and_print_instruction(inst)

Loading