Skip to content

Commit

Permalink
libcdb: add offline_only to search_by_symbol_offsets (Gallopsled#…
Browse files Browse the repository at this point in the history
…2388)

* Add `offline_only` for `search_by_symbol_offsets`

* Fix bug

* Update CHANGELOG

* Remove redundant code

* Update

---------

Co-authored-by: peace-maker <[email protected]>
  • Loading branch information
the-soloist and peace-maker authored May 23, 2024
1 parent e92a30b commit 51e8eb0
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 25 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ The table below shows which release corresponds to each branch, and what date th
- [#2391][2391] Fix error message when passing invalid kwargs to `xor`
- [#2376][2376] Return buffered data on first EOF in tube.readline()
- [#2387][2387] Convert apport_corefile() output from bytes-like object to string
- [#2388][2388] libcdb: add `offline_only` to `search_by_symbol_offsets`

[2360]: https://github.com/Gallopsled/pwntools/pull/2360
[2356]: https://github.com/Gallopsled/pwntools/pull/2356
Expand All @@ -93,6 +94,7 @@ The table below shows which release corresponds to each branch, and what date th
[2391]: https://github.com/Gallopsled/pwntools/pull/2391
[2376]: https://github.com/Gallopsled/pwntools/pull/2376
[2387]: https://github.com/Gallopsled/pwntools/pull/2387
[2388]: https://github.com/Gallopsled/pwntools/pull/2388

## 4.13.0 (`beta`)

Expand Down
147 changes: 122 additions & 25 deletions pwnlib/libcdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,43 @@ def provider_local_database(hex_encoded_id, hash_type):

return None

def query_local_database(params):
if not context.local_libcdb or not params.get("symbols"):
return None

localdb = Path(context.local_libcdb)
if not localdb.is_dir():
return None

res = []
query_syms = params["symbols"]

# Loop through each '.symbols' file in the local database
# Make sure `Path.rglod` order stable
for symbol_file in sorted(localdb.rglob("*.symbols"), key=lambda x: x.as_posix()):
libc_syms = _parse_libc_symbol(symbol_file)

matched = 0
for name, addr in query_syms.items():
if isinstance(addr, str):
addr = int(addr, 16)

# Compare last 12 bits
if libc_syms.get(name) and (libc_syms.get(name) & 0xfff) == (addr & 0xfff):
matched += 1
else:
# aborting this loop once there was a mismatch.
break

# Check if all symbols have been matched
if matched == len(query_syms):
libs_id = symbol_file.stem
libc_path = symbol_file.parent / ("%s.so" % libs_id)
libs_url = read(symbol_file.parent / ("%s.url" % libs_id)).decode().strip()
res.append(_pack_libs_info(libc_path, libs_id, libs_url, libc_syms))

return res

PROVIDERS = {
"offline": [provider_local_system, provider_local_database],
"online": [provider_libcdb, provider_libc_rip]
Expand Down Expand Up @@ -546,7 +583,7 @@ def _handle_multiple_matching_libcs(matching_libcs):
selected_index = options("Select the libc version to use:", [libc['id'] for libc in matching_libcs])
return matching_libcs[selected_index]

def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False):
def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False):
"""
Lookup possible matching libc versions based on leaked function addresses.
Expand All @@ -568,6 +605,9 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
return_as_list(bool):
Return a list of build ids of all matching libc versions
instead of a path to a downloaded file.
offline_only(bool):
When pass `offline_only=True`, restricts search mode to offline sources only,
disable online lookup. Defaults to `False`, and enable both offline and online providers.
Returns:
Path to the downloaded library on disk, or :const:`None`.
Expand All @@ -592,27 +632,50 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as

params = {'symbols': symbols}
log.debug('Request: %s', params)
matching_libcs = query_libc_rip(params)
log.debug('Result: %s', matching_libcs)
if matching_libcs is None or len(matching_libcs) == 0:
log.warn_once("No matching libc for symbols %r on libc.rip", symbols)

offline_matching = query_local_database(params)
online_matching = query_libc_rip(params) if not offline_only else None

if offline_matching is None:
offline_matching = []
if online_matching is None:
online_matching = []

# Aggregate and deduplicate matches from both sources
matching_libcs = {}
for libc in offline_matching + online_matching:
if libc['id'] not in matching_libcs:
matching_libcs[libc['id']] = libc

log.debug('Offline search result: %s', offline_matching)
if not offline_only:
log.debug('Online search result: %s', online_matching)

# Check if no matches are found
if len(matching_libcs) == 0:
log.warn_once("No matching libc for symbols %r", symbols)
return None

matching_list = list(matching_libcs.values())

if return_as_list:
return [libc['buildid'] for libc in matching_libcs]
return [libc['buildid'] for libc in matching_list]

if len(matching_libcs) == 1:
return search_by_build_id(matching_libcs[0]['buildid'], unstrip=unstrip)
# If there's only one match, return it directly
if len(matching_list) == 1:
return search_by_build_id(matching_list[0]['buildid'], unstrip=unstrip, offline_only=offline_only)

# If a specific index is provided, validate it and return the selected libc
if select_index is not None:
if select_index > 0 and select_index <= len(matching_libcs):
return search_by_build_id(matching_libcs[select_index - 1]['buildid'], unstrip=unstrip)
if select_index > 0 and select_index <= len(matching_list):
return search_by_build_id(matching_list[select_index - 1]['buildid'], unstrip=unstrip, offline_only=offline_only)
else:
log.error('Invalid selected libc index. %d is not in the range of 1-%d.', select_index, len(matching_libcs))
log.error('Invalid selected libc index. %d is not in the range of 1-%d.', select_index, len(matching_list))
return None

selected_libc = _handle_multiple_matching_libcs(matching_libcs)
return search_by_build_id(selected_libc['buildid'], unstrip=unstrip)
# Handle multiple matches interactively if no index is specified
selected_libc = _handle_multiple_matching_libcs(matching_list)
return search_by_build_id(selected_libc['buildid'], unstrip=unstrip, offline_only=offline_only)

def search_by_build_id(hex_encoded_id, unstrip=True, offline_only=False):
"""
Expand All @@ -624,9 +687,8 @@ def search_by_build_id(hex_encoded_id, unstrip=True, offline_only=False):
unstrip(bool):
Try to fetch debug info for the libc and apply it to the downloaded file.
offline_only(bool):
Both offline and online providers are used by default. When pass
`offline_only=True`, libcdb enable an exclusive offline search mode,
which will disable online providers.
When pass `offline_only=True`, restricts search mode to offline sources only,
disable online lookup. Defaults to `False`, and enable both offline and online providers.
Returns:
Path to the downloaded library on disk, or :const:`None`.
Expand Down Expand Up @@ -654,9 +716,8 @@ def search_by_md5(hex_encoded_id, unstrip=True, offline_only=False):
unstrip(bool):
Try to fetch debug info for the libc and apply it to the downloaded file.
offline_only(bool):
Both offline and online providers are used by default. When pass
`offline_only=True`, libcdb enable an exclusive offline search mode,
which will disable online providers.
When pass `offline_only=True`, restricts search mode to offline sources only,
disable online lookup. Defaults to `False`, and enable both offline and online providers.
Returns:
Path to the downloaded library on disk, or :const:`None`.
Expand Down Expand Up @@ -684,9 +745,8 @@ def search_by_sha1(hex_encoded_id, unstrip=True, offline_only=False):
unstrip(bool):
Try to fetch debug info for the libc and apply it to the downloaded file.
offline_only(bool):
Both offline and online providers are used by default. When pass
`offline_only=True`, libcdb enable an exclusive offline search mode,
which will disable online providers.
When pass `offline_only=True`, restricts search mode to offline sources only,
disable online lookup. Defaults to `False`, and enable both offline and online providers.
Returns:
Path to the downloaded library on disk, or :const:`None`.
Expand Down Expand Up @@ -714,9 +774,8 @@ def search_by_sha256(hex_encoded_id, unstrip=True, offline_only=False):
unstrip(bool):
Try to fetch debug info for the libc and apply it to the downloaded file.
offline_only(bool):
Both offline and online providers are used by default. When pass
`offline_only=True`, libcdb enable an exclusive offline search mode,
which will disable online providers.
When pass `offline_only=True`, restricts search mode to offline sources only,
disable online lookup. Defaults to `False`, and enable both offline and online providers.
Returns:
Path to the downloaded library on disk, or :const:`None`.
Expand All @@ -734,7 +793,45 @@ def search_by_sha256(hex_encoded_id, unstrip=True, offline_only=False):
"""
return search_by_hash(hex_encoded_id, 'sha256', unstrip, offline_only)

def _parse_libc_symbol(path):
"""
Parse symbols file to `dict`, the format is same as https://github.com/niklasb/libc-database/
"""

syms = {}

with open(path, "r") as fd:
for x in fd:
name, addr = x.split(" ")
syms[name] = int(addr, 16)

return syms

def _pack_libs_info(path, libs_id, libs_url, syms):
"""
The JSON format is the same as libc.rip, and the "download_url" field is by default an empty string,
as it's not required in offline mode.
"""

info = {}

info["id"] = libs_id
info["libs_url"] = libs_url
info["download_url"] = ""

for hash_type, hash_func in HASHES.items():
# replace 'build_id' to 'buildid'
info[hash_type.replace("_", "")] = hash_func(path)

default_symbol_list = [
"__libc_start_main_ret", "dup2", "printf", "puts", "read", "system", "str_bin_sh"
]

info["symbols"] = {}
for name in default_symbol_list:
info["symbols"][name] = hex(syms[name])

return info


def get_build_id_offsets():
Expand Down

0 comments on commit 51e8eb0

Please sign in to comment.