Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improves stellar-core-debug-info script and adds docs #4553

Open
wants to merge 2 commits into from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ This folder is for storing any scripts that may be helpful for using stellar-cor
./src/stellar-core(+0x34f0c1) [0x55c7cd1000c1]"
```

### Stellar Core Debug Info

- Name - `stellar-core-debug-info`
- Description - Gathers useful information about core state in order to help debug crashes. This includes collecting log files, bucket directories,
SQL DB state, status reported by `offline-info`, and OS information for the given node.
- Usage - Ex. `stellar-core-debug-info /tmp/stellarCoreDumpOutputDirectory`. This script requires a destination directory to write temporary files to and the resulting
zip file of the collected debug information. Note that secret seeds from config files are automatically redacted.
If the given output directory does not exist, the script will attempt to create it. By default, the script checks
the `stellar-core.service` file to determine correct paths of the stellar-core executable and config file. From the config file, the script will
then parse the path of log files, bucket directory, and SQL DB. All these fields can be manually overridden as well, see
`stellar-core-debug-info --help` for specific flags.

### Soroban Settings Helper
- Name - `settings-helper.sh`
- Prequisites - `stellar-xdr` and `stellar-core`
Expand Down
125 changes: 104 additions & 21 deletions scripts/stellar-core-debug-info
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,78 @@ import time

def parse_args():
parser = argparse.ArgumentParser(description='Gathers information about host and stellar-core')
parser.add_argument('-d', '--dest', required=False, type=str, help='Pre-existing path to use for scratch space and.'
'storing results. The script will create new subdirectory under this path.',
default='/var/lib/stellar/')
parser.add_argument('-c', '--core-config', required=False, type=str, help='Path to the stellar-core config file',
default='/etc/stellar/stellar-core.cfg')
parser.add_argument('-l', '--log-dir', required=False, type=str, help='Path where logs are written to.'
'If not set we will try to find it in the config or use /var/log/stellar/ location.'
parser.add_argument('outputDir', type=str, help='Path to directory to use for scratch space and '
'storing results. The script will create the directory if it does not exist and a new subdirectory under this path.')
parser.add_argument('-c', '--core-config', required=False, type=str, help='Path to the stellar-core config file. '
'If not set we will try to find it in the service file.')
parser.add_argument('-l', '--log-dir', required=False, type=str, help='Path where logs are written to. '
'If not set we will try to find it in the config. '
'Set to string "disabled" to exclude logs.')
parser.add_argument('-b', '--bucket-dir', required=False, type=str, help='Path where buckets are written to.'
'If not set we will try to find it in the config or use /var/lib/stellar/buckets location.'
parser.add_argument('-b', '--bucket-dir', required=False, type=str, help='Path where buckets are written to. '
'If not set we will try to find it in the config. '
'Set to string "disabled" to exclude buckets directory.')
parser.add_argument('-p', '--core-path', required=False, type=str, help='Path to the stellar-core binary'
'If not set "stellar-core" will be used.',
default='stellar-core')
parser.add_argument('-s', '--sqlite-path', required=False, type=str, help='Path to the sqlite database.'
'If not set we will try to find it in the config or use /var/lib/stellar/stellar.db location.'
parser.add_argument('-p', '--core-path', required=False, type=str, help='Path to the stellar-core binary. '
'If not set "stellar-core" will be used.', default='stellar-core')
parser.add_argument('-s', '--sqlite-path', required=False, type=str, help='Path to the sqlite database. '
'If not set we will try to find it in the config. '
'Set to string "disabled" to exclude sqlite.')
return parser.parse_args()

def get_service_exec_start():
service_name = "stellar-core.service"
try:
# Use systemctl to retrieve the service file content
result = subprocess.run(
["systemctl", "cat", service_name],
SirTyson marked this conversation as resolved.
Show resolved Hide resolved
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
SirTyson marked this conversation as resolved.
Show resolved Hide resolved

if result.returncode != 0:
raise Exception(result.stderr.strip())

# Parse the service file content
exec_start = None
for line in result.stdout.splitlines():
if line.strip().startswith("ExecStart="):
exec_start = line.split("=", 1)[1].strip()
break

if exec_start:
return exec_start
else:
raise ValueError(f"No 'ExecStart' found in {service_name} service file.")
except Exception as e:
return f"Error: {e}"

def extract_paths(exec_start):
try:
# Extract the first path (the command)
first_path = re.search(r"^([^\s]+)", exec_start).group(1)

# Extract the config file path after the --conf flag
conf_path = re.search(r"--conf\s+([^\s]+\.cfg)", exec_start)
conf_path = conf_path.group(1) if conf_path else None

return first_path, conf_path
except Exception as e:
return f"Error: {e}", None

def get_full_path_for_file(file):
# If the file is a relative or absolute path
if file.startswith("./") or file.startswith("../") or os.path.sep in file or file.startswith("~"):
return os.path.abspath(os.path.expanduser(file))

return file

def get_full_path_for_command(command):
# If the file is a relative or absolute path
if command.startswith("./") or command.startswith("../") or os.path.sep in command or command.startswith("~"):
return os.path.abspath(os.path.expanduser(command))
else:
# If it's just a command, search for it in PATH
return shutil.which(command)

class Gatherer(object):
def catch_errors(func):
Expand All @@ -48,8 +101,8 @@ class Gatherer(object):

def __init__(self, args):
timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
self.base_dir = args.dest
self.scratch_dir = os.path.join(args.dest, f'stellar-core-debug-info-{timestamp}')
self.base_dir = get_full_path_for_file(args.outputDir)
self.scratch_dir = os.path.join(self.base_dir, f'stellar-core-debug-info-{timestamp}')
self.tgz_file = f'{self.scratch_dir}.tar.gz'
self.core_config = args.core_config
self.core_path = args.core_path
Expand All @@ -59,8 +112,34 @@ class Gatherer(object):
self.header_template = '#####################\n# {}\n#####################\n'

def pre_flight(self):
if not os.path.isdir(self.base_dir) or not os.access(self.base_dir, os.W_OK):
print(f"Error: destination directory must exist and be writable: {self.scratch_dir}")
if not self.core_config:
try:
exec_start = get_service_exec_start()
self.core_path, self.core_config = extract_paths(exec_start)
except Exception as e:
print(f"Could not parse stellar-core config file from service file, please provide it with --core-config flag.")
return False
else:
# If the paths are not absolute, make them absolute
self.core_config = get_full_path_for_file(self.core_config)
self.core_path = get_full_path_for_command(self.core_path)
if not self.core_path:
print("Error: stellar-core command not found, please specify executable with --core-path flag")
return False

if os.path.exists(self.base_dir) and not os.path.isdir(self.base_dir):
print(f"Error: destination path {self.base_dir} exists but is not a directory")
return False

if not os.path.exists(self.base_dir):
try:
os.mkdir(self.base_dir, mode=0o755)
except: # noqa: E722
print(f'Error: failed to create destination directory {self.base_dir}')
return False

if not os.access(self.base_dir, os.W_OK):
print(f"Error: destination directory must be writable: {self.scratch_dir}")
return False

try:
Expand All @@ -77,9 +156,13 @@ class Gatherer(object):
print(f"Error: can't read core config file: {self.core_config}. Maybe you need --core-config flag?")
return False

user = pwd.getpwuid(os.getuid()).pw_name
if user not in ['root', 'stellar']:
print(f'Warning: the script should normaly be run as stellar or root user. Running as {user}')
# Check if stellar-core executable exists and is executable
if not os.path.isfile(self.core_path):
print(f"Error: stellar-core binary not found, have you specified a full path?: {self.core_path}")
return False

if not os.access(self.core_path, os.X_OK):
print("Warning: user does not have permission to run stellar-core, debug info will be limited!")

return True

Expand Down
Loading