diff --git a/src/datachain/cli/parser/__init__.py b/src/datachain/cli/parser/__init__.py index 9a1995d8a..e5cfad3f7 100644 --- a/src/datachain/cli/parser/__init__.py +++ b/src/datachain/cli/parser/__init__.py @@ -1,3 +1,4 @@ +import argparse from argparse import ArgumentParser from importlib.metadata import PackageNotFoundError, version @@ -18,7 +19,8 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 __version__ = "unknown" parser = ArgumentParser( - description="DataChain: Wrangle unstructured AI data at scale", prog="datachain" + description="DataChain: Wrangle unstructured AI data at scale.", + prog="datachain", ) parser.add_argument("-V", "--version", action="version", version=__version__) @@ -46,24 +48,24 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 "--debug-sql", action="store_true", default=False, - help="Show All SQL Queries (very verbose output, for debugging only)", + help=argparse.SUPPRESS, ) parent_parser.add_argument( "--pdb", action="store_true", default=False, - help="Drop into the pdb debugger on fatal exception", + help=argparse.SUPPRESS, ) subp = parser.add_subparsers( title="Available Commands", metavar="command", dest="command", - help=f"Use `{parser.prog} command --help` for command-specific help.", + help=f"Use `{parser.prog} command --help` for command-specific help", required=True, ) parse_cp = subp.add_parser( - "cp", parents=[parent_parser], description="Copy data files from the cloud" + "cp", parents=[parent_parser], description="Copy data files from the cloud." ) add_sources_arg(parse_cp).complete = shtab.DIR # type: ignore[attr-defined] parse_cp.add_argument("output", type=str, help="Output") @@ -90,7 +92,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 ) parse_clone = subp.add_parser( - "clone", parents=[parent_parser], description="Copy data files from the cloud" + "clone", parents=[parent_parser], description="Copy data files from the cloud." ) add_sources_arg(parse_clone).complete = shtab.DIR # type: ignore[attr-defined] parse_clone.add_argument("output", type=str, help="Output") @@ -139,22 +141,23 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 "dataset", aliases=["ds"], parents=[parent_parser], - description="Commands for managing datasers", + description="Commands for managing datasets.", ) datasets_subparser = datasets_parser.add_subparsers( dest="datasets_cmd", - help="Use `datachain datasets CMD --help` to display command specific help", + required=True, + help="Use `datachain dataset CMD --help` to display command-specific help", ) parse_pull = datasets_subparser.add_parser( "pull", parents=[parent_parser], - description="Pull specific dataset version from SaaS", + description="Pull specific dataset version from Studio.", ) parse_pull.add_argument( "dataset", type=str, - help="Name and version of remote dataset created in SaaS", + help="Name and version of remote dataset created in Studio", ) parse_pull.add_argument("-o", "--output", type=str, help="Output") parse_pull.add_argument( @@ -202,7 +205,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 ) parse_edit_dataset = datasets_subparser.add_parser( - "edit", parents=[parent_parser], description="Edit dataset metadata" + "edit", parents=[parent_parser], description="Edit dataset metadata." ) parse_edit_dataset.add_argument("name", type=str, help="Dataset name") parse_edit_dataset.add_argument( @@ -244,11 +247,11 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 "--team", action="store", default=None, - help="The team to edit a dataset. By default, it will use team from config.", + help="The team to edit a dataset. By default, it will use team from config", ) datasets_parser = datasets_subparser.add_parser( - "ls", parents=[parent_parser], description="List datasets" + "ls", parents=[parent_parser], description="List datasets." ) datasets_parser.add_argument( "--studio", @@ -274,11 +277,11 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 "--team", action="store", default=None, - help="The team to list datasets for. By default, it will use team from config.", + help="The team to list datasets for. By default, it will use team from config", ) rm_dataset_parser = datasets_subparser.add_parser( - "rm", parents=[parent_parser], description="Removes dataset", aliases=["remove"] + "rm", parents=[parent_parser], description="Remove dataset.", aliases=["remove"] ) rm_dataset_parser.add_argument("name", type=str, help="Dataset name") rm_dataset_parser.add_argument( @@ -292,7 +295,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 "--force", default=False, action=BooleanOptionalAction, - help="Force delete registered dataset with all of it's versions", + help="Force delete registered dataset with all of its versions", ) rm_dataset_parser.add_argument( "--studio", @@ -318,13 +321,11 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 "--team", action="store", default=None, - help="The team to delete a dataset. By default, it will use team from config.", + help="The team to delete a dataset. By default, it will use team from config", ) dataset_stats_parser = datasets_subparser.add_parser( - "stats", - parents=[parent_parser], - description="Shows basic dataset stats", + "stats", parents=[parent_parser], description="Show basic dataset statistics." ) dataset_stats_parser.add_argument("name", type=str, help="Dataset name") dataset_stats_parser.add_argument( @@ -349,7 +350,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 ) parse_ls = subp.add_parser( - "ls", parents=[parent_parser], description="List storage contents" + "ls", parents=[parent_parser], description="List storage contents." ) add_sources_arg(parse_ls, nargs="*") parse_ls.add_argument( @@ -357,7 +358,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 "--long", action="count", default=0, - help="List files in the long format", + help="List files in long format", ) parse_ls.add_argument( "--studio", @@ -383,11 +384,11 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 "--team", action="store", default=None, - help="The team to list datasets for. By default, it will use team from config.", + help="The team to list datasets for. By default, it will use team from config", ) parse_du = subp.add_parser( - "du", parents=[parent_parser], description="Display space usage" + "du", parents=[parent_parser], description="Display space usage." ) add_sources_arg(parse_du) parse_du.add_argument( @@ -405,8 +406,8 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 type=int, metavar="N", help=( - "Display sizes for N directory depths below the given directory, " - "the default is 0 (summarize provided directory only)." + "Display sizes up to N directory levels deep " + "(default: 0, summarize provided directory only)" ), ) parse_du.add_argument( @@ -417,32 +418,32 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 ) parse_find = subp.add_parser( - "find", parents=[parent_parser], description="Search in a directory hierarchy" + "find", parents=[parent_parser], description="Search in a directory hierarchy." ) add_sources_arg(parse_find) parse_find.add_argument( "--name", type=str, action="append", - help="Filename to match pattern.", + help="Match filename pattern", ) parse_find.add_argument( "--iname", type=str, action="append", - help="Like -name but case insensitive.", + help="Match filename pattern (case insensitive)", ) parse_find.add_argument( "--path", type=str, action="append", - help="Path to match pattern.", + help="Path to match pattern", ) parse_find.add_argument( "--ipath", type=str, action="append", - help="Like -path but case insensitive.", + help="Like -path but case insensitive", ) parse_find.add_argument( "--size", @@ -450,7 +451,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 help=( "Filter by size (+ is greater or equal, - is less or equal). " "Specified size is in bytes, or use a suffix like K, M, G for " - "kilobytes, megabytes, gigabytes, etc." + "kilobytes, megabytes, gigabytes, etc" ), ) parse_find.add_argument( @@ -470,14 +471,14 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 ) parse_index = subp.add_parser( - "index", parents=[parent_parser], description="Index storage location" + "index", parents=[parent_parser], description="Index storage location." ) add_sources_arg(parse_index) show_parser = subp.add_parser( "show", parents=[parent_parser], - description="Create a new dataset with a query script", + description="Create a new dataset with a query script.", ) show_parser.add_argument("name", type=str, help="Dataset name") show_parser.add_argument( @@ -493,7 +494,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 query_parser = subp.add_parser( "query", parents=[parent_parser], - description="Create a new dataset with a query script", + description="Create a new dataset with a query script.", ) query_parser.add_argument( "script", metavar="", type=str, help="Filepath for script" @@ -507,7 +508,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 metavar="N", help=( "Use multiprocessing to run any query script UDFs with N worker processes. " - "N defaults to the CPU count." + "N defaults to the CPU count" ), ) query_parser.add_argument( @@ -520,10 +521,12 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915 ) subp.add_parser( - "clear-cache", parents=[parent_parser], description="Clear the local file cache" + "clear-cache", + parents=[parent_parser], + description="Clear the local file cache.", ) subp.add_parser( - "gc", parents=[parent_parser], description="Garbage collect temporary tables" + "gc", parents=[parent_parser], description="Garbage collect temporary tables." ) subp.add_parser("internal-run-udf", parents=[parent_parser]) @@ -536,12 +539,12 @@ def add_completion_parser(subparsers, parents): parser = subparsers.add_parser( "completion", parents=parents, - description="Output shell completion script", + description="Output shell completion script.", ) parser.add_argument( "-s", "--shell", - help="Shell syntax for completions.", + help="Shell syntax for completions", default="bash", choices=shtab.SUPPORTED_SHELLS, ) diff --git a/src/datachain/cli/parser/job.py b/src/datachain/cli/parser/job.py index 3ef29d5c1..fb4be860b 100644 --- a/src/datachain/cli/parser/job.py +++ b/src/datachain/cli/parser/job.py @@ -1,19 +1,17 @@ def add_jobs_parser(subparsers, parent_parser) -> None: - jobs_help = "Commands to handle the Job running with Iterative Studio" - jobs_description = ( - "This will help us to run, cancel and view the status of the job in Studio. " - ) + jobs_help = "Manage jobs in Studio" + jobs_description = "Commands to manage job execution in Studio." jobs_parser = subparsers.add_parser( "job", parents=[parent_parser], description=jobs_description, help=jobs_help ) jobs_subparser = jobs_parser.add_subparsers( dest="cmd", - help="Use `DataChain studio CMD --help` to display command-specific help.", + help="Use `datachain studio CMD --help` to display command-specific help", required=True, ) studio_run_help = "Run a job in Studio" - studio_run_description = "This command runs a job in Studio." + studio_run_description = "Run a job in Studio." studio_run_parser = jobs_subparser.add_parser( "run", @@ -25,56 +23,56 @@ def add_jobs_parser(subparsers, parent_parser) -> None: studio_run_parser.add_argument( "query_file", action="store", - help="The query file to run.", + help="Query file to run", ) studio_run_parser.add_argument( "--team", action="store", default=None, - help="The team to run a job for. By default, it will use team from config.", + help="Team to run job for (default: from config)", ) studio_run_parser.add_argument( "--env-file", action="store", - help="File containing environment variables to set for the job.", + help="File with environment variables for the job", ) studio_run_parser.add_argument( "--env", nargs="+", - help="Environment variable. Can be specified multiple times. Format: KEY=VALUE", + help="Environment variables in KEY=VALUE format", ) studio_run_parser.add_argument( "--workers", type=int, - help="Number of workers to use for the job.", + help="Number of workers for the job", ) studio_run_parser.add_argument( "--files", nargs="+", - help="Files to include in the job.", + help="Additional files to include in the job", ) studio_run_parser.add_argument( "--python-version", action="store", - help="Python version to use for the job (e.g. '3.9', '3.10', '3.11').", + help="Python version for the job (e.g., 3.9, 3.10, 3.11)", ) studio_run_parser.add_argument( "--req-file", action="store", - help="File containing Python package requirements.", + help="Python requirements file", ) studio_run_parser.add_argument( "--req", nargs="+", - help="Python package requirement. Can be specified multiple times.", + help="Python package requirements", ) studio_cancel_help = "Cancel a job in Studio" - studio_cancel_description = "This command cancels a job in Studio." + studio_cancel_description = "Cancel a running job in Studio." studio_cancel_parser = jobs_subparser.add_parser( "cancel", @@ -86,19 +84,17 @@ def add_jobs_parser(subparsers, parent_parser) -> None: studio_cancel_parser.add_argument( "job_id", action="store", - help="The job ID to cancel.", + help="Job ID to cancel", ) studio_cancel_parser.add_argument( "--team", action="store", default=None, - help="The team to cancel a job for. By default, it will use team from config.", + help="Team to cancel job for (default: from config)", ) - studio_log_help = "Show the logs and latest status of Jobs in Studio" - studio_log_description = ( - "This will display the logs and latest status of jobs in Studio" - ) + studio_log_help = "Show job logs and status in Studio" + studio_log_description = "Display logs and current status of jobs in Studio." studio_log_parser = jobs_subparser.add_parser( "logs", @@ -110,11 +106,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None: studio_log_parser.add_argument( "job_id", action="store", - help="The job ID to show the logs.", + help="Job ID to show logs for", ) studio_log_parser.add_argument( "--team", action="store", default=None, - help="The team to check the logs. By default, it will use team from config.", + help="Team to check logs for (default: from config)", ) diff --git a/src/datachain/cli/parser/studio.py b/src/datachain/cli/parser/studio.py index c13b9b726..58971bdc6 100644 --- a/src/datachain/cli/parser/studio.py +++ b/src/datachain/cli/parser/studio.py @@ -1,10 +1,8 @@ def add_studio_parser(subparsers, parent_parser) -> None: - studio_help = "Commands to authenticate DataChain with Iterative Studio" + studio_help = "Manage Studio authentication" studio_description = ( - "Authenticate DataChain with Studio and set the token. " - "Once this token has been properly configured,\n" - "DataChain will utilize it for seamlessly sharing datasets\n" - "and using Studio features from CLI" + "Manage authentication and settings for Studio. " + "Configure tokens for sharing datasets and using Studio features." ) studio_parser = subparsers.add_parser( @@ -15,14 +13,14 @@ def add_studio_parser(subparsers, parent_parser) -> None: ) studio_subparser = studio_parser.add_subparsers( dest="cmd", - help="Use `DataChain studio CMD --help` to display command-specific help.", + help="Use `datachain studio CMD --help` to display command-specific help", required=True, ) - studio_login_help = "Authenticate DataChain with Studio host" + studio_login_help = "Authenticate with Studio" studio_login_description = ( - "By default, this command authenticates the DataChain with Studio\n" - "using default scopes and assigns a random name as the token name." + "Authenticate with Studio using default scopes. " + "A random name will be assigned as the token name if not specified." ) login_parser = studio_subparser.add_parser( "login", @@ -36,14 +34,14 @@ def add_studio_parser(subparsers, parent_parser) -> None: "--hostname", action="store", default=None, - help="The hostname of the Studio instance to authenticate with.", + help="Hostname of the Studio instance", ) login_parser.add_argument( "-s", "--scopes", action="store", default=None, - help="The scopes for the authentication token. ", + help="Authentication token scopes", ) login_parser.add_argument( @@ -51,21 +49,20 @@ def add_studio_parser(subparsers, parent_parser) -> None: "--name", action="store", default=None, - help="The name of the authentication token. It will be used to\n" - "identify token shown in Studio profile.", + help="Authentication token name (shown in Studio profile)", ) login_parser.add_argument( "--no-open", action="store_true", default=False, - help="Use authentication flow based on user code.\n" - "You will be presented with user code to enter in browser.\n" - "DataChain will also use this if it cannot launch browser on your behalf.", + help="Use code-based authentication without browser", ) - studio_logout_help = "Logout user from Studio" - studio_logout_description = "This removes the studio token from your global config." + studio_logout_help = "Log out from Studio" + studio_logout_description = ( + "Remove the Studio authentication token from global config." + ) studio_subparser.add_parser( "logout", @@ -74,10 +71,8 @@ def add_studio_parser(subparsers, parent_parser) -> None: help=studio_logout_help, ) - studio_team_help = "Set the default team for DataChain" - studio_team_description = ( - "Set the default team for DataChain to use when interacting with Studio." - ) + studio_team_help = "Set default team for Studio operations" + studio_team_description = "Set the default team for Studio operations." team_parser = studio_subparser.add_parser( "team", @@ -88,28 +83,29 @@ def add_studio_parser(subparsers, parent_parser) -> None: team_parser.add_argument( "team_name", action="store", - help="The name of the team to set as the default.", + help="Name of the team to set as default", ) team_parser.add_argument( "--global", action="store_true", default=False, - help="Set the team globally for all DataChain projects.", + help="Set team globally for all projects", ) - studio_token_help = "View the token datachain uses to contact Studio" # noqa: S105 # nosec B105 + studio_token_help = "View Studio authentication token" # noqa: S105 + studio_token_description = "Display the current authentication token for Studio." # noqa: S105 studio_subparser.add_parser( "token", parents=[parent_parser], - description=studio_token_help, + description=studio_token_description, help=studio_token_help, ) - studio_ls_dataset_help = "List the available datasets from Studio" + studio_ls_dataset_help = "List available Studio datasets" studio_ls_dataset_description = ( - "This command lists all the datasets available in Studio.\n" - "It will show the dataset name and the number of versions available." + "List all datasets available in Studio, showing dataset names " + "and version counts." ) ls_dataset_parser = studio_subparser.add_parser( @@ -122,5 +118,5 @@ def add_studio_parser(subparsers, parent_parser) -> None: "--team", action="store", default=None, - help="The team to list datasets for. By default, it will use team from config.", + help="Team to list datasets for (default: from config)", )