*日本語版はREADMEはこちら
A reimplementation of GNU bash (version 3.2.57).
- GNU make (version 3.81)
- GCC (Apple clang version 13.0.0)
Used these versions during development.
- installment of
readline
$ brew install readline
- installment of
minishell
$ git clone https://github.com/tacomea/minishell.git
$ cd minishell
$ make
- prompt with command history
-c
option or running command using|
- builtin
echo
:-n
optioncd
: relative or absolute pathpwd
: no optionexport
: no optionunset
: no optionenv
: no optionexit
: no option
- quotes
'
"
- redirections
<
,>
: redirect input/output<<
: here document>>
: redirect output in append mode
- pipes
|
- subshell
()
- variables expansion
$
$(characters)
: environment variable$?
: exit status
- signal
ctrl-C
ctrl-D
ctrl-\
- control operator
&&
||
- wildcards
*
: only in current working directory - unsupported
- unclosed quotation marks
- interpretation of special characters such as
\
and;
- Split modules by their functionality and clarify dependencies. Created
internal
directory in each directory. Made sure not to include header files ininternal
of external modules.
- When interpreting the input string and dividing it into tokens, the tokens are managed using a linked-list so that they can be easily parsed later.
// lexer/lexer.c
t_token *lex(char *input)
{
t_token token;
t_token *tmp;
t_lexer *l;
new_lexer(&l, input);
token.next = NULL;
tmp = &token;
// read_char() read through the string.
read_char(l);
while (1)
{
// Connect each analyzed token as a list
tmp->next = next_token(l);
if (tmp->next->type == EOL)
break ;
tmp = tmp->next;
}
free(l);
// Returns the first pointer of the created list and passes it to the Parser。
return (token.next);
}
// lexer/internal/lexer_internal.h
// t_lexer manages the position of the string being parsed.
typedef struct s_lexer
{
char *input;
size_t position;
size_t read_position;
char ch;
bool is_subshell;
bool is_redirect;
} t_lexer;
// token/token.h
// t_token holds the type and string of each token
typedef struct s_token {
enum e_token_type type;
t_string literal;
struct s_token *next;
} t_token;
- Achieved high extensibility by writing the grammar in BNF (Backus–Naur form) and implementing it accordingly.
// parser/minishell.bnf
<command_line> ::= <pipeline> '&&' <newline> <command_line> //pattern 1
| <pipeline> '||' <newline> <command_line> //pattern 2
| <pipeline> //pattern 3
<pipeline> ::= <command> '|' <newline> <pipeline>
| <command>
<command> ::= <subshell>
| <simple_command>
<subshell> ::= '(' <compound_list> ')' <redirection_list>
| '(' <compound_list> ')'
<compound_list> ::= <pipeline> '&&' <newline> <compound_list>
| <pipeline> '||' <newline> <compound_list>
| <pipeline> '\n' <newline> <compound_list>
| <pipeline>
<simple_command> ::= <simple_command_element> <simple_command>
| <simple_command_element>
<simple_command_element> ::= <word>
| <redirection>
<redirection_list> ::= <redirection> <redirection_list>
| <redirection>
<redirection> ::= <number>? '>' <word>
| <number>? '<' <word>
| <number>? '>>' <word>
| <number>? '<<' <word>
// parser/internal/command_line.c
t_ast_node *command_line(t_parser *p)
{
t_ast_node *result;
t_ast_node *pipeline_;
t_ast_node *commandline_;
// The first element is always a pipeline, so read it
if (!assign_ast_node(&pipeline_, pipeline(p)))
return (NULL);
// If there is no token, it is pattern 3, so return
if (p->token->type == EOL)
return (pipeline_);
new_ast_node(&result);
// If a token exists, check it for pattern 1 or 2, which can only be '&&','||'.
if (consume_token(p, AND_IF, NULL))
result->type = AND_IF_NODE;
else if (consume_token(p, OR_IF, NULL))
result->type = OR_IF_NODE;
else
{
p->err = ERR_UNEXPECTED_TOKEN;
return (delete_ast_nodes(pipeline_, result));
}
// Read the command line.
if (!assign_ast_node(&commandline_, command_line(p)))
{
p->err = ERR_UNEXPECTED_EOF;
return (delete_ast_nodes(pipeline_, result));
}
// Set pipeline on the left side of the binary tree, command line on the right side, and return
set_ast_nodes(result, pipeline_, commandline_);
return (result);
}
- Split each Expander process to follow Bash's design and expand each node parsed by Parser.
// expander/expander.c
void search_expandable_node(t_expander *e, t_ast_node *node)
{
t_ast_node *original_right;
char *original_data;
// When reached the end of the node, return
if (!node)
return ;
// Explore each node and perform the expansion process recursively.
search_expandable_node(e, node->right);
search_expandable_node(e, node->left);
// If the node cannot be expanded (&&,|| etc.), return
if (node->type != COMMAND_ARG_NODE
&& node->type != REDIRECT_IN_NODE
&& node->type != REDIRECT_OUT_NODE
&& node->type != REDIRECT_APPEND_NODE)
return ;
original_right = node->right;
original_data = x_strdup(node->data);
// 1.Variable Expansion
node->data = variable_expansion(e, node->data);
if (!is_empty_data(e, node, original_data))
{
// 2.Word Splitting
word_splitting(node, e, original_data, original_right);
// 3.Filename Expansion
filename_expansion(node, e, original_data, original_right);
// 4.Quotes Removal
quotes_removal(node, original_right);
}
free(original_data);
}
- Create a function to manage the state of the string being parsed.
// expander/internal/expander_utils.c
int quotation_status(char c, int status)
{
// If the character you are reading is "
if (c == '\"')
{
// If you are already in ", get out of the quote
if (status == IN_DOUBLE_QUOTE)
status = OUTSIDE;
// If already in ', keep being in ' (do not interpret "")
else if (status == IN_SINGLE_QUOTE)
status = IN_SINGLE_QUOTE;
// If you are outside the quote, go inside the "
else
status = IN_DOUBLE_QUOTE;
}
else if (c == '\'')
{
if (status == IN_DOUBLE_QUOTE)
status = IN_DOUBLE_QUOTE;
else if (status == IN_SINGLE_QUOTE)
status = OUTSIDE;
else
status = IN_SINGLE_QUOTE;
}
return (status);
}
- Implemented Parser-aligned recursion to improve readability and extensibility
// execute/internal/execute_command_line.c
int execute_command_line(t_executor *e, t_ast_node *node)
{
t_ast_node *pipeline_node;
// When patterns 1 and 2 ('&&', '||' are present), the left node is executed
if (node->type == AND_IF_NODE || node->type == OR_IF_NODE)
pipeline_node = node->left;
else
pipeline_node = node;
// Check the exit status of the previous one to see if it runs.
if (is_execute_condition(e->condition, e->exit_status))
{
// Evaluate and execute pipeline on left side
eval_pipeline(e, &e->pipeline, pipeline_node);
execute_pipeline(e, e->pipeline);
// Set exit status
register_env_var_from_literal("?", NULL, e->exit_status, e->env_vars);
// recursively `free()` and `close()` nodes that are no longer needed
delete_execute_list(e->pipeline, T_PIPELINE);
}
// When pattern 1 or 2 is used, set condition and execute the node on the right side
if (node->type == AND_IF_NODE || node->type == OR_IF_NODE)
{
if (node->type == AND_IF_NODE)
e->condition = CONDITION_AND_IF;
else if (node->type == OR_IF_NODE)
e->condition = CONDITION_OR_IF;
return (execute_command_line(e, node->right));
}
return (e->exit_status);
}
- When there are multiple redirections, duplicate redirections are overwritten to ease post-processing.
void new_redirect_in(t_simple_command *sc, char *data, t_node_type type)
{
// close() the previous one if this is the second or subsequent input redirection
if (sc->r_in != UNSET_FD)
close(sc->r_in);
// Overwrite with new file descriptor
if (type == REDIRECT_IN_NODE)
{
sc->r_in = open(data, O_RDONLY);
if (sc->r_in == -1)
{
ft_putstr_fd("minishell: ", STDERR_FILENO);
perror(data);
sc->err = REDIRECT_ERR;
}
}
// Process here doc and overwrite file descriptor
else if (type == HEREDOC_NODE)
sc->r_in = execute_heredoc(data);
}
- The Architecture of Open Source Applications
- shell の文字列分解と環境変数展開を再実装した
- bash 再実装の振り返り
- パイプを実装してみたというお話
- Bash Reference Manual
- C 言語で glob
- gnu libc manual
- Go 言語でつくるインタプリタ
- 詳解UNIXプログラミング