-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlexer_1.c
155 lines (141 loc) · 4.07 KB
/
lexer_1.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* lexer_1.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: phelebra <[email protected]> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2023/06/19 15:01:41 by fvonsovs #+# #+# */
/* Updated: 2023/08/07 13:53:56 by phelebra ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
/*
LEXER
should take input such as this:
char * = {cmd1 "blah blah" whats 'up 'you? $USER |wc -l > out}
step 1: separate string according to spaces, take quotes into account
char ** = {cmd1, "blah blah", whats, 'up 'you, $USER, |wc, -l,
>outfile, NULL}
step 2: expand variables
char ** = {cmd1, "blah blah", whats, 'up 'you, fvonsovs, |wc, -l,
>outfile, NULL}
step 3: split opts such as | < > << >> outside of quotes
char ** = {cmd1, "blah blah", whats, 'up 'you, fvonsovs, |, wc, -l,
>, outfile, NULL}
NOTE
<< will be interpreted as {<, <} and vice versa for >>
parser should handle this
*/
char **lexer(char *s, t_env *env)
{
char **tokenized;
char **opt_split;
int i;
tokenized = tokenize(s, " ");
i = -1;
while (tokenized && tokenized[++i])
{
tokenized[i] = expand_args(&tokenized[i], env);
opt_split = tokenize_opts(tokenized[i], "<|>");
ft_matrix_replace_in(&tokenized, opt_split, i);
i += ft_matrixlen(opt_split) - 1;
ft_free_matrix(&opt_split);
}
return (tokenized);
}
/*
used to tokenize command string
counts tokens with count_words, allocated memory
fills array with tokens from fill_array
*/
char **tokenize(char *src, char *delims)
{
t_lexer lex;
char **ret;
int n;
lex.char_i = 0;
lex.word_n = 0;
lex.token_n = 0;
lex.quotes = 0;
lex.quote_c = 0;
if (!src)
return (NULL);
n = count_words(src, delims, &lex);
if (n == -1)
return (NULL);
ret = malloc(sizeof(char *) * (n + 1));
if (ret == NULL)
return (NULL);
ret = fill_array(ret, src, delims, &lex);
ret[n] = NULL;
return (ret);
}
/*
counts number of tokens in the string s
token = characters separated by delims except if in quotes
checks that quotes are properly closed, ret -1 if unclosed
*/
int count_words(char *s, char *delims, t_lexer *lex)
{
lex->quotes = 0;
lex->quote_c = 0;
while (s[lex->char_i] != '\0')
{
if (!ft_strchr(delims, s[lex->char_i]))
{
lex->word_n++;
while ((!ft_strchr(delims, s[lex->char_i]) || lex->quotes)
&& s[lex->char_i] != '\0')
{
if (!lex->quote_c && (s[lex->char_i] == '\"'
|| s[lex->char_i] == '\''))
lex->quote_c = s[lex->char_i];
lex->quotes = (lex->quotes
+ (s[lex->char_i] == lex->quote_c)) % 2;
lex->quote_c *= lex->quotes != 0;
lex->char_i++;
}
if (lex->quotes)
return (-1);
}
else
lex->char_i++;
}
return (lex->word_n);
}
void skip_and_parse(char *s, char *delims, t_lexer *lex)
{
while (ft_strchr(delims, s[lex->char_i]) && s[lex->char_i] != '\0')
{
lex->char_i++;
}
lex->word_n = lex->char_i;
while ((!ft_strchr(delims, s[lex->char_i]) || lex->quotes
|| lex->quote_c) && s[lex->char_i])
{
lex->quotes = (lex->quotes
+ (!lex->quote_c && s[lex->char_i] == '\'')) % 2;
lex->quote_c = (lex->quote_c
+ (!lex->quotes && s[lex->char_i] == '\"')) % 2;
lex->char_i++;
}
}
char **fill_array(char **ret, char *s, char *delims, t_lexer *lex)
{
int s_len;
lex->quotes = 0;
lex->quote_c = 0;
lex->char_i = 0;
s_len = ft_strlen(s);
while (s[lex->char_i])
{
skip_and_parse(s, delims, lex);
if (lex->word_n >= s_len)
ret[lex->token_n++] = "\0";
else
ret[lex->token_n++] = ft_substr(s, lex->word_n,
lex->char_i - lex->word_n);
}
return (ret);
}